From eb98ff014c6d5d1e16b19ea553cd4e0b61b5c317 Mon Sep 17 00:00:00 2001 From: Ville Rantanen Date: Thu, 17 Nov 2016 12:30:01 +0200 Subject: [PATCH] manual tsvkit updater --- tsv/lib/tsvhead | 93 +++++++++++++++++ tsv/lib/tsvtail | 99 ++++++++++++++++++ tsv/tsvhead | 1 + tsv/tsvkit.sh | 257 ++++++++++++++++++++++++++++++++++++++++++++++ tsv/tsvkit.update | 17 +++ tsv/tsvtail | 1 + 6 files changed, 468 insertions(+) create mode 100755 tsv/lib/tsvhead create mode 100755 tsv/lib/tsvtail create mode 120000 tsv/tsvhead create mode 100644 tsv/tsvkit.sh create mode 100755 tsv/tsvkit.update create mode 120000 tsv/tsvtail diff --git a/tsv/lib/tsvhead b/tsv/lib/tsvhead new file mode 100755 index 0000000..368b94f --- /dev/null +++ b/tsv/lib/tsvhead @@ -0,0 +1,93 @@ +#!/usr/bin/env python +# +# Copyright 2015 Ville Rantanen +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . +# + +'''simple head for tsv/csv files.''' + +__author__ = "Ville Rantanen " + +__version__ = "0.1" + +import sys,os,argparse +from argparse import ArgumentParser + +def setup_options(): + ''' Create command line options ''' + usage=''' +Simple implementation of head that keeps the header row. + +''' + + parser=ArgumentParser(description=usage, + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog="\n".join(["Version: "+__version__,__author__])) + common_group=parser.add_argument_group('common', 'Common options') + common_group.add_argument("-v","--version",action="version",version=__version__) + common_group.add_argument("-n",type=int,dest="lines",default=False, + help="Lines to show from beginning of file. Negative value to show lines but the number. Default: 10") + parser.add_argument("file",type=str, nargs='*', + help="File(s) to be headed") + opts=parser.parse_args() + if not opts.lines: + try: + int(opts.file[0]) + opts.lines=int(opts.file.pop(0)) + except: + pass + if not opts.lines: + opts.lines=10 + return opts + +def behead(fileob,opts): + + header=fileob.readline() + sys.stdout.write(header) + if opts.lines>=0: + head_ordinary(fileob, opts.lines) + else: + head_allbutlast(fileob, opts.lines) + +def head_ordinary(fileob, lines): + for i,row in enumerate(fileob): + if i>lines-1: + break + sys.stdout.write(row) + +def head_allbutlast(fileob, lines): + lines=-lines + buf=[] + for row in fileob: + buf.append(row) + if len(buf)>lines: + sys.stdout.write(buf.pop(0)) + +def main(): + opts=setup_options() + try: + if (not sys.stdin.isatty()): + behead(sys.stdin, opts) + for fi in opts.file: + behead(open(fi,'r'), opts) + except IOError as (n,e): + if n==32: + pass + else: + import traceback + print traceback.format_exc() + +if __name__ == "__main__": + main() diff --git a/tsv/lib/tsvtail b/tsv/lib/tsvtail new file mode 100755 index 0000000..9b4c469 --- /dev/null +++ b/tsv/lib/tsvtail @@ -0,0 +1,99 @@ +#!/usr/bin/env python +# +# Copyright 2015 Ville Rantanen +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . +# + +'''simple tail for tsv/csv files.''' + +__author__ = "Ville Rantanen " + +__version__ = "0.1" + +import sys,os,argparse +from argparse import ArgumentParser + +def setup_options(): + ''' Create command line options ''' + usage=''' +simple implementation of tail, keeping the header row + +''' + + parser=ArgumentParser(description=usage, + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog="\n".join(["Version: "+__version__,__author__])) + common_group=parser.add_argument_group('common', 'Common options') + common_group.add_argument("-v","--version",action="version",version=__version__) + common_group.add_argument("-n",type=str,dest="lines",default=False, + help="Lines to show from end of file. +K to start output from the Kth. Default: 10") + parser.add_argument("file",type=str, nargs='*', + help="File(s) to be headed") + opts=parser.parse_args() + if not opts.lines: + try: + int(opts.file[0]) + opts.lines=opts.file.pop(0) + except: + pass + if not opts.lines: + opts.lines="10" + if opts.lines.startswith("+"): + opts.lines=-int(opts.lines) + else: + opts.lines=int(opts.lines) + return opts + +def tail(fileob,opts): + + header=fileob.readline() + sys.stdout.write(header) + if opts.lines>=0: + tail_ordinary(fileob, opts.lines) + else: + tail_allbutfirst(fileob, -opts.lines) + +def tail_allbutfirst(fileob, lines): + for i,row in enumerate(fileob): + if ilines: + buf.pop(0) + [sys.stdout.write(l) for l in buf] + +def main(): + opts=setup_options() + try: + if (not sys.stdin.isatty()): + tail(sys.stdin, opts) + for fi in opts.file: + tail(open(fi,'r'), opts) + except IOError as (n,e): + if n==32: + pass + else: + import traceback + print traceback.format_exc() + except KeyboardInterrupt: + pass + +if __name__ == "__main__": + main() diff --git a/tsv/tsvhead b/tsv/tsvhead new file mode 120000 index 0000000..4dbc5f8 --- /dev/null +++ b/tsv/tsvhead @@ -0,0 +1 @@ +lib/tsvhead \ No newline at end of file diff --git a/tsv/tsvkit.sh b/tsv/tsvkit.sh new file mode 100644 index 0000000..b541c2e --- /dev/null +++ b/tsv/tsvkit.sh @@ -0,0 +1,257 @@ +## Tab sebarated file functions, wrappers for csvkit +# To use the tsv- commands, install csvkit:
sudo apt-get install python-pip; sudo pip install csvkit
+ +TSVDIR=$( dirname "${BASH_SOURCE[0]}" ) + +function c2t { +# Convert comma separated stream in to tab separated stream +# Usage: echo "foo,bar" | c2t + + python -c 'import sys,csv +try: + csv.writer(sys.stdout, dialect=csv.excel_tab, lineterminator="\n").writerows(csv.reader(sys.stdin, dialect=csv.excel)) +except IOError: + pass' +} + +function header { +# Print only the first line of input +# Usage: header file.csv +# Usage: cat file.csv | header + + head -n 1 "$@" +} + +function noheader { +# Strip first row of input +# Usage: noheader file.csv +# Usage: cat file.csv | noheader + + tail -n +2 "$@" +} + +function tsvecho { +# Echo with tab separated values, quoted +# Usage: tsvecho value1 value2 "some value" > header.csv +# Usage: echo value1 value2 | tsvecho + + local HEAD + [[ -t 0 ]] && { + printf -v HEAD "\"%s\"\t" "$@" + } || { + printf -v HEAD "\"%s\"\t" "$@" $( cat - ) + } + echo "${HEAD%?}" +} + +function tsvstrip { +# Strip tsv of quotes +# Usage: cat file.csv | tsvstrip + + python -c 'import sys,csv +try: + csv.writer(sys.stdout, dialect=csv.excel_tab, quoting=csv.QUOTE_NONE).writerows(csv.reader(sys.stdin, dialect=csv.excel_tab)) +except IOError: + pass' +} + +function tsvtranspose { +# Transpose a tsv file +# Usage: cat file.csv | tsvtranspose + + python -c 'import sys,csv +try: + csv.writer(sys.stdout, dialect=csv.excel_tab, quoting=csv.QUOTE_NONE).writerows(map(None,*csv.reader(sys.stdin, dialect=csv.excel_tab))) +except IOError: + pass' +} + +function tsvhead { +# Head a file, but print also the header. header not counted in line numbers +# Usage: cat file | tsvhead -n 30 + + if [ -t 0 ]; then + python "${TSVDIR}"/lib/tsvhead "$@" + else + cat - | python "${TSVDIR}"/lib/tsvhead "$@" + fi +} + +function tsvtail { +# Tail a file, but print also the header. header not counted in line numbers +# Usage: cat file | tsvtail -n 30 + + if [ -t 0 ]; then + python "${TSVDIR}"/lib/tsvtail "$@" + else + cat - | python "${TSVDIR}"/lib/tsvtail "$@" + fi +} + +which "csvjoin" > /dev/null && { + +function tsvcut { +# csvcut with tab-delimited dialect, see original script for options +# Usage: tsvcut -c Col1,Col3 input1.tsv + + + csvcut -t "$@" | c2t +} + +function tsvformat { +# csvformat with tab-delimited dialect, see original script for options +# Usage: tsvformat -c Col2 -m searchString input1.tsv + + + csvformat -t -T "$@" +} + +function tsvgrep { +# csvgrep with tab-delimited dialect, see original script for options +# Usage: tsvgrep -c Col2 -m searchString input1.tsv + + + csvgrep -t "$@" | c2t +} + +function tsvjoin { +# csvjoin with tab-delimited dialect, see original script for options +# Usage: tsvjoin -c 1,1 input1.tsv input2.tsv + + + csvjoin -t "$@" | c2t +} + +function tsvlook { +# csvlook with tab-delimited dialect, see original script for options +# Usage: tsvlook file1.tsv + + + csvlook -t "$@" +} + +function tsvquery { +# Simple SQL query for tab-delimited files. The tables are named tsv1,tsv2 ..., +# unless named with NAME=FILE syntax. If defining database with -d +# it is kept for later queries. A query is not optional(!), but it can be +# an empty string. +# Usage: tsvquery mytable=file1.tsv file2.tsv "SELECT * FROM tsv1,mytable WHERE C4 not like '%NA%' ORDER BY C4 DESC" +# Usage: tsquery -d DB.sqlite tsv1=file1.tsv "SELECT * FROM tsv1;" # Will keep the database file, reusable later + + local DBTEMP + local i + for (( i=1; i<=$(($#)); i++ )) + do [ "${!i}" = "-h" ] && { + echo 'Usage: + tsvquery [-d database.sqlite] tableName=data.tsv otherTable=data2.tsv "SQL QUERY" + When defining a database with -d it is kept, and can be + inserted with more data later. Otherwise the DB is created in /tmp/ + and deleted afterwards. + If not using name=data.tsv syntax, tables are named tsv1, tsv2... + Note: You have to give an SQL query. If you just want to + populate a database, add " " as an empty query. + ' + return 0 + } + [ "${!i}" = "-d" ] && { + # User defined data base + local j + j=$(( $i + 1 )) + DBTEMP="${!j}" + shift 2 + } + done + [ -z "$j" ] && DBTEMP=$( mktemp ) + for (( i=1; i<=$(($#-1)); i++ )) + do [ -f "${!i}" ] && { + # Add table with unique numbering + local OLDTBLS=$( sqlite3 "$DBTEMP" ".tables" ) + local TBLNO=1 + while : + do echo $OLDTBLS | grep tsv$TBLNO > /dev/null || break + TBLNO=$(( $TBLNO + 1 )) + done + cat "${!i}" | csvsql -t --db "sqlite:///$DBTEMP" --insert --table tsv$TBLNO + } || { + # Add a user named table + local TBL + local FIL + TBL=$( echo ${!i} | sed 's,=.*,,' ) + FIL=$( echo ${!i} | sed "s,^$TBL=,," ) + [ -f "$FIL" ] && { + cat "$FIL" | csvsql -t --db "sqlite:///$DBTEMP" --insert --table "$TBL" + } || { + echo File "${!i}" not found + rm -f "$DBTEMP" + return 1 + } + } + done + sqlite3 -list -separator ' ' -nullvalue NA -header -batch "$DBTEMP" "${@: -1}" + local EC=$? + # remove DB if using temporary + [ -z "$j" ] && { + rm -f "$DBTEMP" + } + return $EC +} + +function tsvsort { +# csvsort with tab-delimited dialect, see original script for options +# Usage: tsvsort -c Col3 input.tsv + + + csvsort -t "$@" | c2t +} + +function tsvstack { +# csvstack with tab-delimited dialect, see original script for options +# Usage: tsvstack file1.tsv file2.tsv + + + csvstack -t "$@" | c2t +} + +} || { + CSVKITERROR="no csvkit installed. [sudo pip install csvkit]" + tsvjoin () { echo $CSVKITERROR; return 1; } + tsvcut () { echo $CSVKITERROR; return 1; } + tsvgrep () { echo $CSVKITERROR; return 1; } + tsvquery () { echo $CSVKITERROR; return 1; } + tsvsort () { echo $CSVKITERROR; return 1; } + tsvstack () { echo $CSVKITERROR; return 1; } + tsvlook () { echo $CSVKITERROR; return 1; } +} + +function tsvfold { +# Folds a sequence of line separated arguments into a TSV table with a header you specify, +# which means it may easily cause aliasing if you have the wrong number of them. +# Any loop which uses "print" to produce field values should ideally work. +# Usage: cat data | tsv Column1 Column2 ... +# Usage: seq 100 | tsvfold First Second Third Fourth + + + echo "" | tsvecho "$@" + if ! [ -t 0 ]; then + mawk -F $'\t' '{ORS=(NR%'$#'?FS:RS)}1' + fi +} + +function tsvdims { +# Print dimensions of a TSV +# Usage: tsvdims file.txt +# Usage: cat file.txt | tsvdims + + python -c 'import sys,csv +if sys.argv[1]=="": + input=sys.stdin +else: + input=open(sys.argv[1]) +rows=-1 +for row in csv.reader(input, dialect=csv.excel_tab): + if rows==-1: + cols=len(row) + rows+=1 +csv.writer(sys.stdout, dialect=csv.excel_tab).writerows([["Columns","Rows"],[cols,rows]]) +' "$1" +} diff --git a/tsv/tsvkit.update b/tsv/tsvkit.update new file mode 100755 index 0000000..8116216 --- /dev/null +++ b/tsv/tsvkit.update @@ -0,0 +1,17 @@ +#!/bin/bash + +TOOLSPATH=$( readlink -f $( dirname "$BASH_SOURCE" )/.. ) + +function _update_tsvkit() { + mkdir -p "$TOOLSPATH"/tsv/lib + wget -qO "$TOOLSPATH"/tsv/tsvkit.sh https://bitbucket.org/anduril-dev/anduril/raw/anduril2/lang/bash/tsvkit.sh + wget -qO "$TOOLSPATH"/tsv/lib/tsvhead https://bitbucket.org/anduril-dev/anduril/raw/anduril2/lang/bash/lib/tsvhead + wget -qO "$TOOLSPATH"/tsv/lib/tsvtail https://bitbucket.org/anduril-dev/anduril/raw/anduril2/lang/bash/lib/tsvtail + chmod +x "$TOOLSPATH"/tsv/lib/* +} + +[[ -d "$TOOLSPATH"/tsv/ ]] && { + _update_tsvkit +} + +unset -f _update_tsvkit diff --git a/tsv/tsvtail b/tsv/tsvtail new file mode 120000 index 0000000..59f7ae6 --- /dev/null +++ b/tsv/tsvtail @@ -0,0 +1 @@ +lib/tsvtail \ No newline at end of file