manual tsvkit updater

2016-11-17 12:30:01 +02:00
parent e0e719aa1f
commit eb98ff014c
6 changed files with 468 additions and 0 deletions
--- a/tsv/lib/tsvhead
+++ b/tsv/lib/tsvhead
@@ -0,0 +1,93 @@
+#!/usr/bin/env python
+#
+# Copyright 2015 Ville Rantanen
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+'''simple head for tsv/csv files.'''
+
+__author__ = "Ville Rantanen <ville.q.rantanen@gmail.com>"
+
+__version__ = "0.1"
+
+import sys,os,argparse
+from argparse import ArgumentParser 
+
+def setup_options():
+    ''' Create command line options '''
+    usage='''
+Simple implementation of head that keeps the header row.
+
+'''
+ 
+    parser=ArgumentParser(description=usage,
+                          formatter_class=argparse.RawDescriptionHelpFormatter,
+                          epilog="\n".join(["Version: "+__version__,__author__]))
+    common_group=parser.add_argument_group('common', 'Common options')
+    common_group.add_argument("-v","--version",action="version",version=__version__)
+    common_group.add_argument("-n",type=int,dest="lines",default=False,
+                     help="Lines to show from beginning of file. Negative value to show lines but the number. Default: 10")
+    parser.add_argument("file",type=str, nargs='*',
+                     help="File(s) to be headed")
+    opts=parser.parse_args()
+    if not opts.lines:
+        try:
+            int(opts.file[0])
+            opts.lines=int(opts.file.pop(0))
+        except:
+            pass
+    if not opts.lines:
+        opts.lines=10
+    return opts
+
+def behead(fileob,opts):
+
+    header=fileob.readline()
+    sys.stdout.write(header)
+    if opts.lines>=0:
+        head_ordinary(fileob, opts.lines)
+    else:
+        head_allbutlast(fileob, opts.lines)
+    
+def head_ordinary(fileob, lines):
+    for i,row in enumerate(fileob):
+        if i>lines-1:
+            break
+        sys.stdout.write(row)
+
+def head_allbutlast(fileob, lines):
+    lines=-lines
+    buf=[]
+    for row in fileob:
+        buf.append(row)
+        if len(buf)>lines:
+            sys.stdout.write(buf.pop(0))
+
+def main():
+    opts=setup_options()
+    try:
+        if (not sys.stdin.isatty()):
+            behead(sys.stdin, opts)
+        for fi in opts.file:
+            behead(open(fi,'r'), opts)
+    except IOError as (n,e):
+        if  n==32:
+            pass
+        else:
+            import traceback
+            print traceback.format_exc()
+
+if __name__ == "__main__":
+    main()
--- a/tsv/lib/tsvtail
+++ b/tsv/lib/tsvtail
@@ -0,0 +1,99 @@
+#!/usr/bin/env python
+#
+# Copyright 2015 Ville Rantanen
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+'''simple tail for tsv/csv files.'''
+
+__author__ = "Ville Rantanen <ville.q.rantanen@gmail.com>"
+
+__version__ = "0.1"
+
+import sys,os,argparse
+from argparse import ArgumentParser 
+
+def setup_options():
+    ''' Create command line options '''
+    usage='''
+simple implementation of tail, keeping the header row
+
+'''
+ 
+    parser=ArgumentParser(description=usage,
+                          formatter_class=argparse.RawDescriptionHelpFormatter,
+                          epilog="\n".join(["Version: "+__version__,__author__]))
+    common_group=parser.add_argument_group('common', 'Common options')
+    common_group.add_argument("-v","--version",action="version",version=__version__)
+    common_group.add_argument("-n",type=str,dest="lines",default=False,
+                     help="Lines to show from end of file. +K to start output from the Kth. Default: 10")
+    parser.add_argument("file",type=str, nargs='*',
+                     help="File(s) to be headed")
+    opts=parser.parse_args()
+    if not opts.lines:
+        try:
+            int(opts.file[0])
+            opts.lines=opts.file.pop(0)
+        except:
+            pass
+    if not opts.lines:
+        opts.lines="10"
+    if opts.lines.startswith("+"):
+        opts.lines=-int(opts.lines)
+    else:
+        opts.lines=int(opts.lines)
+    return opts
+
+def tail(fileob,opts):
+
+    header=fileob.readline()
+    sys.stdout.write(header)
+    if opts.lines>=0:
+        tail_ordinary(fileob, opts.lines)
+    else:
+        tail_allbutfirst(fileob, -opts.lines)
+    
+def tail_allbutfirst(fileob, lines):
+    for i,row in enumerate(fileob):
+        if i<lines-1:
+            continue
+        sys.stdout.write(row)
+
+def tail_ordinary(fileob, lines):
+    buf=[]
+    for row in fileob:
+        buf.append(row)
+        if len(buf)>lines:
+            buf.pop(0)
+    [sys.stdout.write(l) for l in buf]
+
+def main():
+    opts=setup_options()
+    try:
+        if (not sys.stdin.isatty()):
+            tail(sys.stdin, opts)
+        for fi in opts.file:
+            tail(open(fi,'r'), opts)
+    except IOError as (n,e):
+        if  n==32:
+            pass
+        else:
+            import traceback
+            print traceback.format_exc()
+    except KeyboardInterrupt:
+        pass
+
+if __name__ == "__main__":
+    main()
--- a/tsv/tsvhead
+++ b/tsv/tsvhead
@@ -0,0 +1 @@
+lib/tsvhead
--- a/tsv/tsvkit.sh
+++ b/tsv/tsvkit.sh
@@ -0,0 +1,257 @@
+## Tab sebarated file functions, wrappers for csvkit
+# To use the tsv- commands, install csvkit: <pre>sudo apt-get install python-pip; sudo pip install csvkit</pre>
+
+TSVDIR=$( dirname "${BASH_SOURCE[0]}" )
+
+function c2t {
+# Convert comma separated stream in to tab separated stream
+# Usage: echo "foo,bar" | c2t
+
+    python -c 'import sys,csv
+try:
+    csv.writer(sys.stdout, dialect=csv.excel_tab, lineterminator="\n").writerows(csv.reader(sys.stdin, dialect=csv.excel))
+except IOError:
+    pass'
+}
+
+function header {
+# Print only the first line of input 
+# Usage:  header file.csv
+# Usage:  cat file.csv | header 
+
+    head -n 1 "$@"
+}
+
+function noheader {
+# Strip first row of input 
+# Usage:  noheader file.csv
+# Usage:  cat file.csv | noheader
+
+    tail -n +2 "$@"
+}
+
+function tsvecho {
+# Echo with tab separated values, quoted
+# Usage: tsvecho value1 value2 "some value" > header.csv
+# Usage: echo value1 value2 | tsvecho 
+
+    local HEAD
+    [[ -t 0 ]] && {
+        printf -v HEAD "\"%s\"\t" "$@"
+    } || {
+        printf -v HEAD "\"%s\"\t" "$@" $( cat - )
+    }
+    echo "${HEAD%?}"
+}
+
+function tsvstrip {
+# Strip tsv of quotes
+# Usage: cat file.csv | tsvstrip
+
+    python -c 'import sys,csv
+try:
+    csv.writer(sys.stdout, dialect=csv.excel_tab, quoting=csv.QUOTE_NONE).writerows(csv.reader(sys.stdin, dialect=csv.excel_tab))
+except IOError:
+    pass'
+}
+
+function tsvtranspose {
+# Transpose a tsv file
+# Usage: cat file.csv | tsvtranspose
+
+    python -c 'import sys,csv
+try:
+    csv.writer(sys.stdout, dialect=csv.excel_tab, quoting=csv.QUOTE_NONE).writerows(map(None,*csv.reader(sys.stdin, dialect=csv.excel_tab)))
+except IOError:
+    pass'
+}
+
+function tsvhead {
+# Head a file, but print also the header. header not counted in line numbers
+# Usage: cat file | tsvhead -n 30
+
+    if [ -t 0 ]; then
+        python "${TSVDIR}"/lib/tsvhead "$@"
+    else
+        cat - | python "${TSVDIR}"/lib/tsvhead "$@"
+    fi
+}
+
+function tsvtail {
+# Tail a file, but print also the header. header not counted in line numbers
+# Usage: cat file | tsvtail -n 30
+
+    if [ -t 0 ]; then
+        python "${TSVDIR}"/lib/tsvtail "$@"
+    else
+        cat - | python "${TSVDIR}"/lib/tsvtail "$@"
+    fi
+}
+
+which "csvjoin" > /dev/null && {
+
+function tsvcut {
+# csvcut with tab-delimited dialect, see original script for options
+# Usage: tsvcut -c Col1,Col3 input1.tsv
+
+    
+    csvcut -t "$@" | c2t
+}
+
+function tsvformat {
+# csvformat with tab-delimited dialect, see original script for options
+# Usage: tsvformat -c Col2 -m searchString input1.tsv
+
+    
+    csvformat -t -T "$@"
+}
+
+function tsvgrep {
+# csvgrep with tab-delimited dialect, see original script for options
+# Usage: tsvgrep -c Col2 -m searchString input1.tsv
+
+    
+    csvgrep -t "$@" | c2t
+}
+
+function tsvjoin {
+# csvjoin with tab-delimited dialect, see original script for options
+# Usage: tsvjoin -c 1,1 input1.tsv input2.tsv
+
+    
+    csvjoin -t "$@" | c2t
+}
+
+function tsvlook {
+# csvlook with tab-delimited dialect, see original script for options
+# Usage: tsvlook file1.tsv
+
+    
+    csvlook -t "$@"
+}
+
+function tsvquery {
+# Simple SQL query for tab-delimited files. The tables are  named tsv1,tsv2 ...,
+# unless named with NAME=FILE syntax. If defining database with -d
+# it is kept for later queries. A query is not optional(!), but it can be
+# an empty string.
+# Usage: tsvquery mytable=file1.tsv file2.tsv "SELECT * FROM tsv1,mytable WHERE C4 not like '%NA%' ORDER BY C4 DESC"
+# Usage: tsquery -d DB.sqlite tsv1=file1.tsv "SELECT * FROM tsv1;" # Will keep the database file, reusable later
+
+    local DBTEMP
+    local i
+    for (( i=1; i<=$(($#)); i++ ))
+    do [ "${!i}" = "-h" ] && {
+        echo 'Usage:
+    tsvquery [-d database.sqlite] tableName=data.tsv otherTable=data2.tsv "SQL QUERY"
+    When defining a database with -d it is kept, and can be
+    inserted with more data later. Otherwise the DB is created in /tmp/
+    and deleted afterwards.
+    If not using name=data.tsv syntax, tables are named tsv1, tsv2...  
+    Note: You have to give an SQL query. If you just want to 
+          populate a database, add " "  as an empty query.
+        '
+        return 0
+        }
+       [ "${!i}" = "-d" ] && {
+        # User defined data base
+        local j
+        j=$(( $i + 1 ))
+        DBTEMP="${!j}"
+        shift 2
+       }
+    done
+    [ -z "$j" ] && DBTEMP=$( mktemp )
+    for (( i=1; i<=$(($#-1)); i++ ))
+    do [ -f "${!i}" ] && {
+           # Add table with unique numbering
+           local OLDTBLS=$( sqlite3 "$DBTEMP" ".tables" )
+           local TBLNO=1
+           while : 
+           do echo $OLDTBLS | grep tsv$TBLNO > /dev/null || break
+              TBLNO=$(( $TBLNO + 1 ))
+           done
+           cat "${!i}" | csvsql -t --db "sqlite:///$DBTEMP" --insert --table tsv$TBLNO
+       } || {
+           # Add a user named table
+           local TBL
+           local FIL
+           TBL=$( echo ${!i} | sed 's,=.*,,' )
+           FIL=$( echo ${!i} | sed "s,^$TBL=,," )
+           [ -f "$FIL" ] && { 
+               cat "$FIL" | csvsql -t --db "sqlite:///$DBTEMP" --insert --table "$TBL"
+           } || {
+               echo File "${!i}" not found
+               rm -f "$DBTEMP"
+               return 1
+           }
+       }
+    done
+    sqlite3 -list -separator '	' -nullvalue NA -header -batch "$DBTEMP" "${@: -1}"
+    local EC=$?
+    # remove DB if using temporary
+    [ -z "$j" ] && {
+        rm -f "$DBTEMP" 
+    }
+    return $EC
+}
+
+function tsvsort {
+# csvsort with tab-delimited dialect, see original script for options
+# Usage: tsvsort -c Col3 input.tsv
+
+    
+    csvsort -t "$@" | c2t
+}
+
+function tsvstack {
+# csvstack with tab-delimited dialect, see original script for options
+# Usage: tsvstack file1.tsv file2.tsv
+
+    
+    csvstack -t "$@" | c2t
+}
+
+} || {
+    CSVKITERROR="no csvkit installed.  [sudo pip install csvkit]"
+    tsvjoin () { echo $CSVKITERROR; return 1; }
+    tsvcut () { echo $CSVKITERROR; return 1; }
+    tsvgrep () { echo $CSVKITERROR; return 1; }
+    tsvquery () { echo $CSVKITERROR; return 1; }
+    tsvsort () { echo $CSVKITERROR; return 1; }
+    tsvstack () { echo $CSVKITERROR; return 1; }
+    tsvlook () { echo $CSVKITERROR; return 1; }
+}
+
+function tsvfold {
+# Folds a sequence of line separated arguments into a TSV table with a header you specify,
+# which means it may easily cause aliasing if you have the wrong number of them.
+# Any loop which uses "print" to produce field values should ideally work.
+# Usage: cat data | tsv Column1 Column2 ...
+# Usage: seq 100 | tsvfold First Second Third Fourth
+
+
+    echo "" | tsvecho "$@"
+    if ! [ -t 0 ]; then
+        mawk -F $'\t' '{ORS=(NR%'$#'?FS:RS)}1'
+    fi
+}
+
+function tsvdims {
+# Print dimensions of a TSV
+# Usage: tsvdims file.txt
+# Usage: cat file.txt | tsvdims 
+
+    python -c 'import sys,csv
+if sys.argv[1]=="":
+   input=sys.stdin
+else:
+    input=open(sys.argv[1])
+rows=-1
+for row in csv.reader(input, dialect=csv.excel_tab):
+    if rows==-1:
+        cols=len(row)
+    rows+=1
+csv.writer(sys.stdout, dialect=csv.excel_tab).writerows([["Columns","Rows"],[cols,rows]])
+' "$1"
+}
--- a/tsv/tsvkit.update
+++ b/tsv/tsvkit.update
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+TOOLSPATH=$( readlink -f $( dirname "$BASH_SOURCE" )/.. )
+
+function _update_tsvkit() {
+    mkdir -p "$TOOLSPATH"/tsv/lib
+    wget -qO "$TOOLSPATH"/tsv/tsvkit.sh https://bitbucket.org/anduril-dev/anduril/raw/anduril2/lang/bash/tsvkit.sh
+    wget -qO "$TOOLSPATH"/tsv/lib/tsvhead https://bitbucket.org/anduril-dev/anduril/raw/anduril2/lang/bash/lib/tsvhead
+    wget -qO "$TOOLSPATH"/tsv/lib/tsvtail https://bitbucket.org/anduril-dev/anduril/raw/anduril2/lang/bash/lib/tsvtail
+    chmod +x "$TOOLSPATH"/tsv/lib/*
+}
+
+[[ -d "$TOOLSPATH"/tsv/ ]] && {
+    _update_tsvkit
+}
+
+unset -f _update_tsvkit
--- a/tsv/tsvtail
+++ b/tsv/tsvtail
@@ -0,0 +1 @@
+lib/tsvtail