manual tsvkit updater
This commit is contained in:
93
tsv/lib/tsvhead
Executable file
93
tsv/lib/tsvhead
Executable file
@@ -0,0 +1,93 @@
|
||||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright 2015 Ville Rantanen
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify it
|
||||
# under the terms of the GNU Lesser General Public License as published
|
||||
# by the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
'''simple head for tsv/csv files.'''
|
||||
|
||||
__author__ = "Ville Rantanen <ville.q.rantanen@gmail.com>"
|
||||
|
||||
__version__ = "0.1"
|
||||
|
||||
import sys,os,argparse
|
||||
from argparse import ArgumentParser
|
||||
|
||||
def setup_options():
|
||||
''' Create command line options '''
|
||||
usage='''
|
||||
Simple implementation of head that keeps the header row.
|
||||
|
||||
'''
|
||||
|
||||
parser=ArgumentParser(description=usage,
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="\n".join(["Version: "+__version__,__author__]))
|
||||
common_group=parser.add_argument_group('common', 'Common options')
|
||||
common_group.add_argument("-v","--version",action="version",version=__version__)
|
||||
common_group.add_argument("-n",type=int,dest="lines",default=False,
|
||||
help="Lines to show from beginning of file. Negative value to show lines but the number. Default: 10")
|
||||
parser.add_argument("file",type=str, nargs='*',
|
||||
help="File(s) to be headed")
|
||||
opts=parser.parse_args()
|
||||
if not opts.lines:
|
||||
try:
|
||||
int(opts.file[0])
|
||||
opts.lines=int(opts.file.pop(0))
|
||||
except:
|
||||
pass
|
||||
if not opts.lines:
|
||||
opts.lines=10
|
||||
return opts
|
||||
|
||||
def behead(fileob,opts):
|
||||
|
||||
header=fileob.readline()
|
||||
sys.stdout.write(header)
|
||||
if opts.lines>=0:
|
||||
head_ordinary(fileob, opts.lines)
|
||||
else:
|
||||
head_allbutlast(fileob, opts.lines)
|
||||
|
||||
def head_ordinary(fileob, lines):
|
||||
for i,row in enumerate(fileob):
|
||||
if i>lines-1:
|
||||
break
|
||||
sys.stdout.write(row)
|
||||
|
||||
def head_allbutlast(fileob, lines):
|
||||
lines=-lines
|
||||
buf=[]
|
||||
for row in fileob:
|
||||
buf.append(row)
|
||||
if len(buf)>lines:
|
||||
sys.stdout.write(buf.pop(0))
|
||||
|
||||
def main():
|
||||
opts=setup_options()
|
||||
try:
|
||||
if (not sys.stdin.isatty()):
|
||||
behead(sys.stdin, opts)
|
||||
for fi in opts.file:
|
||||
behead(open(fi,'r'), opts)
|
||||
except IOError as (n,e):
|
||||
if n==32:
|
||||
pass
|
||||
else:
|
||||
import traceback
|
||||
print traceback.format_exc()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
99
tsv/lib/tsvtail
Executable file
99
tsv/lib/tsvtail
Executable file
@@ -0,0 +1,99 @@
|
||||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright 2015 Ville Rantanen
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify it
|
||||
# under the terms of the GNU Lesser General Public License as published
|
||||
# by the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
'''simple tail for tsv/csv files.'''
|
||||
|
||||
__author__ = "Ville Rantanen <ville.q.rantanen@gmail.com>"
|
||||
|
||||
__version__ = "0.1"
|
||||
|
||||
import sys,os,argparse
|
||||
from argparse import ArgumentParser
|
||||
|
||||
def setup_options():
|
||||
''' Create command line options '''
|
||||
usage='''
|
||||
simple implementation of tail, keeping the header row
|
||||
|
||||
'''
|
||||
|
||||
parser=ArgumentParser(description=usage,
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="\n".join(["Version: "+__version__,__author__]))
|
||||
common_group=parser.add_argument_group('common', 'Common options')
|
||||
common_group.add_argument("-v","--version",action="version",version=__version__)
|
||||
common_group.add_argument("-n",type=str,dest="lines",default=False,
|
||||
help="Lines to show from end of file. +K to start output from the Kth. Default: 10")
|
||||
parser.add_argument("file",type=str, nargs='*',
|
||||
help="File(s) to be headed")
|
||||
opts=parser.parse_args()
|
||||
if not opts.lines:
|
||||
try:
|
||||
int(opts.file[0])
|
||||
opts.lines=opts.file.pop(0)
|
||||
except:
|
||||
pass
|
||||
if not opts.lines:
|
||||
opts.lines="10"
|
||||
if opts.lines.startswith("+"):
|
||||
opts.lines=-int(opts.lines)
|
||||
else:
|
||||
opts.lines=int(opts.lines)
|
||||
return opts
|
||||
|
||||
def tail(fileob,opts):
|
||||
|
||||
header=fileob.readline()
|
||||
sys.stdout.write(header)
|
||||
if opts.lines>=0:
|
||||
tail_ordinary(fileob, opts.lines)
|
||||
else:
|
||||
tail_allbutfirst(fileob, -opts.lines)
|
||||
|
||||
def tail_allbutfirst(fileob, lines):
|
||||
for i,row in enumerate(fileob):
|
||||
if i<lines-1:
|
||||
continue
|
||||
sys.stdout.write(row)
|
||||
|
||||
def tail_ordinary(fileob, lines):
|
||||
buf=[]
|
||||
for row in fileob:
|
||||
buf.append(row)
|
||||
if len(buf)>lines:
|
||||
buf.pop(0)
|
||||
[sys.stdout.write(l) for l in buf]
|
||||
|
||||
def main():
|
||||
opts=setup_options()
|
||||
try:
|
||||
if (not sys.stdin.isatty()):
|
||||
tail(sys.stdin, opts)
|
||||
for fi in opts.file:
|
||||
tail(open(fi,'r'), opts)
|
||||
except IOError as (n,e):
|
||||
if n==32:
|
||||
pass
|
||||
else:
|
||||
import traceback
|
||||
print traceback.format_exc()
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
1
tsv/tsvhead
Symbolic link
1
tsv/tsvhead
Symbolic link
@@ -0,0 +1 @@
|
||||
lib/tsvhead
|
||||
257
tsv/tsvkit.sh
Normal file
257
tsv/tsvkit.sh
Normal file
@@ -0,0 +1,257 @@
|
||||
## Tab sebarated file functions, wrappers for csvkit
|
||||
# To use the tsv- commands, install csvkit: <pre>sudo apt-get install python-pip; sudo pip install csvkit</pre>
|
||||
|
||||
TSVDIR=$( dirname "${BASH_SOURCE[0]}" )
|
||||
|
||||
function c2t {
|
||||
# Convert comma separated stream in to tab separated stream
|
||||
# Usage: echo "foo,bar" | c2t
|
||||
|
||||
python -c 'import sys,csv
|
||||
try:
|
||||
csv.writer(sys.stdout, dialect=csv.excel_tab, lineterminator="\n").writerows(csv.reader(sys.stdin, dialect=csv.excel))
|
||||
except IOError:
|
||||
pass'
|
||||
}
|
||||
|
||||
function header {
|
||||
# Print only the first line of input
|
||||
# Usage: header file.csv
|
||||
# Usage: cat file.csv | header
|
||||
|
||||
head -n 1 "$@"
|
||||
}
|
||||
|
||||
function noheader {
|
||||
# Strip first row of input
|
||||
# Usage: noheader file.csv
|
||||
# Usage: cat file.csv | noheader
|
||||
|
||||
tail -n +2 "$@"
|
||||
}
|
||||
|
||||
function tsvecho {
|
||||
# Echo with tab separated values, quoted
|
||||
# Usage: tsvecho value1 value2 "some value" > header.csv
|
||||
# Usage: echo value1 value2 | tsvecho
|
||||
|
||||
local HEAD
|
||||
[[ -t 0 ]] && {
|
||||
printf -v HEAD "\"%s\"\t" "$@"
|
||||
} || {
|
||||
printf -v HEAD "\"%s\"\t" "$@" $( cat - )
|
||||
}
|
||||
echo "${HEAD%?}"
|
||||
}
|
||||
|
||||
function tsvstrip {
|
||||
# Strip tsv of quotes
|
||||
# Usage: cat file.csv | tsvstrip
|
||||
|
||||
python -c 'import sys,csv
|
||||
try:
|
||||
csv.writer(sys.stdout, dialect=csv.excel_tab, quoting=csv.QUOTE_NONE).writerows(csv.reader(sys.stdin, dialect=csv.excel_tab))
|
||||
except IOError:
|
||||
pass'
|
||||
}
|
||||
|
||||
function tsvtranspose {
|
||||
# Transpose a tsv file
|
||||
# Usage: cat file.csv | tsvtranspose
|
||||
|
||||
python -c 'import sys,csv
|
||||
try:
|
||||
csv.writer(sys.stdout, dialect=csv.excel_tab, quoting=csv.QUOTE_NONE).writerows(map(None,*csv.reader(sys.stdin, dialect=csv.excel_tab)))
|
||||
except IOError:
|
||||
pass'
|
||||
}
|
||||
|
||||
function tsvhead {
|
||||
# Head a file, but print also the header. header not counted in line numbers
|
||||
# Usage: cat file | tsvhead -n 30
|
||||
|
||||
if [ -t 0 ]; then
|
||||
python "${TSVDIR}"/lib/tsvhead "$@"
|
||||
else
|
||||
cat - | python "${TSVDIR}"/lib/tsvhead "$@"
|
||||
fi
|
||||
}
|
||||
|
||||
function tsvtail {
|
||||
# Tail a file, but print also the header. header not counted in line numbers
|
||||
# Usage: cat file | tsvtail -n 30
|
||||
|
||||
if [ -t 0 ]; then
|
||||
python "${TSVDIR}"/lib/tsvtail "$@"
|
||||
else
|
||||
cat - | python "${TSVDIR}"/lib/tsvtail "$@"
|
||||
fi
|
||||
}
|
||||
|
||||
which "csvjoin" > /dev/null && {
|
||||
|
||||
function tsvcut {
|
||||
# csvcut with tab-delimited dialect, see original script for options
|
||||
# Usage: tsvcut -c Col1,Col3 input1.tsv
|
||||
|
||||
|
||||
csvcut -t "$@" | c2t
|
||||
}
|
||||
|
||||
function tsvformat {
|
||||
# csvformat with tab-delimited dialect, see original script for options
|
||||
# Usage: tsvformat -c Col2 -m searchString input1.tsv
|
||||
|
||||
|
||||
csvformat -t -T "$@"
|
||||
}
|
||||
|
||||
function tsvgrep {
|
||||
# csvgrep with tab-delimited dialect, see original script for options
|
||||
# Usage: tsvgrep -c Col2 -m searchString input1.tsv
|
||||
|
||||
|
||||
csvgrep -t "$@" | c2t
|
||||
}
|
||||
|
||||
function tsvjoin {
|
||||
# csvjoin with tab-delimited dialect, see original script for options
|
||||
# Usage: tsvjoin -c 1,1 input1.tsv input2.tsv
|
||||
|
||||
|
||||
csvjoin -t "$@" | c2t
|
||||
}
|
||||
|
||||
function tsvlook {
|
||||
# csvlook with tab-delimited dialect, see original script for options
|
||||
# Usage: tsvlook file1.tsv
|
||||
|
||||
|
||||
csvlook -t "$@"
|
||||
}
|
||||
|
||||
function tsvquery {
|
||||
# Simple SQL query for tab-delimited files. The tables are named tsv1,tsv2 ...,
|
||||
# unless named with NAME=FILE syntax. If defining database with -d
|
||||
# it is kept for later queries. A query is not optional(!), but it can be
|
||||
# an empty string.
|
||||
# Usage: tsvquery mytable=file1.tsv file2.tsv "SELECT * FROM tsv1,mytable WHERE C4 not like '%NA%' ORDER BY C4 DESC"
|
||||
# Usage: tsquery -d DB.sqlite tsv1=file1.tsv "SELECT * FROM tsv1;" # Will keep the database file, reusable later
|
||||
|
||||
local DBTEMP
|
||||
local i
|
||||
for (( i=1; i<=$(($#)); i++ ))
|
||||
do [ "${!i}" = "-h" ] && {
|
||||
echo 'Usage:
|
||||
tsvquery [-d database.sqlite] tableName=data.tsv otherTable=data2.tsv "SQL QUERY"
|
||||
When defining a database with -d it is kept, and can be
|
||||
inserted with more data later. Otherwise the DB is created in /tmp/
|
||||
and deleted afterwards.
|
||||
If not using name=data.tsv syntax, tables are named tsv1, tsv2...
|
||||
Note: You have to give an SQL query. If you just want to
|
||||
populate a database, add " " as an empty query.
|
||||
'
|
||||
return 0
|
||||
}
|
||||
[ "${!i}" = "-d" ] && {
|
||||
# User defined data base
|
||||
local j
|
||||
j=$(( $i + 1 ))
|
||||
DBTEMP="${!j}"
|
||||
shift 2
|
||||
}
|
||||
done
|
||||
[ -z "$j" ] && DBTEMP=$( mktemp )
|
||||
for (( i=1; i<=$(($#-1)); i++ ))
|
||||
do [ -f "${!i}" ] && {
|
||||
# Add table with unique numbering
|
||||
local OLDTBLS=$( sqlite3 "$DBTEMP" ".tables" )
|
||||
local TBLNO=1
|
||||
while :
|
||||
do echo $OLDTBLS | grep tsv$TBLNO > /dev/null || break
|
||||
TBLNO=$(( $TBLNO + 1 ))
|
||||
done
|
||||
cat "${!i}" | csvsql -t --db "sqlite:///$DBTEMP" --insert --table tsv$TBLNO
|
||||
} || {
|
||||
# Add a user named table
|
||||
local TBL
|
||||
local FIL
|
||||
TBL=$( echo ${!i} | sed 's,=.*,,' )
|
||||
FIL=$( echo ${!i} | sed "s,^$TBL=,," )
|
||||
[ -f "$FIL" ] && {
|
||||
cat "$FIL" | csvsql -t --db "sqlite:///$DBTEMP" --insert --table "$TBL"
|
||||
} || {
|
||||
echo File "${!i}" not found
|
||||
rm -f "$DBTEMP"
|
||||
return 1
|
||||
}
|
||||
}
|
||||
done
|
||||
sqlite3 -list -separator ' ' -nullvalue NA -header -batch "$DBTEMP" "${@: -1}"
|
||||
local EC=$?
|
||||
# remove DB if using temporary
|
||||
[ -z "$j" ] && {
|
||||
rm -f "$DBTEMP"
|
||||
}
|
||||
return $EC
|
||||
}
|
||||
|
||||
function tsvsort {
|
||||
# csvsort with tab-delimited dialect, see original script for options
|
||||
# Usage: tsvsort -c Col3 input.tsv
|
||||
|
||||
|
||||
csvsort -t "$@" | c2t
|
||||
}
|
||||
|
||||
function tsvstack {
|
||||
# csvstack with tab-delimited dialect, see original script for options
|
||||
# Usage: tsvstack file1.tsv file2.tsv
|
||||
|
||||
|
||||
csvstack -t "$@" | c2t
|
||||
}
|
||||
|
||||
} || {
|
||||
CSVKITERROR="no csvkit installed. [sudo pip install csvkit]"
|
||||
tsvjoin () { echo $CSVKITERROR; return 1; }
|
||||
tsvcut () { echo $CSVKITERROR; return 1; }
|
||||
tsvgrep () { echo $CSVKITERROR; return 1; }
|
||||
tsvquery () { echo $CSVKITERROR; return 1; }
|
||||
tsvsort () { echo $CSVKITERROR; return 1; }
|
||||
tsvstack () { echo $CSVKITERROR; return 1; }
|
||||
tsvlook () { echo $CSVKITERROR; return 1; }
|
||||
}
|
||||
|
||||
function tsvfold {
|
||||
# Folds a sequence of line separated arguments into a TSV table with a header you specify,
|
||||
# which means it may easily cause aliasing if you have the wrong number of them.
|
||||
# Any loop which uses "print" to produce field values should ideally work.
|
||||
# Usage: cat data | tsv Column1 Column2 ...
|
||||
# Usage: seq 100 | tsvfold First Second Third Fourth
|
||||
|
||||
|
||||
echo "" | tsvecho "$@"
|
||||
if ! [ -t 0 ]; then
|
||||
mawk -F $'\t' '{ORS=(NR%'$#'?FS:RS)}1'
|
||||
fi
|
||||
}
|
||||
|
||||
function tsvdims {
|
||||
# Print dimensions of a TSV
|
||||
# Usage: tsvdims file.txt
|
||||
# Usage: cat file.txt | tsvdims
|
||||
|
||||
python -c 'import sys,csv
|
||||
if sys.argv[1]=="":
|
||||
input=sys.stdin
|
||||
else:
|
||||
input=open(sys.argv[1])
|
||||
rows=-1
|
||||
for row in csv.reader(input, dialect=csv.excel_tab):
|
||||
if rows==-1:
|
||||
cols=len(row)
|
||||
rows+=1
|
||||
csv.writer(sys.stdout, dialect=csv.excel_tab).writerows([["Columns","Rows"],[cols,rows]])
|
||||
' "$1"
|
||||
}
|
||||
17
tsv/tsvkit.update
Executable file
17
tsv/tsvkit.update
Executable file
@@ -0,0 +1,17 @@
|
||||
#!/bin/bash
|
||||
|
||||
TOOLSPATH=$( readlink -f $( dirname "$BASH_SOURCE" )/.. )
|
||||
|
||||
function _update_tsvkit() {
|
||||
mkdir -p "$TOOLSPATH"/tsv/lib
|
||||
wget -qO "$TOOLSPATH"/tsv/tsvkit.sh https://bitbucket.org/anduril-dev/anduril/raw/anduril2/lang/bash/tsvkit.sh
|
||||
wget -qO "$TOOLSPATH"/tsv/lib/tsvhead https://bitbucket.org/anduril-dev/anduril/raw/anduril2/lang/bash/lib/tsvhead
|
||||
wget -qO "$TOOLSPATH"/tsv/lib/tsvtail https://bitbucket.org/anduril-dev/anduril/raw/anduril2/lang/bash/lib/tsvtail
|
||||
chmod +x "$TOOLSPATH"/tsv/lib/*
|
||||
}
|
||||
|
||||
[[ -d "$TOOLSPATH"/tsv/ ]] && {
|
||||
_update_tsvkit
|
||||
}
|
||||
|
||||
unset -f _update_tsvkit
|
||||
1
tsv/tsvtail
Symbolic link
1
tsv/tsvtail
Symbolic link
@@ -0,0 +1 @@
|
||||
lib/tsvtail
|
||||
Reference in New Issue
Block a user