tsvsummary for bash
This commit is contained in:
61
tsv/tsvsummary
Executable file
61
tsv/tsvsummary
Executable file
@@ -0,0 +1,61 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
function usage {
|
||||
echo -e ' tsvsummary, a CSVSummary clone for the command line.
|
||||
Depends on csvkit and ncsv
|
||||
|
||||
Usage: tsvsummary [-c column] [-t type] tsv_file
|
||||
-c name of the column to use as clusterCol
|
||||
(produce as many lines as column has unique values)
|
||||
-t Type of statistics: mean, max, min, sum, stdev, nas, unique
|
||||
-h Help
|
||||
'
|
||||
|
||||
}
|
||||
TYPEOF=mean
|
||||
TYPELIST=( mean max min sum stdev nas unique )
|
||||
while getopts c:ht: opt
|
||||
do case "$opt" in
|
||||
c)
|
||||
LABEL=$OPTARG
|
||||
;;
|
||||
t)
|
||||
TYPEOF=$( echo $OPTARG | tr '[:upper:]' '[:lower:]' )
|
||||
;;
|
||||
h)
|
||||
usage
|
||||
exit
|
||||
;;
|
||||
\?)
|
||||
exit
|
||||
;;
|
||||
esac
|
||||
done
|
||||
shift $(( ${OPTIND} - 1 ));
|
||||
|
||||
for type in "${TYPELIST[@]}"
|
||||
do if [ "$type" = "$TYPEOF" ]; then TYPEFOUND=1;fi
|
||||
done
|
||||
[[ -z "$TYPEFOUND" ]] && ( echo Statistics type $TYPEOF not recognized, valid ones: ${TYPELIST[@]}; exit 1 )
|
||||
|
||||
if [ -z "$1" ]
|
||||
then usage
|
||||
echo No file name provided
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$LABEL" ]
|
||||
then ncsv --stat "$1" | sed -n "1p;/^$TYPEOF/Ip"
|
||||
exit
|
||||
else
|
||||
IFS=$'\n'
|
||||
UNIQUE=( $( csvcut -t -c "$LABEL" "$1" | tail -n +2 | sort -uV ) )
|
||||
HEADER=$( csvcut -t -C "$LABEL" "$1" | head -n 1 | ncsv -i, -d"\t" )
|
||||
echo -e "$LABEL\tStatistic\t$HEADER"
|
||||
|
||||
for (( i=0; i<${#UNIQUE[@]}; i++ ))
|
||||
do echo -ne "${UNIQUE[$i]}\t"
|
||||
csvgrep -t -c "$LABEL" -m "${UNIQUE[$i]}" "$1" | csvcut -C "$LABEL" | ncsv -i, --stat | sed -n "/^$TYPEOF/Ip"
|
||||
done
|
||||
fi
|
||||
Reference in New Issue
Block a user