diff --git a/tsv/tsvsummary b/tsv/tsvsummary new file mode 100755 index 0000000..b1bfddf --- /dev/null +++ b/tsv/tsvsummary @@ -0,0 +1,61 @@ +#!/bin/bash + +set -e +function usage { +echo -e ' tsvsummary, a CSVSummary clone for the command line. +Depends on csvkit and ncsv + +Usage: tsvsummary [-c column] [-t type] tsv_file + -c name of the column to use as clusterCol + (produce as many lines as column has unique values) + -t Type of statistics: mean, max, min, sum, stdev, nas, unique + -h Help +' + +} +TYPEOF=mean +TYPELIST=( mean max min sum stdev nas unique ) +while getopts c:ht: opt +do case "$opt" in + c) + LABEL=$OPTARG + ;; + t) + TYPEOF=$( echo $OPTARG | tr '[:upper:]' '[:lower:]' ) + ;; + h) + usage + exit + ;; + \?) + exit + ;; + esac +done +shift $(( ${OPTIND} - 1 )); + +for type in "${TYPELIST[@]}" +do if [ "$type" = "$TYPEOF" ]; then TYPEFOUND=1;fi +done +[[ -z "$TYPEFOUND" ]] && ( echo Statistics type $TYPEOF not recognized, valid ones: ${TYPELIST[@]}; exit 1 ) + +if [ -z "$1" ] +then usage + echo No file name provided + exit 1 +fi + +if [ -z "$LABEL" ] +then ncsv --stat "$1" | sed -n "1p;/^$TYPEOF/Ip" + exit +else + IFS=$'\n' + UNIQUE=( $( csvcut -t -c "$LABEL" "$1" | tail -n +2 | sort -uV ) ) + HEADER=$( csvcut -t -C "$LABEL" "$1" | head -n 1 | ncsv -i, -d"\t" ) + echo -e "$LABEL\tStatistic\t$HEADER" + + for (( i=0; i<${#UNIQUE[@]}; i++ )) + do echo -ne "${UNIQUE[$i]}\t" + csvgrep -t -c "$LABEL" -m "${UNIQUE[$i]}" "$1" | csvcut -C "$LABEL" | ncsv -i, --stat | sed -n "/^$TYPEOF/Ip" + done +fi