q-tools/tsv/tsvsummary

#!/bin/bash

set -e
function usage {
echo -e ' tsvsummary, a CSVSummary clone for the command line.
Depends on csvkit and ncsv

Usage:   tsvsummary [-c column] [-t type] tsv_file
   -c    name of the column to use as clusterCol
         (produce as many lines as column has unique values)
   -t    Type of statistics: mean, max, min, sum, stdev, nas, unique
   -h    Help
'

}
TYPEOF=mean
TYPELIST=( mean max min sum stdev nas unique )
while getopts c:ht: opt
do  case "$opt" in
    c)
        LABEL=$OPTARG
    ;;
    t)
        TYPEOF=$( echo $OPTARG | tr '[:upper:]' '[:lower:]' )
    ;;
    h)
        usage
        exit
    ;;
    \?)
      exit
      ;;
    esac
done
shift $(( ${OPTIND} - 1 ));

for type in "${TYPELIST[@]}"
do if [ "$type" = "$TYPEOF" ]; then TYPEFOUND=1;fi
done
[[ -z "$TYPEFOUND" ]] && ( echo Statistics type $TYPEOF not recognized, valid ones: ${TYPELIST[@]}; exit 1 )

if [ -z "$1" ]
then usage
     echo No file name provided
     exit 1
fi

if [ -z "$LABEL" ]
then ncsv --stat "$1" | sed -n "1p;/^$TYPEOF/Ip"
     exit
else
     IFS=$'\n'
     UNIQUE=( $( csvcut -t -c "$LABEL" "$1" | tail -n +2 | sort -uV ) )
     HEADER=$( csvcut -t -C "$LABEL" "$1" | head -n 1 | ncsv -i, -d"\t" )
     echo -e "$LABEL\tStatistic\t$HEADER"

     for (( i=0; i<${#UNIQUE[@]}; i++ ))
     do  echo -ne "${UNIQUE[$i]}\t"
         csvgrep -t -c "$LABEL" -m "${UNIQUE[$i]}" "$1" | csvcut -C "$LABEL" | ncsv -i, --stat | sed -n "/^$TYPEOF/Ip"
     done
fi