#!/bin/bash set -e function usage { echo -e ' tsvsummary, a CSVSummary clone for the command line. Depends on csvkit and ncsv Usage: tsvsummary [-c column] [-t type] tsv_file -c name of the column to use as clusterCol (produce as many lines as column has unique values) -t Type of statistics: mean, max, min, sum, stdev, nas, unique -h Help ' } TYPEOF=mean TYPELIST=( mean max min sum stdev nas unique ) while getopts c:ht: opt do case "$opt" in c) LABEL=$OPTARG ;; t) TYPEOF=$( echo $OPTARG | tr '[:upper:]' '[:lower:]' ) ;; h) usage exit ;; \?) exit ;; esac done shift $(( ${OPTIND} - 1 )); for type in "${TYPELIST[@]}" do if [ "$type" = "$TYPEOF" ]; then TYPEFOUND=1;fi done [[ -z "$TYPEFOUND" ]] && ( echo Statistics type $TYPEOF not recognized, valid ones: ${TYPELIST[@]}; exit 1 ) if [ -z "$1" ] then usage echo No file name provided exit 1 fi if [ -z "$LABEL" ] then ncsv --stat "$1" | sed -n "1p;/^$TYPEOF/Ip" exit else IFS=$'\n' UNIQUE=( $( csvcut -t -c "$LABEL" "$1" | tail -n +2 | sort -uV ) ) HEADER=$( csvcut -t -C "$LABEL" "$1" | head -n 1 | ncsv -i, -d"\t" ) echo -e "$LABEL\tStatistic\t$HEADER" for (( i=0; i<${#UNIQUE[@]}; i++ )) do echo -ne "${UNIQUE[$i]}\t" csvgrep -t -c "$LABEL" -m "${UNIQUE[$i]}" "$1" | csvcut -C "$LABEL" | ncsv -i, --stat | sed -n "/^$TYPEOF/Ip" done fi