added column counts to tsvsummary
This commit is contained in:
@@ -5,21 +5,25 @@ function usage {
|
|||||||
echo -e ' tsvsummary, a CSVSummary clone for the command line.
|
echo -e ' tsvsummary, a CSVSummary clone for the command line.
|
||||||
Depends on csvkit and ncsv
|
Depends on csvkit and ncsv
|
||||||
|
|
||||||
Usage: tsvsummary [-c column] [-t type] tsv_file
|
Usage: tsvsummary [-c column] [ -n name ] [-t type] tsv_file
|
||||||
-c name of the column to use as clusterCol
|
-c name of the column to use as clusterCol
|
||||||
(produce as many lines as column has unique values)
|
(produce as many lines as column has unique values)
|
||||||
-t Type of statistics: mean, max, min, sum, stdev, nas, unique
|
-n Name of column to print an element count
|
||||||
-h Help
|
-t Type of statistics: mean, max, min, sum, stdev, nas, unique
|
||||||
|
-h Help
|
||||||
'
|
'
|
||||||
}
|
}
|
||||||
|
|
||||||
TYPEOF=mean
|
TYPEOF=mean
|
||||||
TYPELIST=( mean max min sum stdev nas unique )
|
TYPELIST=( mean max min sum stdev nas unique )
|
||||||
while getopts c:ht: opt
|
while getopts c:hn:t: opt
|
||||||
do case "$opt" in
|
do case "$opt" in
|
||||||
c)
|
c)
|
||||||
LABEL=$OPTARG
|
LABEL=$OPTARG
|
||||||
;;
|
;;
|
||||||
|
n)
|
||||||
|
NAMECOUNT=$OPTARG
|
||||||
|
;;
|
||||||
t)
|
t)
|
||||||
TYPEOF=$( echo $OPTARG | tr '[:upper:]' '[:lower:]' )
|
TYPEOF=$( echo $OPTARG | tr '[:upper:]' '[:lower:]' )
|
||||||
;;
|
;;
|
||||||
@@ -34,31 +38,42 @@ do case "$opt" in
|
|||||||
done
|
done
|
||||||
shift $(( ${OPTIND} - 1 ));
|
shift $(( ${OPTIND} - 1 ));
|
||||||
|
|
||||||
for type in "${TYPELIST[@]}"
|
for type in "${TYPELIST[@]}"; do
|
||||||
do if [ "$type" = "$TYPEOF" ]; then TYPEFOUND=1;fi
|
if [ "$type" = "$TYPEOF" ]; then TYPEFOUND=1;fi
|
||||||
done
|
done
|
||||||
[[ -z "$TYPEFOUND" ]] && ( echo Statistics type $TYPEOF not recognized, valid ones: ${TYPELIST[@]}; exit 1 )
|
[[ -z "$TYPEFOUND" ]] && ( echo Statistics type $TYPEOF not recognized, valid ones: ${TYPELIST[@]}; exit 1 )
|
||||||
|
|
||||||
if [ -z "$1" ]
|
if [ -z "$1" ]; then
|
||||||
then usage
|
usage
|
||||||
echo No file name provided
|
echo No file name provided
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
which ncsv &> /dev/null || ( echo ncsv required: "https://bitbucket.org/MoonQ/ncsv"; exit 1 )
|
which ncsv &> /dev/null || ( echo ncsv required: "https://bitbucket.org/MoonQ/ncsv (install with pip)"; exit 1 )
|
||||||
which csvcut &> /dev/null || ( echo csvkit required: "https://csvkit.readthedocs.org/"; exit 1 )
|
which csvcut &> /dev/null || ( echo csvkit required: "https://csvkit.readthedocs.org/ (install with pip)"; exit 1 )
|
||||||
|
|
||||||
if [ -z "$LABEL" ]
|
if [ -z "$LABEL" ]; then
|
||||||
then ncsv --stat "$1" | sed -n "1p;/^$TYPEOF/Ip"
|
if [ -n "$NAMECOUNT" ]; then
|
||||||
exit
|
paste <( ncsv --stat "$1" | sed -n "1p;/^$TYPEOF/Ip" ) <( echo "$NAMECOUNT"; cat "$1" | wc -l )
|
||||||
|
else
|
||||||
|
ncsv --stat "$1" | sed -n "1p;/^$TYPEOF/Ip"
|
||||||
|
fi
|
||||||
|
exit
|
||||||
else
|
else
|
||||||
IFS=$'\n'
|
IFS=$'\n'
|
||||||
UNIQUE=( $( csvcut -t -c "$LABEL" "$1" | tail -n +2 | sort -uV ) )
|
UNIQUE=( $( csvcut -t -c "$LABEL" "$1" | tail -n +2 | sort -uV ) )
|
||||||
HEADER=$( csvcut -t -C "$LABEL" "$1" | head -n 1 | ncsv -i, -d"\t" )
|
HEADER=$( csvcut -t -C "$LABEL" "$1" | head -n 1 | ncsv -i, -d"\t" )
|
||||||
echo -e "$LABEL\tStatistic\t$HEADER"
|
if [ -n "$NAMECOUNT" ]; then
|
||||||
|
HEADER+="\t$NAMECOUNT"
|
||||||
for (( i=0; i<${#UNIQUE[@]}; i++ ))
|
fi
|
||||||
do echo -ne "${UNIQUE[$i]}\t"
|
echo -e "$LABEL\tStatistic\t$HEADER"
|
||||||
csvgrep -t -c "$LABEL" -m "${UNIQUE[$i]}" "$1" | csvcut -C "$LABEL" | ncsv -i, --stat | sed -n "/^$TYPEOF/Ip"
|
|
||||||
done
|
for (( i=0; i<${#UNIQUE[@]}; i++ )); do
|
||||||
|
echo -ne "${UNIQUE[$i]}\t"
|
||||||
|
csvgrep -t -c "$LABEL" -m "${UNIQUE[$i]}" "$1" | csvcut -C "$LABEL" | ncsv -i, --stat | sed -n "/^$TYPEOF/Ip" | tr -d \\n
|
||||||
|
if [ -n "$NAMECOUNT" ]; then
|
||||||
|
printf "\t%s" $( csvgrep -t -c "$LABEL" -m "${UNIQUE[$i]}" "$1" | csvcut -C "$LABEL" | wc -l )
|
||||||
|
fi
|
||||||
|
printf "\n"
|
||||||
|
done
|
||||||
fi
|
fi
|
||||||
|
|||||||
Reference in New Issue
Block a user