From 242f4286603143bbcc6befc8bd507e25e3e382df Mon Sep 17 00:00:00 2001 From: Ville Rantanen Date: Wed, 18 May 2016 13:39:44 +0300 Subject: [PATCH] added python based tsvplot --- bin/tsvplot | 2 +- tsv/tsvplot | 41 ++++++--- tsv/tsvplot.py | 224 +++++++++++++++++++++++++++++++++++++++++++++++++ tsv/tsvsummary | 2 +- 4 files changed, 256 insertions(+), 13 deletions(-) create mode 100755 tsv/tsvplot.py diff --git a/bin/tsvplot b/bin/tsvplot index 6e32376..fa93546 120000 --- a/bin/tsvplot +++ b/bin/tsvplot @@ -1 +1 @@ -../tsv/tsvplot \ No newline at end of file +../tsv/tsvplot.py \ No newline at end of file diff --git a/tsv/tsvplot b/tsv/tsvplot index cae1bae..73bcc45 100755 --- a/tsv/tsvplot +++ b/tsv/tsvplot @@ -1,33 +1,52 @@ #!/bin/bash -[[ -z "$3" ]] && { +_helpexit() { + self=$( basename $0 ) echo "Plot2D for console. Arguments - tsvplot FILE xCol yCol [plotStyle] [preBlock] + $self FILE xCol yCol [-X] [plotStyle] [preBlock] - e.g. tsvplot cal.tsv 2 4 - or tsvplot cal.tsv Area Weight \"with lines;\" \"set title 'test plot';\" + e.g. $self cal.tsv 2 4 + or $self cal.tsv Area Weight \"with lines;\" \"set title 'test plot';\" preBlock may contain any GNUPlot commands. plotStyle contains any plotting style commands. + -X use X11 instead of console Requires csvkit and gnuplot " exit } -fbase=$( basename "$1" ) +if [ $# -lt 3 ]; then _helpexit; fi -csvcut -t -c "$2,$3" "$1" | gnuplot -e " -set term dumb $(tput cols) $(tput lines) enhanced; +POS=0 +POSADJUST=0 +for (( i=1; i<=$#; i++ )); do + POS=$(( $i + $POSADJUST )) + [[ ${!i} = "-h" ]] && _helpexit + [[ ${!i} = "help" ]] && _helpexit + [[ ${!i} = "-X" ]] && PLOTTERM=" " && POSADJUST=$(( $POSADJUST - 1 )) && continue + [[ $POS -eq 1 ]] && FILE="${!i}" + [[ $POS -eq 2 ]] && XCOL="${!i}" + [[ $POS -eq 3 ]] && YCOL="${!i}" + [[ $POS -eq 4 ]] && STYLE="${!i}" + [[ $POS -eq 5 ]] && PRE="${!i}" +done +fbase=$( basename "$FILE" ) +[[ -z "$PLOTTERM" ]] && PLOTTERM="set term dumb $(tput cols) $(tput lines) enhanced;" +[[ -z "$PRE" ]] && PRE="set title '$fbase';" + +csvcut -t -c "$XCOL,$YCOL" "$FILE" | gnuplot -p -e "$PLOTTERM set datafile separator \",\"; -set xlabel '$2'; -$5 -plot '. +# +from __future__ import print_function +import sys, os, math, csv, subprocess +from argparse import ArgumentParser + +'''Plot CSV files with GNUPLOT.''' +__author__ = "Ville Rantanen" +__version__ = "0.1" + +def get_data_scatter(opts): + reader=csv.DictReader(open(os.path.join(*opts.csv),'rb'), dialect=csv.excel_tab, lineterminator="\n") + data=[] + try: + for row in reader: + data.append(",".join([row[c] for c in opts.columns])) + except KeyError,ME: + print("Column {} not found.".format(ME),file=sys.stderr) + print("Columns in the file:"+",".join(reader.fieldnames),file=sys.stderr) + sys.exit(1) + return "\n".join(data) + +def get_data_histogram(opts): + reader=csv.DictReader(open(os.path.join(*opts.csv),'rb'), dialect=csv.excel_tab, lineterminator="\n") + data=[] + try: + for row in reader: + try: + f=float(row[opts.columns[0]]) + data.append(row[opts.columns[0]]) + except ValueError: + continue + except KeyError,ME: + print("Column {} not found.".format(ME),file=sys.stderr) + print("Columns in the file:"+",".join(reader.fieldnames),file=sys.stderr) + sys.exit(1) + return "\n".join(data) + +def get_stats(opts): + reader=csv.DictReader(open(os.path.join(*opts.csv),'rb'), dialect=csv.excel_tab, lineterminator="\n") + values_min=float('nan') + values_max=-float('nan') + values_n=0 + try: + for row in reader: + try: + f=float(row[opts.columns[0]]) + except ValueError: + continue + values_n+=1 + values_min=min(f,values_min) + values_max=max(f,values_max) + except KeyError,ME: + print("Column {} not found.".format(ME),file=sys.stderr) + print("Columns in the file:"+",".join(reader.fieldnames),file=sys.stderr) + sys.exit(1) + return (values_min,values_max,values_n) + +def get_plotterm(opts): + if opts.X11: + plotterm="" + else: + (height,width)=termsize() + if opts.height: + height=opts.height + if opts.width: + width=opts.width + height+=2 + plotterm="set term dumb {0} {1} enhanced;".format(width,height) + return plotterm + +def get_histogram_template(opts): + plotterm=get_plotterm(opts) + (vmin,vmax,vnum)=get_stats(opts) + if opts.bins: + bins=opts.bins + else: + bins=math.ceil(math.sqrt(vnum)) + if not opts.X11: + (height,width)=termsize() + if opts.width: + width=opts.width + bins=min(int(float(width)/4),bins) + template='''{} +set datafile separator ","; +set xlabel '{}'; +Min = {}; +Max = {}; +n = {:f}; +width = (Max-Min)/n; +set boxwidth width absolute; +bin(x) = width*(floor((x-Min)/width)+0.5) + Min; +{} +plot '0: + print(template) + print('='*25) + print(comm[1]) + return(p.returncode) + +def which(program): + import os + def is_exe(fpath): + return os.path.isfile(fpath) and os.access(fpath, os.X_OK) + + fpath, fname = os.path.split(program) + if fpath: + if is_exe(program): + return program + else: + for path in os.environ["PATH"].split(os.pathsep): + exe_file = os.path.join(path, program) + if is_exe(exe_file): + return exe_file + + return None + +def setup_options(): + ''' Setup the command line options ''' + + parser=ArgumentParser(description="Plot CSV columns to 2D scatter plot, or a histogram. Uses GNUPLOT command line utility to draw plots.") + parser.add_argument("-v","--version",action='version', version=__version__) + parser.add_argument("-c",action="store",dest="columns",type=str,required=True, + help="Column name for histogram, or comma separated list of two names for scatter plot. First column is the x-axis.") + parser.add_argument("-d",action="store",dest="delimiter",default="\t", + help="File delimiter, default [tab].") + parser.add_argument("-n",action="store",type=int,dest="bins",default=False, + help="Bins for histograms. Default: min( console_width/4,sqrt(len(values)) )") + parser.add_argument("-p",action="store",dest="pre",default="", + help="Gnuplot commands to execute before plotting.") + parser.add_argument("-s",action="store",dest="style",default=None, + help="Plotting style, e.g. 'with lines'. Defaults to points for 2D plot, boxes for histogram.") + parser.add_argument("--width",action="store",type=int,dest="width",default=False, + help="Console width. Default: fullscreen") + parser.add_argument("--height",action="store",type=int,dest="height",default=False, + help="Console height. Default: fullscreen") + parser.add_argument("-X",action="store_true",dest="X11",default=False, + help="Show plot in X11 window, instead of console.") + parser.add_argument("-D",action="store_true",dest="debug",default=False, + help="Print commands before execution") + + parser.add_argument("csv",type=str,action="store", + help="CSV file to plot") + options=parser.parse_args() + options.columns=[c.strip() for c in options.columns.split(",")] + if len(options.columns)>2: + parser.error("Only two columns supported.") + options.csv=os.path.split(options.csv) + if not options.style: + if len(options.columns)==1: + options.style="with boxes;" + else: + options.style="pt '*'" + return options + +def termsize(): + try: + rows, columns = os.popen('stty size', 'r').read().split() + except: + (rows,columns)=(25,80) + return (int(rows),int(columns)) + +if not which('gnuplot'): + print('You don\'t seem to have "gnuplot" installed!') + sys.exit(1) + +opts=setup_options() + +if len(opts.columns)==1: + template=get_histogram_template(opts) + get_data=get_data_histogram +else: + template=get_scatterplot_template(opts) + get_data=get_data_scatter +if opts.debug: + print(template) +rc=run_gnuplot(opts, template, get_data) +sys.exit(rc) diff --git a/tsv/tsvsummary b/tsv/tsvsummary index 070559f..1f1e064 100755 --- a/tsv/tsvsummary +++ b/tsv/tsvsummary @@ -45,7 +45,7 @@ then usage exit 1 fi -which ncsv &> /dev/null || ( echo ncsv required: "https://code.google.com/p/nicecsv/"; exit 1 ) +which ncsv &> /dev/null || ( echo ncsv required: "https://bitbucket.org/MoonQ/ncsv"; exit 1 ) which csvcut &> /dev/null || ( echo csvkit required: "https://csvkit.readthedocs.org/"; exit 1 ) if [ -z "$LABEL" ]