added python based tsvplot

This commit is contained in:
Ville Rantanen
2016-05-18 13:39:44 +03:00
parent 979be83b70
commit 242f428660
4 changed files with 256 additions and 13 deletions

View File

@@ -1 +1 @@
../tsv/tsvplot
../tsv/tsvplot.py

View File

@@ -1,33 +1,52 @@
#!/bin/bash
[[ -z "$3" ]] && {
_helpexit() {
self=$( basename $0 )
echo "Plot2D for console.
Arguments
tsvplot FILE xCol yCol [plotStyle] [preBlock]
$self FILE xCol yCol [-X] [plotStyle] [preBlock]
e.g. tsvplot cal.tsv 2 4
or tsvplot cal.tsv Area Weight \"with lines;\" \"set title 'test plot';\"
e.g. $self cal.tsv 2 4
or $self cal.tsv Area Weight \"with lines;\" \"set title 'test plot';\"
preBlock may contain any GNUPlot commands.
plotStyle contains any plotting style commands.
-X use X11 instead of console
Requires csvkit and gnuplot
"
exit
}
fbase=$( basename "$1" )
if [ $# -lt 3 ]; then _helpexit; fi
csvcut -t -c "$2,$3" "$1" | gnuplot -e "
set term dumb $(tput cols) $(tput lines) enhanced;
POS=0
POSADJUST=0
for (( i=1; i<=$#; i++ )); do
POS=$(( $i + $POSADJUST ))
[[ ${!i} = "-h" ]] && _helpexit
[[ ${!i} = "help" ]] && _helpexit
[[ ${!i} = "-X" ]] && PLOTTERM=" " && POSADJUST=$(( $POSADJUST - 1 )) && continue
[[ $POS -eq 1 ]] && FILE="${!i}"
[[ $POS -eq 2 ]] && XCOL="${!i}"
[[ $POS -eq 3 ]] && YCOL="${!i}"
[[ $POS -eq 4 ]] && STYLE="${!i}"
[[ $POS -eq 5 ]] && PRE="${!i}"
done
fbase=$( basename "$FILE" )
[[ -z "$PLOTTERM" ]] && PLOTTERM="set term dumb $(tput cols) $(tput lines) enhanced;"
[[ -z "$PRE" ]] && PRE="set title '$fbase';"
csvcut -t -c "$XCOL,$YCOL" "$FILE" | gnuplot -p -e "$PLOTTERM
set datafile separator \",\";
set xlabel '$2';
$5
plot '<cat' using 1:2 title '$fbase $3' $4"
set xlabel '$XCOL';
set ylabel '$YCOL';
$PRE
plot '<cat' using 1:2 title '' $STYLE"
[[ $? -ne 0 ]] && {
echo -n "Possible columns: "
head -n 1 "$1" | tr \\t ,
head -n 1 "$FILE" | tr \\t ,
} || {
true
}

224
tsv/tsvplot.py Executable file
View File

@@ -0,0 +1,224 @@
#!/usr/bin/env python
#
# Copyright 2016 Ville Rantanen
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import print_function
import sys, os, math, csv, subprocess
from argparse import ArgumentParser
'''Plot CSV files with GNUPLOT.'''
__author__ = "Ville Rantanen"
__version__ = "0.1"
def get_data_scatter(opts):
reader=csv.DictReader(open(os.path.join(*opts.csv),'rb'), dialect=csv.excel_tab, lineterminator="\n")
data=[]
try:
for row in reader:
data.append(",".join([row[c] for c in opts.columns]))
except KeyError,ME:
print("Column {} not found.".format(ME),file=sys.stderr)
print("Columns in the file:"+",".join(reader.fieldnames),file=sys.stderr)
sys.exit(1)
return "\n".join(data)
def get_data_histogram(opts):
reader=csv.DictReader(open(os.path.join(*opts.csv),'rb'), dialect=csv.excel_tab, lineterminator="\n")
data=[]
try:
for row in reader:
try:
f=float(row[opts.columns[0]])
data.append(row[opts.columns[0]])
except ValueError:
continue
except KeyError,ME:
print("Column {} not found.".format(ME),file=sys.stderr)
print("Columns in the file:"+",".join(reader.fieldnames),file=sys.stderr)
sys.exit(1)
return "\n".join(data)
def get_stats(opts):
reader=csv.DictReader(open(os.path.join(*opts.csv),'rb'), dialect=csv.excel_tab, lineterminator="\n")
values_min=float('nan')
values_max=-float('nan')
values_n=0
try:
for row in reader:
try:
f=float(row[opts.columns[0]])
except ValueError:
continue
values_n+=1
values_min=min(f,values_min)
values_max=max(f,values_max)
except KeyError,ME:
print("Column {} not found.".format(ME),file=sys.stderr)
print("Columns in the file:"+",".join(reader.fieldnames),file=sys.stderr)
sys.exit(1)
return (values_min,values_max,values_n)
def get_plotterm(opts):
if opts.X11:
plotterm=""
else:
(height,width)=termsize()
if opts.height:
height=opts.height
if opts.width:
width=opts.width
height+=2
plotterm="set term dumb {0} {1} enhanced;".format(width,height)
return plotterm
def get_histogram_template(opts):
plotterm=get_plotterm(opts)
(vmin,vmax,vnum)=get_stats(opts)
if opts.bins:
bins=opts.bins
else:
bins=math.ceil(math.sqrt(vnum))
if not opts.X11:
(height,width)=termsize()
if opts.width:
width=opts.width
bins=min(int(float(width)/4),bins)
template='''{}
set datafile separator ",";
set xlabel '{}';
Min = {};
Max = {};
n = {:f};
width = (Max-Min)/n;
set boxwidth width absolute;
bin(x) = width*(floor((x-Min)/width)+0.5) + Min;
{}
plot '<cat' u (bin($1)):(1.0) smooth freq title '' {};
'''.format(
plotterm,
opts.columns[0],
vmin,vmax,bins,
opts.pre,
opts.style
)
return template
def get_scatterplot_template(opts):
plotterm=get_plotterm(opts)
template = '''{}
set datafile separator ",";
set xlabel '{}';
set ylabel '{}';
{}
plot '<cat' using 1:2 title '' {};'''.format(
plotterm,
opts.columns[0],
opts.columns[1],
opts.pre,
opts.style
)
return template
def run_gnuplot(opts, template, get_data):
p = subprocess.Popen(['gnuplot','-p','-e',template], stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
comm=p.communicate(input=get_data(opts))
for l in comm[0].split("\n"):
if not l.strip()=="":
print(l)
if p.returncode>0:
print(template)
print('='*25)
print(comm[1])
return(p.returncode)
def which(program):
import os
def is_exe(fpath):
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
fpath, fname = os.path.split(program)
if fpath:
if is_exe(program):
return program
else:
for path in os.environ["PATH"].split(os.pathsep):
exe_file = os.path.join(path, program)
if is_exe(exe_file):
return exe_file
return None
def setup_options():
''' Setup the command line options '''
parser=ArgumentParser(description="Plot CSV columns to 2D scatter plot, or a histogram. Uses GNUPLOT command line utility to draw plots.")
parser.add_argument("-v","--version",action='version', version=__version__)
parser.add_argument("-c",action="store",dest="columns",type=str,required=True,
help="Column name for histogram, or comma separated list of two names for scatter plot. First column is the x-axis.")
parser.add_argument("-d",action="store",dest="delimiter",default="\t",
help="File delimiter, default [tab].")
parser.add_argument("-n",action="store",type=int,dest="bins",default=False,
help="Bins for histograms. Default: min( console_width/4,sqrt(len(values)) )")
parser.add_argument("-p",action="store",dest="pre",default="",
help="Gnuplot commands to execute before plotting.")
parser.add_argument("-s",action="store",dest="style",default=None,
help="Plotting style, e.g. 'with lines'. Defaults to points for 2D plot, boxes for histogram.")
parser.add_argument("--width",action="store",type=int,dest="width",default=False,
help="Console width. Default: fullscreen")
parser.add_argument("--height",action="store",type=int,dest="height",default=False,
help="Console height. Default: fullscreen")
parser.add_argument("-X",action="store_true",dest="X11",default=False,
help="Show plot in X11 window, instead of console.")
parser.add_argument("-D",action="store_true",dest="debug",default=False,
help="Print commands before execution")
parser.add_argument("csv",type=str,action="store",
help="CSV file to plot")
options=parser.parse_args()
options.columns=[c.strip() for c in options.columns.split(",")]
if len(options.columns)>2:
parser.error("Only two columns supported.")
options.csv=os.path.split(options.csv)
if not options.style:
if len(options.columns)==1:
options.style="with boxes;"
else:
options.style="pt '*'"
return options
def termsize():
try:
rows, columns = os.popen('stty size', 'r').read().split()
except:
(rows,columns)=(25,80)
return (int(rows),int(columns))
if not which('gnuplot'):
print('You don\'t seem to have "gnuplot" installed!')
sys.exit(1)
opts=setup_options()
if len(opts.columns)==1:
template=get_histogram_template(opts)
get_data=get_data_histogram
else:
template=get_scatterplot_template(opts)
get_data=get_data_scatter
if opts.debug:
print(template)
rc=run_gnuplot(opts, template, get_data)
sys.exit(rc)

View File

@@ -45,7 +45,7 @@ then usage
exit 1
fi
which ncsv &> /dev/null || ( echo ncsv required: "https://code.google.com/p/nicecsv/"; exit 1 )
which ncsv &> /dev/null || ( echo ncsv required: "https://bitbucket.org/MoonQ/ncsv"; exit 1 )
which csvcut &> /dev/null || ( echo csvkit required: "https://csvkit.readthedocs.org/"; exit 1 )
if [ -z "$LABEL" ]