Files
q-tools/tsv/tsvplot.py
2021-04-09 13:01:04 +03:00

227 lines
7.6 KiB
Python
Executable File

#!/usr/bin/env python3
#
# Copyright 2016 Ville Rantanen
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import print_function
from __future__ import division
from past.utils import old_div
import sys, os, math, csv, subprocess
from argparse import ArgumentParser
'''Plot CSV files with GNUPLOT.'''
__author__ = "Ville Rantanen"
__version__ = "0.1"
def get_data_scatter(opts):
reader=csv.DictReader(open(os.path.join(*opts.csv),'rb'), dialect=csv.excel_tab, lineterminator="\n")
data=[]
try:
for row in reader:
data.append(",".join([row[c] for c in opts.columns]))
except KeyError as ME:
print("Column {} not found.".format(ME),file=sys.stderr)
print("Columns in the file:"+",".join(reader.fieldnames),file=sys.stderr)
sys.exit(1)
return "\n".join(data)
def get_data_histogram(opts):
reader=csv.DictReader(open(os.path.join(*opts.csv),'rb'), dialect=csv.excel_tab, lineterminator="\n")
data=[]
try:
for row in reader:
try:
f=float(row[opts.columns[0]])
data.append(row[opts.columns[0]])
except ValueError:
continue
except KeyError as ME:
print("Column {} not found.".format(ME),file=sys.stderr)
print("Columns in the file:"+",".join(reader.fieldnames),file=sys.stderr)
sys.exit(1)
return "\n".join(data)
def get_stats(opts):
reader=csv.DictReader(open(os.path.join(*opts.csv),'rb'), dialect=csv.excel_tab, lineterminator="\n")
values_min=float('nan')
values_max=-float('nan')
values_n=0
try:
for row in reader:
try:
f=float(row[opts.columns[0]])
except ValueError:
continue
values_n+=1
values_min=min(f,values_min)
values_max=max(f,values_max)
except KeyError as ME:
print("Column {} not found.".format(ME),file=sys.stderr)
print("Columns in the file:"+",".join(reader.fieldnames),file=sys.stderr)
sys.exit(1)
return (values_min,values_max,values_n)
def get_plotterm(opts):
if opts.X11:
plotterm=""
else:
(height,width)=termsize()
if opts.height:
height=opts.height
if opts.width:
width=opts.width
height+=2
plotterm="set term dumb {0} {1} enhanced;".format(width,height)
return plotterm
def get_histogram_template(opts):
plotterm=get_plotterm(opts)
(vmin,vmax,vnum)=get_stats(opts)
if opts.bins:
bins=opts.bins
else:
bins=math.ceil(math.sqrt(vnum))
if not opts.X11:
(height,width)=termsize()
if opts.width:
width=opts.width
bins=min(int(old_div(float(width),4)),bins)
template='''{}
set datafile separator ",";
set xlabel '{}';
Min = {};
Max = {};
n = {:f};
width = (Max-Min)/n;
set boxwidth width absolute;
bin(x) = width*(floor((x-Min)/width)+0.5) + Min;
{}
plot '<cat' u (bin($1)):(1.0) smooth freq title '' {};
'''.format(
plotterm,
opts.columns[0],
vmin,vmax,bins,
opts.pre,
opts.style
)
return template
def get_scatterplot_template(opts):
plotterm=get_plotterm(opts)
template = '''{}
set datafile separator ",";
set xlabel '{}';
set ylabel '{}';
{}
plot '<cat' using 1:2 title '' {};'''.format(
plotterm,
opts.columns[0],
opts.columns[1],
opts.pre,
opts.style
)
return template
def run_gnuplot(opts, template, get_data):
p = subprocess.Popen(['gnuplot','-p','-e',template], stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
comm=p.communicate(input=get_data(opts))
for l in comm[0].split("\n"):
if not l.strip()=="":
print(l)
if p.returncode>0:
print(template)
print('='*25)
print(comm[1])
return(p.returncode)
def which(program):
import os
def is_exe(fpath):
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
fpath, fname = os.path.split(program)
if fpath:
if is_exe(program):
return program
else:
for path in os.environ["PATH"].split(os.pathsep):
exe_file = os.path.join(path, program)
if is_exe(exe_file):
return exe_file
return None
def setup_options():
''' Setup the command line options '''
parser=ArgumentParser(description="Plot CSV columns to 2D scatter plot, or a histogram. Uses GNUPLOT command line utility to draw plots.")
parser.add_argument("-v","--version",action='version', version=__version__)
parser.add_argument("-c",action="store",dest="columns",type=str,required=True,
help="Column name for histogram, or comma separated list of two names for scatter plot. First column is the x-axis.")
parser.add_argument("-d",action="store",dest="delimiter",default="\t",
help="File delimiter, default [tab].")
parser.add_argument("-n",action="store",type=int,dest="bins",default=False,
help="Bins for histograms. Default: min( console_width/4,sqrt(len(values)) )")
parser.add_argument("-p",action="store",dest="pre",default="",
help="Gnuplot commands to execute before plotting.")
parser.add_argument("-s",action="store",dest="style",default=None,
help="Plotting style, e.g. 'with lines'. Defaults to points for 2D plot, boxes for histogram.")
parser.add_argument("--width",action="store",type=int,dest="width",default=False,
help="Console width. Default: fullscreen")
parser.add_argument("--height",action="store",type=int,dest="height",default=False,
help="Console height. Default: fullscreen")
parser.add_argument("-X",action="store_true",dest="X11",default=False,
help="Show plot in X11 window, instead of console.")
parser.add_argument("-D",action="store_true",dest="debug",default=False,
help="Print commands before execution")
parser.add_argument("csv",type=str,action="store",
help="CSV file to plot")
options=parser.parse_args()
options.columns=[c.strip() for c in options.columns.split(",")]
if len(options.columns)>2:
parser.error("Only two columns supported.")
options.csv=os.path.split(options.csv)
if not options.style:
if len(options.columns)==1:
options.style="with boxes;"
else:
options.style="pt '*'"
return options
def termsize():
try:
rows, columns = os.popen('stty size', 'r').read().split()
except:
(rows,columns)=(25,80)
return (int(rows),int(columns))
if not which('gnuplot'):
print('You don\'t seem to have "gnuplot" installed!')
sys.exit(1)
opts=setup_options()
if len(opts.columns)==1:
template=get_histogram_template(opts)
get_data=get_data_histogram
else:
template=get_scatterplot_template(opts)
get_data=get_data_scatter
if opts.debug:
print(template)
rc=run_gnuplot(opts, template, get_data)
sys.exit(rc)