Installable TSV Filter tool

This commit is contained in:
Q
2021-10-29 21:31:12 +03:00
parent e9863883b4
commit e2c4b2c147
3 changed files with 213 additions and 0 deletions

View File

@@ -0,0 +1,6 @@
from TSVFilter.filter import TSVFilter
def main():
TSVFilter()

View File

@@ -0,0 +1,192 @@
import sys
import csv
import os
import re
from argparse import ArgumentParser
class TSVFilter:
def __init__(self):
self.get_options()
self.parse_columns()
self.parse_filters()
self.process()
def get_options(self):
parser = ArgumentParser()
parser.add_argument(
"-f",
action="store",
dest="num_filters",
default=None,
help="Comma separated list of floating point filters as: 'key[operator]value'. Valid operators are <, <=, >, >=, != and ==. Example: -f 'column1<4,column1>=0,column2==10'",
)
parser.add_argument(
"-s",
action="store",
dest="str_filters",
default=None,
help="Comma separated list of string filters as: 'key[operator]value'. Valid operators are !=, == and ~=. The ~= is a regex fullmatch operator. Example: -s 'column1==value,column2!=othervalue,column3~=M[0-9]+'",
)
parser.add_argument(
"-c",
action="store",
dest="columns",
default=None,
help="Comma separated list of column names to output. If empty, all columns are included.",
)
parser.add_argument(
"-d",
action="store",
dest="delimiter",
default="\t",
help="Delimiter: defaults to tab.",
)
parser.add_argument(
"file",
action="store",
help="Filename to process. If '-', stdin used.",
)
self.options_parser = parser
self.options = parser.parse_args()
def parse_columns(self):
if self.options.columns:
self.columns = [c.strip() for c in self.options.columns.split(",")]
else:
self.columns = None
def parse_filters(self):
def lt(value1, value2):
return value1 < value2
def le(value1, value2):
return value1 <= value2
def gt(value1, value2):
return value1 > value2
def ge(value1, value2):
return value1 >= value2
def ne(value1, value2):
return value1 != value2
def eq(value1, value2):
return value1 == value2
def reg(value1, expression):
return bool(re.fullmatch(expression, value1))
foperators = {
"<": lt,
"<=": le,
">=": ge,
">": gt,
"==": eq,
"!=": ne,
}
soperators = {
"==": eq,
"!=": ne,
"~=": reg,
}
self.filters = None
if self.options.num_filters or self.options.str_filters:
self.filters = []
try:
if self.options.num_filters:
for f in self.options.num_filters.split(","):
col, op, value = re.findall("(.+)(<=|>=|<|>|!=|==)(.+)", f.strip())[
0
]
value = try_num(value, force_num=True)
self.filters.append(
{
"col": col,
"op": foperators[op],
"value": value,
"numeric": True,
}
)
if self.options.str_filters:
for f in self.options.str_filters.split(","):
col, op, value = re.findall("(.+)(!=|==|~=)(.+)", f.strip())[0]
self.filters.append(
{
"col": col,
"op": soperators[op],
"value": value,
"numeric": False,
}
)
except Exception:
self.options_parser.print_help()
sys.stderr.write("\nCannot parse filter: {}\n".format(f))
sys.exit(1)
def process(self):
if self.options.file == "-":
fp = sys.stdin
else:
fp = open(self.options.file, "rt")
reader = csv.DictReader(fp, delimiter=self.options.delimiter)
fieldnames = reader.fieldnames
if self.columns:
fieldnames = self.columns
for c in self.columns:
if c not in reader.fieldnames:
raise ValueError("No such column '{}'".format(c))
writer = csv.DictWriter(
sys.stdout,
fieldnames=fieldnames,
quoting=csv.QUOTE_NONNUMERIC,
delimiter=self.options.delimiter,
)
writer.writeheader()
try:
for row in reader:
printrow = type(self.filters) == type(None)
if self.filters:
matches = []
for filt in self.filters:
if filt["numeric"]:
comp_value = try_num(row[filt["col"]], force_num=True)
else:
comp_value = row[filt["col"]]
matches.append(filt["op"](comp_value, filt["value"]))
if all(matches):
printrow = True
if printrow:
row = {
col: try_num(str(row[col])) for col in row if col in fieldnames
}
writer.writerow(row)
except BrokenPipeError:
return
def try_num(s, force_num=False):
try:
return int(s)
except ValueError:
pass
try:
return float(s)
except ValueError:
pass
if force_num:
raise ValueError("Value '{}' can not be converted to numeric".format(s))
return s

View File

@@ -0,0 +1,15 @@
from distutils.core import setup
setup(
name = 'TSVFilter',
packages = ['TSVFilter'],
version = '1.0',
description = 'TSV column filter.',
author = 'Ville Rantanen',
author_email = 'ville.q.rantanen@gmail.com',
keywords = ['TSV', 'data'],
entry_points = {
"console_scripts": [
"TSVFilter=TSVFilter:main",
],
},
)