working TSV and SWP and modules
This commit is contained in:
6
py-packages/TSVFilter/TSVFilter/__init__.py
Normal file
6
py-packages/TSVFilter/TSVFilter/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
from TSVFilter.filter import TSVFilter
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
TSVFilter()
|
||||
196
py-packages/TSVFilter/TSVFilter/filter.py
Normal file
196
py-packages/TSVFilter/TSVFilter/filter.py
Normal file
@@ -0,0 +1,196 @@
|
||||
import sys
|
||||
import csv
|
||||
import os
|
||||
import re
|
||||
from argparse import ArgumentParser
|
||||
|
||||
__version__ = "1.0"
|
||||
|
||||
|
||||
class TSVFilter:
|
||||
def __init__(self):
|
||||
|
||||
self.get_options()
|
||||
self.parse_columns()
|
||||
self.parse_filters()
|
||||
self.process()
|
||||
|
||||
def get_options(self):
|
||||
|
||||
parser = ArgumentParser()
|
||||
# ~ parser.add_argument('--version', action='version', version=open(os.path.join(os.path.dirname(__file__),"VERSION"), "rt").read())
|
||||
parser.add_argument("--version", action="version", version=__version__)
|
||||
parser.add_argument(
|
||||
"-f",
|
||||
action="store",
|
||||
dest="num_filters",
|
||||
default=None,
|
||||
help="Comma separated list of floating point filters as: 'key[operator]value'. Valid operators are <, <=, >, >=, != and ==. Example: -f 'column1<4,column1>=0,column2==10'",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-s",
|
||||
action="store",
|
||||
dest="str_filters",
|
||||
default=None,
|
||||
help="Comma separated list of string filters as: 'key[operator]value'. Valid operators are !=, == and ~=. The ~= is a regex fullmatch operator. Example: -s 'column1==value,column2!=othervalue,column3~=M[0-9]+'",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-c",
|
||||
action="store",
|
||||
dest="columns",
|
||||
default=None,
|
||||
help="Comma separated list of column names to output. If empty, all columns are included.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-d",
|
||||
action="store",
|
||||
dest="delimiter",
|
||||
default="\t",
|
||||
help="Delimiter: defaults to tab.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"file",
|
||||
action="store",
|
||||
help="Filename to process. If '-', stdin used.",
|
||||
)
|
||||
self.options_parser = parser
|
||||
self.options = parser.parse_args()
|
||||
|
||||
def parse_columns(self):
|
||||
|
||||
if self.options.columns:
|
||||
self.columns = [c.strip() for c in self.options.columns.split(",")]
|
||||
else:
|
||||
self.columns = None
|
||||
|
||||
def parse_filters(self):
|
||||
def lt(value1, value2):
|
||||
return value1 < value2
|
||||
|
||||
def le(value1, value2):
|
||||
return value1 <= value2
|
||||
|
||||
def gt(value1, value2):
|
||||
return value1 > value2
|
||||
|
||||
def ge(value1, value2):
|
||||
return value1 >= value2
|
||||
|
||||
def ne(value1, value2):
|
||||
return value1 != value2
|
||||
|
||||
def eq(value1, value2):
|
||||
return value1 == value2
|
||||
|
||||
def reg(value1, expression):
|
||||
return bool(re.fullmatch(expression, value1))
|
||||
|
||||
foperators = {
|
||||
"<": lt,
|
||||
"<=": le,
|
||||
">=": ge,
|
||||
">": gt,
|
||||
"==": eq,
|
||||
"!=": ne,
|
||||
}
|
||||
soperators = {
|
||||
"==": eq,
|
||||
"!=": ne,
|
||||
"~=": reg,
|
||||
}
|
||||
|
||||
self.filters = None
|
||||
|
||||
if self.options.num_filters or self.options.str_filters:
|
||||
self.filters = []
|
||||
|
||||
try:
|
||||
if self.options.num_filters:
|
||||
for f in self.options.num_filters.split(","):
|
||||
col, op, value = re.findall("(.+)(<=|>=|<|>|!=|==)(.+)", f.strip())[
|
||||
0
|
||||
]
|
||||
value = try_num(value, force_num=True)
|
||||
self.filters.append(
|
||||
{
|
||||
"col": col,
|
||||
"op": foperators[op],
|
||||
"value": value,
|
||||
"numeric": True,
|
||||
}
|
||||
)
|
||||
|
||||
if self.options.str_filters:
|
||||
for f in self.options.str_filters.split(","):
|
||||
col, op, value = re.findall("(.+)(!=|==|~=)(.+)", f.strip())[0]
|
||||
self.filters.append(
|
||||
{
|
||||
"col": col,
|
||||
"op": soperators[op],
|
||||
"value": value,
|
||||
"numeric": False,
|
||||
}
|
||||
)
|
||||
except Exception:
|
||||
self.options_parser.print_help()
|
||||
sys.stderr.write("\nCannot parse filter: {}\n".format(f))
|
||||
sys.exit(1)
|
||||
|
||||
def process(self):
|
||||
|
||||
if self.options.file == "-":
|
||||
fp = sys.stdin
|
||||
else:
|
||||
fp = open(self.options.file, "rt")
|
||||
|
||||
reader = csv.DictReader(fp, delimiter=self.options.delimiter)
|
||||
fieldnames = reader.fieldnames
|
||||
if self.columns:
|
||||
fieldnames = self.columns
|
||||
for c in self.columns:
|
||||
if c not in reader.fieldnames:
|
||||
raise ValueError("No such column '{}'".format(c))
|
||||
|
||||
writer = csv.DictWriter(
|
||||
sys.stdout,
|
||||
fieldnames=fieldnames,
|
||||
quoting=csv.QUOTE_NONNUMERIC,
|
||||
delimiter=self.options.delimiter,
|
||||
)
|
||||
writer.writeheader()
|
||||
try:
|
||||
for row in reader:
|
||||
printrow = type(self.filters) == type(None)
|
||||
if self.filters:
|
||||
matches = []
|
||||
for filt in self.filters:
|
||||
if filt["numeric"]:
|
||||
comp_value = try_num(row[filt["col"]], force_num=True)
|
||||
else:
|
||||
comp_value = row[filt["col"]]
|
||||
matches.append(filt["op"](comp_value, filt["value"]))
|
||||
if all(matches):
|
||||
printrow = True
|
||||
if printrow:
|
||||
row = {
|
||||
col: try_num(str(row[col])) for col in row if col in fieldnames
|
||||
}
|
||||
writer.writerow(row)
|
||||
except BrokenPipeError:
|
||||
return
|
||||
|
||||
|
||||
def try_num(s, force_num=False):
|
||||
|
||||
try:
|
||||
return int(s)
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
return float(s)
|
||||
except ValueError:
|
||||
pass
|
||||
if force_num:
|
||||
raise ValueError("Value '{}' can not be converted to numeric".format(s))
|
||||
|
||||
return s
|
||||
24
py-packages/TSVFilter/setup.py
Normal file
24
py-packages/TSVFilter/setup.py
Normal file
@@ -0,0 +1,24 @@
|
||||
from distutils.core import setup
|
||||
|
||||
|
||||
def version_reader(path):
|
||||
for line in open(path, "rt").read(1024).split("\n"):
|
||||
if line.startswith("__version__"):
|
||||
return line.split("=")[1].strip().replace('"', "")
|
||||
|
||||
|
||||
version = version_reader(os.path.join("TSVFilter", "filter.py"))
|
||||
setup(
|
||||
name="TSVFilter",
|
||||
packages=["TSVFilter"],
|
||||
version=version,
|
||||
description="TSV column filter.",
|
||||
author="Ville Rantanen",
|
||||
author_email="ville.q.rantanen@gmail.com",
|
||||
keywords=["TSV", "data"],
|
||||
entry_points={
|
||||
"console_scripts": [
|
||||
"TSVFilter=TSVFilter:main",
|
||||
],
|
||||
},
|
||||
)
|
||||
Reference in New Issue
Block a user