Files
q-tools/files/daterake
2025-11-10 13:59:01 +02:00

163 lines
5.7 KiB
Python
Executable File

#!/usr/bin/env python3
import argparse
import json
import os
import sys
from datetime import datetime
__version__ = "20251110.02"
def filter_entries(entries, years=6, months=6, weeks=6, days=6, head=5):
def date2day(d):
return d.strftime("%Y-%m-%d")
def date2week(d):
return f"{d.strftime('%Y')}w{d.date().isocalendar()[1]}"
def date2month(d):
return d.strftime("%Y-%m")
def date2year(d):
return d.strftime("%Y")
def date2list(d):
return [date2day(d), date2week(d), date2month(d), date2year(d)]
def reduce(entries, date2id, start_date, name, max_age):
ids = []
reduced = []
for entry in entries:
id = date2id(entry[1])
age = (start_date - entry[1].date()).days
if age > 0 and age < max_age:
if id in ids:
continue
ids.append(id)
reduced.append(entry + [name, id, age])
reduced.sort(key=lambda x: x[1], reverse=True)
return reduced
entries = sorted(entries, key=lambda x: x[1])
original_entries = entries.copy()
result = []
for i in range(min(head, len(entries))):
last_entry = entries.pop()
result.append(last_entry + ["head", str(i + 1), None])
start_date = result[-1][1].date()
result.extend([entry for i, entry in enumerate(reduce(entries, date2day, start_date, "day", days * 1)) if i < days])
start_date = result[-1][1].date()
result.extend(
[entry for i, entry in enumerate(reduce(entries, date2week, start_date, "week", weeks * 7)) if i < weeks]
)
start_date = result[-1][1].date()
result.extend(
[entry for i, entry in enumerate(reduce(entries, date2month, start_date, "month", months * 31)) if i < months]
)
start_date = result[-1][1].date()
result.extend(
[entry for i, entry in enumerate(reduce(entries, date2year, start_date, "year", years * 366)) if i < years]
)
result.sort(key=lambda x: x[1], reverse=True)
filtered = [f + ["removed", "", None] for f in original_entries if f[0] not in [r[0] for r in result]]
return result, filtered
def get_opts():
parser = argparse.ArgumentParser(
description="Filter entries based on file timestamps, or parsed dates for keeping backups for example. Filenames read from stdin. Note: you must use any of --keep, --drop, --stderr, --json. ",
epilog="Example usage (watch out for actual deletion): # ls | grep ^backup- | daterake -n 5 --drop -s | xargs echo rm -f ",
)
parser.add_argument("--version", action="version", version=__version__)
parser.add_argument(
"--head", "-n", type=int, default=1, help="Number of latest entries to keep. Default: %(default)s"
)
parser.add_argument(
"--days", "-d", type=int, default=7, help="Number of daily entries to keep. Default: %(default)s"
)
parser.add_argument(
"--weeks", "-w", type=int, default=6, help="Number of weekly entries to keep. Default: %(default)s"
)
parser.add_argument(
"--months", "-m", type=int, default=6, help="Number of monthly entries to keep. Default: %(default)s"
)
parser.add_argument(
"--years", "-y", type=int, default=4, help="Number of annual entries to keep. Default: %(default)s"
)
parser.add_argument(
"--parse", default=None, help="Parse names, instead of using file timestamps (ex: 'service-%%Y-%%m-%%d.log')"
)
parser.add_argument("--json", default=False, action="store_true", help="Print output as JSON")
parser.add_argument("--verbose", "-v", default=False, action="store_true", help="Print output as verbose table")
parser.add_argument("--stderr", "-s", default=False, action="store_true", help="Print full output in stderr")
parser.add_argument("--drop", "--invert", default=False, action="store_true", help="Print the names to drop")
parser.add_argument("--keep", default=False, action="store_true", help="Print the names to keep")
args = parser.parse_args()
if args.head < 1:
parser.error("HEAD must be at least 1")
if not any((args.keep, args.drop, args.stderr, args.json)):
parser.error("Not using any of keep, drop, json or stderr will not print out anything.")
if sys.stdin.isatty():
parser.print_help()
sys.exit(0)
return args
def printable_date(d):
return d.strftime("%Y-%m-%d %a %H:%M:%S")
def print_table(printable, verbose, file=sys.stdout):
for entry in printable:
if verbose:
print(f"{entry[0]:20s} {printable_date(entry[1])} {entry[2]:5s} {entry[3]}", file=file)
else:
print(entry[0], file=file)
def main():
args = get_opts()
entries = []
for entry_file in [f.rstrip("\n") for f in sys.stdin.readlines()]:
if args.parse is not None:
entries.append([entry_file, datetime.strptime(entry_file, args.parse)])
else:
entries.append([entry_file, datetime.fromtimestamp(os.path.getmtime(entry_file))])
keep, remove = filter_entries(entries, args.years, args.months, args.weeks, args.days, args.head)
if args.json:
print(
json.dumps(
[
{"name": f[0], "date": printable_date(f[1]), "match": f[2], "match-id": f[3], "age": f[4]}
for f in keep + remove
],
indent=2,
)
)
else:
if args.keep:
print_table(keep, args.verbose)
if args.drop:
print_table(remove, args.verbose)
if args.stderr:
print_table(keep, True, file=sys.stderr)
print_table(remove, True, file=sys.stderr)
if __name__ == "__main__":
main()