#!/usr/bin/env python3 import argparse import glob import json import os import sys from datetime import datetime, timedelta __version__ = "20251107.01" def filter_entries(entries, years=6, months=6, weeks=6, days=6, head=5): def date2day(d): return d.strftime("%Y-%m-%d") def date2week(d): return f"{d.strftime('%Y')}w{d.date().isocalendar()[1]}" def date2month(d): return d.strftime("%Y-%m") def date2year(d): return d.strftime("%Y") def date2list(d): return [date2day(d), date2week(d), date2month(d), date2year(d)] def reduce(entries, date2id, start_date, name): ids = [] reduced = [] for entry in entries: id = date2id(entry[1]) age = (start_date - entry[1].date()).days if age > 0: if id in ids: continue ids.append(id) reduced.append(entry + [name, id]) reduced.sort(key=lambda x: x[1], reverse=True) return reduced entries = sorted(entries, key=lambda x: x[1]) original_entries = entries.copy() result = [] for i in range(head): last_entry = entries.pop() result.append(last_entry + ["head", str(i + 1)]) start_date = result[-1][1].date() result.extend([entry for i, entry in enumerate(reduce(entries, date2day, start_date, "day")) if i < days]) start_date = result[-1][1].date() result.extend([entry for i, entry in enumerate(reduce(entries, date2week, start_date, "week")) if i < weeks]) start_date = result[-1][1].date() result.extend([entry for i, entry in enumerate(reduce(entries, date2month, start_date, "month")) if i < months]) start_date = result[-1][1].date() result.extend([entry for i, entry in enumerate(reduce(entries, date2year, start_date, "year")) if i < years]) start_date = result[-1][1].date() result.sort(key=lambda x: x[1], reverse=True) filtered = [f + ["removed", ""] for f in original_entries if f[0] not in [r[0] for r in result]] return result, filtered def get_opts(): parser = argparse.ArgumentParser( description="Filter entries based on file timestamps, or parse dates. Filenames read from stdin" ) parser.add_argument("--version", action="version", version=__version__) parser.add_argument( "--head", "-n", type=int, default=1, help="Number of latest entries to keep. Default: %(default)s" ) parser.add_argument( "--days", "-d", type=int, default=7, help="Number of daily entries to keep. Default: %(default)s" ) parser.add_argument( "--weeks", "-w", type=int, default=6, help="Number of weekly entries to keep. Default: %(default)s" ) parser.add_argument( "--months", "-m", type=int, default=6, help="Number of monthly entries to keep. Default: %(default)s" ) parser.add_argument( "--years", "-y", type=int, default=4, help="Number of annual entries to keep. Default: %(default)s" ) parser.add_argument("--json", default=False, action="store_true", help="Print output as JSON") parser.add_argument("--verbose", "-v", default=False, action="store_true", help="Print output as verbose table") parser.add_argument("--invert", default=False, action="store_true", help="Print names to remove") parser.add_argument( "--parse", default=None, help="Parse names, instead of using file timestamps (ex: 'service-%%Y-%%m-%%d.log')" ) args = parser.parse_args() if args.head < 1: parser.error("HEAD must be at least 1") if sys.stdin.isatty(): parser.print_help() sys.exit(0) return args def printable_date(d): return d.strftime("%Y-%m-%d %a %H:%M:%S") def main(): args = get_opts() entries = [] for entry_file in [f.rstrip("\n") for f in sys.stdin.readlines()]: if args.parse is not None: entries.append([entry_file, datetime.strptime(entry_file, args.parse)]) else: entries.append([entry_file, datetime.fromtimestamp(os.path.getmtime(entry_file))]) keep, remove = filter_entries(entries, args.years, args.months, args.weeks, args.days, args.head) if args.json: print( json.dumps( [{"name": f[0], "date": printable_date(f[1]), "match": f[2], "id": f[3]} for f in keep + remove], indent=2, ) ) else: printable = remove if args.invert else keep for entry in printable: if args.verbose: print(f"{entry[0]:20s} {printable_date(entry[1])} {entry[2]:5s} {entry[3]}") else: print(entry[0]) if __name__ == "__main__": main()