#!/usr/bin/env python3 import argparse import json import os import sys from datetime import datetime __version__ = "20251110.02" def filter_entries(entries, years=6, months=6, weeks=6, days=6, head=5): def date2day(d): return d.strftime("%Y-%m-%d") def date2week(d): return f"{d.strftime('%Y')}w{d.date().isocalendar()[1]}" def date2month(d): return d.strftime("%Y-%m") def date2year(d): return d.strftime("%Y") def date2list(d): return [date2day(d), date2week(d), date2month(d), date2year(d)] def reduce(entries, date2id, start_date, name, max_age): ids = [] reduced = [] for entry in entries: id = date2id(entry[1]) age = (start_date - entry[1].date()).days if age > 0 and age < max_age: if id in ids: continue ids.append(id) reduced.append(entry + [name, id, age]) reduced.sort(key=lambda x: x[1], reverse=True) return reduced entries = sorted(entries, key=lambda x: x[1]) original_entries = entries.copy() result = [] for i in range(min(head, len(entries))): last_entry = entries.pop() result.append(last_entry + ["head", str(i + 1), None]) start_date = result[-1][1].date() result.extend([entry for i, entry in enumerate(reduce(entries, date2day, start_date, "day", days * 1)) if i < days]) start_date = result[-1][1].date() result.extend( [entry for i, entry in enumerate(reduce(entries, date2week, start_date, "week", weeks * 7)) if i < weeks] ) start_date = result[-1][1].date() result.extend( [entry for i, entry in enumerate(reduce(entries, date2month, start_date, "month", months * 31)) if i < months] ) start_date = result[-1][1].date() result.extend( [entry for i, entry in enumerate(reduce(entries, date2year, start_date, "year", years * 366)) if i < years] ) result.sort(key=lambda x: x[1], reverse=True) filtered = [f + ["removed", "", None] for f in original_entries if f[0] not in [r[0] for r in result]] return result, filtered def get_opts(): parser = argparse.ArgumentParser( description="Filter entries based on file timestamps, or parsed dates for keeping backups for example. Filenames read from stdin. Note: you must use any of --keep, --drop, --stderr, --json. ", epilog="Example usage (watch out for actual deletion): # ls | grep ^backup- | daterake -n 5 --drop -s | xargs echo rm -f ", ) parser.add_argument("--version", action="version", version=__version__) parser.add_argument( "--head", "-n", type=int, default=1, help="Number of latest entries to keep. Default: %(default)s" ) parser.add_argument( "--days", "-d", type=int, default=7, help="Number of daily entries to keep. Default: %(default)s" ) parser.add_argument( "--weeks", "-w", type=int, default=6, help="Number of weekly entries to keep. Default: %(default)s" ) parser.add_argument( "--months", "-m", type=int, default=6, help="Number of monthly entries to keep. Default: %(default)s" ) parser.add_argument( "--years", "-y", type=int, default=4, help="Number of annual entries to keep. Default: %(default)s" ) parser.add_argument( "--parse", default=None, help="Parse names, instead of using file timestamps (ex: 'service-%%Y-%%m-%%d.log')" ) parser.add_argument("--json", default=False, action="store_true", help="Print output as JSON") parser.add_argument("--verbose", "-v", default=False, action="store_true", help="Print output as verbose table") parser.add_argument("--stderr", "-s", default=False, action="store_true", help="Print full output in stderr") parser.add_argument("--drop", "--invert", default=False, action="store_true", help="Print the names to drop") parser.add_argument("--keep", default=False, action="store_true", help="Print the names to keep") args = parser.parse_args() if args.head < 1: parser.error("HEAD must be at least 1") if not any((args.keep, args.drop, args.stderr, args.json)): parser.error("Not using any of keep, drop, json or stderr will not print out anything.") if sys.stdin.isatty(): parser.print_help() sys.exit(0) return args def printable_date(d): return d.strftime("%Y-%m-%d %a %H:%M:%S") def print_table(printable, verbose, file=sys.stdout): for entry in printable: if verbose: print(f"{entry[0]:20s} {printable_date(entry[1])} {entry[2]:5s} {entry[3]}", file=file) else: print(entry[0], file=file) def main(): args = get_opts() entries = [] for entry_file in [f.rstrip("\n") for f in sys.stdin.readlines()]: if args.parse is not None: entries.append([entry_file, datetime.strptime(entry_file, args.parse)]) else: entries.append([entry_file, datetime.fromtimestamp(os.path.getmtime(entry_file))]) keep, remove = filter_entries(entries, args.years, args.months, args.weeks, args.days, args.head) if args.json: print( json.dumps( [ {"name": f[0], "date": printable_date(f[1]), "match": f[2], "match-id": f[3], "age": f[4]} for f in keep + remove ], indent=2, ) ) else: if args.keep: print_table(keep, args.verbose) if args.drop: print_table(remove, args.verbose) if args.stderr: print_table(keep, True, file=sys.stderr) print_table(remove, True, file=sys.stderr) if __name__ == "__main__": main()