diff --git a/files/file_list.py b/files/file_list.py index d0374b4..d0eacff 100755 --- a/files/file_list.py +++ b/files/file_list.py @@ -7,7 +7,7 @@ import sqlite3 import subprocess import hashlib import magic -from argparse import ArgumentParser +from argparse import ArgumentParser import ConfigParser,StringIO,io import datetime @@ -37,6 +37,10 @@ def setup_options(): help="Depth of summarization for --du.") parser.add_argument("--dup",action="store_true",dest="duplicate",default=False, help="Return a list of duplicate files, based on hashes. This option will flip the 'Add new files' option. [%(default)s]") + parser.add_argument("--dup-order",action="store",dest="duplicate_order",default='path', + help = "Order duplicates by a method. (length = path str length)", + choices = ('age','length','file','path') + ) parser.add_argument("--haschanges",action="store_true",dest="haschanges",default=False, help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.") parser.add_argument("--hasdeletions",action="store_true",dest="hasdeletions",default=False, @@ -67,6 +71,7 @@ def setup_options(): options.sqlpath=os.path.dirname(os.path.realpath(options.sqlfile)) return options + def add_recurse(options): conn=sqlite3.connect(options.sqlfile) conn.text_factory=str @@ -101,6 +106,7 @@ def add_recurse(options): sys.stdout.write("\n") return + def add_single(conn,filename,change=False,hash=None,minsize=0,fullfile=False): try: fsize=os.path.getsize(filename) @@ -119,7 +125,7 @@ def add_single(conn,filename,change=False,hash=None,minsize=0,fullfile=False): return except UnicodeDecodeError: mime="NA" - + if change: db.execute("UPDATE list SET date=?, hash=?, size=?, mime=? \ WHERE file=?",(ftime,hash,fsize,mime,filename)) @@ -130,6 +136,7 @@ def add_single(conn,filename,change=False,hash=None,minsize=0,fullfile=False): sys.stdout.write('\r') return + def checkdb(options): needle=options.search if len(needle)==0: @@ -177,14 +184,14 @@ def checkdb(options): print(("%-"+pad+"s (%s %7s)")%(f, humanize_date(os.path.getmtime(f)), humanize_size(os.path.getsize(f)))) - + print("----\nFile check summary:") print("Database modified: %s"%(humanize_date(os.path.getmtime(options.sqlfile)),)) print("Checksum matches : %d"%(OK_count,)) print("Checksum mismatch: %d"%(len(differing),)) print("Files missing : %d"%(len(missing),)) print("Files added : %d"%(len(added),)) - + def clean_dirs(dirs): for s in dirs[:]: @@ -192,6 +199,7 @@ def clean_dirs(dirs): dirs.remove(s) return dirs + def clean_syms(files,path): nonsyms=[] for f in files: @@ -208,8 +216,8 @@ def createdb(options): size INTEGER, mime TEXT)') db.execute('CREATE TABLE config (id INTEGER PRIMARY KEY AUTOINCREMENT,\ object TEXT)') - conn.commit() - + conn.commit() + config = ConfigParser.RawConfigParser() config.add_section("General") config.set("General","Relative",str(options.relative)) @@ -217,9 +225,10 @@ def createdb(options): store=StringIO.StringIO() config.write(store) db.execute("INSERT INTO config (object) values (?)",(store.getvalue(),)) - conn.commit() + conn.commit() return + def delete_nonexisting(sqlfile,options): conn=sqlite3.connect(sqlfile) conn.text_factory=str @@ -236,10 +245,11 @@ def delete_nonexisting(sqlfile,options): delete=True if delete: print('removing.. '+row[0]) - dbdel.execute("DELETE FROM list where file == ?",(row[0],)) + dbdel.execute("DELETE FROM list where file == ?",(row[0],)) conn.commit() return + def disk_used(options): conn=sqlite3.connect(options.sqlfile) conn.text_factory=str @@ -262,17 +272,19 @@ def disk_used(options): else: sizes[ entries.index(start_path) ]+=row[0] for entry in zip(sizes,entries): - print("| ".join([ str(entry[0]).ljust(14), - humanize_size(entry[0]).rjust(8), + print("| ".join([ str(entry[0]).ljust(14), + humanize_size(entry[0]).rjust(8), entry[1]])) - + + def filename_join(path,name,options): filename=os.path.realpath(os.path.join(path,name)) if options.relative: return os.path.relpath(filename, options.sqlpath) return filename -def find_duplicates(sqlfile): + +def find_duplicates(sqlfile, order): conn=sqlite3.connect(sqlfile) conn.text_factory=str db=conn.cursor() @@ -285,7 +297,7 @@ def find_duplicates(sqlfile): flist=[] for row in dbh: flist.append(row) - flist.sort(key=lambda file: file[0]) + sort_by_method(flist, order) duphash.append((hash, flist)) duphash.sort(key=lambda file: file[1][0]) return duphash @@ -317,6 +329,7 @@ def get_folder_contents(db,path): files.append(base) return files + def get_md5(filename,fullfile=False): ''' returns content based hash, only first 50Mb is read, unless user wants the whole file ''' fsize=os.path.getsize(filename) @@ -326,8 +339,8 @@ def get_md5(filename,fullfile=False): block_size=2**24 percents_per_block=100/(float(fsize)/block_size) md5 = hashlib.md5() - with open(filename,'rb') as f: - for chunk in iter(lambda: f.read(block_size), b''): + with open(filename,'rb') as f: + for chunk in iter(lambda: f.read(block_size), b''): sys.stderr.write('\r %s (%02d%%)'%(ANIM[anim_i%anim_len],int(anim_i*percents_per_block))) sys.stderr.flush() anim_i+=1 @@ -347,7 +360,7 @@ def has_changes(options): has_changes_deleted(db) if options.hasadditions or options.haschanges: has_changes_additions(db,options) - + def has_changes_deleted(db,exit=True): db.execute('SELECT file FROM list') deleted=[] @@ -360,6 +373,7 @@ def has_changes_deleted(db,exit=True): deleted.append(row[0]) return deleted + def has_changes_additions(db,options,exit=True): added=[] changed=[] @@ -389,7 +403,7 @@ def has_changes_additions(db,options,exit=True): sys.exit(1) else: changed.append(filename) - + return (added,changed) #~ def hash_match(db,filename,hash): @@ -402,6 +416,7 @@ def humanize_date(date): return '' return datetime.datetime.fromtimestamp(int(date)).strftime('%Y-%m-%d %H:%M:%S') + def humanize_size(size,precision=1): if size==None: return 'nan' @@ -414,11 +429,13 @@ def humanize_size(size,precision=1): defPrecision=precision return "%.*f%s"%(defPrecision,size,suffixes[suffixIndex]) + def is_listed(db,filename): db.execute("SELECT COUNT(*) FROM list where file == ?",(filename,)) count=db.fetchall() return count[0][0]>0 + def matchdb(sqlfile,needle,helper): needle=needle.lower() import difflib as dl @@ -441,20 +458,27 @@ def matchdb(sqlfile,needle,helper): best_match=row[0] print(best_match) -def print_structure(files): +def print_duplicates(files): for hash in files: #print(hash[0]) i=1 for f in hash[1]: - print "%(i)d: %(x)d:%(f)s " % {'i':i, 'f':f[0], 'x':f[1]} + print("%(i)d|%(s)s|%(d)s|%(f)s " % { + 'i':i, + 'f':f[0], + 'd': humanize_date(f[2]), + 's': humanize_size(f[1]) + }) i+=1 return + def print_stderr(s): sys.stderr.write(s) sys.stderr.write("\n") sys.stderr.flush() + def searchdb(sqlfile,needle): needle=['%'+i+'%' for i in needle] like_query=' OR '.join(['file LIKE ?' for i in needle]) @@ -465,6 +489,18 @@ def searchdb(sqlfile,needle): for row in db: print(row[0]) + +def sort_by_method(flist, order): + if order == 'path': + flist.sort(key=lambda file: file[0]) + if order == 'file': + flist.sort(key=lambda file: os.path.basename(file[0])) + if order == 'age': + flist.sort(key=lambda file: file[2]) + if order == 'length': + flist.sort(key=lambda file: len(file[0])) + + def stored_options(options): try: conn=sqlite3.connect(options.sqlfile) @@ -483,6 +519,7 @@ def stored_options(options): return options + def main(): options=setup_options(); @@ -507,15 +544,15 @@ def main(): disk_used(options) sys.exit(0) if options.delete: - print('Deleting entries...') + print('Deleting entries...') delete_nonexisting(options.sqlfile,options) if options.add or options.changed: print('Adding '+options.startpath+' entries...') add_recurse(options) if options.duplicate: - files=find_duplicates(options.sqlfile) - print_structure(files) - + files=find_duplicates(options.sqlfile, options.duplicate_order) + print_duplicates(files) + sys.exit(0) main()