From 7662a59a7ee7296bc6c55ba7b334004b600cef66 Mon Sep 17 00:00:00 2001 From: Ville Rantanen Date: Wed, 21 Dec 2016 13:59:15 +0200 Subject: [PATCH] file listing with more advanced checkup --- files/file_list.py | 91 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 75 insertions(+), 16 deletions(-) diff --git a/files/file_list.py b/files/file_list.py index d0d6853..21eadc6 100755 --- a/files/file_list.py +++ b/files/file_list.py @@ -8,6 +8,7 @@ import hashlib import magic from argparse import ArgumentParser import ConfigParser,StringIO,io +import datetime SQLFILE='list_of_files.sqlite' IMGMATCH=re.compile('.*\.jpg$|.*\.jpeg$|.*\.png$',re.I) @@ -24,7 +25,7 @@ def setup_options(): parser.add_argument("-c",action="store_true",dest="changed",default=False, help="Modify changed files [%(default)s]") parser.add_argument("--check",action="store_true",dest="check",default=False, - help="Check md5sums of files. Note that --full affects the comparison. Limit check with -s.") + help="Check md5sums of files. Limit check with -s.") parser.add_argument("-d",action="store_true",dest="delete",default=False, help="Delete non-existing entries [%(default)s]") parser.add_argument("--du",type=str,action='store',dest="diskused",default=False, @@ -119,24 +120,60 @@ def add_single(conn,filename,change=False,hash=None,minsize=0,fullfile=False): VALUES(?,?,?,?,?)",(filename,ftime,hash,fsize,mime)) return -def checkdb(sqlfile,fullFile,needle): +def checkdb(options): + needle=options.search if len(needle)==0: needle.append('%') needle=['%'+i+'%' for i in needle] like_query=' OR '.join(['file LIKE ?' for i in needle]) - conn=sqlite3.connect(sqlfile) + conn=sqlite3.connect(options.sqlfile) conn.text_factory=str db=conn.cursor() - db.execute("SELECT file,hash FROM list WHERE "+like_query+" ORDER BY file",needle) + db.execute("SELECT file,hash,size,date FROM list WHERE "+like_query+" ORDER BY file",needle) + missing=[] + differing=[] + OK_count=0 for row in db: status='OK' if os.path.exists(row[0]): - md5f=get_md5(row[0],fullFile) + md5f=get_md5(row[0],options.fullfile) if row[1]!=md5f: status='Checksum-difference' + differing.append(row) else: status='Not-found' + missing.append(row) print("%s %s"%(row[0],status)) + if status=='OK': + OK_count+=1 + if len(differing)>0: + print_stderr("----\nDiffering files:") + pad=str(max([len(x[0]) for x in differing])) + for f in differing: + print_stderr(("%-"+pad+"s (%s %7s => %s %7s)")%(f[0],humanize_date(f[3]),humanize_size(f[2]), + humanize_date(os.path.getmtime(f[0])), + humanize_size(os.path.getsize(f[0])))) + if len(missing)>0: + print_stderr("----\nMissing files:") + pad=str(max([len(x[0]) for x in missing])) + for f in missing: + print_stderr(("%-"+pad+"s (%s %7s)")%(f[0],humanize_date(f[3]),humanize_size(f[2]))) + (added,changed)=has_changes_additions(db,options,False) + if len(added)>0: + print_stderr("----\nAdded files:") + pad=str(max([len(x[0]) for x in added])) + for f in added: + print_stderr(("%-"+pad+"s (%s %7s)")%(f, + humanize_date(os.path.getmtime(f)), + humanize_size(os.path.getsize(f)))) + + print_stderr("----\nFile check summary:") + print_stderr("Database modified: %s"%(humanize_date(os.path.getmtime(options.sqlfile)),)) + print_stderr("Checksum matches : %d"%(OK_count,)) + print_stderr("Checksum mismatch: %d"%(len(differing),)) + print_stderr("Files missing : %d"%(len(missing),)) + print_stderr("Files added : %d"%(len(added),)) + def clean_dirs(dirs): for s in dirs[:]: @@ -293,15 +330,21 @@ def has_changes(options): if options.hasadditions or options.haschanges: has_changes_additions(db,options) -def has_changes_deleted(db): +def has_changes_deleted(db,exit=True): db.execute('SELECT file FROM list') + deleted=[] for row in db: if not os.path.exists(row[0]): - print('True') - sys.exit(1) - return + if exit: + print('True') + sys.exit(1) + else: + deleted.append(row[0]) + return deleted -def has_changes_additions(db,options): +def has_changes_additions(db,options,exit=True): + added=[] + changed=[] for path,dirs,files in os.walk(options.startpath,followlinks=options.symlinks): dirs=clean_dirs(dirs) db_files=get_folder_contents(db,filename_join(path,"",options)+"/") @@ -313,23 +356,34 @@ def has_changes_additions(db,options): continue #if not is_listed(db,filename): if file not in db_files: - print('True') - sys.exit(1) + if exit: + print('True') + sys.exit(1) + else: + added.append(filename) else: if options.changed: ftime=os.path.getmtime(filename) if not ftime_match(db,filename,ftime): #file content changed - print('True') - sys.exit(1) + if exit: + print('True') + sys.exit(1) + else: + changed.append(filename) - return + return (added,changed) #~ def hash_match(db,filename,hash): #~ db.execute("SELECT hash FROM list where file == ?",(filename,)) #~ count=db.fetchall() #~ return count[0][0]==hash +def humanize_date(date): + if date==None: + return '' + return datetime.datetime.fromtimestamp(int(date)).strftime('%Y-%m-%d %H:%M:%S') + def humanize_size(size,precision=1): if size==None: return 'nan' @@ -378,6 +432,11 @@ def print_structure(files): i+=1 return +def print_stderr(s): + sys.stderr.write(s) + sys.stderr.write("\n") + sys.stderr.flush() + def searchdb(sqlfile,needle): needle=['%'+i+'%' for i in needle] like_query=' OR '.join(['file LIKE ?' for i in needle]) @@ -418,7 +477,7 @@ def main(): has_changes(options) sys.exit(0) if options.check: - checkdb(options.sqlfile,options.fullfile,options.search) + checkdb(options) sys.exit(0) if len(options.search)>0 and not options.match: searchdb(options.sqlfile,options.search)