file listing with more advanced checkup

This commit is contained in:
Ville Rantanen
2016-12-21 13:59:15 +02:00
parent 120d5547f5
commit 7662a59a7e

View File

@@ -8,6 +8,7 @@ import hashlib
import magic import magic
from argparse import ArgumentParser from argparse import ArgumentParser
import ConfigParser,StringIO,io import ConfigParser,StringIO,io
import datetime
SQLFILE='list_of_files.sqlite' SQLFILE='list_of_files.sqlite'
IMGMATCH=re.compile('.*\.jpg$|.*\.jpeg$|.*\.png$',re.I) IMGMATCH=re.compile('.*\.jpg$|.*\.jpeg$|.*\.png$',re.I)
@@ -24,7 +25,7 @@ def setup_options():
parser.add_argument("-c",action="store_true",dest="changed",default=False, parser.add_argument("-c",action="store_true",dest="changed",default=False,
help="Modify changed files [%(default)s]") help="Modify changed files [%(default)s]")
parser.add_argument("--check",action="store_true",dest="check",default=False, parser.add_argument("--check",action="store_true",dest="check",default=False,
help="Check md5sums of files. Note that --full affects the comparison. Limit check with -s.") help="Check md5sums of files. Limit check with -s.")
parser.add_argument("-d",action="store_true",dest="delete",default=False, parser.add_argument("-d",action="store_true",dest="delete",default=False,
help="Delete non-existing entries [%(default)s]") help="Delete non-existing entries [%(default)s]")
parser.add_argument("--du",type=str,action='store',dest="diskused",default=False, parser.add_argument("--du",type=str,action='store',dest="diskused",default=False,
@@ -119,24 +120,60 @@ def add_single(conn,filename,change=False,hash=None,minsize=0,fullfile=False):
VALUES(?,?,?,?,?)",(filename,ftime,hash,fsize,mime)) VALUES(?,?,?,?,?)",(filename,ftime,hash,fsize,mime))
return return
def checkdb(sqlfile,fullFile,needle): def checkdb(options):
needle=options.search
if len(needle)==0: if len(needle)==0:
needle.append('%') needle.append('%')
needle=['%'+i+'%' for i in needle] needle=['%'+i+'%' for i in needle]
like_query=' OR '.join(['file LIKE ?' for i in needle]) like_query=' OR '.join(['file LIKE ?' for i in needle])
conn=sqlite3.connect(sqlfile) conn=sqlite3.connect(options.sqlfile)
conn.text_factory=str conn.text_factory=str
db=conn.cursor() db=conn.cursor()
db.execute("SELECT file,hash FROM list WHERE "+like_query+" ORDER BY file",needle) db.execute("SELECT file,hash,size,date FROM list WHERE "+like_query+" ORDER BY file",needle)
missing=[]
differing=[]
OK_count=0
for row in db: for row in db:
status='OK' status='OK'
if os.path.exists(row[0]): if os.path.exists(row[0]):
md5f=get_md5(row[0],fullFile) md5f=get_md5(row[0],options.fullfile)
if row[1]!=md5f: if row[1]!=md5f:
status='Checksum-difference' status='Checksum-difference'
differing.append(row)
else: else:
status='Not-found' status='Not-found'
missing.append(row)
print("%s %s"%(row[0],status)) print("%s %s"%(row[0],status))
if status=='OK':
OK_count+=1
if len(differing)>0:
print_stderr("----\nDiffering files:")
pad=str(max([len(x[0]) for x in differing]))
for f in differing:
print_stderr(("%-"+pad+"s (%s %7s => %s %7s)")%(f[0],humanize_date(f[3]),humanize_size(f[2]),
humanize_date(os.path.getmtime(f[0])),
humanize_size(os.path.getsize(f[0]))))
if len(missing)>0:
print_stderr("----\nMissing files:")
pad=str(max([len(x[0]) for x in missing]))
for f in missing:
print_stderr(("%-"+pad+"s (%s %7s)")%(f[0],humanize_date(f[3]),humanize_size(f[2])))
(added,changed)=has_changes_additions(db,options,False)
if len(added)>0:
print_stderr("----\nAdded files:")
pad=str(max([len(x[0]) for x in added]))
for f in added:
print_stderr(("%-"+pad+"s (%s %7s)")%(f,
humanize_date(os.path.getmtime(f)),
humanize_size(os.path.getsize(f))))
print_stderr("----\nFile check summary:")
print_stderr("Database modified: %s"%(humanize_date(os.path.getmtime(options.sqlfile)),))
print_stderr("Checksum matches : %d"%(OK_count,))
print_stderr("Checksum mismatch: %d"%(len(differing),))
print_stderr("Files missing : %d"%(len(missing),))
print_stderr("Files added : %d"%(len(added),))
def clean_dirs(dirs): def clean_dirs(dirs):
for s in dirs[:]: for s in dirs[:]:
@@ -293,15 +330,21 @@ def has_changes(options):
if options.hasadditions or options.haschanges: if options.hasadditions or options.haschanges:
has_changes_additions(db,options) has_changes_additions(db,options)
def has_changes_deleted(db): def has_changes_deleted(db,exit=True):
db.execute('SELECT file FROM list') db.execute('SELECT file FROM list')
deleted=[]
for row in db: for row in db:
if not os.path.exists(row[0]): if not os.path.exists(row[0]):
if exit:
print('True') print('True')
sys.exit(1) sys.exit(1)
return else:
deleted.append(row[0])
return deleted
def has_changes_additions(db,options): def has_changes_additions(db,options,exit=True):
added=[]
changed=[]
for path,dirs,files in os.walk(options.startpath,followlinks=options.symlinks): for path,dirs,files in os.walk(options.startpath,followlinks=options.symlinks):
dirs=clean_dirs(dirs) dirs=clean_dirs(dirs)
db_files=get_folder_contents(db,filename_join(path,"",options)+"/") db_files=get_folder_contents(db,filename_join(path,"",options)+"/")
@@ -313,23 +356,34 @@ def has_changes_additions(db,options):
continue continue
#if not is_listed(db,filename): #if not is_listed(db,filename):
if file not in db_files: if file not in db_files:
if exit:
print('True') print('True')
sys.exit(1) sys.exit(1)
else:
added.append(filename)
else: else:
if options.changed: if options.changed:
ftime=os.path.getmtime(filename) ftime=os.path.getmtime(filename)
if not ftime_match(db,filename,ftime): if not ftime_match(db,filename,ftime):
#file content changed #file content changed
if exit:
print('True') print('True')
sys.exit(1) sys.exit(1)
else:
changed.append(filename)
return return (added,changed)
#~ def hash_match(db,filename,hash): #~ def hash_match(db,filename,hash):
#~ db.execute("SELECT hash FROM list where file == ?",(filename,)) #~ db.execute("SELECT hash FROM list where file == ?",(filename,))
#~ count=db.fetchall() #~ count=db.fetchall()
#~ return count[0][0]==hash #~ return count[0][0]==hash
def humanize_date(date):
if date==None:
return ''
return datetime.datetime.fromtimestamp(int(date)).strftime('%Y-%m-%d %H:%M:%S')
def humanize_size(size,precision=1): def humanize_size(size,precision=1):
if size==None: if size==None:
return 'nan' return 'nan'
@@ -378,6 +432,11 @@ def print_structure(files):
i+=1 i+=1
return return
def print_stderr(s):
sys.stderr.write(s)
sys.stderr.write("\n")
sys.stderr.flush()
def searchdb(sqlfile,needle): def searchdb(sqlfile,needle):
needle=['%'+i+'%' for i in needle] needle=['%'+i+'%' for i in needle]
like_query=' OR '.join(['file LIKE ?' for i in needle]) like_query=' OR '.join(['file LIKE ?' for i in needle])
@@ -418,7 +477,7 @@ def main():
has_changes(options) has_changes(options)
sys.exit(0) sys.exit(0)
if options.check: if options.check:
checkdb(options.sqlfile,options.fullfile,options.search) checkdb(options)
sys.exit(0) sys.exit(0)
if len(options.search)>0 and not options.match: if len(options.search)>0 and not options.match:
searchdb(options.sqlfile,options.search) searchdb(options.sqlfile,options.search)