From cc495f18470fd9b4f5050e554af10e2125d84074 Mon Sep 17 00:00:00 2001 From: q Date: Wed, 11 Feb 2015 21:45:34 +0200 Subject: [PATCH] image_list ready for production.. --- image_list.py | 333 ++++++++++++----- image_list_beta.py | 903 --------------------------------------------- 2 files changed, 231 insertions(+), 1005 deletions(-) delete mode 100755 image_list_beta.py diff --git a/image_list.py b/image_list.py index 5e46a69..733691e 100755 --- a/image_list.py +++ b/image_list.py @@ -23,6 +23,8 @@ def setup_options(): help="Modify changed files [%(default)s]") parser.add_argument("-d",action="store_true",dest="delete",default=False, help="Delete non-existing entries [%(default)s]") + parser.add_argument("-D",action="store_true",dest="delete_data",default=False, + help="Delete unused metadata [%(default)s]") parser.add_argument("--du",type=str,action='store',dest="diskused",default=False, help="Print directory sizes. Argument is the path where directories are listed from.") parser.add_argument("--du-depth",type=str,action='store',dest="diskused_depth",default=1, @@ -52,7 +54,9 @@ def setup_options(): "If value is a filename, search similar to that image. "+ "Append with ',value' to limit similarity. "+ "The output columns: SD SimilarityDiff., CD ColorDiff., "+ - "RD AspectRatioDiff.,Shp SharpnessIndex.") + "RD AspectRatioDiff.,Shp SharpnessIndex. This function does not return exact duplicates.") + parser.add_argument("-t",type=str,dest="tag",default=None, + help="Give file a tag. If argument is a file name, print the tags of the file.") parser.add_argument("--viewer",type=str,dest="viewer",default=None, help="Program to view images, %%f refers to filename(s)."+ "If '1', defaults to: 'geeqie -l %%f'") @@ -62,21 +66,29 @@ def setup_options(): options=parser.parse_args() BADDIRS.extend(options.exclude) - if options.duplicate or options.searchsmall or options.measure or options.nearestcolor or options.similarity!=None or options.search or options.diskused: + if options.duplicate or \ + options.searchsmall or \ + options.measure or \ + options.nearestcolor or \ + options.similarity!=None or \ + options.search or \ + options.diskused: options.add=not options.add + if options.tag: + options.add=False return options def createdb(sqlfile): conn=sqlite3.connect(sqlfile) db=conn.cursor() conn.text_factory=str - db.execute('CREATE TABLE list (id INTEGER PRIMARY KEY AUTOINCREMENT,\ - file TEXT,date INTEGER,portrait NUMERIC, hash TEXT,\ - width INTEGER,height INTEGER,size INTEGER,\ + db.execute('CREATE TABLE data (hash TEXT PRIMARY KEY,\ + description TEXT,portrait NUMERIC, \ + width INTEGER,height INTEGER,\ fingerprint TEXT,sharpness NUMERIC,\ R REAL,G REAL, B REAL, BR REAL, BG REAL, BB REAL)') - db.execute('CREATE TABLE descriptions (file TEXT,path TEXT, hash TEXT,\ - description TEXT)') + db.execute('CREATE TABLE list (file TEXT,hash TEXT,date INTEGER,size INTEGER)') + db.execute('CREATE TABLE tags (tag TEXT,hash TEXT)') conn.commit() return @@ -93,7 +105,22 @@ def delete_nonexisting(sqlfile): dbdel.execute("DELETE FROM list where file == ?",(row[0],)) conn.commit() return - + +def delete_data(sqlfile): + conn=sqlite3.connect(sqlfile) + conn.text_factory=str + db=conn.cursor() + dbdel=conn.cursor() + db.execute('''SELECT hash FROM data EXCEPT SELECT hash FROM list''') + for row in db: + dbdel.execute("DELETE FROM data where hash == ?",(row[0],)) + conn.commit() + db.execute('''SELECT hash FROM tags EXCEPT SELECT hash FROM list''') + for row in db: + dbdel.execute("DELETE FROM tags where hash == ?",(row[0],)) + conn.commit() + return + def delete_files(files): ''' Actually deletes files! ''' print_structure(files) @@ -122,7 +149,7 @@ def add_recurse(options): for file in files: if IMGMATCH.match(file): filename=os.path.realpath(os.path.join(path,file)) - #if not is_listed(db,filename): + if file not in db_files: if options.add: try: @@ -133,7 +160,7 @@ def add_recurse(options): sys.exit(1) else: if options.changed: - ftime=os.path.getmtime(filename) + ftime=int(os.path.getmtime(filename)) #hash=get_md5(filename) #if not hash_match(db,filename,hash): if not ftime_match(db,filename,ftime): @@ -160,28 +187,61 @@ def add_single(conn,filename,change=False,hash=None,minsize=0): db=conn.cursor() if hash==None: hash=get_md5(filename) - ftime=os.path.getmtime(filename) + ftime=int(os.path.getmtime(filename)) fsize=os.path.getsize(filename) if change: - db.execute("UPDATE list SET date=?, portrait=?, hash=?, width=? ,height=?, \ - fingerprint=NULL, sharpness=NULL, R=NULL, G=NULL, B=NULL, BR=NULL, BG=NULL, BB=NULL, \ - size=? WHERE file=?",(ftime,portrait,hash,dims[0],dims[1],fsize,filename)) + db.execute("UPDATE list SET hash=?, date=? ,size=? \ + WHERE file=?",(hash,ftime,fsize,filename)) print("changing: %(f)s (%(x)sx%(y)s)" % {'f':filename, 'x':dims[0], 'y':dims[1]}) else: - db.execute("INSERT INTO list(file,date,portrait,hash,width,height,size)\ - VALUES(?,?,?,?,?,?,?)",(filename,ftime,portrait,hash,dims[0],dims[1],fsize)) + db.execute("INSERT INTO list(file,hash,size,date)\ + VALUES(?,?,?,?)",(filename,hash,fsize,ftime)) print("adding: %(f)s (%(x)sx%(y)s)" % {'f':filename, 'x':dims[0], 'y':dims[1]}) + + if hash_in_data(conn.cursor(),hash): + if change: + db.execute("UPDATE data SET portrait=?, width=? ,height=?, \ + fingerprint=NULL, sharpness=NULL, R=NULL, G=NULL, B=NULL, BR=NULL, BG=NULL, BB=NULL \ + WHERE hash = ?",(portrait,dims[0],dims[1],hash)) + else: + db.execute("INSERT INTO data(hash,portrait,width,height) \ + VALUES(?,?,?,?)",(hash,portrait,dims[0],dims[1])) + return +def add_tag(options): + conn=sqlite3.connect(options.sqlfile) + conn.text_factory=str + hash=file2hash(conn.cursor(), os.path.realpath(options.startpath)) + if hash==None: + print("Image not found "+os.path.realpath(options.startpath)) + return + db=conn.cursor() + db.execute("INSERT INTO tags(hash,tag) \ + VALUES(?,?)",(hash,options.tag)) + conn.commit() + print(options.startpath+":\""+options.tag+"\"") + +def print_tag(options): + conn=sqlite3.connect(options.sqlfile) + conn.text_factory=str + hash=file2hash(conn.cursor(), os.path.realpath(options.tag)) + if hash==None: + print("Image not found "+os.path.realpath(options.tag)) + return + db=conn.cursor() + db.execute("SELECT DISTINCT tag FROM tags WHERE hash = ?",(hash,)) + print( ",".join( row[0] for row in db )) + def random_lists(sqlfile): conn=sqlite3.connect(sqlfile) conn.text_factory=str db=conn.cursor() - db.execute('SELECT file FROM list WHERE portrait=0') + db.execute('SELECT list.file FROM list LEFT JOIN data ON list.hash = data.hash WHERE data.portrait=0') lfile=open('landscape.list.s','w') for row in db: lfile.write(row[0]+'\n') - db.execute('SELECT file FROM list WHERE portrait=1') + db.execute('SELECT list.file FROM list LEFT JOIN data ON list.hash = data.hash WHERE data.portrait=1') pfile=open('portrait.list.s','w') for row in db: pfile.write(row[0]+'\n') @@ -211,7 +271,7 @@ def get_folder_contents(db,path): return files def ftime_match(db,filename,ftime): - db.execute("SELECT date FROM list where file == ?",(filename,)) + db.execute("SELECT date FROM list WHERE file == ?",(filename,)) count=db.fetchall() return count[0][0]==ftime @@ -220,9 +280,35 @@ def hash_match(db,filename,hash): count=db.fetchall() return count[0][0]==hash +def hash2file(db,hash,firstOnly=True): + db.execute("SELECT file FROM list where hash == ?",(hash,)) + names=db.fetchall() + if len(names)==0: + return None + if firstOnly: + return names[0][0] + else: + return [x[0] for x in names] + +def file2hash(db,filename): + db.execute("SELECT hash FROM list where file == ? LIMIT 1",(filename,)) + names=db.fetchall() + if len(names)==0: + return None + return names[0][0] + +def hash_in_data(db,hash): + db.execute("SELECT hash FROM data where hash == ? LIMIT 1",(hash,)) + hashes=db.fetchall() + return len(hashes)!=0 +def hash_in_list(db,hash): + db.execute("SELECT hash FROM list where hash == ? LIMIT 1",(hash,)) + hashes=db.fetchall() + return len(hashes)!=0 + def get_md5(filename): - ''' Return hash of the first 5 megabytes of the file ''' - return hashlib.md5(open(filename,'rb').read(1024*1024*5)).hexdigest() + ''' Return hash of the first 15 megabytes of the file ''' + return hashlib.md5(open(filename,'rb').read(1024*1024*15)).hexdigest() def get_dims(filename): idargs=['identify','-format','%wx%h',filename+'[0]'] @@ -243,22 +329,24 @@ def append_colors(sqlfile): conn.text_factory=str db=conn.cursor() dbh=conn.cursor() - db.execute("SELECT file,R FROM list WHERE R IS NULL ORDER BY file") - i=0 + count=dbh.execute("SELECT COUNT(hash) FROM data WHERE R IS NULL").fetchall()[0][0] + db.execute("SELECT hash,R FROM data WHERE R IS NULL") dirname_old="" - for row in db: - colors=get_colors(row[0]) - dbh.execute("UPDATE list SET R=?, G=?, B=?, BR=?, BG=?, BB=? \ - WHERE file=?",(colors[0][0],colors[0][1],colors[0][2], + for i,row in enumerate(db): + filename=hash2file(conn.cursor(),row[0]) + if filename==None: + continue + colors=get_colors(filename) + dbh.execute("UPDATE data SET R=?, G=?, B=?, BR=?, BG=?, BB=? \ + WHERE hash=?",(colors[0][0],colors[0][1],colors[0][2], colors[1][0],colors[1][1],colors[1][2],row[0])) - filename=" "+os.path.basename(row[0]) - dirname=os.path.dirname(row[0]) + filebase=" "+os.path.basename(filename) + dirname=os.path.dirname(filename) if dirname!=dirname_old: dirname_old=dirname - filename=row[0] - print("colors: %(f)s (%(r)s %(g)s %(b)s)" % {'f':filename, 'r':colors[0][0], + filebase=filename + print("%(i)d: %(f)s (%(r)s %(g)s %(b)s)" % {'i':count-i, 'f':filename, 'r':colors[0][0], 'g':colors[0][1], 'b':colors[0][2]}) - i+=1 if (i%50==0): conn.commit(); conn.commit() @@ -273,19 +361,24 @@ def find_color_nearest(opts): src=[float(i) for i in src.strip().strip('"').split(',')] if len(src)==3: src.append(1) - db.execute("SELECT file, ABS(BR-?)+ABS(BG-?)+ABS(BB-?) as K,BR,BG,BB FROM list ORDER BY K LIMIT ?", + db.execute("SELECT hash, ABS(BR-?)+ABS(BG-?)+ABS(BB-?) as K,BR,BG,BB FROM data ORDER BY K LIMIT ?", (src[0],src[1],src[2],src[3])) hits=[] for hit in db: - hits.append(hit) - + fs=hash2file(conn.cursor(),hit[0],False) + if fs==None: + continue + if hit[1]==None: + print("Color information not found. Run again with --measure.") + return + for f in fs: + hits.append((f,hit[1],hit[2],hit[3],hit[4])) + file_len=str(max([len(x[0]) for x in hits])) - for c in range(len(hits)): - print( ('{: <'+file_len+'} D {:.2f} (RGB {:.2f},{:.2f},{:.2f})').format(hits[c][0], - hits[c][1], - hits[c][2], - hits[c][3], - hits[c][4])) + for h in range(len(hits)): + if h>=src[3]: + break + print( ('{: <'+file_len+'} D {:.2f} (RGB {:.2f},{:.2f},{:.2f})').format(*hits[h])) if opts.viewer: fnames=[x[0] for x in hits] call_viewer(opts, fnames) @@ -295,41 +388,49 @@ def find_color_nearest_file(opts): """ Find closest matching images to given files with RGB color """ cmp=opts.nearestcolor.rsplit(",",1) if len(cmp)==1: - thr=1 + thr=2 else: - thr=int(cmp[1]) - cmp=cmp[0] + thr=int(cmp[1])+1 + cmp=os.path.realpath(cmp[0]) conn=sqlite3.connect(opts.sqlfile) conn.text_factory=str db=conn.cursor() if is_listed(db, cmp): - db1.execute("SELECT file,fingerprint,sharpness,width,height,BR,BG,BB FROM list WHERE file=?",(cmp,)) - for hit1 in db: + hash=file2hash(conn.cursor(), cmp) + db1=conn.cursor() + db1.execute("SELECT hash,fingerprint,sharpness,width,height,BR,BG,BB FROM data WHERE hash=?",(hash,)) + for hit1 in db1: fp=int(hit1[1]) sp=hit1[2] dims=hit1[3:5] src=hit1[5:8] else: + hash=get_md5(cmp) fp=int(get_fingerprint(cmp)) sp=get_sharpness(cmp) dims=get_dims(cmp) src=get_colors(cmp)[1] src=[float(i) for i in src] - db.execute("SELECT file, ABS(BR-?)+ABS(BG-?)+ABS(BB-?) as K,BR,BG,BB FROM list ORDER BY K LIMIT ?", + db.execute("SELECT hash, ABS(BR-?)+ABS(BG-?)+ABS(BB-?) as K,BR,BG,BB FROM data ORDER BY K LIMIT ?", (src[0],src[1],src[2],thr)) hits=[] for hit in db: - if hit[0]==cmp: + fs=hash2file(conn.cursor(),hit[0],False) + if fs==None: continue - hits.append(hit) + if hit[1]==None: + print("Color information not found. Run again with --measure.") + return + for f in fs: + if f==cmp: + continue + hits.append((f,hit[1],hit[2],hit[3],hit[4])) file_len=str(max([len(x[0]) for x in hits])) - for c in range(len(hits)): - print( ('{: <'+file_len+'} D {:.2f} (RGB {:.2f},{:.2f},{:.2f})').format(hits[c][0], - hits[c][1], - hits[c][2], - hits[c][3], - hits[c][4])) + for h in range(len(hits)): + if h>=thr-1: + break + print( ('{: <'+file_len+'} D {:.2f} (RGB {:.2f},{:.2f},{:.2f})').format(*hits[h]) ) if opts.viewer: fnames=[x[0] for x in hits] call_viewer(opts, fnames) @@ -371,20 +472,22 @@ def append_fingerprints(sqlfile): conn.text_factory=str db=conn.cursor() dbh=conn.cursor() - db.execute("SELECT file FROM list WHERE fingerprint IS NULL ORDER BY file") - i=0 + count=dbh.execute("SELECT COUNT(hash) FROM data WHERE fingerprint IS NULL").fetchall()[0][0] + db.execute("SELECT hash FROM data WHERE fingerprint IS NULL") dirname_old="" - for row in db: - fp=get_fingerprint(row[0]) - dbh.execute("UPDATE list SET fingerprint=? \ - WHERE file=?",(fp,row[0])) - filename=" "+os.path.basename(row[0]) - dirname=os.path.dirname(row[0]) + for i,row in enumerate(db): + filename=hash2file(conn.cursor(),row[0]) + if filename==None: + continue + fp=get_fingerprint(filename) + dbh.execute("UPDATE data SET fingerprint=? \ + WHERE hash=?",(fp,row[0])) + filebase=" "+os.path.basename(filename) + dirname=os.path.dirname(filename) if dirname!=dirname_old: dirname_old=dirname - filename=row[0] - i+=1 - print("%(nr)i %(f)s" % {'f':filename, 'nr':i}) + filebase=filename + print("%(nr)i %(f)s" % {'f':filebase, 'nr':count-i}) if (i%50==0): conn.commit(); conn.commit() @@ -413,21 +516,24 @@ def find_fingerprint_similar(opts): conn.text_factory=str db1=conn.cursor() db2=conn.cursor() - db1.execute("SELECT file,fingerprint,sharpness,width,height,R,G,B FROM list WHERE sharpness > 0 ORDER BY file") + db1.execute("SELECT hash,fingerprint,sharpness,width,height,R,G,B FROM data WHERE sharpness > 0") hits=[] hit_list=[] i=None for i,hit1 in enumerate(db1): - if hit1[0] in hit_list: - continue cmp=hit1[0] + cmpf=hash2file(conn.cursor(),hit1[0]) + if cmpf==None: + continue + if cmpf in hit_list: + continue fp=int(hit1[1]) sp=hit1[2] dims=hit1[3:5] pixels=dims[0]*dims[1] colors=hit1[5:8] - db2.execute("SELECT file,fingerprint,sharpness,width,height,R,G,B FROM list WHERE sharpness > 0 ORDER BY file") - this1=[ [cmp, 0,sp,int(hit1[3]),int(hit1[4]),0,pixels,0] ] + db2.execute("SELECT hash,fingerprint,sharpness,width,height,R,G,B FROM data WHERE sharpness > 0") + this1=[ [cmpf, 0,sp,int(hit1[3]),int(hit1[4]),0,pixels,0] ] for hit2 in db2: if hit2[0]==cmp: continue @@ -438,8 +544,14 @@ def find_fingerprint_similar(opts): get_color_diff(hit2[5:8],colors), int(hit2[3])*int(hit2[4]), get_ratio_diff(hit2[3:5],dims)] - this1.append(this2) - hit_list.append(hit2[0]) + fs=hash2file(conn.cursor(), hit2[0], False) + if fs==None: + continue + for f in fs: + thisf=this2 + thisf[0]=f + this1.append(thisf) + hit_list.append(f) this1.sort(key=lambda x: x[1]) if len(this1)>1: hits.append(this1) @@ -473,28 +585,30 @@ def find_fingerprint_nearest(opts): conn.text_factory=str db1=conn.cursor() if is_listed(db1, cmp): - db1.execute("SELECT file,fingerprint,sharpness,width,height,R,G,B FROM list WHERE file=?",(cmp,)) + hash=file2hash(conn.cursor(),cmp) + db1.execute("SELECT hash,fingerprint,sharpness,width,height,R,G,B FROM data WHERE hash=?",(hash,)) for hit1 in db1: fp=int(hit1[1]) sp=hit1[2] dims=hit1[3:5] colors=hit1[5:8] else: + hash=get_md5(cmp) fp=int(get_fingerprint(cmp)) sp=get_sharpness(cmp) dims=get_dims(cmp) colors=get_colors(cmp)[0] - db1.execute("SELECT file,fingerprint,sharpness,width,height,R,G,B FROM list WHERE sharpness > 0 ORDER BY file") + db1.execute("SELECT hash,fingerprint,sharpness,width,height,R,G,B FROM data WHERE sharpness > 0") this=['',thr,0,0,0,0,0] hit1=None for i,hit1 in enumerate(db1): - if hit1[0] == cmp: + if hit1[0] == hash: continue similarity=bin(fp^int(hit1[1])).count('1') if similarity4} {: ^4} {: ^4} {: ^4} {: >5}x{: >5}').format(cmp,"","","","%.1f" % sp,dims[0],dims[1])) @@ -515,28 +630,29 @@ def find_fingerprint_nearest(opts): if opts.viewer: call_viewer(opts, (cmp,this[0])) - def append_sharpness(sqlfile): conn=sqlite3.connect(sqlfile) conn.text_factory=str db=conn.cursor() dbh=conn.cursor() - db.execute("SELECT file FROM list WHERE sharpness IS NULL ORDER BY file") - i=0 + count=dbh.execute("SELECT COUNT(hash) FROM data WHERE sharpness IS NULL").fetchall()[0][0] + db.execute("SELECT hash FROM data WHERE sharpness IS NULL") dirname_old="" - for row in db: - sp=get_sharpness(row[0]) - dbh.execute("UPDATE list SET sharpness=? \ - WHERE file=?",(sp,row[0])) - filename=" "+os.path.basename(row[0]) - dirname=os.path.dirname(row[0]) + for i,row in enumerate(db): + filename=hash2file(conn.cursor(),row[0]) + if filename==None: + continue + sp=get_sharpness(filename) + dbh.execute("UPDATE data SET sharpness=? \ + WHERE hash=?",(sp,row[0])) + filebase=" "+os.path.basename(filename) + dirname=os.path.dirname(filename) if dirname!=dirname_old: dirname_old=dirname - filename=row[0] - i+=1 - print("%(nr)i %(f)s %(s)f" % {'f':filename, 'nr':i,'s':sp}) - if (i%50==0): + filebase=filename + print("%(nr)i %(f)s %(s)f" % {'f':filebase, 'nr':count-i,'s':sp}) + if (i%25==0): conn.commit(); conn.commit() @@ -561,11 +677,12 @@ def searchdb(sqlfile,needle): conn.text_factory=str db=conn.cursor() dbh=conn.cursor() - db.execute("SELECT file,width,height,date FROM list WHERE file LIKE ? ORDER BY file",('%'+needle+'%',)) + db.execute("SELECT file,hash FROM list WHERE file LIKE ? ORDER BY file",('%'+needle+'%',)) results=[] flist=[] for row in db: - results.append(row) + data=dbh.execute("SELECT hash,width,height FROM data WHERE hash = ?",(row[1],)).fetchall() + results.append([row[0], data[0][1],data[0][2]]) flist.append(('search',results)) return flist @@ -616,14 +733,17 @@ def find_duplicates(sqlfile,search): conn.text_factory=str db=conn.cursor() dbh=conn.cursor() + dbf=conn.cursor() db.execute("SELECT hash,count(*) FROM list WHERE file LIKE ? group by hash HAVING count(*) > 1 ",(search,)) duphash=[] for row in db: hash=row[0] - dbh.execute("SELECT file,width,height,date FROM list WHERE hash = ?",(hash,)) + dbh.execute("SELECT hash,width,height FROM data WHERE hash = ?",(hash,)) flist=[] - for row in dbh: - flist.append(row) + for h in dbh: + dbf.execute("SELECT file,date FROM list WHERE hash = ?",(hash,)) + for f in dbf: + flist.append([f[0],h[1],h[2],f[1]]) flist.sort(key=lambda file: file[3]) duphash.append((hash, flist)) duphash.sort(key=lambda file: file[1][0]) @@ -633,11 +753,15 @@ def find_smalls(minsize,sqlfile): conn=sqlite3.connect(sqlfile) conn.text_factory=str db=conn.cursor() - db.execute("SELECT file,width,height FROM list WHERE width < ? OR height < ?",(minsize,minsize)) + db.execute("SELECT hash,width,height FROM data WHERE width < ? OR height < ?",(minsize,minsize)) smalls=[] flist=[] for row in db: - smalls.append(row) + fs=hash2file(conn.cursor(), row[0], False) + if fs==None: + continue + for f in fs: + smalls.append([f, row[1], row[2]]) flist.append(('smalls',smalls)) return flist @@ -667,10 +791,8 @@ def disk_used(options): def print_structure(files): for hash in files: #print(hash[0]) - i=1 - for f in hash[1]: - print("%(i)d: (%(x)dx%(y)d):%(f)s " % {'i':i, 'f':f[0], 'x':f[1], 'y':f[2]}) - i+=1 + for i,f in enumerate(hash[1]): + print("%(i)d: (%(x)dx%(y)d):%(f)s " % {'i':i+1, 'f':f[0], 'x':f[1], 'y':f[2]}) return def print_dup_structure(files,opts): @@ -723,6 +845,9 @@ def main(): if options.delete: print('Deleting entries...') delete_nonexisting(options.sqlfile) + if options.delete_data: + print('Deleting metadata...') + delete_data(options.sqlfile) if options.add or options.changed: print('Adding entries...') add_recurse(options) @@ -763,7 +888,11 @@ def main(): if options.diskused: disk_used(options) #print(files) - + if options.tag: + if options.startpath==".": + print_tag(options) + else: + add_tag(options) sys.exit(0) if __name__ == "__main__": diff --git a/image_list_beta.py b/image_list_beta.py deleted file mode 100755 index 80f47c8..0000000 --- a/image_list_beta.py +++ /dev/null @@ -1,903 +0,0 @@ -#!/usr/bin/python -from __future__ import print_function -import sys -import os -import re -import sqlite3 -import subprocess -import hashlib -import traceback -from argparse import ArgumentParser - -SQLFILE='list_of_images.sqlite' -DESCFILE='descriptions.csv' -IMGMATCH=re.compile('.*\.jpg$|.*\.jpeg$|.*\.png$|.*\.gif$',re.I) -BADDIRS=['_tn','_med'] -MINSIZE=0 - -def setup_options(): - parser=ArgumentParser(description="Maintains the list of images sqlite file") - parser.add_argument("-a",action="store_false",dest="add",default=True, - help="Do not add new files [%(default)s]") - parser.add_argument("-c",action="store_true",dest="changed",default=False, - help="Modify changed files [%(default)s]") - parser.add_argument("-d",action="store_true",dest="delete",default=False, - help="Delete non-existing entries [%(default)s]") - parser.add_argument("-D",action="store_true",dest="delete_data",default=False, - help="Delete unused metadata [%(default)s]") - parser.add_argument("--du",type=str,action='store',dest="diskused",default=False, - help="Print directory sizes. Argument is the path where directories are listed from.") - parser.add_argument("--du-depth",type=str,action='store',dest="diskused_depth",default=1, - help="Depth of summarization for --du.") - parser.add_argument("-f",action="store",dest="sqlfile",default=SQLFILE, - help="SQL file name to use [%(default)s]") - parser.add_argument("-l",action="store_true",dest="symlinks",default=False, - help="Follow symbolic links [%(default)s]") - parser.add_argument("-m",type=int,dest="minsize",default=MINSIZE, - help="Minimum pixel width/height of stored image [%(default)s]") - parser.add_argument("-r",action="store_true",dest="random",default=False, - help="Create randomized files for landscape and portrait images [%(default)s]") - parser.add_argument("-s",type=str,dest="search",default=False, - help="Search list based on path pattern") - parser.add_argument("--measure",action="store_true",dest="measure",default=False, - help="Measure various statistics for similarity/color searches. This option will flip the 'Add new files' option. [%(default)s]") - parser.add_argument("--nearest",type=str,dest="nearestcolor",default=False, - help="Search list for nearest ambient color. format: R,G,B in float 0-1. Add fourth value to limit search to number of hits. Also accepts format file,hits to find nearest color to given file.") - parser.add_argument("--dup",action="store_true",dest="duplicate",default=False, - help="Return a list of duplicate files, based on hashes. This option will flip the 'Add new files' option. [%(default)s]") - parser.add_argument("--del",action="store_true",dest="deleteFiles",default=False, - help="Delete files listed with --small. [%(default)s]") - parser.add_argument("--small",action="store_true",dest="searchsmall",default=False, - help="Return a list of small files, smaller than -m INT. This option will flip the 'Add new files' option. [%(default)s]") - parser.add_argument("--similar",type=str,dest="similarity",default=None, - help="Search list for similar images. Value 0-255 for similarity threshold. 0=high similarity. "+ - "If value is a filename, search similar to that image. "+ - "Append with ',value' to limit similarity. "+ - "The output columns: SD SimilarityDiff., CD ColorDiff., "+ - "RD AspectRatioDiff.,Shp SharpnessIndex. This function does not return exact duplicates.") - parser.add_argument("-t",type=str,dest="tag",default=None, - help="Give file a tag. If argument is a file name, print the tags of the file.") - parser.add_argument("--viewer",type=str,dest="viewer",default=None, - help="Program to view images, %%f refers to filename(s)."+ - "If '1', defaults to: 'geeqie -l %%f'") - parser.add_argument("-x",action="append",dest="exclude",default=[], - help="Exclude folder name from the lists. This option may be issued several times.") - parser.add_argument('startpath', action="store",default='.', nargs='?') - - options=parser.parse_args() - BADDIRS.extend(options.exclude) - if options.duplicate or \ - options.searchsmall or \ - options.measure or \ - options.nearestcolor or \ - options.similarity!=None or \ - options.search or \ - options.diskused: - options.add=not options.add - if options.tag: - options.add=False - return options - -def createdb(sqlfile): - conn=sqlite3.connect(sqlfile) - db=conn.cursor() - conn.text_factory=str - db.execute('CREATE TABLE data (hash TEXT PRIMARY KEY,\ - description TEXT,portrait NUMERIC, \ - width INTEGER,height INTEGER,\ - fingerprint TEXT,sharpness NUMERIC,\ - R REAL,G REAL, B REAL, BR REAL, BG REAL, BB REAL)') - db.execute('CREATE TABLE list (file TEXT,hash TEXT,date INTEGER,size INTEGER)') - db.execute('CREATE TABLE tags (tag TEXT,hash TEXT)') - conn.commit() - return - -def delete_nonexisting(sqlfile): - conn=sqlite3.connect(sqlfile) - conn.text_factory=str - #conn.row_factory=sqlite3.Row - db=conn.cursor() - dbdel=conn.cursor() - db.execute('SELECT file FROM list') - for row in db: - if not os.path.exists(row[0]): - print('removing.. '+row[0]) - dbdel.execute("DELETE FROM list where file == ?",(row[0],)) - conn.commit() - return - -def delete_data(sqlfile): - conn=sqlite3.connect(sqlfile) - conn.text_factory=str - db=conn.cursor() - dbdel=conn.cursor() - db.execute('''SELECT hash FROM data EXCEPT SELECT hash FROM list''') - for row in db: - dbdel.execute("DELETE FROM data where hash == ?",(row[0],)) - conn.commit() - db.execute('''SELECT hash FROM tags EXCEPT SELECT hash FROM list''') - for row in db: - dbdel.execute("DELETE FROM tags where hash == ?",(row[0],)) - conn.commit() - return - -def delete_files(files): - ''' Actually deletes files! ''' - print_structure(files) - - doit=confirm(prompt="Sure to delete these files?") - if doit: - print("now delling") - for hash in files: - for f in hash[1]: - print(f[0]) - os.remove(f[0]) - return - -def add_recurse(options): - conn=sqlite3.connect(options.sqlfile) - conn.text_factory=str - db=conn.cursor() - for path,dirs,files in os.walk(options.startpath,followlinks=options.symlinks): - print('Checking '+path) - dirs=clean_dirs(dirs) - if not options.symlinks: - files=clean_syms(files) - files.sort() - dirs.sort() - db_files=get_folder_contents(db,os.path.realpath(path)+'/') - for file in files: - if IMGMATCH.match(file): - filename=os.path.realpath(os.path.join(path,file)) - - if file not in db_files: - if options.add: - try: - add_single(conn,filename,change=False,minsize=options.minsize) - except: - print('error adding file: '+filename) - traceback.print_exc(file=sys.stdout) - sys.exit(1) - else: - if options.changed: - ftime=int(os.path.getmtime(filename)) - #hash=get_md5(filename) - #if not hash_match(db,filename,hash): - if not ftime_match(db,filename,ftime): - #file content changed - try: - add_single(conn,filename,change=True,minsize=options.minsize) - except: - print('error changing file: '+filename) - traceback.print_exc(file=sys.stdout) - sys.exit(1) - # if file mentioned, and hash same, no need to change entry - conn.commit() - return - -def add_single(conn,filename,change=False,hash=None,minsize=0): - dims=get_dims(filename) - if int(dims[0])0 - -def get_folder_contents(db,path): - ''' return the contents of the folder ''' - files=[] - db.execute("SELECT file FROM list where file LIKE ?",(path+'%',)) - for row in db: - base=row[0].replace(path,'',1) - if base.find('/')==-1: - files.append(base) - return files - -def ftime_match(db,filename,ftime): - db.execute("SELECT date FROM list WHERE file == ?",(filename,)) - count=db.fetchall() - return count[0][0]==ftime - -def hash_match(db,filename,hash): - db.execute("SELECT hash FROM list where file == ?",(filename,)) - count=db.fetchall() - return count[0][0]==hash - -def hash2file(db,hash,firstOnly=True): - db.execute("SELECT file FROM list where hash == ?",(hash,)) - names=db.fetchall() - if len(names)==0: - return None - if firstOnly: - return names[0][0] - else: - return [x[0] for x in names] - -def file2hash(db,filename): - db.execute("SELECT hash FROM list where file == ? LIMIT 1",(filename,)) - names=db.fetchall() - if len(names)==0: - return None - return names[0][0] - -def hash_in_data(db,hash): - db.execute("SELECT hash FROM data where hash == ? LIMIT 1",(hash,)) - hashes=db.fetchall() - return len(hashes)!=0 -def hash_in_list(db,hash): - db.execute("SELECT hash FROM list where hash == ? LIMIT 1",(hash,)) - hashes=db.fetchall() - return len(hashes)!=0 - -def get_md5(filename): - ''' Return hash of the first 15 megabytes of the file ''' - return hashlib.md5(open(filename,'rb').read(1024*1024*15)).hexdigest() - -def get_dims(filename): - idargs=['identify','-format','%wx%h',filename+'[0]'] - p=subprocess.Popen(idargs,stdout=subprocess.PIPE) - out, err = p.communicate() - return (out.strip().split('x')) - -def call_viewer(opts, files): - """ Runs the viewer program, contains defaults """ - - if opts.viewer=="1": - opts.viewer="geeqie -l %f" - devnull = open('/dev/null', 'w') - subprocess.call(opts.viewer.replace('%f', " ".join(files)), stderr=devnull, shell=True) - -def append_colors(sqlfile): - conn=sqlite3.connect(sqlfile) - conn.text_factory=str - db=conn.cursor() - dbh=conn.cursor() - db.execute("SELECT hash,R FROM data WHERE R IS NULL") - i=0 - dirname_old="" - for row in db: - filename=hash2file(conn.cursor(),row[0]) - if filename==None: - continue - colors=get_colors(filename) - dbh.execute("UPDATE data SET R=?, G=?, B=?, BR=?, BG=?, BB=? \ - WHERE hash=?",(colors[0][0],colors[0][1],colors[0][2], - colors[1][0],colors[1][1],colors[1][2],row[0])) - filebase=" "+os.path.basename(filename) - dirname=os.path.dirname(filename) - if dirname!=dirname_old: - dirname_old=dirname - filebase=filename - print("colors: %(f)s (%(r)s %(g)s %(b)s)" % {'f':filename, 'r':colors[0][0], - 'g':colors[0][1], 'b':colors[0][2]}) - i+=1 - if (i%50==0): - conn.commit(); - conn.commit() - return - -def find_color_nearest(opts): - """ Find closest matching images to given RGB color """ - src=opts.nearestcolor - conn=sqlite3.connect(opts.sqlfile) - conn.text_factory=str - db=conn.cursor() - src=[float(i) for i in src.strip().strip('"').split(',')] - if len(src)==3: - src.append(1) - db.execute("SELECT hash, ABS(BR-?)+ABS(BG-?)+ABS(BB-?) as K,BR,BG,BB FROM data ORDER BY K LIMIT ?", - (src[0],src[1],src[2],src[3])) - hits=[] - for hit in db: - fs=hash2file(conn.cursor(),hit[0],False) - if fs==None: - continue - if hit[1]==None: - print("Color information not found. Run again with --measure.") - return - for f in fs: - hits.append((f,hit[1],hit[2],hit[3],hit[4])) - - file_len=str(max([len(x[0]) for x in hits])) - for h in range(len(hits)): - if h>=src[3]: - break - print( ('{: <'+file_len+'} D {:.2f} (RGB {:.2f},{:.2f},{:.2f})').format(*hits[h])) - if opts.viewer: - fnames=[x[0] for x in hits] - call_viewer(opts, fnames) - return - -def find_color_nearest_file(opts): - """ Find closest matching images to given files with RGB color """ - cmp=opts.nearestcolor.rsplit(",",1) - if len(cmp)==1: - thr=2 - else: - thr=int(cmp[1])+1 - cmp=os.path.realpath(cmp[0]) - conn=sqlite3.connect(opts.sqlfile) - conn.text_factory=str - db=conn.cursor() - if is_listed(db, cmp): - hash=file2hash(conn.cursor(), cmp) - db1=conn.cursor() - db1.execute("SELECT hash,fingerprint,sharpness,width,height,BR,BG,BB FROM data WHERE hash=?",(hash,)) - for hit1 in db1: - fp=int(hit1[1]) - sp=hit1[2] - dims=hit1[3:5] - src=hit1[5:8] - else: - hash=get_md5(cmp) - fp=int(get_fingerprint(cmp)) - sp=get_sharpness(cmp) - dims=get_dims(cmp) - src=get_colors(cmp)[1] - src=[float(i) for i in src] - db.execute("SELECT hash, ABS(BR-?)+ABS(BG-?)+ABS(BB-?) as K,BR,BG,BB FROM data ORDER BY K LIMIT ?", - (src[0],src[1],src[2],thr)) - hits=[] - for hit in db: - fs=hash2file(conn.cursor(),hit[0],False) - if fs==None: - continue - if hit[1]==None: - print("Color information not found. Run again with --measure.") - return - for f in fs: - if f==cmp: - continue - hits.append((f,hit[1],hit[2],hit[3],hit[4])) - - file_len=str(max([len(x[0]) for x in hits])) - for h in range(len(hits)): - if h>=thr-1: - break - print( ('{: <'+file_len+'} D {:.2f} (RGB {:.2f},{:.2f},{:.2f})').format(*hits[h]) ) - if opts.viewer: - fnames=[x[0] for x in hits] - call_viewer(opts, fnames) - return - - -def get_colors(filename): - small_args=['convert','-define','jpeg:size=64x64',filename+'[0]','-resize','10x10!','TEXT:-'] - p=subprocess.Popen(small_args,stdout=subprocess.PIPE) - img, err = p.communicate() - mean_args=['convert','-','-format','"%[fx:mean.r],%[fx:mean.g],%[fx:mean.b]"','info:-'] - p=subprocess.Popen(mean_args,stdout=subprocess.PIPE,stdin=subprocess.PIPE) - mean, err = p.communicate(input=img) - mean_args=['convert','-', - '(','+clone','-gravity','North','-crop','10x1+0+0','-write','mpr:top','+delete',')', - '(','+clone','-gravity','South','-crop','10x1+0+0','-write','mpr:bot','+delete',')', - '(','+clone','-gravity','West','-crop','1x10+0+0','-rotate','90','-write','mpr:lef','+delete',')', - '(','+clone','-gravity','East','-crop','1x10+0+0','-rotate','90','-write','mpr:rig','+delete',')', - '+delete','mpr:top','mpr:bot','mpr:lef','mpr:rig','+append', - '-format','"%[fx:mean.r],%[fx:mean.g],%[fx:mean.b]"','info:-'] - p=subprocess.Popen(mean_args,stdout=subprocess.PIPE,stdin=subprocess.PIPE) - border, err = p.communicate(input=img) - mean=[float(i) for i in mean.strip().strip('"').split(',')] - border=[float(i) for i in border.strip().strip('"').split(',')] - return (mean,border) - -def get_color_diff(c1,c2): - """ Return color difference from two RGB triplets """ - - return abs( c1[0] - c2[0] )+abs( c1[1] - c2[1] )+abs( c1[2] - c2[2] ) - -def get_ratio_diff(d1,d2): - """ Return ratio difference from two w,h dimension tuplets """ - - return abs( float(d1[0])/float(d1[1]) - float(d2[0])/float(d2[1]) ) - -def append_fingerprints(sqlfile): - conn=sqlite3.connect(sqlfile) - conn.text_factory=str - db=conn.cursor() - dbh=conn.cursor() - db.execute("SELECT hash FROM data WHERE fingerprint IS NULL") - i=0 - dirname_old="" - for row in db: - filename=hash2file(conn.cursor(),row[0]) - if filename==None: - continue - fp=get_fingerprint(filename) - dbh.execute("UPDATE data SET fingerprint=? \ - WHERE hash=?",(fp,row[0])) - filebase=" "+os.path.basename(filename) - dirname=os.path.dirname(filename) - if dirname!=dirname_old: - dirname_old=dirname - filebase=filename - i+=1 - print("%(nr)i %(f)s" % {'f':filebase, 'nr':i}) - if (i%50==0): - conn.commit(); - conn.commit() - -def get_fingerprint(filename): - small_args=['convert','-define','jpeg:size=256x256',filename+'[0]','-resize','160x160!', - '-colorspace','Gray','-blur','2x2','-normalize','-equalize','-resize','16x16','-depth','1','TEXT:-'] - p=subprocess.Popen(small_args,stdout=subprocess.PIPE) - img, err = p.communicate() - values='' - for row in img.split('\n'): - gray=row.split(',') - if len(gray)<3: - continue - if gray[2]=="255": - values+='1' - else: - values+='0' - return str(int(values,2)) - -def find_fingerprint_similar(opts): - ''' Find all similar images, nearest match more similar than thr ''' - - thr=int(opts.similarity) - conn=sqlite3.connect(opts.sqlfile) - conn.text_factory=str - db1=conn.cursor() - db2=conn.cursor() - db1.execute("SELECT hash,fingerprint,sharpness,width,height,R,G,B FROM data WHERE sharpness > 0") - hits=[] - hit_list=[] - i=None - for i,hit1 in enumerate(db1): - cmp=hit1[0] - cmpf=hash2file(conn.cursor(),hit1[0]) - if cmpf==None: - continue - if cmpf in hit_list: - continue - fp=int(hit1[1]) - sp=hit1[2] - dims=hit1[3:5] - pixels=dims[0]*dims[1] - colors=hit1[5:8] - db2.execute("SELECT hash,fingerprint,sharpness,width,height,R,G,B FROM data WHERE sharpness > 0") - this1=[ [cmpf, 0,sp,int(hit1[3]),int(hit1[4]),0,pixels,0] ] - for hit2 in db2: - if hit2[0]==cmp: - continue - similarity=bin(fp^int(hit2[1])).count('1') - if similarity1: - hits.append(this1) - hit_list.append(cmp) - - if i==None: - print("No measurements found") - sys.exit(1) - - for src in hits: - file_len=str(max([len(x[0]) for x in src])) - print( ('{: <'+file_len+'} {: ^4} {: ^4} {: ^4} {: ^4} {: ^5}x{: ^5}').format("File","SD","CD","RD","Shp","W","H")) - for c in range(len(src)): - print( ('{: <'+file_len+'} {: >4} {: >4} {: >4} {: >4} {: >5}x{: >5}').format(src[c][0],src[c][1], - "%.2f"%src[c][5],"%.2f"%src[c][7], - "%.1f" % src[c][2],src[c][3],src[c][4])) - if opts.viewer: - fnames=[x[0] for x in src] - call_viewer(opts, fnames) - - return - -def find_fingerprint_nearest(opts): - ''' Find nearest match to given file ''' - - cmp=os.path.realpath(opts.similarity.rsplit(",")[0]) - thr=sys.maxint - if len(opts.similarity.rsplit(","))>1: - thr=int(opts.similarity.rsplit(",",1)[1]) - conn=sqlite3.connect(opts.sqlfile) - conn.text_factory=str - db1=conn.cursor() - if is_listed(db1, cmp): - hash=file2hash(conn.cursor(),cmp) - db1.execute("SELECT hash,fingerprint,sharpness,width,height,R,G,B FROM data WHERE hash=?",(hash,)) - for hit1 in db1: - fp=int(hit1[1]) - sp=hit1[2] - dims=hit1[3:5] - colors=hit1[5:8] - else: - hash=get_md5(cmp) - fp=int(get_fingerprint(cmp)) - sp=get_sharpness(cmp) - dims=get_dims(cmp) - colors=get_colors(cmp)[0] - - db1.execute("SELECT hash,fingerprint,sharpness,width,height,R,G,B FROM data WHERE sharpness > 0") - this=['',thr,0,0,0,0,0] - hit1=None - for i,hit1 in enumerate(db1): - if hit1[0] == hash: - continue - similarity=bin(fp^int(hit1[1])).count('1') - if similarity4} {: ^4} {: ^4} {: ^4} {: >5}x{: >5}').format(cmp,"","","","%.1f" % sp,dims[0],dims[1])) - print( ('{: <'+file_len+'} {: >4} {: >4} {: >4} {: >4} {: >5}x{: >5}').format(this[0], this[1],"%.2f"%this[5], - "%.2f"%this[6], "%.1f" % this[2],this[3], this[4])) - - if opts.viewer: - call_viewer(opts, (cmp,this[0])) - - -def append_sharpness(sqlfile): - conn=sqlite3.connect(sqlfile) - conn.text_factory=str - db=conn.cursor() - dbh=conn.cursor() - db.execute("SELECT hash FROM data WHERE sharpness IS NULL") - i=0 - dirname_old="" - for row in db: - filename=hash2file(conn.cursor(),row[0]) - if filename==None: - continue - sp=get_sharpness(filename) - dbh.execute("UPDATE data SET sharpness=? \ - WHERE hash=?",(sp,row[0])) - filebase=" "+os.path.basename(filename) - dirname=os.path.dirname(filename) - if dirname!=dirname_old: - dirname_old=dirname - filebase=filename - i+=1 - print("%(nr)i %(f)s %(s)f" % {'f':filebase, 'nr':i,'s':sp}) - if (i%50==0): - conn.commit(); - conn.commit() - -def get_sharpness(filename): - """ Difference in X, Difference in Y, get smaller diff = smaller sharpness. - May change if bugs found.. """ - - # Resize to 1024 smaller axis, crop with golden ratio - # grayscale and equalize histogram. calculate difference between x neighbor - # and y neigbor. smaller difference = less sharp. - small_args="convert ( -define jpeg:size=2048x2048 -resize 1024^ -gravity center -crop 62%x62%+0+0 -colorspace gray XXX -equalize ) \ -( -clone 0 ( -clone 0 -roll +1+0 ) -compose Difference -composite -shave 1x1 ) \ -( -clone 0 ( -clone 0 -roll +0+1 ) -compose Difference -composite -shave 1x1 ) \ --delete 0 -compose Darken -composite -format %[fx:mean*1000] info:".split(" ") - small_args[12]=filename+'[0]' - p=subprocess.Popen(small_args,stdout=subprocess.PIPE) - sharpness, err = p.communicate() - return float(sharpness.strip()) - -def searchdb(sqlfile,needle): - conn=sqlite3.connect(sqlfile) - conn.text_factory=str - db=conn.cursor() - dbh=conn.cursor() - db.execute("SELECT file,hash FROM list WHERE file LIKE ? ORDER BY file",('%'+needle+'%',)) - results=[] - flist=[] - for row in db: - data=dbh.execute("SELECT hash,width,height FROM data WHERE hash = ?",(row[1],)).fetchall() - results.append([row[0], data[0][1],data[0][2]]) - flist.append(('search',results)) - return flist - -def clean_dirs(dirs): - for s in dirs[:]: - if (s in BADDIRS) or (s.startswith(".")): - dirs.remove(s) - return dirs - -def clean_syms(files): - for f in files[:]: - if os.path.islink(f): - files.remove(f) - return files - -def confirm(prompt=None, resp=False): - """prompts for yes or no response from the user. Returns True for yes and - False for no. - 'resp' should be set to the default value assumed by the caller when - user simply types ENTER. - """ - - if prompt is None: - prompt = 'Confirm' - if resp: - prompt = '%s [%s]|%s: ' % (prompt, 'y', 'n') - else: - prompt = '%s [%s]|%s: ' % (prompt, 'n', 'y') - - while True: - ans = raw_input(prompt) - if not ans: - return resp - if ans not in ['y', 'Y', 'n', 'N']: - print('please enter y or n.') - continue - if ans == 'y' or ans == 'Y': - return True - if ans == 'n' or ans == 'N': - return False - -def find_duplicates(sqlfile,search): - if (search=='.'): - search='%' - else: - search='%'+search+'%' - conn=sqlite3.connect(sqlfile) - conn.text_factory=str - db=conn.cursor() - dbh=conn.cursor() - dbf=conn.cursor() - db.execute("SELECT hash,count(*) FROM list WHERE file LIKE ? group by hash HAVING count(*) > 1 ",(search,)) - duphash=[] - for row in db: - hash=row[0] - dbh.execute("SELECT hash,width,height FROM data WHERE hash = ?",(hash,)) - flist=[] - for h in dbh: - dbf.execute("SELECT file,date FROM list WHERE hash = ?",(hash,)) - for f in dbf: - flist.append([f[0],h[1],h[2],f[1]]) - flist.sort(key=lambda file: file[3]) - duphash.append((hash, flist)) - duphash.sort(key=lambda file: file[1][0]) - return duphash - -def find_smalls(minsize,sqlfile): - conn=sqlite3.connect(sqlfile) - conn.text_factory=str - db=conn.cursor() - db.execute("SELECT hash,width,height FROM data WHERE width < ? OR height < ?",(minsize,minsize)) - smalls=[] - flist=[] - for row in db: - fs=hash2file(conn.cursor(), row[0], False) - if fs==None: - continue - for f in fs: - smalls.append([f, row[1], row[2]]) - flist.append(('smalls',smalls)) - return flist - -def disk_used(options): - conn=sqlite3.connect(options.sqlfile) - conn.text_factory=str - db=conn.cursor() - db.execute('SELECT size,replace(file,?,"") as path FROM list WHERE file LIKE ?', - (os.path.realpath(options.diskused)+"/", - os.path.realpath(options.diskused)+"%", - )) - entries=[] - sizes=[] - for row in db: - start_path=row[1].split('/') - start_path="/".join(start_path[0:int(options.diskused_depth)]) - if start_path not in entries: - entries.append(start_path) - sizes.append(row[0]) - else: - sizes[ entries.index(start_path) ]+=row[0] - for entry in zip(sizes,entries): - print("| ".join([ str(entry[0]).ljust(14), - humanize_size(entry[0]).rjust(8), - entry[1]])) - -def print_structure(files): - for hash in files: - #print(hash[0]) - for i,f in enumerate(hash[1]): - print("%(i)d: (%(x)dx%(y)d):%(f)s " % {'i':i+1, 'f':f[0], 'x':f[1], 'y':f[2]}) - return - -def print_dup_structure(files,opts): - i=1 - for hash in files: - #print(hash[0]) - fnames=[] - for f in hash[1]: - fnames.append(' "'+f[0]+'"') - print("%(i)d:%(n)d:%(f)s " % {'i':i, 'n':len(fnames), 'f':",".join(fnames)}) - if opts.viewer: - fnames=[x[0] for x in hash[1]] - subprocess.call(opts.viewer.replace('%f', " ".join(fnames)), shell=True) - i+=1 - return - -def humanize_size(size,precision=1): - if size==None: - return 'nan' - suffixes=['B','KB','MB','GB','TB'] - suffixIndex = 0 - defPrecision=0 - while size > 1024: - suffixIndex += 1 - size = size/1024.0 - defPrecision=precision - return "%.*f%s"%(defPrecision,size,suffixes[suffixIndex]) - -def import_descriptions(options): - """ Walk through the path from given [startpath] and read - any DESCFILE, importing the contents in the DB """ - pass - -def export_descriptions(options): - """ Get unique paths from DB, matching [startpath], write - DESCFILE for each file found. Export gets a format argument: - %wx%h %n %d """ - # width, height, basename, description - #%R%G%B %S %F %D - # Red Green Blue Sharpness Fingerprint Date(formatting?) - # %s %H - # filesize Hash - pass - - -def main(): - options=setup_options(); - if not os.path.exists(options.sqlfile): - createdb(options.sqlfile); - if options.delete: - print('Deleting entries...') - delete_nonexisting(options.sqlfile) - if options.delete_data: - print('Deleting metadata...') - delete_data(options.sqlfile) - if options.add or options.changed: - print('Adding entries...') - add_recurse(options) - if options.search: - print_structure(searchdb(options.sqlfile,options.search)) - if options.measure: - print('Measure colors...') - append_colors(options.sqlfile) - print('Measure fingerprints...') - append_fingerprints(options.sqlfile) - print('Measure sharpness...') - append_sharpness(options.sqlfile) - if options.random: - print('Random lists...') - random_lists(options.sqlfile) - if options.nearestcolor: - if os.path.exists(options.nearestcolor.rsplit(",")[0]): - find_color_nearest_file(options) - else: - find_color_nearest(options) - - if options.similarity!=None: - if os.path.exists(options.similarity.rsplit(",")[0]): - find_fingerprint_nearest(options) - else: - find_fingerprint_similar(options) - if options.duplicate: - files=find_duplicates(options.sqlfile,options.startpath) - print_dup_structure(files,options) - if options.searchsmall: - files=find_smalls(options.minsize,options.sqlfile) - if options.deleteFiles: - if len(files[0][1])>0: - delete_files(files) - delete_nonexisting(options.sqlfile) - else: - print_structure(files) - if options.diskused: - disk_used(options) - #print(files) - if options.tag: - if options.startpath==".": - print_tag(options) - else: - add_tag(options) - sys.exit(0) - -if __name__ == "__main__": - main() -