#!/usr/bin/python from __future__ import print_function import sys import os import re import sqlite3 import subprocess import hashlib from argparse import ArgumentParser SQLFILE='list_of_images.sqlite' IMGMATCH=re.compile('.*\.jpg$|.*\.jpeg$|.*\.png$|.*\.gif$',re.I) BADDIRS=['_tn','_med'] MINSIZE=0 def setup_options(): parser=ArgumentParser(description="Maintains the list of images sqlite file") parser.add_argument("-a",action="store_false",dest="add",default=True, help="Do not add new files [%(default)s]") parser.add_argument("-c",action="store_true",dest="changed",default=False, help="Modify changed files [%(default)s]") parser.add_argument("-d",action="store_true",dest="delete",default=False, help="Delete non-existing entries [%(default)s]") parser.add_argument("-f",action="store",dest="sqlfile",default=SQLFILE, help="SQL file name to use [%(default)s]") parser.add_argument("-l",action="store_true",dest="symlinks",default=False, help="Follow symbolic links [%(default)s]") parser.add_argument("-m",type=int,dest="minsize",default=MINSIZE, help="Minimum pixel width/height of stored image [%(default)s]") parser.add_argument("-r",action="store_true",dest="random",default=False, help="Create randomized files for landscape and portrait images [%(default)s]") parser.add_argument("-s",type=str,dest="search",default=False, help="Search list based on path pattern") parser.add_argument("--measure",action="store_true",dest="measure",default=False, help="Measure various statistics for similarity/color searches. This option will flip the 'Add new files' option. [%(default)s]") parser.add_argument("--nearest",type=str,dest="nearestcolor",default=False, help="Search list for nearest ambient color. format: R,G,B in float 0-1. Add fourth value to limit search to number of hits. Also accepts format file,hits to find nearest color to given file.") parser.add_argument("--dup",action="store_true",dest="duplicate",default=False, help="Return a list of duplicate files, based on hashes. This option will flip the 'Add new files' option. [%(default)s]") parser.add_argument("--del",action="store_true",dest="deleteFiles",default=False, help="Delete files listed with --small. [%(default)s]") parser.add_argument("--small",action="store_true",dest="searchsmall",default=False, help="Return a list of small files, smaller than -m INT. This option will flip the 'Add new files' option. [%(default)s]") parser.add_argument("--similar",type=str,dest="similarity",default=None, help="Search list for similar images. Value 0-255 for similarity threshold. 0=high similarity. "+ "If value is a filename, search similar to that image. "+ "Append with ',value' to limit similarity. "+ "The output columns: SD SimilarityDiff., CD ColorDiff., "+ "RD AspectRatioDiff.,Shp SharpnessIndex.") parser.add_argument("--viewer",type=str,dest="viewer",default=None, help="Program to view images, %%f refers to filename(s)."+ "If '1', defaults to: 'geeqie -l %%f'") parser.add_argument("-x",action="append",dest="exclude",default=[], help="Exclude folder name from the lists. This option may be issued several times.") parser.add_argument('startpath', action="store",default='.', nargs='?') options=parser.parse_args() BADDIRS.extend(options.exclude) if options.duplicate or options.searchsmall or options.measure or options.nearestcolor or options.similarity!=None or options.search: options.add=not options.add return options def createdb(sqlfile): conn=sqlite3.connect(sqlfile) db=conn.cursor() conn.text_factory=str db.execute('CREATE TABLE list (id INTEGER PRIMARY KEY AUTOINCREMENT,\ file TEXT,date INTEGER,portrait NUMERIC, hash TEXT,\ width INTEGER,height INTEGER,fingerprint TEXT,sharpness NUMERIC,\ R REAL,G REAL, B REAL, BR REAL, BG REAL, BB REAL)') conn.commit() return def delete_nonexisting(sqlfile): conn=sqlite3.connect(sqlfile) conn.text_factory=str #conn.row_factory=sqlite3.Row db=conn.cursor() dbdel=conn.cursor() db.execute('SELECT file FROM list') for row in db: if not os.path.exists(row[0]): print('removing.. '+row[0]) dbdel.execute("DELETE FROM list where file == ?",(row[0],)) conn.commit() return def delete_files(files): ''' Actually deletes files! ''' print_structure(files) doit=confirm(prompt="Sure to delete these files?") if doit: print("now delling") for hash in files: for f in hash[1]: print(f[0]) os.remove(f[0]) return def add_recurse(options): conn=sqlite3.connect(options.sqlfile) conn.text_factory=str db=conn.cursor() for path,dirs,files in os.walk(options.startpath,followlinks=options.symlinks): print('Checking '+path) dirs=clean_dirs(dirs) if not options.symlinks: files=clean_syms(files) files.sort() dirs.sort() db_files=get_folder_contents(db,os.path.abspath(path)+'/') for file in files: if IMGMATCH.match(file): filename=os.path.abspath(os.path.join(path,file)) #if not is_listed(db,filename): if file not in db_files: if options.add: try: add_single(conn,filename,change=False,minsize=options.minsize) except: print('error adding file: '+filename) sys.exit(1) else: if options.changed: ftime=os.path.getmtime(filename) #hash=get_md5(filename) #if not hash_match(db,filename,hash): if not ftime_match(db,filename,ftime): #file content changed try: add_single(conn,filename,change=True,minsize=options.minsize) except: print('error changing file: '+filename) sys.exit(1) # if file mentioned, and hash same, no need to change entry conn.commit() return def add_single(conn,filename,change=False,hash=None,minsize=0): dims=get_dims(filename) if int(dims[0])0 def get_folder_contents(db,path): ''' return the contents of the folder ''' files=[] db.execute("SELECT file FROM list where file LIKE ?",(path+'%',)) for row in db: base=row[0].replace(path,'',1) if base.find('/')==-1: files.append(base) return files def ftime_match(db,filename,ftime): db.execute("SELECT date FROM list where file == ?",(filename,)) count=db.fetchall() return count[0][0]==ftime def hash_match(db,filename,hash): db.execute("SELECT hash FROM list where file == ?",(filename,)) count=db.fetchall() return count[0][0]==hash def get_md5(filename): ''' Return hash of the first 5 megabytes of the file ''' return hashlib.md5(open(filename,'rb').read(1024*1024*5)).hexdigest() def get_dims(filename): idargs=['identify','-format','%wx%h',filename+'[0]'] p=subprocess.Popen(idargs,stdout=subprocess.PIPE) out, err = p.communicate() return (out.strip().split('x')) def call_viewer(opts, files): """ Runs the viewer program, contains defaults """ if opts.viewer=="1": opts.viewer="geeqie -l %f" devnull = open('/dev/null', 'w') subprocess.call(opts.viewer.replace('%f', " ".join(files)), stderr=devnull, shell=True) def append_colors(sqlfile): conn=sqlite3.connect(sqlfile) conn.text_factory=str db=conn.cursor() dbh=conn.cursor() db.execute("SELECT file,R FROM list WHERE R IS NULL ORDER BY file") i=0 dirname_old="" for row in db: colors=get_colors(row[0]) dbh.execute("UPDATE list SET R=?, G=?, B=?, BR=?, BG=?, BB=? \ WHERE file=?",(colors[0][0],colors[0][1],colors[0][2], colors[1][0],colors[1][1],colors[1][2],row[0])) filename=" "+os.path.basename(row[0]) dirname=os.path.dirname(row[0]) if dirname!=dirname_old: dirname_old=dirname filename=row[0] print("colors: %(f)s (%(r)s %(g)s %(b)s)" % {'f':filename, 'r':colors[0][0], 'g':colors[0][1], 'b':colors[0][2]}) i+=1 if (i%50==0): conn.commit(); conn.commit() return def find_color_nearest(opts): """ Find closest matching images to given RGB color """ src=opts.nearestcolor conn=sqlite3.connect(opts.sqlfile) conn.text_factory=str db=conn.cursor() src=[float(i) for i in src.strip().strip('"').split(',')] if len(src)==3: src.append(1) db.execute("SELECT file, ABS(BR-?)+ABS(BG-?)+ABS(BB-?) as K,BR,BG,BB FROM list ORDER BY K LIMIT ?", (src[0],src[1],src[2],src[3])) hits=[] for hit in db: hits.append(hit) file_len=str(max([len(x[0]) for x in hits])) for c in range(len(hits)): print( ('{: <'+file_len+'} D {:.2f} (RGB {:.2f},{:.2f},{:.2f})').format(hits[c][0], hits[c][1], hits[c][2], hits[c][3], hits[c][4])) if opts.viewer: fnames=[x[0] for x in hits] call_viewer(opts, fnames) return def find_color_nearest_file(opts): """ Find closest matching images to given files with RGB color """ cmp=opts.nearestcolor.rsplit(",",1) if len(cmp)==1: thr=1 else: thr=int(cmp[1]) cmp=cmp[0] conn=sqlite3.connect(opts.sqlfile) conn.text_factory=str db=conn.cursor() if is_listed(db, cmp): db1.execute("SELECT file,fingerprint,sharpness,width,height,BR,BG,BB FROM list WHERE file=?",(cmp,)) for hit1 in db: fp=int(hit1[1]) sp=hit1[2] dims=hit1[3:5] src=hit1[5:8] else: fp=int(get_fingerprint(cmp)) sp=get_sharpness(cmp) dims=get_dims(cmp) src=get_colors(cmp)[1] src=[float(i) for i in src] db.execute("SELECT file, ABS(BR-?)+ABS(BG-?)+ABS(BB-?) as K,BR,BG,BB FROM list ORDER BY K LIMIT ?", (src[0],src[1],src[2],thr)) hits=[] for hit in db: if hit[0]==cmp: continue hits.append(hit) file_len=str(max([len(x[0]) for x in hits])) for c in range(len(hits)): print( ('{: <'+file_len+'} D {:.2f} (RGB {:.2f},{:.2f},{:.2f})').format(hits[c][0], hits[c][1], hits[c][2], hits[c][3], hits[c][4])) if opts.viewer: fnames=[x[0] for x in hits] call_viewer(opts, fnames) return def get_colors(filename): small_args=['convert','-define','jpeg:size=64x64',filename+'[0]','-resize','10x10!','TEXT:-'] p=subprocess.Popen(small_args,stdout=subprocess.PIPE) img, err = p.communicate() mean_args=['convert','-','-format','"%[fx:mean.r],%[fx:mean.g],%[fx:mean.b]"','info:-'] p=subprocess.Popen(mean_args,stdout=subprocess.PIPE,stdin=subprocess.PIPE) mean, err = p.communicate(input=img) mean_args=['convert','-', '(','+clone','-gravity','North','-crop','10x1+0+0','-write','mpr:top','+delete',')', '(','+clone','-gravity','South','-crop','10x1+0+0','-write','mpr:bot','+delete',')', '(','+clone','-gravity','West','-crop','1x10+0+0','-rotate','90','-write','mpr:lef','+delete',')', '(','+clone','-gravity','East','-crop','1x10+0+0','-rotate','90','-write','mpr:rig','+delete',')', '+delete','mpr:top','mpr:bot','mpr:lef','mpr:rig','+append', '-format','"%[fx:mean.r],%[fx:mean.g],%[fx:mean.b]"','info:-'] p=subprocess.Popen(mean_args,stdout=subprocess.PIPE,stdin=subprocess.PIPE) border, err = p.communicate(input=img) mean=[float(i) for i in mean.strip().strip('"').split(',')] border=[float(i) for i in border.strip().strip('"').split(',')] return (mean,border) def get_color_diff(c1,c2): """ Return color difference from two RGB triplets """ return abs( c1[0] - c2[0] )+abs( c1[1] - c2[1] )+abs( c1[2] - c2[2] ) def get_ratio_diff(d1,d2): """ Return ratio difference from two w,h dimension tuplets """ return abs( float(d1[0])/float(d1[1]) - float(d2[0])/float(d2[1]) ) def append_fingerprints(sqlfile): conn=sqlite3.connect(sqlfile) conn.text_factory=str db=conn.cursor() dbh=conn.cursor() db.execute("SELECT file FROM list WHERE fingerprint IS NULL ORDER BY file") i=0 dirname_old="" for row in db: fp=get_fingerprint(row[0]) dbh.execute("UPDATE list SET fingerprint=? \ WHERE file=?",(fp,row[0])) filename=" "+os.path.basename(row[0]) dirname=os.path.dirname(row[0]) if dirname!=dirname_old: dirname_old=dirname filename=row[0] i+=1 print("%(nr)i %(f)s" % {'f':filename, 'nr':i}) if (i%50==0): conn.commit(); conn.commit() def get_fingerprint(filename): small_args=['convert','-define','jpeg:size=256x256',filename+'[0]','-resize','160x160!', '-colorspace','Gray','-blur','2x2','-normalize','-equalize','-resize','16x16','-depth','1','TEXT:-'] p=subprocess.Popen(small_args,stdout=subprocess.PIPE) img, err = p.communicate() values='' for row in img.split('\n'): gray=row.split(',') if len(gray)<3: continue if gray[2]=="255": values+='1' else: values+='0' return str(int(values,2)) def find_fingerprint_similar(opts): ''' Find all similar images, nearest match more similar than thr ''' thr=int(opts.similarity) conn=sqlite3.connect(opts.sqlfile) conn.text_factory=str db1=conn.cursor() db2=conn.cursor() db1.execute("SELECT file,fingerprint,sharpness,width,height,R,G,B FROM list WHERE sharpness > 0 ORDER BY file") hits=[] hit_list=[] i=None for i,hit1 in enumerate(db1): if hit1[0] in hit_list: continue cmp=hit1[0] fp=int(hit1[1]) sp=hit1[2] dims=hit1[3:5] pixels=dims[0]*dims[1] colors=hit1[5:8] db2.execute("SELECT file,fingerprint,sharpness,width,height,R,G,B FROM list WHERE sharpness > 0 ORDER BY file") this1=[ [cmp, 0,sp,int(hit1[3]),int(hit1[4]),0,pixels,0] ] for hit2 in db2: if hit2[0]==cmp: continue similarity=bin(fp^int(hit2[1])).count('1') if similarity1: hits.append(this1) hit_list.append(cmp) if i==None: print("No measurements found") sys.exit(1) for src in hits: file_len=str(max([len(x[0]) for x in src])) print( ('{: <'+file_len+'} {: ^4} {: ^4} {: ^4} {: ^4} {: ^5}x{: ^5}').format("File","SD","CD","RD","Shp","W","H")) for c in range(len(src)): print( ('{: <'+file_len+'} {: >4} {: >4} {: >4} {: >4} {: >5}x{: >5}').format(src[c][0],src[c][1], "%.2f"%src[c][5],"%.2f"%src[c][7], "%.1f" % src[c][2],src[c][3],src[c][4])) if opts.viewer: fnames=[x[0] for x in src] call_viewer(opts, fnames) return def find_fingerprint_nearest(opts): ''' Find nearest match to given file ''' cmp=os.path.abspath(opts.similarity.rsplit(",")[0]) thr=sys.maxint if len(opts.similarity.rsplit(","))>1: thr=int(opts.similarity.rsplit(",",1)[1]) conn=sqlite3.connect(opts.sqlfile) conn.text_factory=str db1=conn.cursor() if is_listed(db1, cmp): db1.execute("SELECT file,fingerprint,sharpness,width,height,R,G,B FROM list WHERE file=?",(cmp,)) for hit1 in db1: fp=int(hit1[1]) sp=hit1[2] dims=hit1[3:5] colors=hit1[5:8] else: fp=int(get_fingerprint(cmp)) sp=get_sharpness(cmp) dims=get_dims(cmp) colors=get_colors(cmp)[0] db1.execute("SELECT file,fingerprint,sharpness,width,height,R,G,B FROM list WHERE sharpness > 0 ORDER BY file") this=['',thr,0,0,0,0,0] hit1=None for i,hit1 in enumerate(db1): if hit1[0] == cmp: continue similarity=bin(fp^int(hit1[1])).count('1') if similarity4} {: ^4} {: ^4} {: ^4} {: >5}x{: >5}').format(cmp,"","","","%.1f" % sp,dims[0],dims[1])) print( ('{: <'+file_len+'} {: >4} {: >4} {: >4} {: >4} {: >5}x{: >5}').format(this[0], this[1],"%.2f"%this[5], "%.2f"%this[6], "%.1f" % this[2],this[3], this[4])) if opts.viewer: call_viewer(opts, (cmp,this[0])) def append_sharpness(sqlfile): conn=sqlite3.connect(sqlfile) conn.text_factory=str db=conn.cursor() dbh=conn.cursor() db.execute("SELECT file FROM list WHERE sharpness IS NULL ORDER BY file") i=0 dirname_old="" for row in db: sp=get_sharpness(row[0]) dbh.execute("UPDATE list SET sharpness=? \ WHERE file=?",(sp,row[0])) filename=" "+os.path.basename(row[0]) dirname=os.path.dirname(row[0]) if dirname!=dirname_old: dirname_old=dirname filename=row[0] i+=1 print("%(nr)i %(f)s %(s)f" % {'f':filename, 'nr':i,'s':sp}) if (i%50==0): conn.commit(); conn.commit() def get_sharpness(filename): """ Difference in X, Difference in Y, get smaller diff = smaller sharpness. May change if bugs found.. """ # Resize to 1024 smaller axis, crop with golden ratio # grayscale and equalize histogram. calculate difference between x neighbor # and y neigbor. smaller difference = less sharp. small_args="convert ( -define jpeg:size=2048x2048 -resize 1024^ -gravity center -crop 62%x62%+0+0 -colorspace gray XXX -equalize ) \ ( -clone 0 ( -clone 0 -roll +1+0 ) -compose Difference -composite -shave 1x1 ) \ ( -clone 0 ( -clone 0 -roll +0+1 ) -compose Difference -composite -shave 1x1 ) \ -delete 0 -compose Darken -composite -format %[fx:mean*1000] info:".split(" ") small_args[12]=filename+'[0]' p=subprocess.Popen(small_args,stdout=subprocess.PIPE) sharpness, err = p.communicate() return float(sharpness.strip()) def searchdb(sqlfile,needle): conn=sqlite3.connect(sqlfile) conn.text_factory=str db=conn.cursor() dbh=conn.cursor() db.execute("SELECT file,width,height,date FROM list WHERE file LIKE ? ORDER BY file",('%'+needle+'%',)) results=[] flist=[] for row in db: results.append(row) flist.append(('search',results)) return flist def clean_dirs(dirs): for s in dirs[:]: if (s in BADDIRS) or (s.startswith(".")): dirs.remove(s) return dirs def clean_syms(files): for f in files[:]: if os.path.islink(f): files.remove(f) return files def confirm(prompt=None, resp=False): """prompts for yes or no response from the user. Returns True for yes and False for no. 'resp' should be set to the default value assumed by the caller when user simply types ENTER. """ if prompt is None: prompt = 'Confirm' if resp: prompt = '%s [%s]|%s: ' % (prompt, 'y', 'n') else: prompt = '%s [%s]|%s: ' % (prompt, 'n', 'y') while True: ans = raw_input(prompt) if not ans: return resp if ans not in ['y', 'Y', 'n', 'N']: print('please enter y or n.') continue if ans == 'y' or ans == 'Y': return True if ans == 'n' or ans == 'N': return False def find_duplicates(sqlfile,search): if (search=='.'): search='%' else: search='%'+search+'%' conn=sqlite3.connect(sqlfile) conn.text_factory=str db=conn.cursor() dbh=conn.cursor() db.execute("SELECT hash,count(*) FROM list WHERE file LIKE ? group by hash HAVING count(*) > 1 ",(search,)) duphash=[] for row in db: hash=row[0] dbh.execute("SELECT file,width,height,date FROM list WHERE hash = ?",(hash,)) flist=[] for row in dbh: flist.append(row) flist.sort(key=lambda file: file[3]) duphash.append((hash, flist)) duphash.sort(key=lambda file: file[1][0]) return duphash def find_smalls(minsize,sqlfile): conn=sqlite3.connect(sqlfile) conn.text_factory=str db=conn.cursor() db.execute("SELECT file,width,height FROM list WHERE width < ? OR height < ?",(minsize,minsize)) smalls=[] flist=[] for row in db: smalls.append(row) flist.append(('smalls',smalls)) return flist def print_structure(files): for hash in files: #print(hash[0]) i=1 for f in hash[1]: print("%(i)d: (%(x)dx%(y)d):%(f)s " % {'i':i, 'f':f[0], 'x':f[1], 'y':f[2]}) i+=1 return def print_dup_structure(files,opts): i=1 for hash in files: #print(hash[0]) fnames=[] for f in hash[1]: fnames.append(' "'+f[0]+'"') print("%(i)d:%(n)d:%(f)s " % {'i':i, 'n':len(fnames), 'f':",".join(fnames)}) if opts.viewer: fnames=[x[0] for x in hash[1]] subprocess.call(opts.viewer.replace('%f', " ".join(fnames)), shell=True) i+=1 return def main(): options=setup_options(); if not os.path.exists(options.sqlfile): createdb(options.sqlfile); if options.delete: print('Deleting entries...') delete_nonexisting(options.sqlfile) if options.add or options.changed: print('Adding entries...') add_recurse(options) if options.search: print_structure(searchdb(options.sqlfile,options.search)) if options.measure: print('Measure colors...') append_colors(options.sqlfile) print('Measure fingerprints...') append_fingerprints(options.sqlfile) print('Measure sharpness...') append_sharpness(options.sqlfile) if options.random: print('Random lists...') random_lists(options.sqlfile) if options.nearestcolor: if os.path.exists(options.nearestcolor.rsplit(",")[0]): find_color_nearest_file(options) else: find_color_nearest(options) if options.similarity!=None: if os.path.exists(options.similarity.rsplit(",")[0]): find_fingerprint_nearest(options) else: find_fingerprint_similar(options) if options.duplicate: files=find_duplicates(options.sqlfile,options.startpath) print_dup_structure(files,options) if options.searchsmall: files=find_smalls(options.minsize,options.sqlfile) if options.deleteFiles: if len(files[0][1])>0: delete_files(files) delete_nonexisting(options.sqlfile) else: print_structure(files) #print(files) sys.exit(0) if __name__ == "__main__": main()