From bad98cb13a1c4a152f37bff0e781f1466693c4e4 Mon Sep 17 00:00:00 2001 From: ville rantanen Date: Wed, 11 Feb 2015 09:48:07 +0200 Subject: [PATCH] restructuring database --- image_list_beta.py | 863 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 863 insertions(+) create mode 100755 image_list_beta.py diff --git a/image_list_beta.py b/image_list_beta.py new file mode 100755 index 0000000..bd63fc2 --- /dev/null +++ b/image_list_beta.py @@ -0,0 +1,863 @@ +#!/usr/bin/python +from __future__ import print_function +import sys +import os +import re +import sqlite3 +import subprocess +import hashlib +import traceback +from argparse import ArgumentParser + +SQLFILE='list_of_images.sqlite' +DESCFILE='descriptions.csv' +IMGMATCH=re.compile('.*\.jpg$|.*\.jpeg$|.*\.png$|.*\.gif$',re.I) +BADDIRS=['_tn','_med'] +MINSIZE=0 + +def setup_options(): + parser=ArgumentParser(description="Maintains the list of images sqlite file") + parser.add_argument("-a",action="store_false",dest="add",default=True, + help="Do not add new files [%(default)s]") + parser.add_argument("-c",action="store_true",dest="changed",default=False, + help="Modify changed files [%(default)s]") + parser.add_argument("-d",action="store_true",dest="delete",default=False, + help="Delete non-existing entries [%(default)s]") + parser.add_argument("-D",action="store_true",dest="delete_data",default=False, + help="Delete unused metadata [%(default)s]") + parser.add_argument("--du",type=str,action='store',dest="diskused",default=False, + help="Print directory sizes. Argument is the path where directories are listed from.") + parser.add_argument("--du-depth",type=str,action='store',dest="diskused_depth",default=1, + help="Depth of summarization for --du.") + parser.add_argument("-f",action="store",dest="sqlfile",default=SQLFILE, + help="SQL file name to use [%(default)s]") + parser.add_argument("-l",action="store_true",dest="symlinks",default=False, + help="Follow symbolic links [%(default)s]") + parser.add_argument("-m",type=int,dest="minsize",default=MINSIZE, + help="Minimum pixel width/height of stored image [%(default)s]") + parser.add_argument("-r",action="store_true",dest="random",default=False, + help="Create randomized files for landscape and portrait images [%(default)s]") + parser.add_argument("-s",type=str,dest="search",default=False, + help="Search list based on path pattern") + parser.add_argument("--measure",action="store_true",dest="measure",default=False, + help="Measure various statistics for similarity/color searches. This option will flip the 'Add new files' option. [%(default)s]") + parser.add_argument("--nearest",type=str,dest="nearestcolor",default=False, + help="Search list for nearest ambient color. format: R,G,B in float 0-1. Add fourth value to limit search to number of hits. Also accepts format file,hits to find nearest color to given file.") + parser.add_argument("--dup",action="store_true",dest="duplicate",default=False, + help="Return a list of duplicate files, based on hashes. This option will flip the 'Add new files' option. [%(default)s]") + parser.add_argument("--del",action="store_true",dest="deleteFiles",default=False, + help="Delete files listed with --small. [%(default)s]") + parser.add_argument("--small",action="store_true",dest="searchsmall",default=False, + help="Return a list of small files, smaller than -m INT. This option will flip the 'Add new files' option. [%(default)s]") + parser.add_argument("--similar",type=str,dest="similarity",default=None, + help="Search list for similar images. Value 0-255 for similarity threshold. 0=high similarity. "+ + "If value is a filename, search similar to that image. "+ + "Append with ',value' to limit similarity. "+ + "The output columns: SD SimilarityDiff., CD ColorDiff., "+ + "RD AspectRatioDiff.,Shp SharpnessIndex. This function does not return exact duplicates.") +## -t string add tag to [file] + + parser.add_argument("--viewer",type=str,dest="viewer",default=None, + help="Program to view images, %%f refers to filename(s)."+ + "If '1', defaults to: 'geeqie -l %%f'") + parser.add_argument("-x",action="append",dest="exclude",default=[], + help="Exclude folder name from the lists. This option may be issued several times.") + parser.add_argument('startpath', action="store",default='.', nargs='?') + + options=parser.parse_args() + BADDIRS.extend(options.exclude) + if options.duplicate or options.searchsmall or options.measure or options.nearestcolor or options.similarity!=None or options.search or options.diskused: + options.add=not options.add + return options + +def createdb(sqlfile): + conn=sqlite3.connect(sqlfile) + db=conn.cursor() + conn.text_factory=str + db.execute('CREATE TABLE data (hash TEXT PRIMARY KEY,\ + description TEXT,portrait NUMERIC, \ + width INTEGER,height INTEGER,\ + fingerprint TEXT,sharpness NUMERIC,\ + R REAL,G REAL, B REAL, BR REAL, BG REAL, BB REAL)') + db.execute('CREATE TABLE list (file TEXT,hash TEXT,date INTEGER,size INTEGER)') + db.execute('CREATE TABLE tags (tag TEXT,hash TEXT)') + conn.commit() + return + +def delete_nonexisting(sqlfile): + conn=sqlite3.connect(sqlfile) + conn.text_factory=str + #conn.row_factory=sqlite3.Row + db=conn.cursor() + dbdel=conn.cursor() + db.execute('SELECT file FROM list') + for row in db: + if not os.path.exists(row[0]): + print('removing.. '+row[0]) + dbdel.execute("DELETE FROM list where file == ?",(row[0],)) + conn.commit() + return + +def delete_data(sqlfile): + conn=sqlite3.connect(sqlfile) + conn.text_factory=str + db=conn.cursor() + dbdel=conn.cursor() + db.execute('''SELECT hash FROM data EXCEPT SELECT hash FROM list''') + for row in db: + dbdel.execute("DELETE FROM data where hash == ?",(row[0],)) + conn.commit() + return + +def delete_files(files): + ''' Actually deletes files! ''' + print_structure(files) + + doit=confirm(prompt="Sure to delete these files?") + if doit: + print("now delling") + for hash in files: + for f in hash[1]: + print(f[0]) + os.remove(f[0]) + return + +def add_recurse(options): + conn=sqlite3.connect(options.sqlfile) + conn.text_factory=str + db=conn.cursor() + for path,dirs,files in os.walk(options.startpath,followlinks=options.symlinks): + print('Checking '+path) + dirs=clean_dirs(dirs) + if not options.symlinks: + files=clean_syms(files) + files.sort() + dirs.sort() + db_files=get_folder_contents(db,os.path.realpath(path)+'/') + for file in files: + if IMGMATCH.match(file): + filename=os.path.realpath(os.path.join(path,file)) + + if file not in db_files: + if options.add: + try: + add_single(conn,filename,change=False,minsize=options.minsize) + except: + print('error adding file: '+filename) + traceback.print_exc(file=sys.stdout) + sys.exit(1) + else: + if options.changed: + ftime=int(os.path.getmtime(filename)) + #hash=get_md5(filename) + #if not hash_match(db,filename,hash): + if not ftime_match(db,filename,ftime): + #file content changed + try: + add_single(conn,filename,change=True,minsize=options.minsize) + except: + print('error changing file: '+filename) + traceback.print_exc(file=sys.stdout) + sys.exit(1) + # if file mentioned, and hash same, no need to change entry + conn.commit() + return + +def add_single(conn,filename,change=False,hash=None,minsize=0): + dims=get_dims(filename) + if int(dims[0])0 + +def get_folder_contents(db,path): + ''' return the contents of the folder ''' + files=[] + db.execute("SELECT file FROM list where file LIKE ?",(path+'%',)) + for row in db: + base=row[0].replace(path,'',1) + if base.find('/')==-1: + files.append(base) + return files + +def ftime_match(db,filename,ftime): + db.execute("SELECT date FROM list WHERE file == ?",(filename,)) + count=db.fetchall() + return count[0][0]==ftime + +def hash_match(db,filename,hash): + db.execute("SELECT hash FROM list where file == ?",(filename,)) + count=db.fetchall() + return count[0][0]==hash + +def hash2file(db,hash,firstOnly=True): + db.execute("SELECT file FROM list where hash == ?",(hash,)) + names=db.fetchall() + if len(names)==0: + return None + if firstOnly: + return names[0][0] + else: + return [x[0] for x in names] + +def file2hash(db,filename): + db.execute("SELECT hash FROM list where file == ? LIMIT 1",(filename,)) + names=db.fetchall() + if len(names)==0: + return None + return names[0][0] + +def hash_in_data(db,hash): + db.execute("SELECT hash FROM data where hash == ? LIMIT 1",(hash,)) + hashes=db.fetchall() + return len(hashes)!=0 +def hash_in_list(db,hash): + db.execute("SELECT hash FROM list where hash == ? LIMIT 1",(hash,)) + hashes=db.fetchall() + return len(hashes)!=0 + +def get_md5(filename): + ''' Return hash of the first 15 megabytes of the file ''' + return hashlib.md5(open(filename,'rb').read(1024*1024*15)).hexdigest() + +def get_dims(filename): + idargs=['identify','-format','%wx%h',filename+'[0]'] + p=subprocess.Popen(idargs,stdout=subprocess.PIPE) + out, err = p.communicate() + return (out.strip().split('x')) + +def call_viewer(opts, files): + """ Runs the viewer program, contains defaults """ + + if opts.viewer=="1": + opts.viewer="geeqie -l %f" + devnull = open('/dev/null', 'w') + subprocess.call(opts.viewer.replace('%f', " ".join(files)), stderr=devnull, shell=True) + +def append_colors(sqlfile): + conn=sqlite3.connect(sqlfile) + conn.text_factory=str + db=conn.cursor() + dbh=conn.cursor() + db.execute("SELECT hash,R FROM data WHERE R IS NULL") + i=0 + dirname_old="" + for row in db: + filename=hash2file(conn.cursor(),row[0]) + if filename==None: + continue + colors=get_colors(filename) + dbh.execute("UPDATE data SET R=?, G=?, B=?, BR=?, BG=?, BB=? \ + WHERE hash=?",(colors[0][0],colors[0][1],colors[0][2], + colors[1][0],colors[1][1],colors[1][2],row[0])) + filebase=" "+os.path.basename(filename) + dirname=os.path.dirname(filename) + if dirname!=dirname_old: + dirname_old=dirname + filebase=filename + print("colors: %(f)s (%(r)s %(g)s %(b)s)" % {'f':filename, 'r':colors[0][0], + 'g':colors[0][1], 'b':colors[0][2]}) + i+=1 + if (i%50==0): + conn.commit(); + conn.commit() + return + +def find_color_nearest(opts): + """ Find closest matching images to given RGB color """ + src=opts.nearestcolor + conn=sqlite3.connect(opts.sqlfile) + conn.text_factory=str + db=conn.cursor() + src=[float(i) for i in src.strip().strip('"').split(',')] + if len(src)==3: + src.append(1) + db.execute("SELECT hash, ABS(BR-?)+ABS(BG-?)+ABS(BB-?) as K,BR,BG,BB FROM data ORDER BY K LIMIT ?", + (src[0],src[1],src[2],src[3])) + hits=[] + for hit in db: + fs=hash2file(conn.cursor(),hit[0],False) + if fs==None: + continue + if hit[1]==None: + print("Color information not found. Run again with --measure.") + return + for f in fs: + hits.append((f,hit[1],hit[2],hit[3],hit[4])) + + file_len=str(max([len(x[0]) for x in hits])) + for h in range(len(hits)): + if h>=src[3]: + break + print( ('{: <'+file_len+'} D {:.2f} (RGB {:.2f},{:.2f},{:.2f})').format(*hits[h])) + if opts.viewer: + fnames=[x[0] for x in hits] + call_viewer(opts, fnames) + return + +def find_color_nearest_file(opts): + """ Find closest matching images to given files with RGB color """ + cmp=opts.nearestcolor.rsplit(",",1) + if len(cmp)==1: + thr=2 + else: + thr=int(cmp[1])+1 + cmp=os.path.realpath(cmp[0]) + conn=sqlite3.connect(opts.sqlfile) + conn.text_factory=str + db=conn.cursor() + if is_listed(db, cmp): + hash=file2hash(conn.cursor(), cmp) + db1=conn.cursor() + db1.execute("SELECT hash,fingerprint,sharpness,width,height,BR,BG,BB FROM data WHERE hash=?",(hash,)) + for hit1 in db1: + fp=int(hit1[1]) + sp=hit1[2] + dims=hit1[3:5] + src=hit1[5:8] + else: + hash=get_md5(cmp) + fp=int(get_fingerprint(cmp)) + sp=get_sharpness(cmp) + dims=get_dims(cmp) + src=get_colors(cmp)[1] + src=[float(i) for i in src] + db.execute("SELECT hash, ABS(BR-?)+ABS(BG-?)+ABS(BB-?) as K,BR,BG,BB FROM data ORDER BY K LIMIT ?", + (src[0],src[1],src[2],thr)) + hits=[] + for hit in db: + fs=hash2file(conn.cursor(),hit[0],False) + if fs==None: + continue + if hit[1]==None: + print("Color information not found. Run again with --measure.") + return + for f in fs: + if f==cmp: + continue + hits.append((f,hit[1],hit[2],hit[3],hit[4])) + + file_len=str(max([len(x[0]) for x in hits])) + for h in range(len(hits)): + if h>=thr-1: + break + print( ('{: <'+file_len+'} D {:.2f} (RGB {:.2f},{:.2f},{:.2f})').format(*hits[h]) ) + if opts.viewer: + fnames=[x[0] for x in hits] + call_viewer(opts, fnames) + return + + +def get_colors(filename): + small_args=['convert','-define','jpeg:size=64x64',filename+'[0]','-resize','10x10!','TEXT:-'] + p=subprocess.Popen(small_args,stdout=subprocess.PIPE) + img, err = p.communicate() + mean_args=['convert','-','-format','"%[fx:mean.r],%[fx:mean.g],%[fx:mean.b]"','info:-'] + p=subprocess.Popen(mean_args,stdout=subprocess.PIPE,stdin=subprocess.PIPE) + mean, err = p.communicate(input=img) + mean_args=['convert','-', + '(','+clone','-gravity','North','-crop','10x1+0+0','-write','mpr:top','+delete',')', + '(','+clone','-gravity','South','-crop','10x1+0+0','-write','mpr:bot','+delete',')', + '(','+clone','-gravity','West','-crop','1x10+0+0','-rotate','90','-write','mpr:lef','+delete',')', + '(','+clone','-gravity','East','-crop','1x10+0+0','-rotate','90','-write','mpr:rig','+delete',')', + '+delete','mpr:top','mpr:bot','mpr:lef','mpr:rig','+append', + '-format','"%[fx:mean.r],%[fx:mean.g],%[fx:mean.b]"','info:-'] + p=subprocess.Popen(mean_args,stdout=subprocess.PIPE,stdin=subprocess.PIPE) + border, err = p.communicate(input=img) + mean=[float(i) for i in mean.strip().strip('"').split(',')] + border=[float(i) for i in border.strip().strip('"').split(',')] + return (mean,border) + +def get_color_diff(c1,c2): + """ Return color difference from two RGB triplets """ + + return abs( c1[0] - c2[0] )+abs( c1[1] - c2[1] )+abs( c1[2] - c2[2] ) + +def get_ratio_diff(d1,d2): + """ Return ratio difference from two w,h dimension tuplets """ + + return abs( float(d1[0])/float(d1[1]) - float(d2[0])/float(d2[1]) ) + +def append_fingerprints(sqlfile): + conn=sqlite3.connect(sqlfile) + conn.text_factory=str + db=conn.cursor() + dbh=conn.cursor() + db.execute("SELECT hash FROM data WHERE fingerprint IS NULL") + i=0 + dirname_old="" + for row in db: + filename=hash2file(conn.cursor(),row[0]) + if filename==None: + continue + fp=get_fingerprint(filename) + dbh.execute("UPDATE data SET fingerprint=? \ + WHERE hash=?",(fp,row[0])) + filebase=" "+os.path.basename(filename) + dirname=os.path.dirname(filename) + if dirname!=dirname_old: + dirname_old=dirname + filebase=filename + i+=1 + print("%(nr)i %(f)s" % {'f':filebase, 'nr':i}) + if (i%50==0): + conn.commit(); + conn.commit() + +def get_fingerprint(filename): + small_args=['convert','-define','jpeg:size=256x256',filename+'[0]','-resize','160x160!', + '-colorspace','Gray','-blur','2x2','-normalize','-equalize','-resize','16x16','-depth','1','TEXT:-'] + p=subprocess.Popen(small_args,stdout=subprocess.PIPE) + img, err = p.communicate() + values='' + for row in img.split('\n'): + gray=row.split(',') + if len(gray)<3: + continue + if gray[2]=="255": + values+='1' + else: + values+='0' + return str(int(values,2)) + +def find_fingerprint_similar(opts): + ''' Find all similar images, nearest match more similar than thr ''' + + thr=int(opts.similarity) + conn=sqlite3.connect(opts.sqlfile) + conn.text_factory=str + db1=conn.cursor() + db2=conn.cursor() + db1.execute("SELECT hash,fingerprint,sharpness,width,height,R,G,B FROM data WHERE sharpness > 0") + hits=[] + hit_list=[] + i=None + for i,hit1 in enumerate(db1): + cmp=hit1[0] + cmpf=hash2file(conn.cursor(),hit1[0]) + if cmpf==None: + continue + if cmpf in hit_list: + continue + fp=int(hit1[1]) + sp=hit1[2] + dims=hit1[3:5] + pixels=dims[0]*dims[1] + colors=hit1[5:8] + db2.execute("SELECT hash,fingerprint,sharpness,width,height,R,G,B FROM data WHERE sharpness > 0") + this1=[ [cmpf, 0,sp,int(hit1[3]),int(hit1[4]),0,pixels,0] ] + for hit2 in db2: + if hit2[0]==cmp: + continue + similarity=bin(fp^int(hit2[1])).count('1') + if similarity1: + hits.append(this1) + hit_list.append(cmp) + + if i==None: + print("No measurements found") + sys.exit(1) + + for src in hits: + file_len=str(max([len(x[0]) for x in src])) + print( ('{: <'+file_len+'} {: ^4} {: ^4} {: ^4} {: ^4} {: ^5}x{: ^5}').format("File","SD","CD","RD","Shp","W","H")) + for c in range(len(src)): + print( ('{: <'+file_len+'} {: >4} {: >4} {: >4} {: >4} {: >5}x{: >5}').format(src[c][0],src[c][1], + "%.2f"%src[c][5],"%.2f"%src[c][7], + "%.1f" % src[c][2],src[c][3],src[c][4])) + if opts.viewer: + fnames=[x[0] for x in src] + call_viewer(opts, fnames) + + return + +def find_fingerprint_nearest(opts): + ''' Find nearest match to given file ''' + + cmp=os.path.realpath(opts.similarity.rsplit(",")[0]) + thr=sys.maxint + if len(opts.similarity.rsplit(","))>1: + thr=int(opts.similarity.rsplit(",",1)[1]) + conn=sqlite3.connect(opts.sqlfile) + conn.text_factory=str + db1=conn.cursor() + if is_listed(db1, cmp): + hash=file2hash(conn.cursor(),cmp) + db1.execute("SELECT hash,fingerprint,sharpness,width,height,R,G,B FROM data WHERE hash=?",(hash,)) + for hit1 in db1: + fp=int(hit1[1]) + sp=hit1[2] + dims=hit1[3:5] + colors=hit1[5:8] + else: + hash=get_md5(cmp) + fp=int(get_fingerprint(cmp)) + sp=get_sharpness(cmp) + dims=get_dims(cmp) + colors=get_colors(cmp)[0] + + db1.execute("SELECT hash,fingerprint,sharpness,width,height,R,G,B FROM data WHERE sharpness > 0") + this=['',thr,0,0,0,0,0] + hit1=None + for i,hit1 in enumerate(db1): + if hit1[0] == hash: + continue + similarity=bin(fp^int(hit1[1])).count('1') + if similarity4} {: ^4} {: ^4} {: ^4} {: >5}x{: >5}').format(cmp,"","","","%.1f" % sp,dims[0],dims[1])) + print( ('{: <'+file_len+'} {: >4} {: >4} {: >4} {: >4} {: >5}x{: >5}').format(this[0], this[1],"%.2f"%this[5], + "%.2f"%this[6], "%.1f" % this[2],this[3], this[4])) + + if opts.viewer: + call_viewer(opts, (cmp,this[0])) + + +def append_sharpness(sqlfile): + conn=sqlite3.connect(sqlfile) + conn.text_factory=str + db=conn.cursor() + dbh=conn.cursor() + db.execute("SELECT hash FROM data WHERE sharpness IS NULL") + i=0 + dirname_old="" + for row in db: + filename=hash2file(conn.cursor(),row[0]) + if filename==None: + continue + sp=get_sharpness(filename) + dbh.execute("UPDATE data SET sharpness=? \ + WHERE hash=?",(sp,row[0])) + filebase=" "+os.path.basename(filename) + dirname=os.path.dirname(filename) + if dirname!=dirname_old: + dirname_old=dirname + filebase=filename + i+=1 + print("%(nr)i %(f)s %(s)f" % {'f':filebase, 'nr':i,'s':sp}) + if (i%50==0): + conn.commit(); + conn.commit() + +def get_sharpness(filename): + """ Difference in X, Difference in Y, get smaller diff = smaller sharpness. + May change if bugs found.. """ + + # Resize to 1024 smaller axis, crop with golden ratio + # grayscale and equalize histogram. calculate difference between x neighbor + # and y neigbor. smaller difference = less sharp. + small_args="convert ( -define jpeg:size=2048x2048 -resize 1024^ -gravity center -crop 62%x62%+0+0 -colorspace gray XXX -equalize ) \ +( -clone 0 ( -clone 0 -roll +1+0 ) -compose Difference -composite -shave 1x1 ) \ +( -clone 0 ( -clone 0 -roll +0+1 ) -compose Difference -composite -shave 1x1 ) \ +-delete 0 -compose Darken -composite -format %[fx:mean*1000] info:".split(" ") + small_args[12]=filename+'[0]' + p=subprocess.Popen(small_args,stdout=subprocess.PIPE) + sharpness, err = p.communicate() + return float(sharpness.strip()) + +def searchdb(sqlfile,needle): + conn=sqlite3.connect(sqlfile) + conn.text_factory=str + db=conn.cursor() + dbh=conn.cursor() + db.execute("SELECT file,hash FROM list WHERE file LIKE ? ORDER BY file",('%'+needle+'%',)) + results=[] + flist=[] + for row in db: + data=dbh.execute("SELECT hash,width,height FROM data WHERE hash = ?",(row[1],)).fetchall() + results.append([row[0], data[0][1],data[0][2]]) + flist.append(('search',results)) + return flist + +def clean_dirs(dirs): + for s in dirs[:]: + if (s in BADDIRS) or (s.startswith(".")): + dirs.remove(s) + return dirs + +def clean_syms(files): + for f in files[:]: + if os.path.islink(f): + files.remove(f) + return files + +def confirm(prompt=None, resp=False): + """prompts for yes or no response from the user. Returns True for yes and + False for no. + 'resp' should be set to the default value assumed by the caller when + user simply types ENTER. + """ + + if prompt is None: + prompt = 'Confirm' + if resp: + prompt = '%s [%s]|%s: ' % (prompt, 'y', 'n') + else: + prompt = '%s [%s]|%s: ' % (prompt, 'n', 'y') + + while True: + ans = raw_input(prompt) + if not ans: + return resp + if ans not in ['y', 'Y', 'n', 'N']: + print('please enter y or n.') + continue + if ans == 'y' or ans == 'Y': + return True + if ans == 'n' or ans == 'N': + return False + +def find_duplicates(sqlfile,search): + if (search=='.'): + search='%' + else: + search='%'+search+'%' + conn=sqlite3.connect(sqlfile) + conn.text_factory=str + db=conn.cursor() + dbh=conn.cursor() + dbf=conn.cursor() + db.execute("SELECT hash,count(*) FROM list WHERE file LIKE ? group by hash HAVING count(*) > 1 ",(search,)) + duphash=[] + for row in db: + hash=row[0] + dbh.execute("SELECT hash,width,height FROM data WHERE hash = ?",(hash,)) + flist=[] + for h in dbh: + dbf.execute("SELECT file,date FROM list WHERE hash = ?",(hash,)) + for f in dbf: + flist.append([f[0],h[1],h[2],f[1]]) + flist.sort(key=lambda file: file[3]) + duphash.append((hash, flist)) + duphash.sort(key=lambda file: file[1][0]) + return duphash + +def find_smalls(minsize,sqlfile): + conn=sqlite3.connect(sqlfile) + conn.text_factory=str + db=conn.cursor() + db.execute("SELECT hash,width,height FROM data WHERE width < ? OR height < ?",(minsize,minsize)) + smalls=[] + flist=[] + for row in db: + fs=hash2file(conn.cursor(), row[0], False) + if fs==None: + continue + for f in fs: + smalls.append([f, row[1], row[2]]) + flist.append(('smalls',smalls)) + return flist + +def disk_used(options): + conn=sqlite3.connect(options.sqlfile) + conn.text_factory=str + db=conn.cursor() + db.execute('SELECT size,replace(file,?,"") as path FROM list WHERE file LIKE ?', + (os.path.realpath(options.diskused)+"/", + os.path.realpath(options.diskused)+"%", + )) + entries=[] + sizes=[] + for row in db: + start_path=row[1].split('/') + start_path="/".join(start_path[0:int(options.diskused_depth)]) + if start_path not in entries: + entries.append(start_path) + sizes.append(row[0]) + else: + sizes[ entries.index(start_path) ]+=row[0] + for entry in zip(sizes,entries): + print("| ".join([ str(entry[0]).ljust(14), + humanize_size(entry[0]).rjust(8), + entry[1]])) + +def print_structure(files): + for hash in files: + #print(hash[0]) + for i,f in enumerate(hash[1]): + print("%(i)d: (%(x)dx%(y)d):%(f)s " % {'i':i+1, 'f':f[0], 'x':f[1], 'y':f[2]}) + return + +def print_dup_structure(files,opts): + i=1 + for hash in files: + #print(hash[0]) + fnames=[] + for f in hash[1]: + fnames.append(' "'+f[0]+'"') + print("%(i)d:%(n)d:%(f)s " % {'i':i, 'n':len(fnames), 'f':",".join(fnames)}) + if opts.viewer: + fnames=[x[0] for x in hash[1]] + subprocess.call(opts.viewer.replace('%f', " ".join(fnames)), shell=True) + i+=1 + return + +def humanize_size(size,precision=1): + if size==None: + return 'nan' + suffixes=['B','KB','MB','GB','TB'] + suffixIndex = 0 + defPrecision=0 + while size > 1024: + suffixIndex += 1 + size = size/1024.0 + defPrecision=precision + return "%.*f%s"%(defPrecision,size,suffixes[suffixIndex]) + +def import_descriptions(options): + """ Walk through the path from given [startpath] and read + any DESCFILE, importing the contents in the DB """ + pass + +def export_descriptions(options): + """ Get unique paths from DB, matching [startpath], write + DESCFILE for each file found. Export gets a format argument: + %wx%h %n %d """ + # width, height, basename, description + #%R%G%B %S %F %D + # Red Green Blue Sharpness Fingerprint Date(formatting?) + # %s %H + # filesize Hash + pass + + +def main(): + options=setup_options(); + if not os.path.exists(options.sqlfile): + createdb(options.sqlfile); + if options.delete: + print('Deleting entries...') + delete_nonexisting(options.sqlfile) + if options.delete_data: + print('Deleting metadata...') + delete_data(options.sqlfile) + if options.add or options.changed: + print('Adding entries...') + add_recurse(options) + if options.search: + print_structure(searchdb(options.sqlfile,options.search)) + if options.measure: + print('Measure colors...') + append_colors(options.sqlfile) + print('Measure fingerprints...') + append_fingerprints(options.sqlfile) + print('Measure sharpness...') + append_sharpness(options.sqlfile) + if options.random: + print('Random lists...') + random_lists(options.sqlfile) + if options.nearestcolor: + if os.path.exists(options.nearestcolor.rsplit(",")[0]): + find_color_nearest_file(options) + else: + find_color_nearest(options) + + if options.similarity!=None: + if os.path.exists(options.similarity.rsplit(",")[0]): + find_fingerprint_nearest(options) + else: + find_fingerprint_similar(options) + if options.duplicate: + files=find_duplicates(options.sqlfile,options.startpath) + print_dup_structure(files,options) + if options.searchsmall: + files=find_smalls(options.minsize,options.sqlfile) + if options.deleteFiles: + if len(files[0][1])>0: + delete_files(files) + delete_nonexisting(options.sqlfile) + else: + print_structure(files) + if options.diskused: + disk_used(options) + #print(files) + + sys.exit(0) + +if __name__ == "__main__": + main() +