From a93b73b76b701255aa0e77d3432b24146e4ab6d0 Mon Sep 17 00:00:00 2001 From: ville rantanen Date: Mon, 29 Apr 2019 12:28:09 +0300 Subject: [PATCH] fixed fingerprinting --- files/image_list.py | 102 +++++++++++++++++++------------------ files/image_list_tagger.py | 1 + 2 files changed, 53 insertions(+), 50 deletions(-) diff --git a/files/image_list.py b/files/image_list.py index 00cb415..825c565 100755 --- a/files/image_list.py +++ b/files/image_list.py @@ -9,7 +9,7 @@ import hashlib import traceback import csv import datetime -from argparse import ArgumentParser +from argparse import ArgumentParser SQLFILE='list_of_images.sqlite' DESCFILE='descriptions.csv' @@ -23,8 +23,8 @@ def setup_options(): db=parser.add_argument_group('database', 'Database management') search=parser.add_argument_group('search', 'Search images from database') - - + + db.add_argument("-a",action="store_false",dest="add",default=True, help="Do not add new files [%(default)s]") db.add_argument("-c",action="store_true",dest="changed",default=False, @@ -83,7 +83,7 @@ def setup_options(): parser.add_argument("-x",action="append",dest="exclude",default=[], help="Exclude folder name from the lists. This option may be issued several times.") parser.add_argument('startpath', action="store",default='.', nargs='?') - + options=parser.parse_args() BADDIRS.extend(options.exclude) if options.duplicate or \ @@ -101,7 +101,7 @@ def setup_options(): return options def checkdb(options): - + if options.search: needle='%'+options.search+'%' else: @@ -139,7 +139,7 @@ def checkdb(options): pad=str(max([len(x[0]) for x in missing])) for f in missing: print_stderr(("%-"+pad+"s (%s %7s)")%(f[0],humanize_date(f[3]),humanize_size(f[2]))) - + print_stderr("----\nFile check summary:") print_stderr("Database modified: %s"%(humanize_date(os.path.getmtime(options.sqlfile)),)) print_stderr("Checksum matches : %d"%(OK_count,)) @@ -157,7 +157,7 @@ def createdb(sqlfile): R REAL,G REAL, B REAL, BR REAL, BG REAL, BB REAL)') db.execute('CREATE TABLE list (file TEXT PRIMARY KEY,hash TEXT,date INTEGER,size INTEGER)') db.execute('CREATE TABLE tags (hash TEXT,tag TEXT)') - conn.commit() + conn.commit() return def delete_nonexisting(sqlfile): @@ -188,7 +188,7 @@ def delete_data(sqlfile): dbdel.execute("DELETE FROM data where hash == ?",(row[0],)) conn.commit() print('Removed {0} metadata'.format(i)) - + tagsbefore=db.execute("SELECT COUNT(hash) FROM tags").fetchall()[0][0] db.execute('''SELECT hash FROM tags EXCEPT SELECT hash FROM list''') for row in db: @@ -198,13 +198,13 @@ def delete_data(sqlfile): conn.commit() tagsafter=db.execute("SELECT COUNT(hash) FROM tags").fetchall()[0][0] print('Removed {0} tags'.format(tagsbefore-tagsafter)) - + return - + def delete_files(files): ''' Actually deletes files! ''' print_structure(files) - + doit=confirm(prompt="Sure to delete these files?") if doit: print("now delling") @@ -230,7 +230,7 @@ def add_recurse(options): if IMGMATCH.match(file): filename=os.path.realpath(os.path.join(path,file)) - if file not in db_files: + if file not in db_files: if options.add: try: add_single(conn,filename,change=False) @@ -346,7 +346,7 @@ def random_lists(sqlfile): sortargs=['sort','-R','-o','portrait.list','portrait.list.s'] p=subprocess.call(sortargs) os.unlink('portrait.list.s') - os.unlink('landscape.list.s') + os.unlink('landscape.list.s') return def is_listed(db,filename): @@ -383,7 +383,7 @@ def hash2file(db,hash,firstOnly=True): return names[0][0] else: return [x[0] for x in names] - + def file2hash(db,filename): db.execute("SELECT hash FROM list where file == ? LIMIT 1",(filename,)) names=db.fetchall() @@ -412,7 +412,7 @@ def get_dims(filename): def call_viewer(opts, files): """ Runs the viewer program, contains defaults """ - + if opts.viewer=="1": opts.viewer="geeqie -l %f" devnull = open('/dev/null', 'w') @@ -435,7 +435,7 @@ def append_colors(opt): dbh.execute("UPDATE data SET R=?, G=?, B=?, BR=?, BG=?, BB=? \ WHERE hash=?",(colors[0][0],colors[0][1],colors[0][2], colors[1][0],colors[1][1],colors[1][2],row[0])) - print("%(i)d: %(f)s (%(r)s %(g)s %(b)s)" % {'i':count-i, 'f':filename, 'r':colors[0][0], + print("%(i)d: %(f)s (%(r)s %(g)s %(b)s)" % {'i':count-i, 'f':filename, 'r':colors[0][0], 'g':colors[0][1], 'b':colors[0][2]}) if (i%50==0): conn.commit(); @@ -517,7 +517,7 @@ def find_color_nearest_file(opts): if f==cmp: continue hits.append((f,hit[1],hit[2],hit[3],hit[4])) - + file_len=str(max([len(x[0]) for x in hits])) for h in range(len(hits)): if h>=thr-1: @@ -551,12 +551,12 @@ def get_colors(filename): def get_color_diff(c1,c2): """ Return color difference from two RGB triplets """ - + return abs( c1[0] - c2[0] )+abs( c1[1] - c2[1] )+abs( c1[2] - c2[2] ) def get_ratio_diff(d1,d2): """ Return ratio difference from two w,h dimension tuplets """ - + return abs( float(d1[0])/float(d1[1]) - float(d2[0])/float(d2[1]) ) def append_fingerprints(opt): @@ -587,13 +587,15 @@ def get_fingerprint(filename): img, err = p.communicate() values='' for row in img.split('\n'): - gray=row.split(',') - if len(gray)<3: - continue - if gray[2]=="255": - values+='1' - else: - values+='0' + if row.find("gray(0)") > 0: + values += '0' + continue + if row.find("gray(255)") > 0: + values += '1' + continue + if row.find("gray(65535)") > 0: + values += '1' + continue return str(int(values,2)) def find_fingerprint_similar(opts): @@ -644,14 +646,14 @@ def find_fingerprint_similar(opts): if len(this1)>1: hits.append(this1) hit_list.append(cmp) - + if i==None: print("No measurements found") sys.exit(1) for src in hits: file_len=str(max([len(x[0]) for x in src])) - print( ('{: <'+file_len+'} {: ^4} {: ^4} {: ^4} {: ^4} {: ^5}x{: ^5}').format("File","SD","CD","RD","Shp","W","H")) + print( ('{: <'+file_len+'} {: ^4} {: ^4} {: ^4} {: ^4} {: ^5}x{: ^5}').format("File","SD","CD","RD","Shp","W","H")) for c in range(len(src)): print( ('{: <'+file_len+'} {: >4} {: >4} {: >4} {: >4} {: >5}x{: >5}').format(src[c][0],src[c][1], "%.2f"%src[c][5],"%.2f"%src[c][7], @@ -686,7 +688,7 @@ def find_fingerprint_nearest(opts): sp=get_sharpness(cmp) dims=get_dims(cmp) colors=get_colors(cmp)[0] - + db1.execute("SELECT hash,fingerprint,sharpness,width,height,R,G,B FROM data WHERE sharpness > 0") this=['',thr,0,0,0,0,0] hit1=None @@ -710,14 +712,14 @@ def find_fingerprint_nearest(opts): return this[0]=hash2file(conn.cursor(),this[0]) file_len=str(max(len(cmp), len(this[0]))) - print( ('{: <'+file_len+'} {: ^4} {: ^4} {: ^4} {: ^4} {: ^5}x{: ^5}').format("File","SD","CD","RD","Shp","W","H")) - print( ('{: <'+file_len+'} {: >4} {: ^4} {: ^4} {: ^4} {: >5}x{: >5}').format(cmp,"","","","%.1f" % sp,dims[0],dims[1])) + print( ('{: <'+file_len+'} {: ^4} {: ^4} {: ^4} {: ^4} {: ^5}x{: ^5}').format("File","SD","CD","RD","Shp","W","H")) + print( ('{: <'+file_len+'} {: >4} {: ^4} {: ^4} {: ^4} {: >5}x{: >5}').format(cmp,"","","","%.1f" % sp,dims[0],dims[1])) print( ('{: <'+file_len+'} {: >4} {: >4} {: >4} {: >4} {: >5}x{: >5}').format(this[0], this[1],"%.2f"%this[5], - "%.2f"%this[6], "%.1f" % this[2],this[3], this[4])) - + "%.2f"%this[6], "%.1f" % this[2],this[3], this[4])) + if opts.viewer: call_viewer(opts, (cmp,this[0])) - + def append_sharpness(opt): conn=sqlite3.connect(opt.sqlfile) @@ -743,7 +745,7 @@ def append_sharpness(opt): def get_sharpness(filename): """ Difference in X, Difference in Y, get smaller diff = smaller sharpness. May change if bugs found.. """ - + # Resize to 1024 smaller axis, crop with golden ratio # grayscale and equalize histogram. calculate difference between x neighbor # and y neigbor. smaller difference = less sharp. @@ -788,14 +790,14 @@ def confirm(prompt=None, resp=False): 'resp' should be set to the default value assumed by the caller when user simply types ENTER. """ - + if prompt is None: prompt = 'Confirm' if resp: prompt = '%s [%s]|%s: ' % (prompt, 'y', 'n') else: prompt = '%s [%s]|%s: ' % (prompt, 'n', 'y') - + while True: ans = raw_input(prompt) if not ans: @@ -868,15 +870,15 @@ def disk_used(options): else: sizes[ entries.index(start_path) ]+=row[0] for entry in zip(sizes,entries): - print("| ".join([ str(entry[0]).ljust(14), - humanize_size(entry[0]).rjust(8), + print("| ".join([ str(entry[0]).ljust(14), + humanize_size(entry[0]).rjust(8), entry[1]])) def print_stderr(s): sys.stderr.write(s) sys.stderr.write("\n") sys.stderr.flush() - + def print_structure(files): for hash in files: #print(hash[0]) @@ -916,7 +918,7 @@ def humanize_date(date): return datetime.datetime.fromtimestamp(int(date)).strftime('%Y-%m-%d %H:%M:%S') def import_descriptions(options): - """ Walk through the path from given [startpath] and read + """ Walk through the path from given [startpath] and read any DESCFILE, importing the contents in the DB """ conn=sqlite3.connect(options.sqlfile) conn.text_factory=str @@ -997,14 +999,14 @@ def description_parse(s,l,d,t): name=os.path.basename(l[0]), tags=t, ) - + def import_metadata(options): """ import data table from another sqlite file""" if not os.path.exists(options.importfile): print("SQLite file {:} missing".format(options.importfile)) sys.exit(1) - + conn=sqlite3.connect(options.sqlfile) conn.text_factory=str db=conn.cursor() @@ -1018,14 +1020,14 @@ def import_metadata(options): conn.commit() count=db.execute("SELECT COUNT(hash) FROM fromDB.data").fetchall()[0][0] tagsafter=db.execute("SELECT COUNT(hash) FROM tags").fetchall()[0][0] - + print("Imported %d metadata, %d tags." % (count,tagsafter-tagsbefore)) def export_database(options): """ export data to new sqlite file. Minimize file size of sqlite.""" if not os.path.exists(options.exportfile): createdb(options.exportfile) - + conn=sqlite3.connect(options.sqlfile) conn.text_factory=str db=conn.cursor() @@ -1037,7 +1039,7 @@ def export_database(options): count=db.execute("SELECT COUNT(hash) FROM toDB.list").fetchall()[0][0] metacount=db.execute("SELECT COUNT(hash) FROM toDB.data").fetchall()[0][0] tagscount=db.execute("SELECT COUNT(hash) FROM toDB.tags").fetchall()[0][0] - + print("Exported %d files, %d metadata, %d tags." % (count,metacount,tagscount)) def check_path(path,opt): @@ -1047,7 +1049,7 @@ def check_path(path,opt): if not opt.relative: if not os.path.isfile(path): raise IOError("File %s not found. try -R for relative search" % path) - + sqlpath=os.path.dirname(os.path.realpath(opt.sqlfile)) rel=os.path.join(sqlpath, path) searchpath=path.split(os.sep) @@ -1065,13 +1067,13 @@ def main(): if not os.path.exists(options.sqlfile): createdb(options.sqlfile); if options.delete: - print('Deleting entries...') + print('Deleting entries...') delete_nonexisting(options.sqlfile) if options.add or options.changed: print('Adding entries...') add_recurse(options) if options.delete_data: - print('Deleting metadata...') + print('Deleting metadata...') delete_data(options.sqlfile) if options.search and not options.check: print_structure(searchdb(options.sqlfile,options.search)) @@ -1095,7 +1097,7 @@ def main(): if options.similarity!=None: if os.path.exists(options.similarity.rsplit(",")[0]): find_fingerprint_nearest(options) - else: + else: find_fingerprint_similar(options) if options.duplicate: files=find_duplicates(options.sqlfile,options.startpath) diff --git a/files/image_list_tagger.py b/files/image_list_tagger.py index 5c41004..01fe1b2 100755 --- a/files/image_list_tagger.py +++ b/files/image_list_tagger.py @@ -56,6 +56,7 @@ class Click: if t<34: self.tags.append(Button(self.top,text="[%s] %s"%(self._hotkey(t+1),self.tagTexts[t]), command=return_func)) self.root.bind(self._hotkey(t+1), self._tag_key) + sys.stdout.write("\n%s: %s"%( self._hotkey(t+1), self.tagTexts[t]) ) #self.top.bind(str(t), self._tag_key) continue self.tags.append(Button(self.top,text=self.tagTexts[t], command=return_func))