fixed fingerprinting

This commit is contained in:
ville rantanen
2019-04-29 12:28:09 +03:00
parent 5991f6b9b9
commit a93b73b76b
2 changed files with 53 additions and 50 deletions

View File

@@ -9,7 +9,7 @@ import hashlib
import traceback import traceback
import csv import csv
import datetime import datetime
from argparse import ArgumentParser from argparse import ArgumentParser
SQLFILE='list_of_images.sqlite' SQLFILE='list_of_images.sqlite'
DESCFILE='descriptions.csv' DESCFILE='descriptions.csv'
@@ -23,8 +23,8 @@ def setup_options():
db=parser.add_argument_group('database', 'Database management') db=parser.add_argument_group('database', 'Database management')
search=parser.add_argument_group('search', 'Search images from database') search=parser.add_argument_group('search', 'Search images from database')
db.add_argument("-a",action="store_false",dest="add",default=True, db.add_argument("-a",action="store_false",dest="add",default=True,
help="Do not add new files [%(default)s]") help="Do not add new files [%(default)s]")
db.add_argument("-c",action="store_true",dest="changed",default=False, db.add_argument("-c",action="store_true",dest="changed",default=False,
@@ -83,7 +83,7 @@ def setup_options():
parser.add_argument("-x",action="append",dest="exclude",default=[], parser.add_argument("-x",action="append",dest="exclude",default=[],
help="Exclude folder name from the lists. This option may be issued several times.") help="Exclude folder name from the lists. This option may be issued several times.")
parser.add_argument('startpath', action="store",default='.', nargs='?') parser.add_argument('startpath', action="store",default='.', nargs='?')
options=parser.parse_args() options=parser.parse_args()
BADDIRS.extend(options.exclude) BADDIRS.extend(options.exclude)
if options.duplicate or \ if options.duplicate or \
@@ -101,7 +101,7 @@ def setup_options():
return options return options
def checkdb(options): def checkdb(options):
if options.search: if options.search:
needle='%'+options.search+'%' needle='%'+options.search+'%'
else: else:
@@ -139,7 +139,7 @@ def checkdb(options):
pad=str(max([len(x[0]) for x in missing])) pad=str(max([len(x[0]) for x in missing]))
for f in missing: for f in missing:
print_stderr(("%-"+pad+"s (%s %7s)")%(f[0],humanize_date(f[3]),humanize_size(f[2]))) print_stderr(("%-"+pad+"s (%s %7s)")%(f[0],humanize_date(f[3]),humanize_size(f[2])))
print_stderr("----\nFile check summary:") print_stderr("----\nFile check summary:")
print_stderr("Database modified: %s"%(humanize_date(os.path.getmtime(options.sqlfile)),)) print_stderr("Database modified: %s"%(humanize_date(os.path.getmtime(options.sqlfile)),))
print_stderr("Checksum matches : %d"%(OK_count,)) print_stderr("Checksum matches : %d"%(OK_count,))
@@ -157,7 +157,7 @@ def createdb(sqlfile):
R REAL,G REAL, B REAL, BR REAL, BG REAL, BB REAL)') R REAL,G REAL, B REAL, BR REAL, BG REAL, BB REAL)')
db.execute('CREATE TABLE list (file TEXT PRIMARY KEY,hash TEXT,date INTEGER,size INTEGER)') db.execute('CREATE TABLE list (file TEXT PRIMARY KEY,hash TEXT,date INTEGER,size INTEGER)')
db.execute('CREATE TABLE tags (hash TEXT,tag TEXT)') db.execute('CREATE TABLE tags (hash TEXT,tag TEXT)')
conn.commit() conn.commit()
return return
def delete_nonexisting(sqlfile): def delete_nonexisting(sqlfile):
@@ -188,7 +188,7 @@ def delete_data(sqlfile):
dbdel.execute("DELETE FROM data where hash == ?",(row[0],)) dbdel.execute("DELETE FROM data where hash == ?",(row[0],))
conn.commit() conn.commit()
print('Removed {0} metadata'.format(i)) print('Removed {0} metadata'.format(i))
tagsbefore=db.execute("SELECT COUNT(hash) FROM tags").fetchall()[0][0] tagsbefore=db.execute("SELECT COUNT(hash) FROM tags").fetchall()[0][0]
db.execute('''SELECT hash FROM tags EXCEPT SELECT hash FROM list''') db.execute('''SELECT hash FROM tags EXCEPT SELECT hash FROM list''')
for row in db: for row in db:
@@ -198,13 +198,13 @@ def delete_data(sqlfile):
conn.commit() conn.commit()
tagsafter=db.execute("SELECT COUNT(hash) FROM tags").fetchall()[0][0] tagsafter=db.execute("SELECT COUNT(hash) FROM tags").fetchall()[0][0]
print('Removed {0} tags'.format(tagsbefore-tagsafter)) print('Removed {0} tags'.format(tagsbefore-tagsafter))
return return
def delete_files(files): def delete_files(files):
''' Actually deletes files! ''' ''' Actually deletes files! '''
print_structure(files) print_structure(files)
doit=confirm(prompt="Sure to delete these files?") doit=confirm(prompt="Sure to delete these files?")
if doit: if doit:
print("now delling") print("now delling")
@@ -230,7 +230,7 @@ def add_recurse(options):
if IMGMATCH.match(file): if IMGMATCH.match(file):
filename=os.path.realpath(os.path.join(path,file)) filename=os.path.realpath(os.path.join(path,file))
if file not in db_files: if file not in db_files:
if options.add: if options.add:
try: try:
add_single(conn,filename,change=False) add_single(conn,filename,change=False)
@@ -346,7 +346,7 @@ def random_lists(sqlfile):
sortargs=['sort','-R','-o','portrait.list','portrait.list.s'] sortargs=['sort','-R','-o','portrait.list','portrait.list.s']
p=subprocess.call(sortargs) p=subprocess.call(sortargs)
os.unlink('portrait.list.s') os.unlink('portrait.list.s')
os.unlink('landscape.list.s') os.unlink('landscape.list.s')
return return
def is_listed(db,filename): def is_listed(db,filename):
@@ -383,7 +383,7 @@ def hash2file(db,hash,firstOnly=True):
return names[0][0] return names[0][0]
else: else:
return [x[0] for x in names] return [x[0] for x in names]
def file2hash(db,filename): def file2hash(db,filename):
db.execute("SELECT hash FROM list where file == ? LIMIT 1",(filename,)) db.execute("SELECT hash FROM list where file == ? LIMIT 1",(filename,))
names=db.fetchall() names=db.fetchall()
@@ -412,7 +412,7 @@ def get_dims(filename):
def call_viewer(opts, files): def call_viewer(opts, files):
""" Runs the viewer program, contains defaults """ """ Runs the viewer program, contains defaults """
if opts.viewer=="1": if opts.viewer=="1":
opts.viewer="geeqie -l %f" opts.viewer="geeqie -l %f"
devnull = open('/dev/null', 'w') devnull = open('/dev/null', 'w')
@@ -435,7 +435,7 @@ def append_colors(opt):
dbh.execute("UPDATE data SET R=?, G=?, B=?, BR=?, BG=?, BB=? \ dbh.execute("UPDATE data SET R=?, G=?, B=?, BR=?, BG=?, BB=? \
WHERE hash=?",(colors[0][0],colors[0][1],colors[0][2], WHERE hash=?",(colors[0][0],colors[0][1],colors[0][2],
colors[1][0],colors[1][1],colors[1][2],row[0])) colors[1][0],colors[1][1],colors[1][2],row[0]))
print("%(i)d: %(f)s (%(r)s %(g)s %(b)s)" % {'i':count-i, 'f':filename, 'r':colors[0][0], print("%(i)d: %(f)s (%(r)s %(g)s %(b)s)" % {'i':count-i, 'f':filename, 'r':colors[0][0],
'g':colors[0][1], 'b':colors[0][2]}) 'g':colors[0][1], 'b':colors[0][2]})
if (i%50==0): if (i%50==0):
conn.commit(); conn.commit();
@@ -517,7 +517,7 @@ def find_color_nearest_file(opts):
if f==cmp: if f==cmp:
continue continue
hits.append((f,hit[1],hit[2],hit[3],hit[4])) hits.append((f,hit[1],hit[2],hit[3],hit[4]))
file_len=str(max([len(x[0]) for x in hits])) file_len=str(max([len(x[0]) for x in hits]))
for h in range(len(hits)): for h in range(len(hits)):
if h>=thr-1: if h>=thr-1:
@@ -551,12 +551,12 @@ def get_colors(filename):
def get_color_diff(c1,c2): def get_color_diff(c1,c2):
""" Return color difference from two RGB triplets """ """ Return color difference from two RGB triplets """
return abs( c1[0] - c2[0] )+abs( c1[1] - c2[1] )+abs( c1[2] - c2[2] ) return abs( c1[0] - c2[0] )+abs( c1[1] - c2[1] )+abs( c1[2] - c2[2] )
def get_ratio_diff(d1,d2): def get_ratio_diff(d1,d2):
""" Return ratio difference from two w,h dimension tuplets """ """ Return ratio difference from two w,h dimension tuplets """
return abs( float(d1[0])/float(d1[1]) - float(d2[0])/float(d2[1]) ) return abs( float(d1[0])/float(d1[1]) - float(d2[0])/float(d2[1]) )
def append_fingerprints(opt): def append_fingerprints(opt):
@@ -587,13 +587,15 @@ def get_fingerprint(filename):
img, err = p.communicate() img, err = p.communicate()
values='' values=''
for row in img.split('\n'): for row in img.split('\n'):
gray=row.split(',') if row.find("gray(0)") > 0:
if len(gray)<3: values += '0'
continue continue
if gray[2]=="255": if row.find("gray(255)") > 0:
values+='1' values += '1'
else: continue
values+='0' if row.find("gray(65535)") > 0:
values += '1'
continue
return str(int(values,2)) return str(int(values,2))
def find_fingerprint_similar(opts): def find_fingerprint_similar(opts):
@@ -644,14 +646,14 @@ def find_fingerprint_similar(opts):
if len(this1)>1: if len(this1)>1:
hits.append(this1) hits.append(this1)
hit_list.append(cmp) hit_list.append(cmp)
if i==None: if i==None:
print("No measurements found") print("No measurements found")
sys.exit(1) sys.exit(1)
for src in hits: for src in hits:
file_len=str(max([len(x[0]) for x in src])) file_len=str(max([len(x[0]) for x in src]))
print( ('{: <'+file_len+'} {: ^4} {: ^4} {: ^4} {: ^4} {: ^5}x{: ^5}').format("File","SD","CD","RD","Shp","W","H")) print( ('{: <'+file_len+'} {: ^4} {: ^4} {: ^4} {: ^4} {: ^5}x{: ^5}').format("File","SD","CD","RD","Shp","W","H"))
for c in range(len(src)): for c in range(len(src)):
print( ('{: <'+file_len+'} {: >4} {: >4} {: >4} {: >4} {: >5}x{: >5}').format(src[c][0],src[c][1], print( ('{: <'+file_len+'} {: >4} {: >4} {: >4} {: >4} {: >5}x{: >5}').format(src[c][0],src[c][1],
"%.2f"%src[c][5],"%.2f"%src[c][7], "%.2f"%src[c][5],"%.2f"%src[c][7],
@@ -686,7 +688,7 @@ def find_fingerprint_nearest(opts):
sp=get_sharpness(cmp) sp=get_sharpness(cmp)
dims=get_dims(cmp) dims=get_dims(cmp)
colors=get_colors(cmp)[0] colors=get_colors(cmp)[0]
db1.execute("SELECT hash,fingerprint,sharpness,width,height,R,G,B FROM data WHERE sharpness > 0") db1.execute("SELECT hash,fingerprint,sharpness,width,height,R,G,B FROM data WHERE sharpness > 0")
this=['',thr,0,0,0,0,0] this=['',thr,0,0,0,0,0]
hit1=None hit1=None
@@ -710,14 +712,14 @@ def find_fingerprint_nearest(opts):
return return
this[0]=hash2file(conn.cursor(),this[0]) this[0]=hash2file(conn.cursor(),this[0])
file_len=str(max(len(cmp), len(this[0]))) file_len=str(max(len(cmp), len(this[0])))
print( ('{: <'+file_len+'} {: ^4} {: ^4} {: ^4} {: ^4} {: ^5}x{: ^5}').format("File","SD","CD","RD","Shp","W","H")) print( ('{: <'+file_len+'} {: ^4} {: ^4} {: ^4} {: ^4} {: ^5}x{: ^5}').format("File","SD","CD","RD","Shp","W","H"))
print( ('{: <'+file_len+'} {: >4} {: ^4} {: ^4} {: ^4} {: >5}x{: >5}').format(cmp,"","","","%.1f" % sp,dims[0],dims[1])) print( ('{: <'+file_len+'} {: >4} {: ^4} {: ^4} {: ^4} {: >5}x{: >5}').format(cmp,"","","","%.1f" % sp,dims[0],dims[1]))
print( ('{: <'+file_len+'} {: >4} {: >4} {: >4} {: >4} {: >5}x{: >5}').format(this[0], this[1],"%.2f"%this[5], print( ('{: <'+file_len+'} {: >4} {: >4} {: >4} {: >4} {: >5}x{: >5}').format(this[0], this[1],"%.2f"%this[5],
"%.2f"%this[6], "%.1f" % this[2],this[3], this[4])) "%.2f"%this[6], "%.1f" % this[2],this[3], this[4]))
if opts.viewer: if opts.viewer:
call_viewer(opts, (cmp,this[0])) call_viewer(opts, (cmp,this[0]))
def append_sharpness(opt): def append_sharpness(opt):
conn=sqlite3.connect(opt.sqlfile) conn=sqlite3.connect(opt.sqlfile)
@@ -743,7 +745,7 @@ def append_sharpness(opt):
def get_sharpness(filename): def get_sharpness(filename):
""" Difference in X, Difference in Y, get smaller diff = smaller sharpness. """ Difference in X, Difference in Y, get smaller diff = smaller sharpness.
May change if bugs found.. """ May change if bugs found.. """
# Resize to 1024 smaller axis, crop with golden ratio # Resize to 1024 smaller axis, crop with golden ratio
# grayscale and equalize histogram. calculate difference between x neighbor # grayscale and equalize histogram. calculate difference between x neighbor
# and y neigbor. smaller difference = less sharp. # and y neigbor. smaller difference = less sharp.
@@ -788,14 +790,14 @@ def confirm(prompt=None, resp=False):
'resp' should be set to the default value assumed by the caller when 'resp' should be set to the default value assumed by the caller when
user simply types ENTER. user simply types ENTER.
""" """
if prompt is None: if prompt is None:
prompt = 'Confirm' prompt = 'Confirm'
if resp: if resp:
prompt = '%s [%s]|%s: ' % (prompt, 'y', 'n') prompt = '%s [%s]|%s: ' % (prompt, 'y', 'n')
else: else:
prompt = '%s [%s]|%s: ' % (prompt, 'n', 'y') prompt = '%s [%s]|%s: ' % (prompt, 'n', 'y')
while True: while True:
ans = raw_input(prompt) ans = raw_input(prompt)
if not ans: if not ans:
@@ -868,15 +870,15 @@ def disk_used(options):
else: else:
sizes[ entries.index(start_path) ]+=row[0] sizes[ entries.index(start_path) ]+=row[0]
for entry in zip(sizes,entries): for entry in zip(sizes,entries):
print("| ".join([ str(entry[0]).ljust(14), print("| ".join([ str(entry[0]).ljust(14),
humanize_size(entry[0]).rjust(8), humanize_size(entry[0]).rjust(8),
entry[1]])) entry[1]]))
def print_stderr(s): def print_stderr(s):
sys.stderr.write(s) sys.stderr.write(s)
sys.stderr.write("\n") sys.stderr.write("\n")
sys.stderr.flush() sys.stderr.flush()
def print_structure(files): def print_structure(files):
for hash in files: for hash in files:
#print(hash[0]) #print(hash[0])
@@ -916,7 +918,7 @@ def humanize_date(date):
return datetime.datetime.fromtimestamp(int(date)).strftime('%Y-%m-%d %H:%M:%S') return datetime.datetime.fromtimestamp(int(date)).strftime('%Y-%m-%d %H:%M:%S')
def import_descriptions(options): def import_descriptions(options):
""" Walk through the path from given [startpath] and read """ Walk through the path from given [startpath] and read
any DESCFILE, importing the contents in the DB """ any DESCFILE, importing the contents in the DB """
conn=sqlite3.connect(options.sqlfile) conn=sqlite3.connect(options.sqlfile)
conn.text_factory=str conn.text_factory=str
@@ -997,14 +999,14 @@ def description_parse(s,l,d,t):
name=os.path.basename(l[0]), name=os.path.basename(l[0]),
tags=t, tags=t,
) )
def import_metadata(options): def import_metadata(options):
""" import data table from another sqlite file""" """ import data table from another sqlite file"""
if not os.path.exists(options.importfile): if not os.path.exists(options.importfile):
print("SQLite file {:} missing".format(options.importfile)) print("SQLite file {:} missing".format(options.importfile))
sys.exit(1) sys.exit(1)
conn=sqlite3.connect(options.sqlfile) conn=sqlite3.connect(options.sqlfile)
conn.text_factory=str conn.text_factory=str
db=conn.cursor() db=conn.cursor()
@@ -1018,14 +1020,14 @@ def import_metadata(options):
conn.commit() conn.commit()
count=db.execute("SELECT COUNT(hash) FROM fromDB.data").fetchall()[0][0] count=db.execute("SELECT COUNT(hash) FROM fromDB.data").fetchall()[0][0]
tagsafter=db.execute("SELECT COUNT(hash) FROM tags").fetchall()[0][0] tagsafter=db.execute("SELECT COUNT(hash) FROM tags").fetchall()[0][0]
print("Imported %d metadata, %d tags." % (count,tagsafter-tagsbefore)) print("Imported %d metadata, %d tags." % (count,tagsafter-tagsbefore))
def export_database(options): def export_database(options):
""" export data to new sqlite file. Minimize file size of sqlite.""" """ export data to new sqlite file. Minimize file size of sqlite."""
if not os.path.exists(options.exportfile): if not os.path.exists(options.exportfile):
createdb(options.exportfile) createdb(options.exportfile)
conn=sqlite3.connect(options.sqlfile) conn=sqlite3.connect(options.sqlfile)
conn.text_factory=str conn.text_factory=str
db=conn.cursor() db=conn.cursor()
@@ -1037,7 +1039,7 @@ def export_database(options):
count=db.execute("SELECT COUNT(hash) FROM toDB.list").fetchall()[0][0] count=db.execute("SELECT COUNT(hash) FROM toDB.list").fetchall()[0][0]
metacount=db.execute("SELECT COUNT(hash) FROM toDB.data").fetchall()[0][0] metacount=db.execute("SELECT COUNT(hash) FROM toDB.data").fetchall()[0][0]
tagscount=db.execute("SELECT COUNT(hash) FROM toDB.tags").fetchall()[0][0] tagscount=db.execute("SELECT COUNT(hash) FROM toDB.tags").fetchall()[0][0]
print("Exported %d files, %d metadata, %d tags." % (count,metacount,tagscount)) print("Exported %d files, %d metadata, %d tags." % (count,metacount,tagscount))
def check_path(path,opt): def check_path(path,opt):
@@ -1047,7 +1049,7 @@ def check_path(path,opt):
if not opt.relative: if not opt.relative:
if not os.path.isfile(path): if not os.path.isfile(path):
raise IOError("File %s not found. try -R for relative search" % path) raise IOError("File %s not found. try -R for relative search" % path)
sqlpath=os.path.dirname(os.path.realpath(opt.sqlfile)) sqlpath=os.path.dirname(os.path.realpath(opt.sqlfile))
rel=os.path.join(sqlpath, path) rel=os.path.join(sqlpath, path)
searchpath=path.split(os.sep) searchpath=path.split(os.sep)
@@ -1065,13 +1067,13 @@ def main():
if not os.path.exists(options.sqlfile): if not os.path.exists(options.sqlfile):
createdb(options.sqlfile); createdb(options.sqlfile);
if options.delete: if options.delete:
print('Deleting entries...') print('Deleting entries...')
delete_nonexisting(options.sqlfile) delete_nonexisting(options.sqlfile)
if options.add or options.changed: if options.add or options.changed:
print('Adding entries...') print('Adding entries...')
add_recurse(options) add_recurse(options)
if options.delete_data: if options.delete_data:
print('Deleting metadata...') print('Deleting metadata...')
delete_data(options.sqlfile) delete_data(options.sqlfile)
if options.search and not options.check: if options.search and not options.check:
print_structure(searchdb(options.sqlfile,options.search)) print_structure(searchdb(options.sqlfile,options.search))
@@ -1095,7 +1097,7 @@ def main():
if options.similarity!=None: if options.similarity!=None:
if os.path.exists(options.similarity.rsplit(",")[0]): if os.path.exists(options.similarity.rsplit(",")[0]):
find_fingerprint_nearest(options) find_fingerprint_nearest(options)
else: else:
find_fingerprint_similar(options) find_fingerprint_similar(options)
if options.duplicate: if options.duplicate:
files=find_duplicates(options.sqlfile,options.startpath) files=find_duplicates(options.sqlfile,options.startpath)

View File

@@ -56,6 +56,7 @@ class Click:
if t<34: if t<34:
self.tags.append(Button(self.top,text="[%s] %s"%(self._hotkey(t+1),self.tagTexts[t]), command=return_func)) self.tags.append(Button(self.top,text="[%s] %s"%(self._hotkey(t+1),self.tagTexts[t]), command=return_func))
self.root.bind(self._hotkey(t+1), self._tag_key) self.root.bind(self._hotkey(t+1), self._tag_key)
sys.stdout.write("\n%s: %s"%( self._hotkey(t+1), self.tagTexts[t]) )
#self.top.bind(str(t), self._tag_key) #self.top.bind(str(t), self._tag_key)
continue continue
self.tags.append(Button(self.top,text=self.tagTexts[t], command=return_func)) self.tags.append(Button(self.top,text=self.tagTexts[t], command=return_func))