fixed fingerprinting

This commit is contained in:
ville rantanen
2019-04-29 12:28:09 +03:00
parent 5991f6b9b9
commit a93b73b76b
2 changed files with 53 additions and 50 deletions

View File

@@ -9,7 +9,7 @@ import hashlib
import traceback
import csv
import datetime
from argparse import ArgumentParser
from argparse import ArgumentParser
SQLFILE='list_of_images.sqlite'
DESCFILE='descriptions.csv'
@@ -23,8 +23,8 @@ def setup_options():
db=parser.add_argument_group('database', 'Database management')
search=parser.add_argument_group('search', 'Search images from database')
db.add_argument("-a",action="store_false",dest="add",default=True,
help="Do not add new files [%(default)s]")
db.add_argument("-c",action="store_true",dest="changed",default=False,
@@ -83,7 +83,7 @@ def setup_options():
parser.add_argument("-x",action="append",dest="exclude",default=[],
help="Exclude folder name from the lists. This option may be issued several times.")
parser.add_argument('startpath', action="store",default='.', nargs='?')
options=parser.parse_args()
BADDIRS.extend(options.exclude)
if options.duplicate or \
@@ -101,7 +101,7 @@ def setup_options():
return options
def checkdb(options):
if options.search:
needle='%'+options.search+'%'
else:
@@ -139,7 +139,7 @@ def checkdb(options):
pad=str(max([len(x[0]) for x in missing]))
for f in missing:
print_stderr(("%-"+pad+"s (%s %7s)")%(f[0],humanize_date(f[3]),humanize_size(f[2])))
print_stderr("----\nFile check summary:")
print_stderr("Database modified: %s"%(humanize_date(os.path.getmtime(options.sqlfile)),))
print_stderr("Checksum matches : %d"%(OK_count,))
@@ -157,7 +157,7 @@ def createdb(sqlfile):
R REAL,G REAL, B REAL, BR REAL, BG REAL, BB REAL)')
db.execute('CREATE TABLE list (file TEXT PRIMARY KEY,hash TEXT,date INTEGER,size INTEGER)')
db.execute('CREATE TABLE tags (hash TEXT,tag TEXT)')
conn.commit()
conn.commit()
return
def delete_nonexisting(sqlfile):
@@ -188,7 +188,7 @@ def delete_data(sqlfile):
dbdel.execute("DELETE FROM data where hash == ?",(row[0],))
conn.commit()
print('Removed {0} metadata'.format(i))
tagsbefore=db.execute("SELECT COUNT(hash) FROM tags").fetchall()[0][0]
db.execute('''SELECT hash FROM tags EXCEPT SELECT hash FROM list''')
for row in db:
@@ -198,13 +198,13 @@ def delete_data(sqlfile):
conn.commit()
tagsafter=db.execute("SELECT COUNT(hash) FROM tags").fetchall()[0][0]
print('Removed {0} tags'.format(tagsbefore-tagsafter))
return
def delete_files(files):
''' Actually deletes files! '''
print_structure(files)
doit=confirm(prompt="Sure to delete these files?")
if doit:
print("now delling")
@@ -230,7 +230,7 @@ def add_recurse(options):
if IMGMATCH.match(file):
filename=os.path.realpath(os.path.join(path,file))
if file not in db_files:
if file not in db_files:
if options.add:
try:
add_single(conn,filename,change=False)
@@ -346,7 +346,7 @@ def random_lists(sqlfile):
sortargs=['sort','-R','-o','portrait.list','portrait.list.s']
p=subprocess.call(sortargs)
os.unlink('portrait.list.s')
os.unlink('landscape.list.s')
os.unlink('landscape.list.s')
return
def is_listed(db,filename):
@@ -383,7 +383,7 @@ def hash2file(db,hash,firstOnly=True):
return names[0][0]
else:
return [x[0] for x in names]
def file2hash(db,filename):
db.execute("SELECT hash FROM list where file == ? LIMIT 1",(filename,))
names=db.fetchall()
@@ -412,7 +412,7 @@ def get_dims(filename):
def call_viewer(opts, files):
""" Runs the viewer program, contains defaults """
if opts.viewer=="1":
opts.viewer="geeqie -l %f"
devnull = open('/dev/null', 'w')
@@ -435,7 +435,7 @@ def append_colors(opt):
dbh.execute("UPDATE data SET R=?, G=?, B=?, BR=?, BG=?, BB=? \
WHERE hash=?",(colors[0][0],colors[0][1],colors[0][2],
colors[1][0],colors[1][1],colors[1][2],row[0]))
print("%(i)d: %(f)s (%(r)s %(g)s %(b)s)" % {'i':count-i, 'f':filename, 'r':colors[0][0],
print("%(i)d: %(f)s (%(r)s %(g)s %(b)s)" % {'i':count-i, 'f':filename, 'r':colors[0][0],
'g':colors[0][1], 'b':colors[0][2]})
if (i%50==0):
conn.commit();
@@ -517,7 +517,7 @@ def find_color_nearest_file(opts):
if f==cmp:
continue
hits.append((f,hit[1],hit[2],hit[3],hit[4]))
file_len=str(max([len(x[0]) for x in hits]))
for h in range(len(hits)):
if h>=thr-1:
@@ -551,12 +551,12 @@ def get_colors(filename):
def get_color_diff(c1,c2):
""" Return color difference from two RGB triplets """
return abs( c1[0] - c2[0] )+abs( c1[1] - c2[1] )+abs( c1[2] - c2[2] )
def get_ratio_diff(d1,d2):
""" Return ratio difference from two w,h dimension tuplets """
return abs( float(d1[0])/float(d1[1]) - float(d2[0])/float(d2[1]) )
def append_fingerprints(opt):
@@ -587,13 +587,15 @@ def get_fingerprint(filename):
img, err = p.communicate()
values=''
for row in img.split('\n'):
gray=row.split(',')
if len(gray)<3:
continue
if gray[2]=="255":
values+='1'
else:
values+='0'
if row.find("gray(0)") > 0:
values += '0'
continue
if row.find("gray(255)") > 0:
values += '1'
continue
if row.find("gray(65535)") > 0:
values += '1'
continue
return str(int(values,2))
def find_fingerprint_similar(opts):
@@ -644,14 +646,14 @@ def find_fingerprint_similar(opts):
if len(this1)>1:
hits.append(this1)
hit_list.append(cmp)
if i==None:
print("No measurements found")
sys.exit(1)
for src in hits:
file_len=str(max([len(x[0]) for x in src]))
print( ('{: <'+file_len+'} {: ^4} {: ^4} {: ^4} {: ^4} {: ^5}x{: ^5}').format("File","SD","CD","RD","Shp","W","H"))
print( ('{: <'+file_len+'} {: ^4} {: ^4} {: ^4} {: ^4} {: ^5}x{: ^5}').format("File","SD","CD","RD","Shp","W","H"))
for c in range(len(src)):
print( ('{: <'+file_len+'} {: >4} {: >4} {: >4} {: >4} {: >5}x{: >5}').format(src[c][0],src[c][1],
"%.2f"%src[c][5],"%.2f"%src[c][7],
@@ -686,7 +688,7 @@ def find_fingerprint_nearest(opts):
sp=get_sharpness(cmp)
dims=get_dims(cmp)
colors=get_colors(cmp)[0]
db1.execute("SELECT hash,fingerprint,sharpness,width,height,R,G,B FROM data WHERE sharpness > 0")
this=['',thr,0,0,0,0,0]
hit1=None
@@ -710,14 +712,14 @@ def find_fingerprint_nearest(opts):
return
this[0]=hash2file(conn.cursor(),this[0])
file_len=str(max(len(cmp), len(this[0])))
print( ('{: <'+file_len+'} {: ^4} {: ^4} {: ^4} {: ^4} {: ^5}x{: ^5}').format("File","SD","CD","RD","Shp","W","H"))
print( ('{: <'+file_len+'} {: >4} {: ^4} {: ^4} {: ^4} {: >5}x{: >5}').format(cmp,"","","","%.1f" % sp,dims[0],dims[1]))
print( ('{: <'+file_len+'} {: ^4} {: ^4} {: ^4} {: ^4} {: ^5}x{: ^5}').format("File","SD","CD","RD","Shp","W","H"))
print( ('{: <'+file_len+'} {: >4} {: ^4} {: ^4} {: ^4} {: >5}x{: >5}').format(cmp,"","","","%.1f" % sp,dims[0],dims[1]))
print( ('{: <'+file_len+'} {: >4} {: >4} {: >4} {: >4} {: >5}x{: >5}').format(this[0], this[1],"%.2f"%this[5],
"%.2f"%this[6], "%.1f" % this[2],this[3], this[4]))
"%.2f"%this[6], "%.1f" % this[2],this[3], this[4]))
if opts.viewer:
call_viewer(opts, (cmp,this[0]))
def append_sharpness(opt):
conn=sqlite3.connect(opt.sqlfile)
@@ -743,7 +745,7 @@ def append_sharpness(opt):
def get_sharpness(filename):
""" Difference in X, Difference in Y, get smaller diff = smaller sharpness.
May change if bugs found.. """
# Resize to 1024 smaller axis, crop with golden ratio
# grayscale and equalize histogram. calculate difference between x neighbor
# and y neigbor. smaller difference = less sharp.
@@ -788,14 +790,14 @@ def confirm(prompt=None, resp=False):
'resp' should be set to the default value assumed by the caller when
user simply types ENTER.
"""
if prompt is None:
prompt = 'Confirm'
if resp:
prompt = '%s [%s]|%s: ' % (prompt, 'y', 'n')
else:
prompt = '%s [%s]|%s: ' % (prompt, 'n', 'y')
while True:
ans = raw_input(prompt)
if not ans:
@@ -868,15 +870,15 @@ def disk_used(options):
else:
sizes[ entries.index(start_path) ]+=row[0]
for entry in zip(sizes,entries):
print("| ".join([ str(entry[0]).ljust(14),
humanize_size(entry[0]).rjust(8),
print("| ".join([ str(entry[0]).ljust(14),
humanize_size(entry[0]).rjust(8),
entry[1]]))
def print_stderr(s):
sys.stderr.write(s)
sys.stderr.write("\n")
sys.stderr.flush()
def print_structure(files):
for hash in files:
#print(hash[0])
@@ -916,7 +918,7 @@ def humanize_date(date):
return datetime.datetime.fromtimestamp(int(date)).strftime('%Y-%m-%d %H:%M:%S')
def import_descriptions(options):
""" Walk through the path from given [startpath] and read
""" Walk through the path from given [startpath] and read
any DESCFILE, importing the contents in the DB """
conn=sqlite3.connect(options.sqlfile)
conn.text_factory=str
@@ -997,14 +999,14 @@ def description_parse(s,l,d,t):
name=os.path.basename(l[0]),
tags=t,
)
def import_metadata(options):
""" import data table from another sqlite file"""
if not os.path.exists(options.importfile):
print("SQLite file {:} missing".format(options.importfile))
sys.exit(1)
conn=sqlite3.connect(options.sqlfile)
conn.text_factory=str
db=conn.cursor()
@@ -1018,14 +1020,14 @@ def import_metadata(options):
conn.commit()
count=db.execute("SELECT COUNT(hash) FROM fromDB.data").fetchall()[0][0]
tagsafter=db.execute("SELECT COUNT(hash) FROM tags").fetchall()[0][0]
print("Imported %d metadata, %d tags." % (count,tagsafter-tagsbefore))
def export_database(options):
""" export data to new sqlite file. Minimize file size of sqlite."""
if not os.path.exists(options.exportfile):
createdb(options.exportfile)
conn=sqlite3.connect(options.sqlfile)
conn.text_factory=str
db=conn.cursor()
@@ -1037,7 +1039,7 @@ def export_database(options):
count=db.execute("SELECT COUNT(hash) FROM toDB.list").fetchall()[0][0]
metacount=db.execute("SELECT COUNT(hash) FROM toDB.data").fetchall()[0][0]
tagscount=db.execute("SELECT COUNT(hash) FROM toDB.tags").fetchall()[0][0]
print("Exported %d files, %d metadata, %d tags." % (count,metacount,tagscount))
def check_path(path,opt):
@@ -1047,7 +1049,7 @@ def check_path(path,opt):
if not opt.relative:
if not os.path.isfile(path):
raise IOError("File %s not found. try -R for relative search" % path)
sqlpath=os.path.dirname(os.path.realpath(opt.sqlfile))
rel=os.path.join(sqlpath, path)
searchpath=path.split(os.sep)
@@ -1065,13 +1067,13 @@ def main():
if not os.path.exists(options.sqlfile):
createdb(options.sqlfile);
if options.delete:
print('Deleting entries...')
print('Deleting entries...')
delete_nonexisting(options.sqlfile)
if options.add or options.changed:
print('Adding entries...')
add_recurse(options)
if options.delete_data:
print('Deleting metadata...')
print('Deleting metadata...')
delete_data(options.sqlfile)
if options.search and not options.check:
print_structure(searchdb(options.sqlfile,options.search))
@@ -1095,7 +1097,7 @@ def main():
if options.similarity!=None:
if os.path.exists(options.similarity.rsplit(",")[0]):
find_fingerprint_nearest(options)
else:
else:
find_fingerprint_similar(options)
if options.duplicate:
files=find_duplicates(options.sqlfile,options.startpath)

View File

@@ -56,6 +56,7 @@ class Click:
if t<34:
self.tags.append(Button(self.top,text="[%s] %s"%(self._hotkey(t+1),self.tagTexts[t]), command=return_func))
self.root.bind(self._hotkey(t+1), self._tag_key)
sys.stdout.write("\n%s: %s"%( self._hotkey(t+1), self.tagTexts[t]) )
#self.top.bind(str(t), self._tag_key)
continue
self.tags.append(Button(self.top,text=self.tagTexts[t], command=return_func))