image similarity metrics

This commit is contained in:
q
2013-12-23 17:35:42 +02:00
parent e7e699b6eb
commit 56a2d21d7b

View File

@@ -30,10 +30,8 @@ def setup_options():
help="Create randomized files for landscape and portrait images [%(default)s]") help="Create randomized files for landscape and portrait images [%(default)s]")
parser.add_argument("-s",type=str,dest="search",default=False, parser.add_argument("-s",type=str,dest="search",default=False,
help="Search list based on path pattern") help="Search list based on path pattern")
parser.add_argument("--color",action="store_true",dest="colors",default=False, parser.add_argument("--measure",action="store_true",dest="measure",default=False,
help="Append list with mean color information This option will flip the 'Add new files' option. [%(default)s]") help="Measure various statistics for similarity/color searches. This option will flip the 'Add new files' option. [%(default)s]")
parser.add_argument("--finger",action="store_true",dest="finger",default=False,
help="Append list with fingerprint information. This option will flip the 'Add new files' option. [%(default)s]")
parser.add_argument("--nearest",type=str,dest="nearestcolor",default=False, parser.add_argument("--nearest",type=str,dest="nearestcolor",default=False,
help="Search list for nearest mean color. format: R,G,B in float 0-1. Add fourth value to limit search to number") help="Search list for nearest mean color. format: R,G,B in float 0-1. Add fourth value to limit search to number")
parser.add_argument("--dup",action="store_true",dest="duplicate",default=False, parser.add_argument("--dup",action="store_true",dest="duplicate",default=False,
@@ -42,7 +40,7 @@ def setup_options():
help="Delete files listed with --small. [%(default)s]") help="Delete files listed with --small. [%(default)s]")
parser.add_argument("--small",action="store_true",dest="searchsmall",default=False, parser.add_argument("--small",action="store_true",dest="searchsmall",default=False,
help="Return a list of small files, smaller than -m INT. This option will flip the 'Add new files' option. [%(default)s]") help="Return a list of small files, smaller than -m INT. This option will flip the 'Add new files' option. [%(default)s]")
parser.add_argument("--similar",type=int,dest="similarity",default=False, parser.add_argument("--similar",type=str,dest="similarity",default=None,
help="Search list for similar images. Value 0-255 for similarity threshold. 0=high similarity") help="Search list for similar images. Value 0-255 for similarity threshold. 0=high similarity")
parser.add_argument("-x",action="append",dest="exclude",default=[], parser.add_argument("-x",action="append",dest="exclude",default=[],
help="Exclude folder name from the lists. This option may be issued several times") help="Exclude folder name from the lists. This option may be issued several times")
@@ -50,7 +48,7 @@ def setup_options():
options=parser.parse_args() options=parser.parse_args()
BADDIRS.extend(options.exclude) BADDIRS.extend(options.exclude)
if options.duplicate or options.searchsmall or options.colors or options.finger: if options.duplicate or options.searchsmall or options.measure or options.nearestcolor or options.similarity!=None or options.search:
options.add=not options.add options.add=not options.add
return options return options
@@ -298,8 +296,8 @@ def get_fingerprint(filename):
values+='0' values+='0'
return str(int(values,2)) return str(int(values,2))
def find_fingerprint_nearest(sqlfile,thr): def find_fingerprint_similar(sqlfile,thr):
# thr either file, or threshold? ''' Find all similar images, nearest match more similar than thr '''
conn=sqlite3.connect(sqlfile) conn=sqlite3.connect(sqlfile)
conn.text_factory=str conn.text_factory=str
db1=conn.cursor() db1=conn.cursor()
@@ -325,12 +323,33 @@ def find_fingerprint_nearest(sqlfile,thr):
this[0]=foo this[0]=foo
hits.append(hit2[0]) hits.append(hit2[0])
if this[2]<thr: if this[2]<thr:
print('"%s" "%s"' % ( this[0], this[1] )) print('"%s" "%s":%i' % ( this[0], this[1], this[2] ))
#for row in hits:
# print(row)
return return
def find_fingerprint_nearest(sqlfile,cmp):
''' Find nearest match '''
cmp=os.path.abspath(cmp)
conn=sqlite3.connect(sqlfile)
conn.text_factory=str
db1=conn.cursor()
if is_listed(db1, cmp):
db1.execute("SELECT file,fingerprint,width,height FROM list WHERE file=?",(cmp,))
for hit1 in db1:
fp=int(hit1[1])
else:
fp=int(get_fingerprint(cmp))
db1.execute("SELECT file,fingerprint,width,height FROM list ORDER BY file")
this=['',sys.maxint]
for i,hit1 in enumerate(db1):
if hit1[0] == cmp:
continue
similarity=bin(fp^int(hit1[1])).count('1')
if similarity<this[1]:
this[1]=similarity
this[0]=hit1[0]
print('"%s":%i' % (this[0], this[1]))
def searchdb(sqlfile,needle): def searchdb(sqlfile,needle):
conn=sqlite3.connect(sqlfile) conn=sqlite3.connect(sqlfile)
conn.text_factory=str conn.text_factory=str
@@ -440,30 +459,29 @@ def main():
options=setup_options(); options=setup_options();
if not os.path.exists(options.sqlfile): if not os.path.exists(options.sqlfile):
createdb(options.sqlfile); createdb(options.sqlfile);
if options.search:
print_structure(searchdb(options.sqlfile,options.search))
sys.exit(0)
if options.nearestcolor:
find_color_nearest(options.sqlfile,options.nearestcolor)
sys.exit(0)
if options.similarity:
find_fingerprint_nearest(options.sqlfile,options.similarity)
sys.exit(0)
if options.delete: if options.delete:
print('Deleting entries...') print('Deleting entries...')
delete_nonexisting(options.sqlfile) delete_nonexisting(options.sqlfile)
if options.add or options.changed: if options.add or options.changed:
print('Adding entries...') print('Adding entries...')
add_recurse(options) add_recurse(options)
if options.colors: if options.search:
print_structure(searchdb(options.sqlfile,options.search))
if options.measure:
print('Adding colors...') print('Adding colors...')
append_colors(options.sqlfile) append_colors(options.sqlfile)
if options.finger:
print('Adding fingerprints...') print('Adding fingerprints...')
append_fingerprints(options.sqlfile) append_fingerprints(options.sqlfile)
if options.random: if options.random:
print('Random lists...') print('Random lists...')
random_lists(options.sqlfile) random_lists(options.sqlfile)
if options.nearestcolor:
find_color_nearest(options.sqlfile,options.nearestcolor)
if options.similarity!=None:
if os.path.exists(options.similarity):
find_fingerprint_nearest(options.sqlfile,options.similarity)
else:
find_fingerprint_similar(options.sqlfile,int(options.similarity))
if options.duplicate: if options.duplicate:
files=find_duplicates(options.sqlfile,options.startpath) files=find_duplicates(options.sqlfile,options.startpath)
print_dup_structure(files) print_dup_structure(files)