image similarity metrics

This commit is contained in:
q
2013-12-22 23:39:27 +02:00
parent 4f8063e72b
commit e7e699b6eb

View File

@@ -32,6 +32,8 @@ def setup_options():
help="Search list based on path pattern") help="Search list based on path pattern")
parser.add_argument("--color",action="store_true",dest="colors",default=False, parser.add_argument("--color",action="store_true",dest="colors",default=False,
help="Append list with mean color information This option will flip the 'Add new files' option. [%(default)s]") help="Append list with mean color information This option will flip the 'Add new files' option. [%(default)s]")
parser.add_argument("--finger",action="store_true",dest="finger",default=False,
help="Append list with fingerprint information. This option will flip the 'Add new files' option. [%(default)s]")
parser.add_argument("--nearest",type=str,dest="nearestcolor",default=False, parser.add_argument("--nearest",type=str,dest="nearestcolor",default=False,
help="Search list for nearest mean color. format: R,G,B in float 0-1. Add fourth value to limit search to number") help="Search list for nearest mean color. format: R,G,B in float 0-1. Add fourth value to limit search to number")
parser.add_argument("--dup",action="store_true",dest="duplicate",default=False, parser.add_argument("--dup",action="store_true",dest="duplicate",default=False,
@@ -40,13 +42,15 @@ def setup_options():
help="Delete files listed with --small. [%(default)s]") help="Delete files listed with --small. [%(default)s]")
parser.add_argument("--small",action="store_true",dest="searchsmall",default=False, parser.add_argument("--small",action="store_true",dest="searchsmall",default=False,
help="Return a list of small files, smaller than -m INT. This option will flip the 'Add new files' option. [%(default)s]") help="Return a list of small files, smaller than -m INT. This option will flip the 'Add new files' option. [%(default)s]")
parser.add_argument("--similar",type=int,dest="similarity",default=False,
help="Search list for similar images. Value 0-255 for similarity threshold. 0=high similarity")
parser.add_argument("-x",action="append",dest="exclude",default=[], parser.add_argument("-x",action="append",dest="exclude",default=[],
help="Exclude folder name from the lists. This option may be issued several times") help="Exclude folder name from the lists. This option may be issued several times")
parser.add_argument('startpath', action="store",default='.', nargs='?') parser.add_argument('startpath', action="store",default='.', nargs='?')
options=parser.parse_args() options=parser.parse_args()
BADDIRS.extend(options.exclude) BADDIRS.extend(options.exclude)
if options.duplicate or options.searchsmall or options.colors: if options.duplicate or options.searchsmall or options.colors or options.finger:
options.add=not options.add options.add=not options.add
return options return options
@@ -56,7 +60,7 @@ def createdb(sqlfile):
conn.text_factory=str conn.text_factory=str
db.execute('CREATE TABLE list (id INTEGER PRIMARY KEY AUTOINCREMENT,\ db.execute('CREATE TABLE list (id INTEGER PRIMARY KEY AUTOINCREMENT,\
file TEXT,date INTEGER,portrait NUMERIC, hash TEXT,\ file TEXT,date INTEGER,portrait NUMERIC, hash TEXT,\
width INTEGER,height INTEGER,\ width INTEGER,height INTEGER,fingerprint TEXT,\
R REAL,G REAL, B REAL, BR REAL, BG REAL, BB REAL)') R REAL,G REAL, B REAL, BR REAL, BG REAL, BB REAL)')
conn.commit() conn.commit()
return return
@@ -261,6 +265,72 @@ def get_colors(filename):
border=[float(i) for i in border.strip().strip('"').split(',')] border=[float(i) for i in border.strip().strip('"').split(',')]
return (mean,border) return (mean,border)
def append_fingerprints(sqlfile):
conn=sqlite3.connect(sqlfile)
conn.text_factory=str
db=conn.cursor()
dbh=conn.cursor()
db.execute("SELECT file FROM list WHERE fingerprint IS NULL ORDER BY file")
i=0
for row in db:
fp=get_fingerprint(row[0])
dbh.execute("UPDATE list SET fingerprint=? \
WHERE file=?",(fp,row[0]))
print " %(f)s" % {'f':row[0]}
i+=1
if (i%50==0):
conn.commit();
conn.commit()
def get_fingerprint(filename):
small_args=['convert','-define','jpeg:size=256x256',filename+'[0]','-resize','160x160!',
'-colorspace','Gray','-blur','2x2','-normalize','-equalize','-resize','16x16','-depth','1','TEXT:-']
p=subprocess.Popen(small_args,stdout=subprocess.PIPE)
img, err = p.communicate()
values=''
for row in img.split('\n'):
gray=row.split(',')
if len(gray)<3:
continue
if gray[2]=="255":
values+='1'
else:
values+='0'
return str(int(values,2))
def find_fingerprint_nearest(sqlfile,thr):
# thr either file, or threshold?
conn=sqlite3.connect(sqlfile)
conn.text_factory=str
db1=conn.cursor()
db2=conn.cursor()
db1.execute("SELECT file,fingerprint,width,height FROM list ORDER BY file")
hits=[]
for i,hit1 in enumerate(db1):
if hit1[0] in hits:
continue
this=[hit1[0],'',sys.maxint,int(hit1[1]),hit1[2]*hit1[3]]
db2.execute("SELECT file,fingerprint,width,height FROM list ORDER BY file")
for hit2 in db2:
if hit2[0]==this[0]:
continue
similarity=bin(this[3]^int(hit2[1])).count('1')
if similarity<thr:
if similarity<this[2]:
this[2]=similarity
this[1]=hit2[0]
if hit2[2]*hit2[3] < this[4]:
foo=this[1]
this[1]=this[0]
this[0]=foo
hits.append(hit2[0])
if this[2]<thr:
print('"%s" "%s"' % ( this[0], this[1] ))
#for row in hits:
# print(row)
return
def searchdb(sqlfile,needle): def searchdb(sqlfile,needle):
conn=sqlite3.connect(sqlfile) conn=sqlite3.connect(sqlfile)
conn.text_factory=str conn.text_factory=str
@@ -346,8 +416,6 @@ def find_smalls(minsize,sqlfile):
flist.append(('smalls',smalls)) flist.append(('smalls',smalls))
return flist return flist
def print_structure(files): def print_structure(files):
for hash in files: for hash in files:
#print(hash[0]) #print(hash[0])
@@ -378,6 +446,9 @@ def main():
if options.nearestcolor: if options.nearestcolor:
find_color_nearest(options.sqlfile,options.nearestcolor) find_color_nearest(options.sqlfile,options.nearestcolor)
sys.exit(0) sys.exit(0)
if options.similarity:
find_fingerprint_nearest(options.sqlfile,options.similarity)
sys.exit(0)
if options.delete: if options.delete:
print('Deleting entries...') print('Deleting entries...')
delete_nonexisting(options.sqlfile) delete_nonexisting(options.sqlfile)
@@ -387,6 +458,9 @@ def main():
if options.colors: if options.colors:
print('Adding colors...') print('Adding colors...')
append_colors(options.sqlfile) append_colors(options.sqlfile)
if options.finger:
print('Adding fingerprints...')
append_fingerprints(options.sqlfile)
if options.random: if options.random:
print('Random lists...') print('Random lists...')
random_lists(options.sqlfile) random_lists(options.sqlfile)