image similarity metrics
This commit is contained in:
@@ -32,6 +32,8 @@ def setup_options():
|
|||||||
help="Search list based on path pattern")
|
help="Search list based on path pattern")
|
||||||
parser.add_argument("--color",action="store_true",dest="colors",default=False,
|
parser.add_argument("--color",action="store_true",dest="colors",default=False,
|
||||||
help="Append list with mean color information This option will flip the 'Add new files' option. [%(default)s]")
|
help="Append list with mean color information This option will flip the 'Add new files' option. [%(default)s]")
|
||||||
|
parser.add_argument("--finger",action="store_true",dest="finger",default=False,
|
||||||
|
help="Append list with fingerprint information. This option will flip the 'Add new files' option. [%(default)s]")
|
||||||
parser.add_argument("--nearest",type=str,dest="nearestcolor",default=False,
|
parser.add_argument("--nearest",type=str,dest="nearestcolor",default=False,
|
||||||
help="Search list for nearest mean color. format: R,G,B in float 0-1. Add fourth value to limit search to number")
|
help="Search list for nearest mean color. format: R,G,B in float 0-1. Add fourth value to limit search to number")
|
||||||
parser.add_argument("--dup",action="store_true",dest="duplicate",default=False,
|
parser.add_argument("--dup",action="store_true",dest="duplicate",default=False,
|
||||||
@@ -40,13 +42,15 @@ def setup_options():
|
|||||||
help="Delete files listed with --small. [%(default)s]")
|
help="Delete files listed with --small. [%(default)s]")
|
||||||
parser.add_argument("--small",action="store_true",dest="searchsmall",default=False,
|
parser.add_argument("--small",action="store_true",dest="searchsmall",default=False,
|
||||||
help="Return a list of small files, smaller than -m INT. This option will flip the 'Add new files' option. [%(default)s]")
|
help="Return a list of small files, smaller than -m INT. This option will flip the 'Add new files' option. [%(default)s]")
|
||||||
|
parser.add_argument("--similar",type=int,dest="similarity",default=False,
|
||||||
|
help="Search list for similar images. Value 0-255 for similarity threshold. 0=high similarity")
|
||||||
parser.add_argument("-x",action="append",dest="exclude",default=[],
|
parser.add_argument("-x",action="append",dest="exclude",default=[],
|
||||||
help="Exclude folder name from the lists. This option may be issued several times")
|
help="Exclude folder name from the lists. This option may be issued several times")
|
||||||
parser.add_argument('startpath', action="store",default='.', nargs='?')
|
parser.add_argument('startpath', action="store",default='.', nargs='?')
|
||||||
|
|
||||||
options=parser.parse_args()
|
options=parser.parse_args()
|
||||||
BADDIRS.extend(options.exclude)
|
BADDIRS.extend(options.exclude)
|
||||||
if options.duplicate or options.searchsmall or options.colors:
|
if options.duplicate or options.searchsmall or options.colors or options.finger:
|
||||||
options.add=not options.add
|
options.add=not options.add
|
||||||
return options
|
return options
|
||||||
|
|
||||||
@@ -56,7 +60,7 @@ def createdb(sqlfile):
|
|||||||
conn.text_factory=str
|
conn.text_factory=str
|
||||||
db.execute('CREATE TABLE list (id INTEGER PRIMARY KEY AUTOINCREMENT,\
|
db.execute('CREATE TABLE list (id INTEGER PRIMARY KEY AUTOINCREMENT,\
|
||||||
file TEXT,date INTEGER,portrait NUMERIC, hash TEXT,\
|
file TEXT,date INTEGER,portrait NUMERIC, hash TEXT,\
|
||||||
width INTEGER,height INTEGER,\
|
width INTEGER,height INTEGER,fingerprint TEXT,\
|
||||||
R REAL,G REAL, B REAL, BR REAL, BG REAL, BB REAL)')
|
R REAL,G REAL, B REAL, BR REAL, BG REAL, BB REAL)')
|
||||||
conn.commit()
|
conn.commit()
|
||||||
return
|
return
|
||||||
@@ -261,6 +265,72 @@ def get_colors(filename):
|
|||||||
border=[float(i) for i in border.strip().strip('"').split(',')]
|
border=[float(i) for i in border.strip().strip('"').split(',')]
|
||||||
return (mean,border)
|
return (mean,border)
|
||||||
|
|
||||||
|
def append_fingerprints(sqlfile):
|
||||||
|
conn=sqlite3.connect(sqlfile)
|
||||||
|
conn.text_factory=str
|
||||||
|
db=conn.cursor()
|
||||||
|
dbh=conn.cursor()
|
||||||
|
db.execute("SELECT file FROM list WHERE fingerprint IS NULL ORDER BY file")
|
||||||
|
i=0
|
||||||
|
for row in db:
|
||||||
|
fp=get_fingerprint(row[0])
|
||||||
|
dbh.execute("UPDATE list SET fingerprint=? \
|
||||||
|
WHERE file=?",(fp,row[0]))
|
||||||
|
print " %(f)s" % {'f':row[0]}
|
||||||
|
i+=1
|
||||||
|
if (i%50==0):
|
||||||
|
conn.commit();
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
def get_fingerprint(filename):
|
||||||
|
small_args=['convert','-define','jpeg:size=256x256',filename+'[0]','-resize','160x160!',
|
||||||
|
'-colorspace','Gray','-blur','2x2','-normalize','-equalize','-resize','16x16','-depth','1','TEXT:-']
|
||||||
|
p=subprocess.Popen(small_args,stdout=subprocess.PIPE)
|
||||||
|
img, err = p.communicate()
|
||||||
|
values=''
|
||||||
|
for row in img.split('\n'):
|
||||||
|
gray=row.split(',')
|
||||||
|
if len(gray)<3:
|
||||||
|
continue
|
||||||
|
if gray[2]=="255":
|
||||||
|
values+='1'
|
||||||
|
else:
|
||||||
|
values+='0'
|
||||||
|
return str(int(values,2))
|
||||||
|
|
||||||
|
def find_fingerprint_nearest(sqlfile,thr):
|
||||||
|
# thr either file, or threshold?
|
||||||
|
conn=sqlite3.connect(sqlfile)
|
||||||
|
conn.text_factory=str
|
||||||
|
db1=conn.cursor()
|
||||||
|
db2=conn.cursor()
|
||||||
|
db1.execute("SELECT file,fingerprint,width,height FROM list ORDER BY file")
|
||||||
|
hits=[]
|
||||||
|
for i,hit1 in enumerate(db1):
|
||||||
|
if hit1[0] in hits:
|
||||||
|
continue
|
||||||
|
this=[hit1[0],'',sys.maxint,int(hit1[1]),hit1[2]*hit1[3]]
|
||||||
|
db2.execute("SELECT file,fingerprint,width,height FROM list ORDER BY file")
|
||||||
|
for hit2 in db2:
|
||||||
|
if hit2[0]==this[0]:
|
||||||
|
continue
|
||||||
|
similarity=bin(this[3]^int(hit2[1])).count('1')
|
||||||
|
if similarity<thr:
|
||||||
|
if similarity<this[2]:
|
||||||
|
this[2]=similarity
|
||||||
|
this[1]=hit2[0]
|
||||||
|
if hit2[2]*hit2[3] < this[4]:
|
||||||
|
foo=this[1]
|
||||||
|
this[1]=this[0]
|
||||||
|
this[0]=foo
|
||||||
|
hits.append(hit2[0])
|
||||||
|
if this[2]<thr:
|
||||||
|
print('"%s" "%s"' % ( this[0], this[1] ))
|
||||||
|
#for row in hits:
|
||||||
|
# print(row)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
def searchdb(sqlfile,needle):
|
def searchdb(sqlfile,needle):
|
||||||
conn=sqlite3.connect(sqlfile)
|
conn=sqlite3.connect(sqlfile)
|
||||||
conn.text_factory=str
|
conn.text_factory=str
|
||||||
@@ -346,8 +416,6 @@ def find_smalls(minsize,sqlfile):
|
|||||||
flist.append(('smalls',smalls))
|
flist.append(('smalls',smalls))
|
||||||
return flist
|
return flist
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def print_structure(files):
|
def print_structure(files):
|
||||||
for hash in files:
|
for hash in files:
|
||||||
#print(hash[0])
|
#print(hash[0])
|
||||||
@@ -378,6 +446,9 @@ def main():
|
|||||||
if options.nearestcolor:
|
if options.nearestcolor:
|
||||||
find_color_nearest(options.sqlfile,options.nearestcolor)
|
find_color_nearest(options.sqlfile,options.nearestcolor)
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
if options.similarity:
|
||||||
|
find_fingerprint_nearest(options.sqlfile,options.similarity)
|
||||||
|
sys.exit(0)
|
||||||
if options.delete:
|
if options.delete:
|
||||||
print('Deleting entries...')
|
print('Deleting entries...')
|
||||||
delete_nonexisting(options.sqlfile)
|
delete_nonexisting(options.sqlfile)
|
||||||
@@ -387,6 +458,9 @@ def main():
|
|||||||
if options.colors:
|
if options.colors:
|
||||||
print('Adding colors...')
|
print('Adding colors...')
|
||||||
append_colors(options.sqlfile)
|
append_colors(options.sqlfile)
|
||||||
|
if options.finger:
|
||||||
|
print('Adding fingerprints...')
|
||||||
|
append_fingerprints(options.sqlfile)
|
||||||
if options.random:
|
if options.random:
|
||||||
print('Random lists...')
|
print('Random lists...')
|
||||||
random_lists(options.sqlfile)
|
random_lists(options.sqlfile)
|
||||||
|
|||||||
Reference in New Issue
Block a user