image_list ready for production..

This commit is contained in:
q
2015-02-11 21:45:34 +02:00
parent f5c6d59bd4
commit cc495f1847
2 changed files with 231 additions and 1005 deletions

View File

@@ -23,6 +23,8 @@ def setup_options():
help="Modify changed files [%(default)s]") help="Modify changed files [%(default)s]")
parser.add_argument("-d",action="store_true",dest="delete",default=False, parser.add_argument("-d",action="store_true",dest="delete",default=False,
help="Delete non-existing entries [%(default)s]") help="Delete non-existing entries [%(default)s]")
parser.add_argument("-D",action="store_true",dest="delete_data",default=False,
help="Delete unused metadata [%(default)s]")
parser.add_argument("--du",type=str,action='store',dest="diskused",default=False, parser.add_argument("--du",type=str,action='store',dest="diskused",default=False,
help="Print directory sizes. Argument is the path where directories are listed from.") help="Print directory sizes. Argument is the path where directories are listed from.")
parser.add_argument("--du-depth",type=str,action='store',dest="diskused_depth",default=1, parser.add_argument("--du-depth",type=str,action='store',dest="diskused_depth",default=1,
@@ -52,7 +54,9 @@ def setup_options():
"If value is a filename, search similar to that image. "+ "If value is a filename, search similar to that image. "+
"Append with ',value' to limit similarity. "+ "Append with ',value' to limit similarity. "+
"The output columns: SD SimilarityDiff., CD ColorDiff., "+ "The output columns: SD SimilarityDiff., CD ColorDiff., "+
"RD AspectRatioDiff.,Shp SharpnessIndex.") "RD AspectRatioDiff.,Shp SharpnessIndex. This function does not return exact duplicates.")
parser.add_argument("-t",type=str,dest="tag",default=None,
help="Give file a tag. If argument is a file name, print the tags of the file.")
parser.add_argument("--viewer",type=str,dest="viewer",default=None, parser.add_argument("--viewer",type=str,dest="viewer",default=None,
help="Program to view images, %%f refers to filename(s)."+ help="Program to view images, %%f refers to filename(s)."+
"If '1', defaults to: 'geeqie -l %%f'") "If '1', defaults to: 'geeqie -l %%f'")
@@ -62,21 +66,29 @@ def setup_options():
options=parser.parse_args() options=parser.parse_args()
BADDIRS.extend(options.exclude) BADDIRS.extend(options.exclude)
if options.duplicate or options.searchsmall or options.measure or options.nearestcolor or options.similarity!=None or options.search or options.diskused: if options.duplicate or \
options.searchsmall or \
options.measure or \
options.nearestcolor or \
options.similarity!=None or \
options.search or \
options.diskused:
options.add=not options.add options.add=not options.add
if options.tag:
options.add=False
return options return options
def createdb(sqlfile): def createdb(sqlfile):
conn=sqlite3.connect(sqlfile) conn=sqlite3.connect(sqlfile)
db=conn.cursor() db=conn.cursor()
conn.text_factory=str conn.text_factory=str
db.execute('CREATE TABLE list (id INTEGER PRIMARY KEY AUTOINCREMENT,\ db.execute('CREATE TABLE data (hash TEXT PRIMARY KEY,\
file TEXT,date INTEGER,portrait NUMERIC, hash TEXT,\ description TEXT,portrait NUMERIC, \
width INTEGER,height INTEGER,size INTEGER,\ width INTEGER,height INTEGER,\
fingerprint TEXT,sharpness NUMERIC,\ fingerprint TEXT,sharpness NUMERIC,\
R REAL,G REAL, B REAL, BR REAL, BG REAL, BB REAL)') R REAL,G REAL, B REAL, BR REAL, BG REAL, BB REAL)')
db.execute('CREATE TABLE descriptions (file TEXT,path TEXT, hash TEXT,\ db.execute('CREATE TABLE list (file TEXT,hash TEXT,date INTEGER,size INTEGER)')
description TEXT)') db.execute('CREATE TABLE tags (tag TEXT,hash TEXT)')
conn.commit() conn.commit()
return return
@@ -94,6 +106,21 @@ def delete_nonexisting(sqlfile):
conn.commit() conn.commit()
return return
def delete_data(sqlfile):
conn=sqlite3.connect(sqlfile)
conn.text_factory=str
db=conn.cursor()
dbdel=conn.cursor()
db.execute('''SELECT hash FROM data EXCEPT SELECT hash FROM list''')
for row in db:
dbdel.execute("DELETE FROM data where hash == ?",(row[0],))
conn.commit()
db.execute('''SELECT hash FROM tags EXCEPT SELECT hash FROM list''')
for row in db:
dbdel.execute("DELETE FROM tags where hash == ?",(row[0],))
conn.commit()
return
def delete_files(files): def delete_files(files):
''' Actually deletes files! ''' ''' Actually deletes files! '''
print_structure(files) print_structure(files)
@@ -122,7 +149,7 @@ def add_recurse(options):
for file in files: for file in files:
if IMGMATCH.match(file): if IMGMATCH.match(file):
filename=os.path.realpath(os.path.join(path,file)) filename=os.path.realpath(os.path.join(path,file))
#if not is_listed(db,filename):
if file not in db_files: if file not in db_files:
if options.add: if options.add:
try: try:
@@ -133,7 +160,7 @@ def add_recurse(options):
sys.exit(1) sys.exit(1)
else: else:
if options.changed: if options.changed:
ftime=os.path.getmtime(filename) ftime=int(os.path.getmtime(filename))
#hash=get_md5(filename) #hash=get_md5(filename)
#if not hash_match(db,filename,hash): #if not hash_match(db,filename,hash):
if not ftime_match(db,filename,ftime): if not ftime_match(db,filename,ftime):
@@ -160,28 +187,61 @@ def add_single(conn,filename,change=False,hash=None,minsize=0):
db=conn.cursor() db=conn.cursor()
if hash==None: if hash==None:
hash=get_md5(filename) hash=get_md5(filename)
ftime=os.path.getmtime(filename) ftime=int(os.path.getmtime(filename))
fsize=os.path.getsize(filename) fsize=os.path.getsize(filename)
if change: if change:
db.execute("UPDATE list SET date=?, portrait=?, hash=?, width=? ,height=?, \ db.execute("UPDATE list SET hash=?, date=? ,size=? \
fingerprint=NULL, sharpness=NULL, R=NULL, G=NULL, B=NULL, BR=NULL, BG=NULL, BB=NULL, \ WHERE file=?",(hash,ftime,fsize,filename))
size=? WHERE file=?",(ftime,portrait,hash,dims[0],dims[1],fsize,filename))
print("changing: %(f)s (%(x)sx%(y)s)" % {'f':filename, 'x':dims[0], 'y':dims[1]}) print("changing: %(f)s (%(x)sx%(y)s)" % {'f':filename, 'x':dims[0], 'y':dims[1]})
else: else:
db.execute("INSERT INTO list(file,date,portrait,hash,width,height,size)\ db.execute("INSERT INTO list(file,hash,size,date)\
VALUES(?,?,?,?,?,?,?)",(filename,ftime,portrait,hash,dims[0],dims[1],fsize)) VALUES(?,?,?,?)",(filename,hash,fsize,ftime))
print("adding: %(f)s (%(x)sx%(y)s)" % {'f':filename, 'x':dims[0], 'y':dims[1]}) print("adding: %(f)s (%(x)sx%(y)s)" % {'f':filename, 'x':dims[0], 'y':dims[1]})
if hash_in_data(conn.cursor(),hash):
if change:
db.execute("UPDATE data SET portrait=?, width=? ,height=?, \
fingerprint=NULL, sharpness=NULL, R=NULL, G=NULL, B=NULL, BR=NULL, BG=NULL, BB=NULL \
WHERE hash = ?",(portrait,dims[0],dims[1],hash))
else:
db.execute("INSERT INTO data(hash,portrait,width,height) \
VALUES(?,?,?,?)",(hash,portrait,dims[0],dims[1]))
return return
def add_tag(options):
conn=sqlite3.connect(options.sqlfile)
conn.text_factory=str
hash=file2hash(conn.cursor(), os.path.realpath(options.startpath))
if hash==None:
print("Image not found "+os.path.realpath(options.startpath))
return
db=conn.cursor()
db.execute("INSERT INTO tags(hash,tag) \
VALUES(?,?)",(hash,options.tag))
conn.commit()
print(options.startpath+":\""+options.tag+"\"")
def print_tag(options):
conn=sqlite3.connect(options.sqlfile)
conn.text_factory=str
hash=file2hash(conn.cursor(), os.path.realpath(options.tag))
if hash==None:
print("Image not found "+os.path.realpath(options.tag))
return
db=conn.cursor()
db.execute("SELECT DISTINCT tag FROM tags WHERE hash = ?",(hash,))
print( ",".join( row[0] for row in db ))
def random_lists(sqlfile): def random_lists(sqlfile):
conn=sqlite3.connect(sqlfile) conn=sqlite3.connect(sqlfile)
conn.text_factory=str conn.text_factory=str
db=conn.cursor() db=conn.cursor()
db.execute('SELECT file FROM list WHERE portrait=0') db.execute('SELECT list.file FROM list LEFT JOIN data ON list.hash = data.hash WHERE data.portrait=0')
lfile=open('landscape.list.s','w') lfile=open('landscape.list.s','w')
for row in db: for row in db:
lfile.write(row[0]+'\n') lfile.write(row[0]+'\n')
db.execute('SELECT file FROM list WHERE portrait=1') db.execute('SELECT list.file FROM list LEFT JOIN data ON list.hash = data.hash WHERE data.portrait=1')
pfile=open('portrait.list.s','w') pfile=open('portrait.list.s','w')
for row in db: for row in db:
pfile.write(row[0]+'\n') pfile.write(row[0]+'\n')
@@ -211,7 +271,7 @@ def get_folder_contents(db,path):
return files return files
def ftime_match(db,filename,ftime): def ftime_match(db,filename,ftime):
db.execute("SELECT date FROM list where file == ?",(filename,)) db.execute("SELECT date FROM list WHERE file == ?",(filename,))
count=db.fetchall() count=db.fetchall()
return count[0][0]==ftime return count[0][0]==ftime
@@ -220,9 +280,35 @@ def hash_match(db,filename,hash):
count=db.fetchall() count=db.fetchall()
return count[0][0]==hash return count[0][0]==hash
def hash2file(db,hash,firstOnly=True):
db.execute("SELECT file FROM list where hash == ?",(hash,))
names=db.fetchall()
if len(names)==0:
return None
if firstOnly:
return names[0][0]
else:
return [x[0] for x in names]
def file2hash(db,filename):
db.execute("SELECT hash FROM list where file == ? LIMIT 1",(filename,))
names=db.fetchall()
if len(names)==0:
return None
return names[0][0]
def hash_in_data(db,hash):
db.execute("SELECT hash FROM data where hash == ? LIMIT 1",(hash,))
hashes=db.fetchall()
return len(hashes)!=0
def hash_in_list(db,hash):
db.execute("SELECT hash FROM list where hash == ? LIMIT 1",(hash,))
hashes=db.fetchall()
return len(hashes)!=0
def get_md5(filename): def get_md5(filename):
''' Return hash of the first 5 megabytes of the file ''' ''' Return hash of the first 15 megabytes of the file '''
return hashlib.md5(open(filename,'rb').read(1024*1024*5)).hexdigest() return hashlib.md5(open(filename,'rb').read(1024*1024*15)).hexdigest()
def get_dims(filename): def get_dims(filename):
idargs=['identify','-format','%wx%h',filename+'[0]'] idargs=['identify','-format','%wx%h',filename+'[0]']
@@ -243,22 +329,24 @@ def append_colors(sqlfile):
conn.text_factory=str conn.text_factory=str
db=conn.cursor() db=conn.cursor()
dbh=conn.cursor() dbh=conn.cursor()
db.execute("SELECT file,R FROM list WHERE R IS NULL ORDER BY file") count=dbh.execute("SELECT COUNT(hash) FROM data WHERE R IS NULL").fetchall()[0][0]
i=0 db.execute("SELECT hash,R FROM data WHERE R IS NULL")
dirname_old="" dirname_old=""
for row in db: for i,row in enumerate(db):
colors=get_colors(row[0]) filename=hash2file(conn.cursor(),row[0])
dbh.execute("UPDATE list SET R=?, G=?, B=?, BR=?, BG=?, BB=? \ if filename==None:
WHERE file=?",(colors[0][0],colors[0][1],colors[0][2], continue
colors=get_colors(filename)
dbh.execute("UPDATE data SET R=?, G=?, B=?, BR=?, BG=?, BB=? \
WHERE hash=?",(colors[0][0],colors[0][1],colors[0][2],
colors[1][0],colors[1][1],colors[1][2],row[0])) colors[1][0],colors[1][1],colors[1][2],row[0]))
filename=" "+os.path.basename(row[0]) filebase=" "+os.path.basename(filename)
dirname=os.path.dirname(row[0]) dirname=os.path.dirname(filename)
if dirname!=dirname_old: if dirname!=dirname_old:
dirname_old=dirname dirname_old=dirname
filename=row[0] filebase=filename
print("colors: %(f)s (%(r)s %(g)s %(b)s)" % {'f':filename, 'r':colors[0][0], print("%(i)d: %(f)s (%(r)s %(g)s %(b)s)" % {'i':count-i, 'f':filename, 'r':colors[0][0],
'g':colors[0][1], 'b':colors[0][2]}) 'g':colors[0][1], 'b':colors[0][2]})
i+=1
if (i%50==0): if (i%50==0):
conn.commit(); conn.commit();
conn.commit() conn.commit()
@@ -273,19 +361,24 @@ def find_color_nearest(opts):
src=[float(i) for i in src.strip().strip('"').split(',')] src=[float(i) for i in src.strip().strip('"').split(',')]
if len(src)==3: if len(src)==3:
src.append(1) src.append(1)
db.execute("SELECT file, ABS(BR-?)+ABS(BG-?)+ABS(BB-?) as K,BR,BG,BB FROM list ORDER BY K LIMIT ?", db.execute("SELECT hash, ABS(BR-?)+ABS(BG-?)+ABS(BB-?) as K,BR,BG,BB FROM data ORDER BY K LIMIT ?",
(src[0],src[1],src[2],src[3])) (src[0],src[1],src[2],src[3]))
hits=[] hits=[]
for hit in db: for hit in db:
hits.append(hit) fs=hash2file(conn.cursor(),hit[0],False)
if fs==None:
continue
if hit[1]==None:
print("Color information not found. Run again with --measure.")
return
for f in fs:
hits.append((f,hit[1],hit[2],hit[3],hit[4]))
file_len=str(max([len(x[0]) for x in hits])) file_len=str(max([len(x[0]) for x in hits]))
for c in range(len(hits)): for h in range(len(hits)):
print( ('{: <'+file_len+'} D {:.2f} (RGB {:.2f},{:.2f},{:.2f})').format(hits[c][0], if h>=src[3]:
hits[c][1], break
hits[c][2], print( ('{: <'+file_len+'} D {:.2f} (RGB {:.2f},{:.2f},{:.2f})').format(*hits[h]))
hits[c][3],
hits[c][4]))
if opts.viewer: if opts.viewer:
fnames=[x[0] for x in hits] fnames=[x[0] for x in hits]
call_viewer(opts, fnames) call_viewer(opts, fnames)
@@ -295,41 +388,49 @@ def find_color_nearest_file(opts):
""" Find closest matching images to given files with RGB color """ """ Find closest matching images to given files with RGB color """
cmp=opts.nearestcolor.rsplit(",",1) cmp=opts.nearestcolor.rsplit(",",1)
if len(cmp)==1: if len(cmp)==1:
thr=1 thr=2
else: else:
thr=int(cmp[1]) thr=int(cmp[1])+1
cmp=cmp[0] cmp=os.path.realpath(cmp[0])
conn=sqlite3.connect(opts.sqlfile) conn=sqlite3.connect(opts.sqlfile)
conn.text_factory=str conn.text_factory=str
db=conn.cursor() db=conn.cursor()
if is_listed(db, cmp): if is_listed(db, cmp):
db1.execute("SELECT file,fingerprint,sharpness,width,height,BR,BG,BB FROM list WHERE file=?",(cmp,)) hash=file2hash(conn.cursor(), cmp)
for hit1 in db: db1=conn.cursor()
db1.execute("SELECT hash,fingerprint,sharpness,width,height,BR,BG,BB FROM data WHERE hash=?",(hash,))
for hit1 in db1:
fp=int(hit1[1]) fp=int(hit1[1])
sp=hit1[2] sp=hit1[2]
dims=hit1[3:5] dims=hit1[3:5]
src=hit1[5:8] src=hit1[5:8]
else: else:
hash=get_md5(cmp)
fp=int(get_fingerprint(cmp)) fp=int(get_fingerprint(cmp))
sp=get_sharpness(cmp) sp=get_sharpness(cmp)
dims=get_dims(cmp) dims=get_dims(cmp)
src=get_colors(cmp)[1] src=get_colors(cmp)[1]
src=[float(i) for i in src] src=[float(i) for i in src]
db.execute("SELECT file, ABS(BR-?)+ABS(BG-?)+ABS(BB-?) as K,BR,BG,BB FROM list ORDER BY K LIMIT ?", db.execute("SELECT hash, ABS(BR-?)+ABS(BG-?)+ABS(BB-?) as K,BR,BG,BB FROM data ORDER BY K LIMIT ?",
(src[0],src[1],src[2],thr)) (src[0],src[1],src[2],thr))
hits=[] hits=[]
for hit in db: for hit in db:
if hit[0]==cmp: fs=hash2file(conn.cursor(),hit[0],False)
if fs==None:
continue continue
hits.append(hit) if hit[1]==None:
print("Color information not found. Run again with --measure.")
return
for f in fs:
if f==cmp:
continue
hits.append((f,hit[1],hit[2],hit[3],hit[4]))
file_len=str(max([len(x[0]) for x in hits])) file_len=str(max([len(x[0]) for x in hits]))
for c in range(len(hits)): for h in range(len(hits)):
print( ('{: <'+file_len+'} D {:.2f} (RGB {:.2f},{:.2f},{:.2f})').format(hits[c][0], if h>=thr-1:
hits[c][1], break
hits[c][2], print( ('{: <'+file_len+'} D {:.2f} (RGB {:.2f},{:.2f},{:.2f})').format(*hits[h]) )
hits[c][3],
hits[c][4]))
if opts.viewer: if opts.viewer:
fnames=[x[0] for x in hits] fnames=[x[0] for x in hits]
call_viewer(opts, fnames) call_viewer(opts, fnames)
@@ -371,20 +472,22 @@ def append_fingerprints(sqlfile):
conn.text_factory=str conn.text_factory=str
db=conn.cursor() db=conn.cursor()
dbh=conn.cursor() dbh=conn.cursor()
db.execute("SELECT file FROM list WHERE fingerprint IS NULL ORDER BY file") count=dbh.execute("SELECT COUNT(hash) FROM data WHERE fingerprint IS NULL").fetchall()[0][0]
i=0 db.execute("SELECT hash FROM data WHERE fingerprint IS NULL")
dirname_old="" dirname_old=""
for row in db: for i,row in enumerate(db):
fp=get_fingerprint(row[0]) filename=hash2file(conn.cursor(),row[0])
dbh.execute("UPDATE list SET fingerprint=? \ if filename==None:
WHERE file=?",(fp,row[0])) continue
filename=" "+os.path.basename(row[0]) fp=get_fingerprint(filename)
dirname=os.path.dirname(row[0]) dbh.execute("UPDATE data SET fingerprint=? \
WHERE hash=?",(fp,row[0]))
filebase=" "+os.path.basename(filename)
dirname=os.path.dirname(filename)
if dirname!=dirname_old: if dirname!=dirname_old:
dirname_old=dirname dirname_old=dirname
filename=row[0] filebase=filename
i+=1 print("%(nr)i %(f)s" % {'f':filebase, 'nr':count-i})
print("%(nr)i %(f)s" % {'f':filename, 'nr':i})
if (i%50==0): if (i%50==0):
conn.commit(); conn.commit();
conn.commit() conn.commit()
@@ -413,21 +516,24 @@ def find_fingerprint_similar(opts):
conn.text_factory=str conn.text_factory=str
db1=conn.cursor() db1=conn.cursor()
db2=conn.cursor() db2=conn.cursor()
db1.execute("SELECT file,fingerprint,sharpness,width,height,R,G,B FROM list WHERE sharpness > 0 ORDER BY file") db1.execute("SELECT hash,fingerprint,sharpness,width,height,R,G,B FROM data WHERE sharpness > 0")
hits=[] hits=[]
hit_list=[] hit_list=[]
i=None i=None
for i,hit1 in enumerate(db1): for i,hit1 in enumerate(db1):
if hit1[0] in hit_list:
continue
cmp=hit1[0] cmp=hit1[0]
cmpf=hash2file(conn.cursor(),hit1[0])
if cmpf==None:
continue
if cmpf in hit_list:
continue
fp=int(hit1[1]) fp=int(hit1[1])
sp=hit1[2] sp=hit1[2]
dims=hit1[3:5] dims=hit1[3:5]
pixels=dims[0]*dims[1] pixels=dims[0]*dims[1]
colors=hit1[5:8] colors=hit1[5:8]
db2.execute("SELECT file,fingerprint,sharpness,width,height,R,G,B FROM list WHERE sharpness > 0 ORDER BY file") db2.execute("SELECT hash,fingerprint,sharpness,width,height,R,G,B FROM data WHERE sharpness > 0")
this1=[ [cmp, 0,sp,int(hit1[3]),int(hit1[4]),0,pixels,0] ] this1=[ [cmpf, 0,sp,int(hit1[3]),int(hit1[4]),0,pixels,0] ]
for hit2 in db2: for hit2 in db2:
if hit2[0]==cmp: if hit2[0]==cmp:
continue continue
@@ -438,8 +544,14 @@ def find_fingerprint_similar(opts):
get_color_diff(hit2[5:8],colors), get_color_diff(hit2[5:8],colors),
int(hit2[3])*int(hit2[4]), int(hit2[3])*int(hit2[4]),
get_ratio_diff(hit2[3:5],dims)] get_ratio_diff(hit2[3:5],dims)]
this1.append(this2) fs=hash2file(conn.cursor(), hit2[0], False)
hit_list.append(hit2[0]) if fs==None:
continue
for f in fs:
thisf=this2
thisf[0]=f
this1.append(thisf)
hit_list.append(f)
this1.sort(key=lambda x: x[1]) this1.sort(key=lambda x: x[1])
if len(this1)>1: if len(this1)>1:
hits.append(this1) hits.append(this1)
@@ -473,28 +585,30 @@ def find_fingerprint_nearest(opts):
conn.text_factory=str conn.text_factory=str
db1=conn.cursor() db1=conn.cursor()
if is_listed(db1, cmp): if is_listed(db1, cmp):
db1.execute("SELECT file,fingerprint,sharpness,width,height,R,G,B FROM list WHERE file=?",(cmp,)) hash=file2hash(conn.cursor(),cmp)
db1.execute("SELECT hash,fingerprint,sharpness,width,height,R,G,B FROM data WHERE hash=?",(hash,))
for hit1 in db1: for hit1 in db1:
fp=int(hit1[1]) fp=int(hit1[1])
sp=hit1[2] sp=hit1[2]
dims=hit1[3:5] dims=hit1[3:5]
colors=hit1[5:8] colors=hit1[5:8]
else: else:
hash=get_md5(cmp)
fp=int(get_fingerprint(cmp)) fp=int(get_fingerprint(cmp))
sp=get_sharpness(cmp) sp=get_sharpness(cmp)
dims=get_dims(cmp) dims=get_dims(cmp)
colors=get_colors(cmp)[0] colors=get_colors(cmp)[0]
db1.execute("SELECT file,fingerprint,sharpness,width,height,R,G,B FROM list WHERE sharpness > 0 ORDER BY file") db1.execute("SELECT hash,fingerprint,sharpness,width,height,R,G,B FROM data WHERE sharpness > 0")
this=['',thr,0,0,0,0,0] this=['',thr,0,0,0,0,0]
hit1=None hit1=None
for i,hit1 in enumerate(db1): for i,hit1 in enumerate(db1):
if hit1[0] == cmp: if hit1[0] == hash:
continue continue
similarity=bin(fp^int(hit1[1])).count('1') similarity=bin(fp^int(hit1[1])).count('1')
if similarity<this[1]: if similarity<this[1]:
this[1]=similarity
this[0]=hit1[0] this[0]=hit1[0]
this[1]=similarity
this[2]=hit1[2] this[2]=hit1[2]
this[3]=int(hit1[3]) this[3]=int(hit1[3])
this[4]=int(hit1[4]) this[4]=int(hit1[4])
@@ -506,6 +620,7 @@ def find_fingerprint_nearest(opts):
if this[1]==thr: if this[1]==thr:
print("No similarities < "+str(thr)+": "+cmp, file=sys.stderr) print("No similarities < "+str(thr)+": "+cmp, file=sys.stderr)
return return
this[0]=hash2file(conn.cursor(),this[0])
file_len=str(max(len(cmp), len(this[0]))) file_len=str(max(len(cmp), len(this[0])))
print( ('{: <'+file_len+'} {: ^4} {: ^4} {: ^4} {: ^4} {: ^5}x{: ^5}').format("File","SD","CD","RD","Shp","W","H")) print( ('{: <'+file_len+'} {: ^4} {: ^4} {: ^4} {: ^4} {: ^5}x{: ^5}').format("File","SD","CD","RD","Shp","W","H"))
print( ('{: <'+file_len+'} {: >4} {: ^4} {: ^4} {: ^4} {: >5}x{: >5}').format(cmp,"","","","%.1f" % sp,dims[0],dims[1])) print( ('{: <'+file_len+'} {: >4} {: ^4} {: ^4} {: ^4} {: >5}x{: >5}').format(cmp,"","","","%.1f" % sp,dims[0],dims[1]))
@@ -516,27 +631,28 @@ def find_fingerprint_nearest(opts):
call_viewer(opts, (cmp,this[0])) call_viewer(opts, (cmp,this[0]))
def append_sharpness(sqlfile): def append_sharpness(sqlfile):
conn=sqlite3.connect(sqlfile) conn=sqlite3.connect(sqlfile)
conn.text_factory=str conn.text_factory=str
db=conn.cursor() db=conn.cursor()
dbh=conn.cursor() dbh=conn.cursor()
db.execute("SELECT file FROM list WHERE sharpness IS NULL ORDER BY file") count=dbh.execute("SELECT COUNT(hash) FROM data WHERE sharpness IS NULL").fetchall()[0][0]
i=0 db.execute("SELECT hash FROM data WHERE sharpness IS NULL")
dirname_old="" dirname_old=""
for row in db: for i,row in enumerate(db):
sp=get_sharpness(row[0]) filename=hash2file(conn.cursor(),row[0])
dbh.execute("UPDATE list SET sharpness=? \ if filename==None:
WHERE file=?",(sp,row[0])) continue
filename=" "+os.path.basename(row[0]) sp=get_sharpness(filename)
dirname=os.path.dirname(row[0]) dbh.execute("UPDATE data SET sharpness=? \
WHERE hash=?",(sp,row[0]))
filebase=" "+os.path.basename(filename)
dirname=os.path.dirname(filename)
if dirname!=dirname_old: if dirname!=dirname_old:
dirname_old=dirname dirname_old=dirname
filename=row[0] filebase=filename
i+=1 print("%(nr)i %(f)s %(s)f" % {'f':filebase, 'nr':count-i,'s':sp})
print("%(nr)i %(f)s %(s)f" % {'f':filename, 'nr':i,'s':sp}) if (i%25==0):
if (i%50==0):
conn.commit(); conn.commit();
conn.commit() conn.commit()
@@ -561,11 +677,12 @@ def searchdb(sqlfile,needle):
conn.text_factory=str conn.text_factory=str
db=conn.cursor() db=conn.cursor()
dbh=conn.cursor() dbh=conn.cursor()
db.execute("SELECT file,width,height,date FROM list WHERE file LIKE ? ORDER BY file",('%'+needle+'%',)) db.execute("SELECT file,hash FROM list WHERE file LIKE ? ORDER BY file",('%'+needle+'%',))
results=[] results=[]
flist=[] flist=[]
for row in db: for row in db:
results.append(row) data=dbh.execute("SELECT hash,width,height FROM data WHERE hash = ?",(row[1],)).fetchall()
results.append([row[0], data[0][1],data[0][2]])
flist.append(('search',results)) flist.append(('search',results))
return flist return flist
@@ -616,14 +733,17 @@ def find_duplicates(sqlfile,search):
conn.text_factory=str conn.text_factory=str
db=conn.cursor() db=conn.cursor()
dbh=conn.cursor() dbh=conn.cursor()
dbf=conn.cursor()
db.execute("SELECT hash,count(*) FROM list WHERE file LIKE ? group by hash HAVING count(*) > 1 ",(search,)) db.execute("SELECT hash,count(*) FROM list WHERE file LIKE ? group by hash HAVING count(*) > 1 ",(search,))
duphash=[] duphash=[]
for row in db: for row in db:
hash=row[0] hash=row[0]
dbh.execute("SELECT file,width,height,date FROM list WHERE hash = ?",(hash,)) dbh.execute("SELECT hash,width,height FROM data WHERE hash = ?",(hash,))
flist=[] flist=[]
for row in dbh: for h in dbh:
flist.append(row) dbf.execute("SELECT file,date FROM list WHERE hash = ?",(hash,))
for f in dbf:
flist.append([f[0],h[1],h[2],f[1]])
flist.sort(key=lambda file: file[3]) flist.sort(key=lambda file: file[3])
duphash.append((hash, flist)) duphash.append((hash, flist))
duphash.sort(key=lambda file: file[1][0]) duphash.sort(key=lambda file: file[1][0])
@@ -633,11 +753,15 @@ def find_smalls(minsize,sqlfile):
conn=sqlite3.connect(sqlfile) conn=sqlite3.connect(sqlfile)
conn.text_factory=str conn.text_factory=str
db=conn.cursor() db=conn.cursor()
db.execute("SELECT file,width,height FROM list WHERE width < ? OR height < ?",(minsize,minsize)) db.execute("SELECT hash,width,height FROM data WHERE width < ? OR height < ?",(minsize,minsize))
smalls=[] smalls=[]
flist=[] flist=[]
for row in db: for row in db:
smalls.append(row) fs=hash2file(conn.cursor(), row[0], False)
if fs==None:
continue
for f in fs:
smalls.append([f, row[1], row[2]])
flist.append(('smalls',smalls)) flist.append(('smalls',smalls))
return flist return flist
@@ -667,10 +791,8 @@ def disk_used(options):
def print_structure(files): def print_structure(files):
for hash in files: for hash in files:
#print(hash[0]) #print(hash[0])
i=1 for i,f in enumerate(hash[1]):
for f in hash[1]: print("%(i)d: (%(x)dx%(y)d):%(f)s " % {'i':i+1, 'f':f[0], 'x':f[1], 'y':f[2]})
print("%(i)d: (%(x)dx%(y)d):%(f)s " % {'i':i, 'f':f[0], 'x':f[1], 'y':f[2]})
i+=1
return return
def print_dup_structure(files,opts): def print_dup_structure(files,opts):
@@ -723,6 +845,9 @@ def main():
if options.delete: if options.delete:
print('Deleting entries...') print('Deleting entries...')
delete_nonexisting(options.sqlfile) delete_nonexisting(options.sqlfile)
if options.delete_data:
print('Deleting metadata...')
delete_data(options.sqlfile)
if options.add or options.changed: if options.add or options.changed:
print('Adding entries...') print('Adding entries...')
add_recurse(options) add_recurse(options)
@@ -763,7 +888,11 @@ def main():
if options.diskused: if options.diskused:
disk_used(options) disk_used(options)
#print(files) #print(files)
if options.tag:
if options.startpath==".":
print_tag(options)
else:
add_tag(options)
sys.exit(0) sys.exit(0)
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -1,903 +0,0 @@
#!/usr/bin/python
from __future__ import print_function
import sys
import os
import re
import sqlite3
import subprocess
import hashlib
import traceback
from argparse import ArgumentParser
SQLFILE='list_of_images.sqlite'
DESCFILE='descriptions.csv'
IMGMATCH=re.compile('.*\.jpg$|.*\.jpeg$|.*\.png$|.*\.gif$',re.I)
BADDIRS=['_tn','_med']
MINSIZE=0
def setup_options():
parser=ArgumentParser(description="Maintains the list of images sqlite file")
parser.add_argument("-a",action="store_false",dest="add",default=True,
help="Do not add new files [%(default)s]")
parser.add_argument("-c",action="store_true",dest="changed",default=False,
help="Modify changed files [%(default)s]")
parser.add_argument("-d",action="store_true",dest="delete",default=False,
help="Delete non-existing entries [%(default)s]")
parser.add_argument("-D",action="store_true",dest="delete_data",default=False,
help="Delete unused metadata [%(default)s]")
parser.add_argument("--du",type=str,action='store',dest="diskused",default=False,
help="Print directory sizes. Argument is the path where directories are listed from.")
parser.add_argument("--du-depth",type=str,action='store',dest="diskused_depth",default=1,
help="Depth of summarization for --du.")
parser.add_argument("-f",action="store",dest="sqlfile",default=SQLFILE,
help="SQL file name to use [%(default)s]")
parser.add_argument("-l",action="store_true",dest="symlinks",default=False,
help="Follow symbolic links [%(default)s]")
parser.add_argument("-m",type=int,dest="minsize",default=MINSIZE,
help="Minimum pixel width/height of stored image [%(default)s]")
parser.add_argument("-r",action="store_true",dest="random",default=False,
help="Create randomized files for landscape and portrait images [%(default)s]")
parser.add_argument("-s",type=str,dest="search",default=False,
help="Search list based on path pattern")
parser.add_argument("--measure",action="store_true",dest="measure",default=False,
help="Measure various statistics for similarity/color searches. This option will flip the 'Add new files' option. [%(default)s]")
parser.add_argument("--nearest",type=str,dest="nearestcolor",default=False,
help="Search list for nearest ambient color. format: R,G,B in float 0-1. Add fourth value to limit search to number of hits. Also accepts format file,hits to find nearest color to given file.")
parser.add_argument("--dup",action="store_true",dest="duplicate",default=False,
help="Return a list of duplicate files, based on hashes. This option will flip the 'Add new files' option. [%(default)s]")
parser.add_argument("--del",action="store_true",dest="deleteFiles",default=False,
help="Delete files listed with --small. [%(default)s]")
parser.add_argument("--small",action="store_true",dest="searchsmall",default=False,
help="Return a list of small files, smaller than -m INT. This option will flip the 'Add new files' option. [%(default)s]")
parser.add_argument("--similar",type=str,dest="similarity",default=None,
help="Search list for similar images. Value 0-255 for similarity threshold. 0=high similarity. "+
"If value is a filename, search similar to that image. "+
"Append with ',value' to limit similarity. "+
"The output columns: SD SimilarityDiff., CD ColorDiff., "+
"RD AspectRatioDiff.,Shp SharpnessIndex. This function does not return exact duplicates.")
parser.add_argument("-t",type=str,dest="tag",default=None,
help="Give file a tag. If argument is a file name, print the tags of the file.")
parser.add_argument("--viewer",type=str,dest="viewer",default=None,
help="Program to view images, %%f refers to filename(s)."+
"If '1', defaults to: 'geeqie -l %%f'")
parser.add_argument("-x",action="append",dest="exclude",default=[],
help="Exclude folder name from the lists. This option may be issued several times.")
parser.add_argument('startpath', action="store",default='.', nargs='?')
options=parser.parse_args()
BADDIRS.extend(options.exclude)
if options.duplicate or \
options.searchsmall or \
options.measure or \
options.nearestcolor or \
options.similarity!=None or \
options.search or \
options.diskused:
options.add=not options.add
if options.tag:
options.add=False
return options
def createdb(sqlfile):
conn=sqlite3.connect(sqlfile)
db=conn.cursor()
conn.text_factory=str
db.execute('CREATE TABLE data (hash TEXT PRIMARY KEY,\
description TEXT,portrait NUMERIC, \
width INTEGER,height INTEGER,\
fingerprint TEXT,sharpness NUMERIC,\
R REAL,G REAL, B REAL, BR REAL, BG REAL, BB REAL)')
db.execute('CREATE TABLE list (file TEXT,hash TEXT,date INTEGER,size INTEGER)')
db.execute('CREATE TABLE tags (tag TEXT,hash TEXT)')
conn.commit()
return
def delete_nonexisting(sqlfile):
conn=sqlite3.connect(sqlfile)
conn.text_factory=str
#conn.row_factory=sqlite3.Row
db=conn.cursor()
dbdel=conn.cursor()
db.execute('SELECT file FROM list')
for row in db:
if not os.path.exists(row[0]):
print('removing.. '+row[0])
dbdel.execute("DELETE FROM list where file == ?",(row[0],))
conn.commit()
return
def delete_data(sqlfile):
conn=sqlite3.connect(sqlfile)
conn.text_factory=str
db=conn.cursor()
dbdel=conn.cursor()
db.execute('''SELECT hash FROM data EXCEPT SELECT hash FROM list''')
for row in db:
dbdel.execute("DELETE FROM data where hash == ?",(row[0],))
conn.commit()
db.execute('''SELECT hash FROM tags EXCEPT SELECT hash FROM list''')
for row in db:
dbdel.execute("DELETE FROM tags where hash == ?",(row[0],))
conn.commit()
return
def delete_files(files):
''' Actually deletes files! '''
print_structure(files)
doit=confirm(prompt="Sure to delete these files?")
if doit:
print("now delling")
for hash in files:
for f in hash[1]:
print(f[0])
os.remove(f[0])
return
def add_recurse(options):
conn=sqlite3.connect(options.sqlfile)
conn.text_factory=str
db=conn.cursor()
for path,dirs,files in os.walk(options.startpath,followlinks=options.symlinks):
print('Checking '+path)
dirs=clean_dirs(dirs)
if not options.symlinks:
files=clean_syms(files)
files.sort()
dirs.sort()
db_files=get_folder_contents(db,os.path.realpath(path)+'/')
for file in files:
if IMGMATCH.match(file):
filename=os.path.realpath(os.path.join(path,file))
if file not in db_files:
if options.add:
try:
add_single(conn,filename,change=False,minsize=options.minsize)
except:
print('error adding file: '+filename)
traceback.print_exc(file=sys.stdout)
sys.exit(1)
else:
if options.changed:
ftime=int(os.path.getmtime(filename))
#hash=get_md5(filename)
#if not hash_match(db,filename,hash):
if not ftime_match(db,filename,ftime):
#file content changed
try:
add_single(conn,filename,change=True,minsize=options.minsize)
except:
print('error changing file: '+filename)
traceback.print_exc(file=sys.stdout)
sys.exit(1)
# if file mentioned, and hash same, no need to change entry
conn.commit()
return
def add_single(conn,filename,change=False,hash=None,minsize=0):
dims=get_dims(filename)
if int(dims[0])<int(dims[1]):
portrait=1
else:
portrait=0
if (int(dims[0])<minsize) & (int(dims[1])<minsize):
print(filename+" too small (%s)" % dims)
return
db=conn.cursor()
if hash==None:
hash=get_md5(filename)
ftime=int(os.path.getmtime(filename))
fsize=os.path.getsize(filename)
if change:
db.execute("UPDATE list SET hash=?, date=? ,size=? \
WHERE file=?",(hash,ftime,fsize,filename))
print("changing: %(f)s (%(x)sx%(y)s)" % {'f':filename, 'x':dims[0], 'y':dims[1]})
else:
db.execute("INSERT INTO list(file,hash,size,date)\
VALUES(?,?,?,?)",(filename,hash,fsize,ftime))
print("adding: %(f)s (%(x)sx%(y)s)" % {'f':filename, 'x':dims[0], 'y':dims[1]})
if hash_in_data(conn.cursor(),hash):
if change:
db.execute("UPDATE data SET portrait=?, width=? ,height=?, \
fingerprint=NULL, sharpness=NULL, R=NULL, G=NULL, B=NULL, BR=NULL, BG=NULL, BB=NULL \
WHERE hash = ?",(portrait,dims[0],dims[1],hash))
else:
db.execute("INSERT INTO data(hash,portrait,width,height) \
VALUES(?,?,?,?)",(hash,portrait,dims[0],dims[1]))
return
def add_tag(options):
conn=sqlite3.connect(options.sqlfile)
conn.text_factory=str
hash=file2hash(conn.cursor(), os.path.realpath(options.startpath))
if hash==None:
print("Image not found "+os.path.realpath(options.startpath))
return
db=conn.cursor()
db.execute("INSERT INTO tags(hash,tag) \
VALUES(?,?)",(hash,options.tag))
conn.commit()
print(options.startpath+":\""+options.tag+"\"")
def print_tag(options):
conn=sqlite3.connect(options.sqlfile)
conn.text_factory=str
hash=file2hash(conn.cursor(), os.path.realpath(options.tag))
if hash==None:
print("Image not found "+os.path.realpath(options.tag))
return
db=conn.cursor()
db.execute("SELECT DISTINCT tag FROM tags WHERE hash = ?",(hash,))
print( ",".join( row[0] for row in db ))
def random_lists(sqlfile):
conn=sqlite3.connect(sqlfile)
conn.text_factory=str
db=conn.cursor()
db.execute('SELECT list.file FROM list LEFT JOIN data ON list.hash = data.hash WHERE data.portrait=0')
lfile=open('landscape.list.s','w')
for row in db:
lfile.write(row[0]+'\n')
db.execute('SELECT list.file FROM list LEFT JOIN data ON list.hash = data.hash WHERE data.portrait=1')
pfile=open('portrait.list.s','w')
for row in db:
pfile.write(row[0]+'\n')
lfile.close()
pfile.close()
sortargs=['sort','-R','-o','landscape.list','landscape.list.s']
p=subprocess.call(sortargs)
sortargs=['sort','-R','-o','portrait.list','portrait.list.s']
p=subprocess.call(sortargs)
os.unlink('portrait.list.s')
os.unlink('landscape.list.s')
return
def is_listed(db,filename):
db.execute("SELECT COUNT(*) FROM list where file == ?",(filename,))
count=db.fetchall()
return count[0][0]>0
def get_folder_contents(db,path):
''' return the contents of the folder '''
files=[]
db.execute("SELECT file FROM list where file LIKE ?",(path+'%',))
for row in db:
base=row[0].replace(path,'',1)
if base.find('/')==-1:
files.append(base)
return files
def ftime_match(db,filename,ftime):
db.execute("SELECT date FROM list WHERE file == ?",(filename,))
count=db.fetchall()
return count[0][0]==ftime
def hash_match(db,filename,hash):
db.execute("SELECT hash FROM list where file == ?",(filename,))
count=db.fetchall()
return count[0][0]==hash
def hash2file(db,hash,firstOnly=True):
db.execute("SELECT file FROM list where hash == ?",(hash,))
names=db.fetchall()
if len(names)==0:
return None
if firstOnly:
return names[0][0]
else:
return [x[0] for x in names]
def file2hash(db,filename):
db.execute("SELECT hash FROM list where file == ? LIMIT 1",(filename,))
names=db.fetchall()
if len(names)==0:
return None
return names[0][0]
def hash_in_data(db,hash):
db.execute("SELECT hash FROM data where hash == ? LIMIT 1",(hash,))
hashes=db.fetchall()
return len(hashes)!=0
def hash_in_list(db,hash):
db.execute("SELECT hash FROM list where hash == ? LIMIT 1",(hash,))
hashes=db.fetchall()
return len(hashes)!=0
def get_md5(filename):
''' Return hash of the first 15 megabytes of the file '''
return hashlib.md5(open(filename,'rb').read(1024*1024*15)).hexdigest()
def get_dims(filename):
idargs=['identify','-format','%wx%h',filename+'[0]']
p=subprocess.Popen(idargs,stdout=subprocess.PIPE)
out, err = p.communicate()
return (out.strip().split('x'))
def call_viewer(opts, files):
""" Runs the viewer program, contains defaults """
if opts.viewer=="1":
opts.viewer="geeqie -l %f"
devnull = open('/dev/null', 'w')
subprocess.call(opts.viewer.replace('%f', " ".join(files)), stderr=devnull, shell=True)
def append_colors(sqlfile):
conn=sqlite3.connect(sqlfile)
conn.text_factory=str
db=conn.cursor()
dbh=conn.cursor()
db.execute("SELECT hash,R FROM data WHERE R IS NULL")
i=0
dirname_old=""
for row in db:
filename=hash2file(conn.cursor(),row[0])
if filename==None:
continue
colors=get_colors(filename)
dbh.execute("UPDATE data SET R=?, G=?, B=?, BR=?, BG=?, BB=? \
WHERE hash=?",(colors[0][0],colors[0][1],colors[0][2],
colors[1][0],colors[1][1],colors[1][2],row[0]))
filebase=" "+os.path.basename(filename)
dirname=os.path.dirname(filename)
if dirname!=dirname_old:
dirname_old=dirname
filebase=filename
print("colors: %(f)s (%(r)s %(g)s %(b)s)" % {'f':filename, 'r':colors[0][0],
'g':colors[0][1], 'b':colors[0][2]})
i+=1
if (i%50==0):
conn.commit();
conn.commit()
return
def find_color_nearest(opts):
""" Find closest matching images to given RGB color """
src=opts.nearestcolor
conn=sqlite3.connect(opts.sqlfile)
conn.text_factory=str
db=conn.cursor()
src=[float(i) for i in src.strip().strip('"').split(',')]
if len(src)==3:
src.append(1)
db.execute("SELECT hash, ABS(BR-?)+ABS(BG-?)+ABS(BB-?) as K,BR,BG,BB FROM data ORDER BY K LIMIT ?",
(src[0],src[1],src[2],src[3]))
hits=[]
for hit in db:
fs=hash2file(conn.cursor(),hit[0],False)
if fs==None:
continue
if hit[1]==None:
print("Color information not found. Run again with --measure.")
return
for f in fs:
hits.append((f,hit[1],hit[2],hit[3],hit[4]))
file_len=str(max([len(x[0]) for x in hits]))
for h in range(len(hits)):
if h>=src[3]:
break
print( ('{: <'+file_len+'} D {:.2f} (RGB {:.2f},{:.2f},{:.2f})').format(*hits[h]))
if opts.viewer:
fnames=[x[0] for x in hits]
call_viewer(opts, fnames)
return
def find_color_nearest_file(opts):
""" Find closest matching images to given files with RGB color """
cmp=opts.nearestcolor.rsplit(",",1)
if len(cmp)==1:
thr=2
else:
thr=int(cmp[1])+1
cmp=os.path.realpath(cmp[0])
conn=sqlite3.connect(opts.sqlfile)
conn.text_factory=str
db=conn.cursor()
if is_listed(db, cmp):
hash=file2hash(conn.cursor(), cmp)
db1=conn.cursor()
db1.execute("SELECT hash,fingerprint,sharpness,width,height,BR,BG,BB FROM data WHERE hash=?",(hash,))
for hit1 in db1:
fp=int(hit1[1])
sp=hit1[2]
dims=hit1[3:5]
src=hit1[5:8]
else:
hash=get_md5(cmp)
fp=int(get_fingerprint(cmp))
sp=get_sharpness(cmp)
dims=get_dims(cmp)
src=get_colors(cmp)[1]
src=[float(i) for i in src]
db.execute("SELECT hash, ABS(BR-?)+ABS(BG-?)+ABS(BB-?) as K,BR,BG,BB FROM data ORDER BY K LIMIT ?",
(src[0],src[1],src[2],thr))
hits=[]
for hit in db:
fs=hash2file(conn.cursor(),hit[0],False)
if fs==None:
continue
if hit[1]==None:
print("Color information not found. Run again with --measure.")
return
for f in fs:
if f==cmp:
continue
hits.append((f,hit[1],hit[2],hit[3],hit[4]))
file_len=str(max([len(x[0]) for x in hits]))
for h in range(len(hits)):
if h>=thr-1:
break
print( ('{: <'+file_len+'} D {:.2f} (RGB {:.2f},{:.2f},{:.2f})').format(*hits[h]) )
if opts.viewer:
fnames=[x[0] for x in hits]
call_viewer(opts, fnames)
return
def get_colors(filename):
small_args=['convert','-define','jpeg:size=64x64',filename+'[0]','-resize','10x10!','TEXT:-']
p=subprocess.Popen(small_args,stdout=subprocess.PIPE)
img, err = p.communicate()
mean_args=['convert','-','-format','"%[fx:mean.r],%[fx:mean.g],%[fx:mean.b]"','info:-']
p=subprocess.Popen(mean_args,stdout=subprocess.PIPE,stdin=subprocess.PIPE)
mean, err = p.communicate(input=img)
mean_args=['convert','-',
'(','+clone','-gravity','North','-crop','10x1+0+0','-write','mpr:top','+delete',')',
'(','+clone','-gravity','South','-crop','10x1+0+0','-write','mpr:bot','+delete',')',
'(','+clone','-gravity','West','-crop','1x10+0+0','-rotate','90','-write','mpr:lef','+delete',')',
'(','+clone','-gravity','East','-crop','1x10+0+0','-rotate','90','-write','mpr:rig','+delete',')',
'+delete','mpr:top','mpr:bot','mpr:lef','mpr:rig','+append',
'-format','"%[fx:mean.r],%[fx:mean.g],%[fx:mean.b]"','info:-']
p=subprocess.Popen(mean_args,stdout=subprocess.PIPE,stdin=subprocess.PIPE)
border, err = p.communicate(input=img)
mean=[float(i) for i in mean.strip().strip('"').split(',')]
border=[float(i) for i in border.strip().strip('"').split(',')]
return (mean,border)
def get_color_diff(c1,c2):
""" Return color difference from two RGB triplets """
return abs( c1[0] - c2[0] )+abs( c1[1] - c2[1] )+abs( c1[2] - c2[2] )
def get_ratio_diff(d1,d2):
""" Return ratio difference from two w,h dimension tuplets """
return abs( float(d1[0])/float(d1[1]) - float(d2[0])/float(d2[1]) )
def append_fingerprints(sqlfile):
conn=sqlite3.connect(sqlfile)
conn.text_factory=str
db=conn.cursor()
dbh=conn.cursor()
db.execute("SELECT hash FROM data WHERE fingerprint IS NULL")
i=0
dirname_old=""
for row in db:
filename=hash2file(conn.cursor(),row[0])
if filename==None:
continue
fp=get_fingerprint(filename)
dbh.execute("UPDATE data SET fingerprint=? \
WHERE hash=?",(fp,row[0]))
filebase=" "+os.path.basename(filename)
dirname=os.path.dirname(filename)
if dirname!=dirname_old:
dirname_old=dirname
filebase=filename
i+=1
print("%(nr)i %(f)s" % {'f':filebase, 'nr':i})
if (i%50==0):
conn.commit();
conn.commit()
def get_fingerprint(filename):
small_args=['convert','-define','jpeg:size=256x256',filename+'[0]','-resize','160x160!',
'-colorspace','Gray','-blur','2x2','-normalize','-equalize','-resize','16x16','-depth','1','TEXT:-']
p=subprocess.Popen(small_args,stdout=subprocess.PIPE)
img, err = p.communicate()
values=''
for row in img.split('\n'):
gray=row.split(',')
if len(gray)<3:
continue
if gray[2]=="255":
values+='1'
else:
values+='0'
return str(int(values,2))
def find_fingerprint_similar(opts):
''' Find all similar images, nearest match more similar than thr '''
thr=int(opts.similarity)
conn=sqlite3.connect(opts.sqlfile)
conn.text_factory=str
db1=conn.cursor()
db2=conn.cursor()
db1.execute("SELECT hash,fingerprint,sharpness,width,height,R,G,B FROM data WHERE sharpness > 0")
hits=[]
hit_list=[]
i=None
for i,hit1 in enumerate(db1):
cmp=hit1[0]
cmpf=hash2file(conn.cursor(),hit1[0])
if cmpf==None:
continue
if cmpf in hit_list:
continue
fp=int(hit1[1])
sp=hit1[2]
dims=hit1[3:5]
pixels=dims[0]*dims[1]
colors=hit1[5:8]
db2.execute("SELECT hash,fingerprint,sharpness,width,height,R,G,B FROM data WHERE sharpness > 0")
this1=[ [cmpf, 0,sp,int(hit1[3]),int(hit1[4]),0,pixels,0] ]
for hit2 in db2:
if hit2[0]==cmp:
continue
similarity=bin(fp^int(hit2[1])).count('1')
if similarity<thr:
this2=[hit2[0], similarity, hit2[2],
int(hit2[3]),int(hit2[4]),
get_color_diff(hit2[5:8],colors),
int(hit2[3])*int(hit2[4]),
get_ratio_diff(hit2[3:5],dims)]
fs=hash2file(conn.cursor(), hit2[0], False)
if fs==None:
continue
for f in fs:
thisf=this2
thisf[0]=f
this1.append(thisf)
hit_list.append(f)
this1.sort(key=lambda x: x[1])
if len(this1)>1:
hits.append(this1)
hit_list.append(cmp)
if i==None:
print("No measurements found")
sys.exit(1)
for src in hits:
file_len=str(max([len(x[0]) for x in src]))
print( ('{: <'+file_len+'} {: ^4} {: ^4} {: ^4} {: ^4} {: ^5}x{: ^5}').format("File","SD","CD","RD","Shp","W","H"))
for c in range(len(src)):
print( ('{: <'+file_len+'} {: >4} {: >4} {: >4} {: >4} {: >5}x{: >5}').format(src[c][0],src[c][1],
"%.2f"%src[c][5],"%.2f"%src[c][7],
"%.1f" % src[c][2],src[c][3],src[c][4]))
if opts.viewer:
fnames=[x[0] for x in src]
call_viewer(opts, fnames)
return
def find_fingerprint_nearest(opts):
''' Find nearest match to given file '''
cmp=os.path.realpath(opts.similarity.rsplit(",")[0])
thr=sys.maxint
if len(opts.similarity.rsplit(","))>1:
thr=int(opts.similarity.rsplit(",",1)[1])
conn=sqlite3.connect(opts.sqlfile)
conn.text_factory=str
db1=conn.cursor()
if is_listed(db1, cmp):
hash=file2hash(conn.cursor(),cmp)
db1.execute("SELECT hash,fingerprint,sharpness,width,height,R,G,B FROM data WHERE hash=?",(hash,))
for hit1 in db1:
fp=int(hit1[1])
sp=hit1[2]
dims=hit1[3:5]
colors=hit1[5:8]
else:
hash=get_md5(cmp)
fp=int(get_fingerprint(cmp))
sp=get_sharpness(cmp)
dims=get_dims(cmp)
colors=get_colors(cmp)[0]
db1.execute("SELECT hash,fingerprint,sharpness,width,height,R,G,B FROM data WHERE sharpness > 0")
this=['',thr,0,0,0,0,0]
hit1=None
for i,hit1 in enumerate(db1):
if hit1[0] == hash:
continue
similarity=bin(fp^int(hit1[1])).count('1')
if similarity<this[1]:
this[0]=hit1[0]
this[1]=similarity
this[2]=hit1[2]
this[3]=int(hit1[3])
this[4]=int(hit1[4])
this[5]=get_color_diff(colors, hit1[5:8]) #abs( hit1[5] - colors[0] )+abs( hit1[6] - colors[1] )+abs( hit1[7] - colors[2] )
this[6]=get_ratio_diff(this[3:5], dims)# (this[3]/float(this[4])) / (float(dims[0])/float(dims[1]))
if hit1==None:
print("No measurements found")
sys.exit(1)
if this[1]==thr:
print("No similarities < "+str(thr)+": "+cmp, file=sys.stderr)
return
this[0]=hash2file(conn.cursor(),this[0])
file_len=str(max(len(cmp), len(this[0])))
print( ('{: <'+file_len+'} {: ^4} {: ^4} {: ^4} {: ^4} {: ^5}x{: ^5}').format("File","SD","CD","RD","Shp","W","H"))
print( ('{: <'+file_len+'} {: >4} {: ^4} {: ^4} {: ^4} {: >5}x{: >5}').format(cmp,"","","","%.1f" % sp,dims[0],dims[1]))
print( ('{: <'+file_len+'} {: >4} {: >4} {: >4} {: >4} {: >5}x{: >5}').format(this[0], this[1],"%.2f"%this[5],
"%.2f"%this[6], "%.1f" % this[2],this[3], this[4]))
if opts.viewer:
call_viewer(opts, (cmp,this[0]))
def append_sharpness(sqlfile):
conn=sqlite3.connect(sqlfile)
conn.text_factory=str
db=conn.cursor()
dbh=conn.cursor()
db.execute("SELECT hash FROM data WHERE sharpness IS NULL")
i=0
dirname_old=""
for row in db:
filename=hash2file(conn.cursor(),row[0])
if filename==None:
continue
sp=get_sharpness(filename)
dbh.execute("UPDATE data SET sharpness=? \
WHERE hash=?",(sp,row[0]))
filebase=" "+os.path.basename(filename)
dirname=os.path.dirname(filename)
if dirname!=dirname_old:
dirname_old=dirname
filebase=filename
i+=1
print("%(nr)i %(f)s %(s)f" % {'f':filebase, 'nr':i,'s':sp})
if (i%50==0):
conn.commit();
conn.commit()
def get_sharpness(filename):
""" Difference in X, Difference in Y, get smaller diff = smaller sharpness.
May change if bugs found.. """
# Resize to 1024 smaller axis, crop with golden ratio
# grayscale and equalize histogram. calculate difference between x neighbor
# and y neigbor. smaller difference = less sharp.
small_args="convert ( -define jpeg:size=2048x2048 -resize 1024^ -gravity center -crop 62%x62%+0+0 -colorspace gray XXX -equalize ) \
( -clone 0 ( -clone 0 -roll +1+0 ) -compose Difference -composite -shave 1x1 ) \
( -clone 0 ( -clone 0 -roll +0+1 ) -compose Difference -composite -shave 1x1 ) \
-delete 0 -compose Darken -composite -format %[fx:mean*1000] info:".split(" ")
small_args[12]=filename+'[0]'
p=subprocess.Popen(small_args,stdout=subprocess.PIPE)
sharpness, err = p.communicate()
return float(sharpness.strip())
def searchdb(sqlfile,needle):
conn=sqlite3.connect(sqlfile)
conn.text_factory=str
db=conn.cursor()
dbh=conn.cursor()
db.execute("SELECT file,hash FROM list WHERE file LIKE ? ORDER BY file",('%'+needle+'%',))
results=[]
flist=[]
for row in db:
data=dbh.execute("SELECT hash,width,height FROM data WHERE hash = ?",(row[1],)).fetchall()
results.append([row[0], data[0][1],data[0][2]])
flist.append(('search',results))
return flist
def clean_dirs(dirs):
for s in dirs[:]:
if (s in BADDIRS) or (s.startswith(".")):
dirs.remove(s)
return dirs
def clean_syms(files):
for f in files[:]:
if os.path.islink(f):
files.remove(f)
return files
def confirm(prompt=None, resp=False):
"""prompts for yes or no response from the user. Returns True for yes and
False for no.
'resp' should be set to the default value assumed by the caller when
user simply types ENTER.
"""
if prompt is None:
prompt = 'Confirm'
if resp:
prompt = '%s [%s]|%s: ' % (prompt, 'y', 'n')
else:
prompt = '%s [%s]|%s: ' % (prompt, 'n', 'y')
while True:
ans = raw_input(prompt)
if not ans:
return resp
if ans not in ['y', 'Y', 'n', 'N']:
print('please enter y or n.')
continue
if ans == 'y' or ans == 'Y':
return True
if ans == 'n' or ans == 'N':
return False
def find_duplicates(sqlfile,search):
if (search=='.'):
search='%'
else:
search='%'+search+'%'
conn=sqlite3.connect(sqlfile)
conn.text_factory=str
db=conn.cursor()
dbh=conn.cursor()
dbf=conn.cursor()
db.execute("SELECT hash,count(*) FROM list WHERE file LIKE ? group by hash HAVING count(*) > 1 ",(search,))
duphash=[]
for row in db:
hash=row[0]
dbh.execute("SELECT hash,width,height FROM data WHERE hash = ?",(hash,))
flist=[]
for h in dbh:
dbf.execute("SELECT file,date FROM list WHERE hash = ?",(hash,))
for f in dbf:
flist.append([f[0],h[1],h[2],f[1]])
flist.sort(key=lambda file: file[3])
duphash.append((hash, flist))
duphash.sort(key=lambda file: file[1][0])
return duphash
def find_smalls(minsize,sqlfile):
conn=sqlite3.connect(sqlfile)
conn.text_factory=str
db=conn.cursor()
db.execute("SELECT hash,width,height FROM data WHERE width < ? OR height < ?",(minsize,minsize))
smalls=[]
flist=[]
for row in db:
fs=hash2file(conn.cursor(), row[0], False)
if fs==None:
continue
for f in fs:
smalls.append([f, row[1], row[2]])
flist.append(('smalls',smalls))
return flist
def disk_used(options):
conn=sqlite3.connect(options.sqlfile)
conn.text_factory=str
db=conn.cursor()
db.execute('SELECT size,replace(file,?,"") as path FROM list WHERE file LIKE ?',
(os.path.realpath(options.diskused)+"/",
os.path.realpath(options.diskused)+"%",
))
entries=[]
sizes=[]
for row in db:
start_path=row[1].split('/')
start_path="/".join(start_path[0:int(options.diskused_depth)])
if start_path not in entries:
entries.append(start_path)
sizes.append(row[0])
else:
sizes[ entries.index(start_path) ]+=row[0]
for entry in zip(sizes,entries):
print("| ".join([ str(entry[0]).ljust(14),
humanize_size(entry[0]).rjust(8),
entry[1]]))
def print_structure(files):
for hash in files:
#print(hash[0])
for i,f in enumerate(hash[1]):
print("%(i)d: (%(x)dx%(y)d):%(f)s " % {'i':i+1, 'f':f[0], 'x':f[1], 'y':f[2]})
return
def print_dup_structure(files,opts):
i=1
for hash in files:
#print(hash[0])
fnames=[]
for f in hash[1]:
fnames.append(' "'+f[0]+'"')
print("%(i)d:%(n)d:%(f)s " % {'i':i, 'n':len(fnames), 'f':",".join(fnames)})
if opts.viewer:
fnames=[x[0] for x in hash[1]]
subprocess.call(opts.viewer.replace('%f', " ".join(fnames)), shell=True)
i+=1
return
def humanize_size(size,precision=1):
if size==None:
return 'nan'
suffixes=['B','KB','MB','GB','TB']
suffixIndex = 0
defPrecision=0
while size > 1024:
suffixIndex += 1
size = size/1024.0
defPrecision=precision
return "%.*f%s"%(defPrecision,size,suffixes[suffixIndex])
def import_descriptions(options):
""" Walk through the path from given [startpath] and read
any DESCFILE, importing the contents in the DB """
pass
def export_descriptions(options):
""" Get unique paths from DB, matching [startpath], write
DESCFILE for each file found. Export gets a format argument:
%wx%h %n %d """
# width, height, basename, description
#%R%G%B %S %F %D
# Red Green Blue Sharpness Fingerprint Date(formatting?)
# %s %H
# filesize Hash
pass
def main():
options=setup_options();
if not os.path.exists(options.sqlfile):
createdb(options.sqlfile);
if options.delete:
print('Deleting entries...')
delete_nonexisting(options.sqlfile)
if options.delete_data:
print('Deleting metadata...')
delete_data(options.sqlfile)
if options.add or options.changed:
print('Adding entries...')
add_recurse(options)
if options.search:
print_structure(searchdb(options.sqlfile,options.search))
if options.measure:
print('Measure colors...')
append_colors(options.sqlfile)
print('Measure fingerprints...')
append_fingerprints(options.sqlfile)
print('Measure sharpness...')
append_sharpness(options.sqlfile)
if options.random:
print('Random lists...')
random_lists(options.sqlfile)
if options.nearestcolor:
if os.path.exists(options.nearestcolor.rsplit(",")[0]):
find_color_nearest_file(options)
else:
find_color_nearest(options)
if options.similarity!=None:
if os.path.exists(options.similarity.rsplit(",")[0]):
find_fingerprint_nearest(options)
else:
find_fingerprint_similar(options)
if options.duplicate:
files=find_duplicates(options.sqlfile,options.startpath)
print_dup_structure(files,options)
if options.searchsmall:
files=find_smalls(options.minsize,options.sqlfile)
if options.deleteFiles:
if len(files[0][1])>0:
delete_files(files)
delete_nonexisting(options.sqlfile)
else:
print_structure(files)
if options.diskused:
disk_used(options)
#print(files)
if options.tag:
if options.startpath==".":
print_tag(options)
else:
add_tag(options)
sys.exit(0)
if __name__ == "__main__":
main()