Files
q-tools/image_list.py
2013-09-30 08:32:40 +03:00

411 lines
15 KiB
Python
Executable File

#!/usr/bin/python
import sys
import os
import re
import sqlite3
import subprocess
import hashlib
from argparse import ArgumentParser
SQLFILE='list_of_images.sqlite'
IMGMATCH=re.compile('.*\.jpg$|.*\.jpeg$|.*\.png$|.*\.gif$',re.I)
BADDIRS=['_tn','_med']
MINSIZE=0
def setup_options():
parser=ArgumentParser(description="Maintains the list of images sqlite file")
parser.add_argument("-a",action="store_false",dest="add",default=True,
help="Do not add new files [%(default)s]")
parser.add_argument("-c",action="store_true",dest="changed",default=False,
help="Modify changed files [%(default)s]")
parser.add_argument("-d",action="store_true",dest="delete",default=False,
help="Delete non-existing entries [%(default)s]")
parser.add_argument("-f",action="store",dest="sqlfile",default=SQLFILE,
help="SQL file name to use [%(default)s]")
parser.add_argument("-l",action="store_true",dest="symlinks",default=False,
help="Follow symbolic links [%(default)s]")
parser.add_argument("-m",type=int,dest="minsize",default=MINSIZE,
help="Minimum pixel width/height of stored image [%(default)s]")
parser.add_argument("-r",action="store_true",dest="random",default=False,
help="Create randomized files for landscape and portrait images [%(default)s]")
parser.add_argument("-s",type=str,dest="search",default=False,
help="Search list based on path pattern")
parser.add_argument("--color",action="store_true",dest="colors",default=False,
help="Append list with mean color information This option will flip the 'Add new files' option. [%(default)s]")
parser.add_argument("--nearest",type=str,dest="nearestcolor",default=False,
help="Search list for nearest mean color. format: R,G,B in float 0-1. Add fourth value to limit search to number")
parser.add_argument("--dup",action="store_true",dest="duplicate",default=False,
help="Return a list of duplicate files, based on hashes. This option will flip the 'Add new files' option. [%(default)s]")
parser.add_argument("--del",action="store_true",dest="deleteFiles",default=False,
help="Delete files listed with --small. [%(default)s]")
parser.add_argument("--small",action="store_true",dest="searchsmall",default=False,
help="Return a list of small files, smaller than -m INT. This option will flip the 'Add new files' option. [%(default)s]")
parser.add_argument("-x",action="append",dest="exclude",default=[],
help="Exclude folder name from the lists. This option may be issued several times")
parser.add_argument('startpath', action="store",default='.', nargs='?')
options=parser.parse_args()
BADDIRS.extend(options.exclude)
if options.duplicate or options.searchsmall or options.colors:
options.add=not options.add
return options
def createdb(sqlfile):
conn=sqlite3.connect(sqlfile)
db=conn.cursor()
conn.text_factory=str
db.execute('CREATE TABLE list (id INTEGER PRIMARY KEY AUTOINCREMENT,\
file TEXT,date INTEGER,portrait NUMERIC, hash TEXT,\
width INTEGER,height INTEGER,\
R REAL,G REAL, B REAL, BR REAL, BG REAL, BB REAL)')
conn.commit()
return
def delete_nonexisting(sqlfile):
conn=sqlite3.connect(sqlfile)
conn.text_factory=str
#conn.row_factory=sqlite3.Row
db=conn.cursor()
dbdel=conn.cursor()
db.execute('SELECT file FROM list')
for row in db:
if not os.path.exists(row[0]):
print('removing.. '+row[0])
dbdel.execute("DELETE FROM list where file == ?",(row[0],))
conn.commit()
return
def delete_files(files):
''' Actually deletes files! '''
print_structure(files)
doit=confirm(prompt="Sure to delete these files?")
if doit:
print("now delling")
for hash in files:
for f in hash[1]:
print f[0]
os.remove(f[0])
return
def add_recurse(options):
conn=sqlite3.connect(options.sqlfile)
conn.text_factory=str
db=conn.cursor()
for path,dirs,files in os.walk(options.startpath,followlinks=options.symlinks):
print('Checking '+path)
dirs=clean_dirs(dirs)
if not options.symlinks:
files=clean_syms(files)
files.sort()
dirs.sort()
db_files=get_folder_contents(db,os.path.abspath(path)+'/')
for file in files:
if IMGMATCH.match(file):
filename=os.path.abspath(os.path.join(path,file))
#if not is_listed(db,filename):
if file not in db_files:
if options.add:
try:
add_single(conn,filename,change=False,minsize=options.minsize)
except:
print('error adding file: '+filename)
sys.exit(1)
else:
if options.changed:
ftime=os.path.getmtime(filename)
#hash=get_md5(filename)
#if not hash_match(db,filename,hash):
if not ftime_match(db,filename,ftime):
#file content changed
try:
add_single(conn,filename,change=True,minsize=options.minsize)
except:
print('error changing file: '+filename)
sys.exit(1)
# if file mentioned, and hash same, no need to change entry
conn.commit()
return
def add_single(conn,filename,change=False,hash=None,minsize=0):
dims=get_dims(filename)
if int(dims[0])<int(dims[1]):
portrait=1
else:
portrait=0
if (int(dims[0])<minsize) & (int(dims[1])<minsize):
print(filename+" too small (%s)" % dims)
return
db=conn.cursor()
if hash==None:
hash=get_md5(filename)
ftime=os.path.getmtime(filename)
if change:
db.execute("UPDATE list SET date=?, portrait=?, hash=?, width=? ,height=? \
WHERE file=?",(ftime,portrait,hash,dims[0],dims[1],filename))
print "changing: %(f)s (%(x)sx%(y)s)" % {'f':filename, 'x':dims[0], 'y':dims[1]}
else:
db.execute("INSERT INTO list(file,date,portrait,hash,width,height)\
VALUES(?,?,?,?,?,?)",(filename,ftime,portrait,hash,dims[0],dims[1]))
print "adding: %(f)s (%(x)sx%(y)s)" % {'f':filename, 'x':dims[0], 'y':dims[1]}
return
def random_lists(sqlfile):
conn=sqlite3.connect(sqlfile)
conn.text_factory=str
db=conn.cursor()
db.execute('SELECT file FROM list WHERE portrait=0')
lfile=open('landscape.list.s','w')
for row in db:
lfile.write(row[0]+'\n')
db.execute('SELECT file FROM list WHERE portrait=1')
pfile=open('portrait.list.s','w')
for row in db:
pfile.write(row[0]+'\n')
lfile.close()
pfile.close()
sortargs=['sort','-R','-o','landscape.list','landscape.list.s']
p=subprocess.call(sortargs)
sortargs=['sort','-R','-o','portrait.list','portrait.list.s']
p=subprocess.call(sortargs)
os.unlink('portrait.list.s')
os.unlink('landscape.list.s')
return
def is_listed(db,filename):
db.execute("SELECT COUNT(*) FROM list where file == ?",(filename,))
count=db.fetchall()
return count[0][0]>0
def get_folder_contents(db,path):
''' return the contents of the folder '''
files=[]
db.execute("SELECT file FROM list where file LIKE ?",(path+'%',))
for row in db:
base=row[0].replace(path,'',1)
if base.find('/')==-1:
files.append(base)
return files
def ftime_match(db,filename,ftime):
db.execute("SELECT date FROM list where file == ?",(filename,))
count=db.fetchall()
return count[0][0]==ftime
def hash_match(db,filename,hash):
db.execute("SELECT hash FROM list where file == ?",(filename,))
count=db.fetchall()
return count[0][0]==hash
def get_md5(filename):
''' Return hash of the first 5 megabytes of the file '''
return hashlib.md5(open(filename,'rb').read(1024*1024*5)).hexdigest()
def get_dims(filename):
idargs=['identify','-format','%wx%h',filename+'[0]']
p=subprocess.Popen(idargs,stdout=subprocess.PIPE)
out, err = p.communicate()
return (out.strip().split('x'))
def append_colors(sqlfile):
conn=sqlite3.connect(sqlfile)
conn.text_factory=str
db=conn.cursor()
dbh=conn.cursor()
db.execute("SELECT file,R FROM list WHERE R IS NULL ORDER BY file")
i=0
for row in db:
colors=get_colors(row[0])
dbh.execute("UPDATE list SET R=?, G=?, B=?, BR=?, BG=?, BB=? \
WHERE file=?",(colors[0][0],colors[0][1],colors[0][2],
colors[1][0],colors[1][1],colors[1][2],row[0]))
print "colors: %(f)s (%(r)s %(g)s %(b)s)" % {'f':row[0], 'r':colors[0][0],
'g':colors[0][1], 'b':colors[0][2]}
i+=1
if (i%50==0):
conn.commit();
conn.commit()
return
def find_color_nearest(sqlfile,src):
conn=sqlite3.connect(sqlfile)
conn.text_factory=str
db=conn.cursor()
src=[float(i) for i in src.strip().strip('"').split(',')]
if len(src)==3:
src.append(1)
db.execute("SELECT file, ABS(BR-?)+ABS(BG-?)+ABS(BB-?) as K,BR,BG,BB FROM list ORDER BY K LIMIT ?",
(src[0],src[1],src[2],src[3]))
for hit in db:
print "%(f)s : D %(d).2f (RGB %(r).2f,%(g).2f,%(b).2f)" % {'f':hit[0],'d':hit[1],
'r':hit[2],'g':hit[3], 'b':hit[4]}
return
def get_colors(filename):
small_args=['convert','-define','jpeg:size=64x64',filename+'[0]','-resize','10x10!','TEXT:-']
p=subprocess.Popen(small_args,stdout=subprocess.PIPE)
img, err = p.communicate()
mean_args=['convert','-','-format','"%[fx:mean.r],%[fx:mean.g],%[fx:mean.b]"','info:-']
p=subprocess.Popen(mean_args,stdout=subprocess.PIPE,stdin=subprocess.PIPE)
mean, err = p.communicate(input=img)
mean_args=['convert','-',
'(','+clone','-gravity','North','-crop','10x1+0+0','-write','mpr:top','+delete',')',
'(','+clone','-gravity','South','-crop','10x1+0+0','-write','mpr:bot','+delete',')',
'(','+clone','-gravity','West','-crop','1x10+0+0','-rotate','90','-write','mpr:lef','+delete',')',
'(','+clone','-gravity','East','-crop','1x10+0+0','-rotate','90','-write','mpr:rig','+delete',')',
'+delete','mpr:top','mpr:bot','mpr:lef','mpr:rig','+append',
'-format','"%[fx:mean.r],%[fx:mean.g],%[fx:mean.b]"','info:-']
p=subprocess.Popen(mean_args,stdout=subprocess.PIPE,stdin=subprocess.PIPE)
border, err = p.communicate(input=img)
mean=[float(i) for i in mean.strip().strip('"').split(',')]
border=[float(i) for i in border.strip().strip('"').split(',')]
return (mean,border)
def searchdb(sqlfile,needle):
conn=sqlite3.connect(sqlfile)
conn.text_factory=str
db=conn.cursor()
dbh=conn.cursor()
db.execute("SELECT file,width,height,date FROM list WHERE file LIKE ? ORDER BY file",('%'+needle+'%',))
results=[]
flist=[]
for row in db:
results.append(row)
flist.append(('search',results))
return flist
def clean_dirs(dirs):
for s in dirs[:]:
if (s in BADDIRS) or (s.startswith(".")):
dirs.remove(s)
return dirs
def clean_syms(files):
for f in files[:]:
if os.path.islink(f):
files.remove(f)
return files
def confirm(prompt=None, resp=False):
"""prompts for yes or no response from the user. Returns True for yes and
False for no.
'resp' should be set to the default value assumed by the caller when
user simply types ENTER.
"""
if prompt is None:
prompt = 'Confirm'
if resp:
prompt = '%s [%s]|%s: ' % (prompt, 'y', 'n')
else:
prompt = '%s [%s]|%s: ' % (prompt, 'n', 'y')
while True:
ans = raw_input(prompt)
if not ans:
return resp
if ans not in ['y', 'Y', 'n', 'N']:
print 'please enter y or n.'
continue
if ans == 'y' or ans == 'Y':
return True
if ans == 'n' or ans == 'N':
return False
def find_duplicates(sqlfile,search):
if (search=='.'):
search='%'
else:
search='%'+search+'%'
conn=sqlite3.connect(sqlfile)
conn.text_factory=str
db=conn.cursor()
dbh=conn.cursor()
db.execute("SELECT hash,count(*) FROM list WHERE file LIKE ? group by hash HAVING count(*) > 1 ",(search,))
duphash=[]
for row in db:
hash=row[0]
dbh.execute("SELECT file,width,height,date FROM list WHERE hash = ?",(hash,))
flist=[]
for row in dbh:
flist.append(row)
flist.sort(key=lambda file: file[3])
duphash.append((hash, flist))
duphash.sort(key=lambda file: file[1][0])
return duphash
def find_smalls(minsize,sqlfile):
conn=sqlite3.connect(sqlfile)
conn.text_factory=str
db=conn.cursor()
db.execute("SELECT file,width,height FROM list WHERE width < ? OR height < ?",(minsize,minsize))
smalls=[]
flist=[]
for row in db:
smalls.append(row)
flist.append(('smalls',smalls))
return flist
def print_structure(files):
for hash in files:
#print(hash[0])
i=1
for f in hash[1]:
print "%(i)d: (%(x)dx%(y)d):%(f)s " % {'i':i, 'f':f[0], 'x':f[1], 'y':f[2]}
i+=1
return
def print_dup_structure(files):
i=1
for hash in files:
#print(hash[0])
fnames=[]
for f in hash[1]:
fnames.append(' "'+f[0]+'"')
print "%(i)d:%(n)d:%(f)s " % {'i':i, 'n':len(fnames), 'f':",".join(fnames)}
i+=1
return
def main():
options=setup_options();
if not os.path.exists(options.sqlfile):
createdb(options.sqlfile);
if options.search:
print_structure(searchdb(options.sqlfile,options.search))
sys.exit(0)
if options.nearestcolor:
find_color_nearest(options.sqlfile,options.nearestcolor)
sys.exit(0)
if options.delete:
print('Deleting entries...')
delete_nonexisting(options.sqlfile)
if options.add or options.changed:
print('Adding entries...')
add_recurse(options)
if options.colors:
print('Adding colors...')
append_colors(options.sqlfile)
if options.random:
print('Random lists...')
random_lists(options.sqlfile)
if options.duplicate:
files=find_duplicates(options.sqlfile,options.startpath)
print_dup_structure(files)
if options.searchsmall:
files=find_smalls(options.minsize,options.sqlfile)
if options.deleteFiles:
if len(files[0][1])>0:
delete_files(files)
delete_nonexisting(options.sqlfile)
else:
print_structure(files)
#print(files)
sys.exit(0)
if __name__ == "__main__":
main()