#!/usr/bin/python import sys import os import re import sqlite3 import subprocess import hashlib from argparse import ArgumentParser SQLFILE='list_of_images.sqlite' IMGMATCH=re.compile('.*\.jpg$|.*\.jpeg$|.*\.png$|.*\.gif$',re.I) BADDIRS=['_tn','_med'] MINSIZE=0 def setup_options(): parser=ArgumentParser(description="Maintains the list of images sqlite file") parser.add_argument("-a",action="store_false",dest="add",default=True, help="Do not add new files [%(default)s]") parser.add_argument("-c",action="store_true",dest="changed",default=False, help="Modify changed files [%(default)s]") parser.add_argument("-d",action="store_true",dest="delete",default=False, help="Delete non-existing entries [%(default)s]") parser.add_argument("-f",action="store",dest="sqlfile",default=SQLFILE, help="SQL file name to use [%(default)s]") parser.add_argument("-l",action="store_true",dest="symlinks",default=False, help="Follow symbolic links [%(default)s]") parser.add_argument("-m",type=int,dest="minsize",default=MINSIZE, help="Minimum pixel width/height of stored image [%(default)s]") parser.add_argument("-r",action="store_true",dest="random",default=False, help="Create randomized files for landscape and portrait images [%(default)s]") parser.add_argument("-s",type=str,dest="search",default=False, help="Search list based on path pattern") parser.add_argument("--dup",action="store_true",dest="duplicate",default=False, help="Return a list of duplicate files, based on hashes. This option will flip the 'Add new files' option. [%(default)s]") parser.add_argument("--del",action="store_true",dest="deleteFiles",default=False, help="Delete files listed with --small. [%(default)s]") parser.add_argument("--small",action="store_true",dest="searchsmall",default=False, help="Return a list of small files, smaller than -m INT. This option will flip the 'Add new files' option. [%(default)s]") parser.add_argument("-x",action="append",dest="exclude",default=[], help="Exclude folder name from the lists. This option may be issued several times") parser.add_argument('startpath', action="store",default='.', nargs='?') options=parser.parse_args() BADDIRS.extend(options.exclude) if options.duplicate or options.searchsmall: options.add=not options.add return options def createdb(sqlfile): conn=sqlite3.connect(sqlfile) db=conn.cursor() conn.text_factory=str db.execute('CREATE TABLE list (id INTEGER PRIMARY KEY AUTOINCREMENT,\ file TEXT,date INTEGER,portrait NUMERIC, hash TEXT,\ width INTEGER,height INTEGER)') conn.commit() return def delete_nonexisting(sqlfile): conn=sqlite3.connect(sqlfile) conn.text_factory=str #conn.row_factory=sqlite3.Row db=conn.cursor() dbdel=conn.cursor() db.execute('SELECT file FROM list') for row in db: if not os.path.exists(row[0]): print('removing.. '+row[0]) dbdel.execute("DELETE FROM list where file == ?",(row[0],)) conn.commit() return def delete_files(files): ''' Actually deletes files! ''' print_structure(files) doit=confirm(prompt="Sure to delete these files?") if doit: print("now delling") for hash in files: for f in hash[1]: print f[0] os.remove(f[0]) return def add_recurse(options): conn=sqlite3.connect(options.sqlfile) conn.text_factory=str db=conn.cursor() for path,dirs,files in os.walk(options.startpath,followlinks=options.symlinks): print('Checking '+path) dirs=clean_dirs(dirs) if not options.symlinks: files=clean_syms(files) files.sort() for file in files: if IMGMATCH.match(file): filename=os.path.abspath(os.path.join(path,file)) if not is_listed(db,filename): if options.add: add_single(conn,filename,change=False,minsize=options.minsize) else: if options.changed: ftime=os.path.getmtime(filename) #hash=get_md5(filename) #if not hash_match(db,filename,hash): if not ftime_match(db,filename,ftime): #file content changed add_single(conn,filename,change=True,minsize=options.minsize) # if file mentioned, and hash same, no need to change entry return def add_single(conn,filename,change=False,hash=None,minsize=0): dims=get_dims(filename) if int(dims[0])0 def ftime_match(db,filename,ftime): db.execute("SELECT date FROM list where file == ?",(filename,)) count=db.fetchall() return count[0][0]==ftime def hash_match(db,filename,hash): db.execute("SELECT hash FROM list where file == ?",(filename,)) count=db.fetchall() return count[0][0]==hash def get_md5(filename): ''' Return hash of the first 5 megabytes of the file ''' return hashlib.md5(open(filename,'rb').read(1024*1024*5)).hexdigest() def get_dims(filename): idargs=['identify','-format','%wx%h',filename+'[0]'] p=subprocess.Popen(idargs,stdout=subprocess.PIPE) out, err = p.communicate() return (out.strip().split('x')) def searchdb(sqlfile,needle): conn=sqlite3.connect(sqlfile) conn.text_factory=str db=conn.cursor() dbh=conn.cursor() db.execute("SELECT file,width,height,date FROM list WHERE file LIKE ? ORDER BY file",('%'+needle+'%',)) results=[] flist=[] for row in db: results.append(row) flist.append(('search',results)) return flist def clean_dirs(dirs): for s in BADDIRS: if s in dirs: dirs.remove(s) return dirs def clean_syms(files): for f in files: if os.path.islink(f): files.remove(f) return files def confirm(prompt=None, resp=False): """prompts for yes or no response from the user. Returns True for yes and False for no. 'resp' should be set to the default value assumed by the caller when user simply types ENTER. """ if prompt is None: prompt = 'Confirm' if resp: prompt = '%s [%s]|%s: ' % (prompt, 'y', 'n') else: prompt = '%s [%s]|%s: ' % (prompt, 'n', 'y') while True: ans = raw_input(prompt) if not ans: return resp if ans not in ['y', 'Y', 'n', 'N']: print 'please enter y or n.' continue if ans == 'y' or ans == 'Y': return True if ans == 'n' or ans == 'N': return False def find_duplicates(sqlfile,search): if (search=='.'): search='%' else: search='%'+search+'%' conn=sqlite3.connect(sqlfile) conn.text_factory=str db=conn.cursor() dbh=conn.cursor() db.execute("SELECT hash,count(*) FROM list WHERE file LIKE ? group by hash HAVING count(*) > 1 ",(search,)) duphash=[] for row in db: hash=row[0] dbh.execute("SELECT file,width,height,date FROM list WHERE hash = ?",(hash,)) flist=[] for row in dbh: flist.append(row) flist.sort(key=lambda file: file[3]) duphash.append((hash, flist)) duphash.sort(key=lambda file: file[1][0]) return duphash def find_smalls(minsize,sqlfile): conn=sqlite3.connect(sqlfile) conn.text_factory=str db=conn.cursor() db.execute("SELECT file,width,height FROM list WHERE width < ? OR height < ?",(minsize,minsize)) smalls=[] flist=[] for row in db: smalls.append(row) flist.append(('smalls',smalls)) return flist def print_structure(files): for hash in files: #print(hash[0]) i=1 for f in hash[1]: print "%(i)d: (%(x)dx%(y)d):%(f)s " % {'i':i, 'f':f[0], 'x':f[1], 'y':f[2]} i+=1 return def print_dup_structure(files): i=1 for hash in files: #print(hash[0]) fnames=[] for f in hash[1]: fnames.append(' "'+f[0]+'"') print "%(i)d:%(n)d:%(f)s " % {'i':i, 'n':len(fnames), 'f':",".join(fnames)} i+=1 return def main(): options=setup_options(); if not os.path.exists(options.sqlfile): createdb(options.sqlfile); if options.search: print_structure(searchdb(options.sqlfile,options.search)) sys.exit(0) if options.delete: print('Deleting entries...') delete_nonexisting(options.sqlfile) if options.add or options.changed: print('Adding entries...') add_recurse(options) if options.random: print('Random lists...') random_lists(options.sqlfile) if options.duplicate: files=find_duplicates(options.sqlfile,options.startpath) print_dup_structure(files) if options.searchsmall: files=find_smalls(options.minsize,options.sqlfile) if options.deleteFiles: if len(files[0][1])>0: delete_files(files) delete_nonexisting(options.sqlfile) else: print_structure(files) #print(files) sys.exit(0) main()