From e7f6ee6fc5b738fbd120fe7e450d8a5298bced37 Mon Sep 17 00:00:00 2001 From: q Date: Tue, 13 Nov 2012 09:02:16 +0200 Subject: [PATCH] adding first files --- file_list.py | 209 +++++++++++++++++++++++++++++++++++ image_list.py | 296 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 505 insertions(+) create mode 100755 file_list.py create mode 100755 image_list.py diff --git a/file_list.py b/file_list.py new file mode 100755 index 0000000..52021c6 --- /dev/null +++ b/file_list.py @@ -0,0 +1,209 @@ +#!/usr/bin/python +import sys +import os +import re +import sqlite3 +import subprocess +import hashlib +import magic +from argparse import ArgumentParser + +SQLFILE='list_of_files.sqlite' +IMGMATCH=re.compile('.*\.jpg$|.*\.jpeg$|.*\.png$',re.I) +BADDIRS=[] +MINSIZE=0 +MIME=magic.open(magic.MAGIC_NONE) +#MIME=magic.open(magic.MAGIC_MIME) +MIME.load() + +def setup_options(): + parser=ArgumentParser(description="Maintains the list of images sqlite file") + parser.add_argument("-a",action="store_false",dest="add",default=True, + help="Do not add new files [%(default)s]") + parser.add_argument("-c",action="store_true",dest="changed",default=False, + help="Modify changed files [%(default)s]") + parser.add_argument("-d",action="store_true",dest="delete",default=False, + help="Delete non-existing entries [%(default)s]") + parser.add_argument("-l",action="store_true",dest="symlinks",default=False, + help="Follow symbolic links [%(default)s]") + parser.add_argument("--dup",action="store_true",dest="duplicate",default=False, + help="Return a list of duplicate files, based on hashes. This option will flip the 'Add new files' option. [%(default)s]") + parser.add_argument("-x",action="append",dest="exclude",default=[], + help="Exclude folder name from the lists. This option may be issued several times") + parser.add_argument("-f",action="store",dest="sqlfile",default=SQLFILE, + help="SQL file name to use [%(default)s]") + parser.add_argument("-s",type=str,dest="search",default=False, + help="Search list based on path pattern") + parser.add_argument('startpath', action="store",default='.', nargs='?') + + options=parser.parse_args() + BADDIRS.extend(options.exclude) + if options.duplicate: + options.add=not options.add + return options + +def createdb(fname): + conn=sqlite3.connect(fname) + db=conn.cursor() + conn.text_factory=str + db.execute('CREATE TABLE list (id INTEGER PRIMARY KEY AUTOINCREMENT,\ + file TEXT,date INTEGER, hash TEXT,\ + size INTEGER, mime TEXT)') + conn.commit() + return + +def delete_nonexisting(sqlfile,options): + conn=sqlite3.connect(sqlfile) + conn.text_factory=str + #conn.row_factory=sqlite3.Row + db=conn.cursor() + dbdel=conn.cursor() + db.execute('SELECT file FROM list') + for row in db: + if os.path.exists(row[0]): + delete=False + if not options.symlinks: + if os.path.islink(row[0]): + delete=True + else: + delete=True + if delete: + print('removing.. '+row[0]) + dbdel.execute("DELETE FROM list where file == ?",(row[0],)) + conn.commit() + return + +def add_recurse(options): + conn=sqlite3.connect(options.sqlfile) + conn.text_factory=str + db=conn.cursor() + for path,dirs,files in os.walk(options.startpath,followlinks=options.symlinks): + dirs=clean_dirs(dirs) + if not options.symlinks: + files=clean_syms(files,path) + for file in files: + filename=os.path.abspath(os.path.join(path,file)) + if file==options.sqlfile: + continue + if not is_listed(db,filename): + if options.add: + add_single(conn,filename,change=False) + else: + if options.changed: + ftime=os.path.getmtime(filename) + if not ftime_match(db,filename,ftime): + #file content changed + add_single(conn,filename,change=True) + + + return + +def add_single(conn,filename,change=False,hash=None,minsize=0): + + db=conn.cursor() + print "%(f)s" % {'f':filename} + if hash==None: + hash=get_md5(filename) + ftime=os.path.getmtime(filename) + fsize=os.path.getsize(filename) + mime=MIME.file(filename) + if change: + db.execute("UPDATE list SET date=?, hash=?, size=?, mime=? \ + WHERE file=?",(ftime,hash,fsize,mime,filename)) + #print "changing: %(f)s " % {'f':filename} + else: + db.execute("INSERT INTO list(file,date,hash,size,mime)\ + VALUES(?,?,?,?,?)",(filename,ftime,hash,fsize,mime)) + conn.commit() + return + +def is_listed(db,filename): + db.execute("SELECT COUNT(*) FROM list where file == ?",(filename,)) + count=db.fetchall() + return count[0][0]>0 + +def ftime_match(db,filename,ftime): + db.execute("SELECT date FROM list where file == ?",(filename,)) + count=db.fetchall() + return count[0][0]==ftime + +def hash_match(db,filename,hash): + db.execute("SELECT hash FROM list where file == ?",(filename,)) + count=db.fetchall() + return count[0][0]==hash + +def get_md5(filename): + ''' returns content based hash, only first 50Mb is read ''' + return hashlib.md5(open(filename,'rb').read(1024*1024*50)).hexdigest() + +def clean_dirs(dirs): + for s in BADDIRS: + if s in dirs: + dirs.remove(s) + return dirs + +def clean_syms(files,path): + nonsyms=[] + for f in files: + if not os.path.islink(os.path.join(path,f)): + nonsyms.append(f) + return nonsyms + +def find_duplicates(sqlfile): + conn=sqlite3.connect(sqlfile) + conn.text_factory=str + db=conn.cursor() + dbh=conn.cursor() + db.execute("SELECT hash,count(*) FROM list group by hash HAVING count(*) > 1 ") + duphash=[] + for row in db: + hash=row[0] + dbh.execute("SELECT file,size,date FROM list WHERE hash = ?",(hash,)) + flist=[] + for row in dbh: + flist.append(row) + flist.sort(key=lambda file: file[0]) + duphash.append((hash, flist)) + duphash.sort(key=lambda file: file[1][0]) + return duphash + +def searchdb(sqlfile,needle): + conn=sqlite3.connect(sqlfile) + conn.text_factory=str + db=conn.cursor() + dbh=conn.cursor() + db.execute("SELECT file FROM list WHERE file LIKE ? ORDER BY file",('%'+needle+'%',)) + for row in db: + print(row[0]) + +def print_structure(files): + for hash in files: + #print(hash[0]) + i=1 + for f in hash[1]: + print "%(i)d: %(x)d:%(f)s " % {'i':i, 'f':f[0], 'x':f[1]} + i+=1 + return + +def main(): + options=setup_options(); + + if not os.path.exists(options.sqlfile): + createdb(options.sqlfile); + if options.search: + searchdb(options.sqlfile,options.search) + sys.exit(0) + if options.delete: + print('Deleting entries...') + delete_nonexisting(options.sqlfile,options) + if options.add or options.changed: + print('Adding '+options.startpath+' entries...') + add_recurse(options) + if options.duplicate: + files=find_duplicates(options.sqlfile) + print_structure(files) + + sys.exit(0) + +main() + diff --git a/image_list.py b/image_list.py new file mode 100755 index 0000000..b48d798 --- /dev/null +++ b/image_list.py @@ -0,0 +1,296 @@ +#!/usr/bin/python +import sys +import os +import re +import sqlite3 +import subprocess +import hashlib +from argparse import ArgumentParser + +SQLFILE='list_of_images.sqlite' +IMGMATCH=re.compile('.*\.jpg$|.*\.jpeg$|.*\.png$|.*\.gif$',re.I) +BADDIRS=['_tn','_med'] +MINSIZE=0 + +def setup_options(): + parser=ArgumentParser(description="Maintains the list of images sqlite file") + parser.add_argument("-a",action="store_false",dest="add",default=True, + help="Do not add new files [%(default)s]") + parser.add_argument("-c",action="store_true",dest="changed",default=False, + help="Modify changed files [%(default)s]") + parser.add_argument("-d",action="store_true",dest="delete",default=False, + help="Delete non-existing entries [%(default)s]") + parser.add_argument("-l",action="store_true",dest="symlinks",default=False, + help="Follow symbolic links [%(default)s]") + parser.add_argument("-m",type=int,dest="minsize",default=MINSIZE, + help="Minimum pixel width/height of stored image [%(default)s]") + parser.add_argument("-r",action="store_true",dest="random",default=False, + help="Create randomized files for landscape and portrait images [%(default)s]") + parser.add_argument("--dup",action="store_true",dest="duplicate",default=False, + help="Return a list of duplicate files, based on hashes. This option will flip the 'Add new files' option. [%(default)s]") + parser.add_argument("--del",action="store_true",dest="deleteFiles",default=False, + help="Delete files listed with --small. [%(default)s]") + parser.add_argument("--small",action="store_true",dest="searchsmall",default=False, + help="Return a list of small files, smaller than -m INT. This option will flip the 'Add new files' option. [%(default)s]") + parser.add_argument("-x",action="append",dest="exclude",default=[], + help="Exclude folder name from the lists. This option may be issued several times") + options=parser.parse_args() + BADDIRS.extend(options.exclude) + if options.duplicate or options.searchsmall: + options.add=not options.add + return options + +def createdb(): + conn=sqlite3.connect(SQLFILE) + db=conn.cursor() + conn.text_factory=str + db.execute('CREATE TABLE list (id INTEGER PRIMARY KEY AUTOINCREMENT,\ + file TEXT,date INTEGER,portrait NUMERIC, hash TEXT,\ + width INTEGER,height INTEGER)') + conn.commit() + return + +def delete_nonexisting(): + conn=sqlite3.connect(SQLFILE) + conn.text_factory=str + #conn.row_factory=sqlite3.Row + db=conn.cursor() + dbdel=conn.cursor() + db.execute('SELECT file FROM list') + for row in db: + if not os.path.exists(row[0]): + print('removing.. '+row[0]) + dbdel.execute("DELETE FROM list where file == ?",(row[0],)) + conn.commit() + return + +def delete_files(files): + ''' Actually deletes files! ''' + print_structure(files) + + doit=confirm(prompt="Sure to delete these files?") + if doit: + print("now delling") + for hash in files: + for f in hash[1]: + print f[0] + os.remove(f[0]) + return + +def add_recurse(options): + conn=sqlite3.connect(SQLFILE) + conn.text_factory=str + db=conn.cursor() + for path,dirs,files in os.walk('.',followlinks=options.symlinks): + dirs=clean_dirs(dirs) + if not options.symlinks: + files=clean_syms(files) + for file in files: + if IMGMATCH.match(file): + filename=os.path.abspath(os.path.join(path,file)) + if not is_listed(db,filename): + if options.add: + add_single(conn,filename,change=False,minsize=options.minsize) + else: + if options.changed: + ftime=os.path.getmtime(filename) + #hash=get_md5(filename) + #if not hash_match(db,filename,hash): + if not ftime_match(db,filename,ftime): + #file content changed + add_single(conn,filename,change=True,minsize=options.minsize) + # if file mentioned, and hash same, no need to change entry + + return + +def add_single(conn,filename,change=False,hash=None,minsize=0): + dims=get_dims(filename) + if int(dims[0])0 + +def ftime_match(db,filename,ftime): + db.execute("SELECT date FROM list where file == ?",(filename,)) + count=db.fetchall() + return count[0][0]==ftime + +def hash_match(db,filename,hash): + db.execute("SELECT hash FROM list where file == ?",(filename,)) + count=db.fetchall() + return count[0][0]==hash + +def get_md5(filename): + ''' Return hash of the first 5 megabytes of the file ''' + return hashlib.md5(open(filename,'rb').read(1024*1024*5)).hexdigest() + +def get_dims(filename): + idargs=['identify','-format','%wx%h',filename+'[0]'] + p=subprocess.Popen(idargs,stdout=subprocess.PIPE) + out, err = p.communicate() + return (out.strip().split('x')) + +def clean_dirs(dirs): + for s in BADDIRS: + if s in dirs: + dirs.remove(s) + return dirs + +def clean_syms(files): + for f in files: + if os.path.islink(f): + files.remove(f) + return files + +def confirm(prompt=None, resp=False): + """prompts for yes or no response from the user. Returns True for yes and + False for no. + 'resp' should be set to the default value assumed by the caller when + user simply types ENTER. + """ + + if prompt is None: + prompt = 'Confirm' + if resp: + prompt = '%s [%s]|%s: ' % (prompt, 'y', 'n') + else: + prompt = '%s [%s]|%s: ' % (prompt, 'n', 'y') + + while True: + ans = raw_input(prompt) + if not ans: + return resp + if ans not in ['y', 'Y', 'n', 'N']: + print 'please enter y or n.' + continue + if ans == 'y' or ans == 'Y': + return True + if ans == 'n' or ans == 'N': + return False + +def find_duplicates(): + conn=sqlite3.connect(SQLFILE) + conn.text_factory=str + db=conn.cursor() + dbh=conn.cursor() + db.execute("SELECT hash,count(*) FROM list group by hash HAVING count(*) > 1 ") + duphash=[] + for row in db: + hash=row[0] + dbh.execute("SELECT file,width,height,date FROM list WHERE hash = ?",(hash,)) + flist=[] + for row in dbh: + flist.append(row) + flist.sort(key=lambda file: file[3]) + duphash.append((hash, flist)) + duphash.sort(key=lambda file: file[1][0]) + return duphash + +def find_smalls(minsize): + conn=sqlite3.connect(SQLFILE) + conn.text_factory=str + db=conn.cursor() + db.execute("SELECT file,width,height FROM list WHERE width < ? OR height < ?",(minsize,minsize)) + smalls=[] + flist=[] + for row in db: + smalls.append(row) + flist.append(('smalls',smalls)) + return flist + +def print_structure(files): + for hash in files: + #print(hash[0]) + i=1 + for f in hash[1]: + print "%(i)d: (%(x)dx%(y)d):%(f)s " % {'i':i, 'f':f[0], 'x':f[1], 'y':f[2]} + i+=1 + return + +def print_dup_structure(files): + i=1 + for hash in files: + #print(hash[0]) + fnames=[] + for f in hash[1]: + fnames.append(' "'+f[0]+'"') + print "%(i)d:%(n)d:%(f)s " % {'i':i, 'n':len(fnames), 'f':",".join(fnames)} + i+=1 + return + +def main(): + options=setup_options(); + if not os.path.exists(SQLFILE): + createdb(); + if options.delete: + print('Deleting entries...') + delete_nonexisting() + if options.add or options.changed: + print('Adding entries...') + add_recurse(options) + if options.random: + print('Random lists...') + random_lists() + if options.duplicate: + files=find_duplicates() + print_dup_structure(files) + if options.searchsmall: + files=find_smalls(options.minsize) + if options.deleteFiles: + if len(files[0][1])>0: + delete_files(files) + delete_nonexisting() + else: + print_structure(files) + #print(files) + + + + sys.exit(0) + +main() +