diff --git a/file_list.py b/file_list.py index 7d6aa02..87c33c7 100755 --- a/file_list.py +++ b/file_list.py @@ -24,6 +24,10 @@ def setup_options(): help="Modify changed files [%(default)s]") parser.add_argument("-d",action="store_true",dest="delete",default=False, help="Delete non-existing entries [%(default)s]") + parser.add_argument("--du",type=str,action='store',dest="diskused",default=False, + help="Print directory sizes. Argument is the path where directories are listed from.") + parser.add_argument("--du-depth",type=str,action='store',dest="diskused_depth",default=1, + help="Depth of summarization for --du.") parser.add_argument("--haschanges",action="store_true",dest="haschanges",default=False, help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.") parser.add_argument("--hasdeletions",action="store_true",dest="hasdeletions",default=False, @@ -81,6 +85,29 @@ def delete_nonexisting(sqlfile,options): conn.commit() return +def disk_used(options): + conn=sqlite3.connect(options.sqlfile) + conn.text_factory=str + db=conn.cursor() + db.execute('SELECT size,replace(file,?,"") as path FROM list WHERE file LIKE ?', + (os.path.realpath(options.diskused)+"/", + os.path.realpath(options.diskused)+"%", + )) + entries=[] + sizes=[] + for row in db: + start_path=row[1].split('/') + start_path="/".join(start_path[0:int(options.diskused_depth)]) + if start_path not in entries: + entries.append(start_path) + sizes.append(row[0]) + else: + sizes[ entries.index(start_path) ]+=row[0] + for entry in zip(sizes,entries): + print("| ".join([ str(entry[0]).ljust(14), + humanize_size(entry[0]).rjust(8), + entry[1]])) + def has_changes(options): conn=sqlite3.connect(options.sqlfile) conn.text_factory=str @@ -103,11 +130,11 @@ def has_changes_deleted(db): def has_changes_additions(db,options): for path,dirs,files in os.walk(options.startpath,followlinks=options.symlinks): dirs=clean_dirs(dirs) - db_files=get_folder_contents(db,os.path.abspath(path)+'/') + db_files=get_folder_contents(db,os.path.realpath(path)+'/') if not options.symlinks: files=clean_syms(files,path) for file in files: - filename=os.path.abspath(os.path.join(path,file)) + filename=os.path.realpath(os.path.join(path,file)) if file==options.sqlfile: continue #if not is_listed(db,filename): @@ -132,11 +159,11 @@ def add_recurse(options): dirs=clean_dirs(dirs) dirs.sort() files.sort() - db_files=get_folder_contents(db,os.path.abspath(path)+'/') + db_files=get_folder_contents(db,os.path.realpath(path)+'/') if not options.symlinks: files=clean_syms(files,path) for file in files: - filename=os.path.abspath(os.path.join(path,file)) + filename=os.path.realpath(os.path.join(path,file)) if file==options.sqlfile: continue #if not is_listed(db,filename): @@ -201,6 +228,18 @@ def hash_match(db,filename,hash): count=db.fetchall() return count[0][0]==hash +def humanize_size(size,precision=1): + if size==None: + return 'nan' + suffixes=['B','KB','MB','GB','TB'] + suffixIndex = 0 + defPrecision=0 + while size > 1024: + suffixIndex += 1 #increment the index of the suffix + size = size/1024.0 #apply the division + defPrecision=precision + return "%.*f%s"%(defPrecision,size,suffixes[suffixIndex]) + def get_md5(filename): ''' returns content based hash, only first 50Mb is read ''' return hashlib.md5(open(filename,'rb').read(1024*1024*50)).hexdigest() @@ -290,6 +329,9 @@ def main(): if options.match: matchdb(options.sqlfile,options.match,options.search) sys.exit(0) + if options.diskused: + disk_used(options) + sys.exit(0) if options.delete: print('Deleting entries...') delete_nonexisting(options.sqlfile,options) diff --git a/image_list.py b/image_list.py index 249a0fc..72dc31d 100755 --- a/image_list.py +++ b/image_list.py @@ -21,6 +21,10 @@ def setup_options(): help="Modify changed files [%(default)s]") parser.add_argument("-d",action="store_true",dest="delete",default=False, help="Delete non-existing entries [%(default)s]") + parser.add_argument("--du",type=str,action='store',dest="diskused",default=False, + help="Print directory sizes. Argument is the path where directories are listed from.") + parser.add_argument("--du-depth",type=str,action='store',dest="diskused_depth",default=1, + help="Depth of summarization for --du.") parser.add_argument("-f",action="store",dest="sqlfile",default=SQLFILE, help="SQL file name to use [%(default)s]") parser.add_argument("-l",action="store_true",dest="symlinks",default=False, @@ -56,7 +60,7 @@ def setup_options(): options=parser.parse_args() BADDIRS.extend(options.exclude) - if options.duplicate or options.searchsmall or options.measure or options.nearestcolor or options.similarity!=None or options.search: + if options.duplicate or options.searchsmall or options.measure or options.nearestcolor or options.similarity!=None or options.search or options.diskused: options.add=not options.add return options @@ -66,7 +70,8 @@ def createdb(sqlfile): conn.text_factory=str db.execute('CREATE TABLE list (id INTEGER PRIMARY KEY AUTOINCREMENT,\ file TEXT,date INTEGER,portrait NUMERIC, hash TEXT,\ - width INTEGER,height INTEGER,fingerprint TEXT,sharpness NUMERIC,\ + width INTEGER,height INTEGER,size INTEGER,\ + fingerprint TEXT,sharpness NUMERIC,\ R REAL,G REAL, B REAL, BR REAL, BG REAL, BB REAL)') conn.commit() return @@ -109,10 +114,10 @@ def add_recurse(options): files=clean_syms(files) files.sort() dirs.sort() - db_files=get_folder_contents(db,os.path.abspath(path)+'/') + db_files=get_folder_contents(db,os.path.realpath(path)+'/') for file in files: if IMGMATCH.match(file): - filename=os.path.abspath(os.path.join(path,file)) + filename=os.path.realpath(os.path.join(path,file)) #if not is_listed(db,filename): if file not in db_files: if options.add: @@ -150,13 +155,14 @@ def add_single(conn,filename,change=False,hash=None,minsize=0): if hash==None: hash=get_md5(filename) ftime=os.path.getmtime(filename) + fsize=os.path.getsize(filename) if change: - db.execute("UPDATE list SET date=?, portrait=?, hash=?, width=? ,height=? \ - WHERE file=?",(ftime,portrait,hash,dims[0],dims[1],filename)) + db.execute("UPDATE list SET date=?, portrait=?, hash=?, width=? ,height=?, \ + size=? WHERE file=?",(ftime,portrait,hash,dims[0],dims[1],fsize,filename)) print("changing: %(f)s (%(x)sx%(y)s)" % {'f':filename, 'x':dims[0], 'y':dims[1]}) else: - db.execute("INSERT INTO list(file,date,portrait,hash,width,height)\ - VALUES(?,?,?,?,?,?)",(filename,ftime,portrait,hash,dims[0],dims[1])) + db.execute("INSERT INTO list(file,date,portrait,hash,width,height,size)\ + VALUES(?,?,?,?,?,?,?)",(filename,ftime,portrait,hash,dims[0],dims[1],fsize)) print("adding: %(f)s (%(x)sx%(y)s)" % {'f':filename, 'x':dims[0], 'y':dims[1]}) return @@ -452,7 +458,7 @@ def find_fingerprint_similar(opts): def find_fingerprint_nearest(opts): ''' Find nearest match to given file ''' - cmp=os.path.abspath(opts.similarity.rsplit(",")[0]) + cmp=os.path.realpath(opts.similarity.rsplit(",")[0]) thr=sys.maxint if len(opts.similarity.rsplit(","))>1: thr=int(opts.similarity.rsplit(",",1)[1]) @@ -628,6 +634,29 @@ def find_smalls(minsize,sqlfile): flist.append(('smalls',smalls)) return flist +def disk_used(options): + conn=sqlite3.connect(options.sqlfile) + conn.text_factory=str + db=conn.cursor() + db.execute('SELECT size,replace(file,?,"") as path FROM list WHERE file LIKE ?', + (os.path.realpath(options.diskused)+"/", + os.path.realpath(options.diskused)+"%", + )) + entries=[] + sizes=[] + for row in db: + start_path=row[1].split('/') + start_path="/".join(start_path[0:int(options.diskused_depth)]) + if start_path not in entries: + entries.append(start_path) + sizes.append(row[0]) + else: + sizes[ entries.index(start_path) ]+=row[0] + for entry in zip(sizes,entries): + print("| ".join([ str(entry[0]).ljust(14), + humanize_size(entry[0]).rjust(8), + entry[1]])) + def print_structure(files): for hash in files: #print(hash[0]) @@ -651,6 +680,18 @@ def print_dup_structure(files,opts): i+=1 return +def humanize_size(size,precision=1): + if size==None: + return 'nan' + suffixes=['B','KB','MB','GB','TB'] + suffixIndex = 0 + defPrecision=0 + while size > 1024: + suffixIndex += 1 + size = size/1024.0 + defPrecision=precision + return "%.*f%s"%(defPrecision,size,suffixes[suffixIndex]) + def main(): options=setup_options(); if not os.path.exists(options.sqlfile): @@ -695,6 +736,8 @@ def main(): delete_nonexisting(options.sqlfile) else: print_structure(files) + if options.diskused: + disk_used(options) #print(files) sys.exit(0)