file size measurements

This commit is contained in:
ville rantanen
2014-07-23 10:23:32 +03:00
parent 4ba197146b
commit 1b74b01aae
2 changed files with 98 additions and 13 deletions

View File

@@ -24,6 +24,10 @@ def setup_options():
help="Modify changed files [%(default)s]") help="Modify changed files [%(default)s]")
parser.add_argument("-d",action="store_true",dest="delete",default=False, parser.add_argument("-d",action="store_true",dest="delete",default=False,
help="Delete non-existing entries [%(default)s]") help="Delete non-existing entries [%(default)s]")
parser.add_argument("--du",type=str,action='store',dest="diskused",default=False,
help="Print directory sizes. Argument is the path where directories are listed from.")
parser.add_argument("--du-depth",type=str,action='store',dest="diskused_depth",default=1,
help="Depth of summarization for --du.")
parser.add_argument("--haschanges",action="store_true",dest="haschanges",default=False, parser.add_argument("--haschanges",action="store_true",dest="haschanges",default=False,
help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.") help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.")
parser.add_argument("--hasdeletions",action="store_true",dest="hasdeletions",default=False, parser.add_argument("--hasdeletions",action="store_true",dest="hasdeletions",default=False,
@@ -81,6 +85,29 @@ def delete_nonexisting(sqlfile,options):
conn.commit() conn.commit()
return return
def disk_used(options):
conn=sqlite3.connect(options.sqlfile)
conn.text_factory=str
db=conn.cursor()
db.execute('SELECT size,replace(file,?,"") as path FROM list WHERE file LIKE ?',
(os.path.realpath(options.diskused)+"/",
os.path.realpath(options.diskused)+"%",
))
entries=[]
sizes=[]
for row in db:
start_path=row[1].split('/')
start_path="/".join(start_path[0:int(options.diskused_depth)])
if start_path not in entries:
entries.append(start_path)
sizes.append(row[0])
else:
sizes[ entries.index(start_path) ]+=row[0]
for entry in zip(sizes,entries):
print("| ".join([ str(entry[0]).ljust(14),
humanize_size(entry[0]).rjust(8),
entry[1]]))
def has_changes(options): def has_changes(options):
conn=sqlite3.connect(options.sqlfile) conn=sqlite3.connect(options.sqlfile)
conn.text_factory=str conn.text_factory=str
@@ -103,11 +130,11 @@ def has_changes_deleted(db):
def has_changes_additions(db,options): def has_changes_additions(db,options):
for path,dirs,files in os.walk(options.startpath,followlinks=options.symlinks): for path,dirs,files in os.walk(options.startpath,followlinks=options.symlinks):
dirs=clean_dirs(dirs) dirs=clean_dirs(dirs)
db_files=get_folder_contents(db,os.path.abspath(path)+'/') db_files=get_folder_contents(db,os.path.realpath(path)+'/')
if not options.symlinks: if not options.symlinks:
files=clean_syms(files,path) files=clean_syms(files,path)
for file in files: for file in files:
filename=os.path.abspath(os.path.join(path,file)) filename=os.path.realpath(os.path.join(path,file))
if file==options.sqlfile: if file==options.sqlfile:
continue continue
#if not is_listed(db,filename): #if not is_listed(db,filename):
@@ -132,11 +159,11 @@ def add_recurse(options):
dirs=clean_dirs(dirs) dirs=clean_dirs(dirs)
dirs.sort() dirs.sort()
files.sort() files.sort()
db_files=get_folder_contents(db,os.path.abspath(path)+'/') db_files=get_folder_contents(db,os.path.realpath(path)+'/')
if not options.symlinks: if not options.symlinks:
files=clean_syms(files,path) files=clean_syms(files,path)
for file in files: for file in files:
filename=os.path.abspath(os.path.join(path,file)) filename=os.path.realpath(os.path.join(path,file))
if file==options.sqlfile: if file==options.sqlfile:
continue continue
#if not is_listed(db,filename): #if not is_listed(db,filename):
@@ -201,6 +228,18 @@ def hash_match(db,filename,hash):
count=db.fetchall() count=db.fetchall()
return count[0][0]==hash return count[0][0]==hash
def humanize_size(size,precision=1):
if size==None:
return 'nan'
suffixes=['B','KB','MB','GB','TB']
suffixIndex = 0
defPrecision=0
while size > 1024:
suffixIndex += 1 #increment the index of the suffix
size = size/1024.0 #apply the division
defPrecision=precision
return "%.*f%s"%(defPrecision,size,suffixes[suffixIndex])
def get_md5(filename): def get_md5(filename):
''' returns content based hash, only first 50Mb is read ''' ''' returns content based hash, only first 50Mb is read '''
return hashlib.md5(open(filename,'rb').read(1024*1024*50)).hexdigest() return hashlib.md5(open(filename,'rb').read(1024*1024*50)).hexdigest()
@@ -290,6 +329,9 @@ def main():
if options.match: if options.match:
matchdb(options.sqlfile,options.match,options.search) matchdb(options.sqlfile,options.match,options.search)
sys.exit(0) sys.exit(0)
if options.diskused:
disk_used(options)
sys.exit(0)
if options.delete: if options.delete:
print('Deleting entries...') print('Deleting entries...')
delete_nonexisting(options.sqlfile,options) delete_nonexisting(options.sqlfile,options)

View File

@@ -21,6 +21,10 @@ def setup_options():
help="Modify changed files [%(default)s]") help="Modify changed files [%(default)s]")
parser.add_argument("-d",action="store_true",dest="delete",default=False, parser.add_argument("-d",action="store_true",dest="delete",default=False,
help="Delete non-existing entries [%(default)s]") help="Delete non-existing entries [%(default)s]")
parser.add_argument("--du",type=str,action='store',dest="diskused",default=False,
help="Print directory sizes. Argument is the path where directories are listed from.")
parser.add_argument("--du-depth",type=str,action='store',dest="diskused_depth",default=1,
help="Depth of summarization for --du.")
parser.add_argument("-f",action="store",dest="sqlfile",default=SQLFILE, parser.add_argument("-f",action="store",dest="sqlfile",default=SQLFILE,
help="SQL file name to use [%(default)s]") help="SQL file name to use [%(default)s]")
parser.add_argument("-l",action="store_true",dest="symlinks",default=False, parser.add_argument("-l",action="store_true",dest="symlinks",default=False,
@@ -56,7 +60,7 @@ def setup_options():
options=parser.parse_args() options=parser.parse_args()
BADDIRS.extend(options.exclude) BADDIRS.extend(options.exclude)
if options.duplicate or options.searchsmall or options.measure or options.nearestcolor or options.similarity!=None or options.search: if options.duplicate or options.searchsmall or options.measure or options.nearestcolor or options.similarity!=None or options.search or options.diskused:
options.add=not options.add options.add=not options.add
return options return options
@@ -66,7 +70,8 @@ def createdb(sqlfile):
conn.text_factory=str conn.text_factory=str
db.execute('CREATE TABLE list (id INTEGER PRIMARY KEY AUTOINCREMENT,\ db.execute('CREATE TABLE list (id INTEGER PRIMARY KEY AUTOINCREMENT,\
file TEXT,date INTEGER,portrait NUMERIC, hash TEXT,\ file TEXT,date INTEGER,portrait NUMERIC, hash TEXT,\
width INTEGER,height INTEGER,fingerprint TEXT,sharpness NUMERIC,\ width INTEGER,height INTEGER,size INTEGER,\
fingerprint TEXT,sharpness NUMERIC,\
R REAL,G REAL, B REAL, BR REAL, BG REAL, BB REAL)') R REAL,G REAL, B REAL, BR REAL, BG REAL, BB REAL)')
conn.commit() conn.commit()
return return
@@ -109,10 +114,10 @@ def add_recurse(options):
files=clean_syms(files) files=clean_syms(files)
files.sort() files.sort()
dirs.sort() dirs.sort()
db_files=get_folder_contents(db,os.path.abspath(path)+'/') db_files=get_folder_contents(db,os.path.realpath(path)+'/')
for file in files: for file in files:
if IMGMATCH.match(file): if IMGMATCH.match(file):
filename=os.path.abspath(os.path.join(path,file)) filename=os.path.realpath(os.path.join(path,file))
#if not is_listed(db,filename): #if not is_listed(db,filename):
if file not in db_files: if file not in db_files:
if options.add: if options.add:
@@ -150,13 +155,14 @@ def add_single(conn,filename,change=False,hash=None,minsize=0):
if hash==None: if hash==None:
hash=get_md5(filename) hash=get_md5(filename)
ftime=os.path.getmtime(filename) ftime=os.path.getmtime(filename)
fsize=os.path.getsize(filename)
if change: if change:
db.execute("UPDATE list SET date=?, portrait=?, hash=?, width=? ,height=? \ db.execute("UPDATE list SET date=?, portrait=?, hash=?, width=? ,height=?, \
WHERE file=?",(ftime,portrait,hash,dims[0],dims[1],filename)) size=? WHERE file=?",(ftime,portrait,hash,dims[0],dims[1],fsize,filename))
print("changing: %(f)s (%(x)sx%(y)s)" % {'f':filename, 'x':dims[0], 'y':dims[1]}) print("changing: %(f)s (%(x)sx%(y)s)" % {'f':filename, 'x':dims[0], 'y':dims[1]})
else: else:
db.execute("INSERT INTO list(file,date,portrait,hash,width,height)\ db.execute("INSERT INTO list(file,date,portrait,hash,width,height,size)\
VALUES(?,?,?,?,?,?)",(filename,ftime,portrait,hash,dims[0],dims[1])) VALUES(?,?,?,?,?,?,?)",(filename,ftime,portrait,hash,dims[0],dims[1],fsize))
print("adding: %(f)s (%(x)sx%(y)s)" % {'f':filename, 'x':dims[0], 'y':dims[1]}) print("adding: %(f)s (%(x)sx%(y)s)" % {'f':filename, 'x':dims[0], 'y':dims[1]})
return return
@@ -452,7 +458,7 @@ def find_fingerprint_similar(opts):
def find_fingerprint_nearest(opts): def find_fingerprint_nearest(opts):
''' Find nearest match to given file ''' ''' Find nearest match to given file '''
cmp=os.path.abspath(opts.similarity.rsplit(",")[0]) cmp=os.path.realpath(opts.similarity.rsplit(",")[0])
thr=sys.maxint thr=sys.maxint
if len(opts.similarity.rsplit(","))>1: if len(opts.similarity.rsplit(","))>1:
thr=int(opts.similarity.rsplit(",",1)[1]) thr=int(opts.similarity.rsplit(",",1)[1])
@@ -628,6 +634,29 @@ def find_smalls(minsize,sqlfile):
flist.append(('smalls',smalls)) flist.append(('smalls',smalls))
return flist return flist
def disk_used(options):
conn=sqlite3.connect(options.sqlfile)
conn.text_factory=str
db=conn.cursor()
db.execute('SELECT size,replace(file,?,"") as path FROM list WHERE file LIKE ?',
(os.path.realpath(options.diskused)+"/",
os.path.realpath(options.diskused)+"%",
))
entries=[]
sizes=[]
for row in db:
start_path=row[1].split('/')
start_path="/".join(start_path[0:int(options.diskused_depth)])
if start_path not in entries:
entries.append(start_path)
sizes.append(row[0])
else:
sizes[ entries.index(start_path) ]+=row[0]
for entry in zip(sizes,entries):
print("| ".join([ str(entry[0]).ljust(14),
humanize_size(entry[0]).rjust(8),
entry[1]]))
def print_structure(files): def print_structure(files):
for hash in files: for hash in files:
#print(hash[0]) #print(hash[0])
@@ -651,6 +680,18 @@ def print_dup_structure(files,opts):
i+=1 i+=1
return return
def humanize_size(size,precision=1):
if size==None:
return 'nan'
suffixes=['B','KB','MB','GB','TB']
suffixIndex = 0
defPrecision=0
while size > 1024:
suffixIndex += 1
size = size/1024.0
defPrecision=precision
return "%.*f%s"%(defPrecision,size,suffixes[suffixIndex])
def main(): def main():
options=setup_options(); options=setup_options();
if not os.path.exists(options.sqlfile): if not os.path.exists(options.sqlfile):
@@ -695,6 +736,8 @@ def main():
delete_nonexisting(options.sqlfile) delete_nonexisting(options.sqlfile)
else: else:
print_structure(files) print_structure(files)
if options.diskused:
disk_used(options)
#print(files) #print(files)
sys.exit(0) sys.exit(0)