file size measurements

This commit is contained in:
ville rantanen
2014-07-23 10:23:32 +03:00
parent 4ba197146b
commit 1b74b01aae
2 changed files with 98 additions and 13 deletions

View File

@@ -24,6 +24,10 @@ def setup_options():
help="Modify changed files [%(default)s]")
parser.add_argument("-d",action="store_true",dest="delete",default=False,
help="Delete non-existing entries [%(default)s]")
parser.add_argument("--du",type=str,action='store',dest="diskused",default=False,
help="Print directory sizes. Argument is the path where directories are listed from.")
parser.add_argument("--du-depth",type=str,action='store',dest="diskused_depth",default=1,
help="Depth of summarization for --du.")
parser.add_argument("--haschanges",action="store_true",dest="haschanges",default=False,
help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.")
parser.add_argument("--hasdeletions",action="store_true",dest="hasdeletions",default=False,
@@ -81,6 +85,29 @@ def delete_nonexisting(sqlfile,options):
conn.commit()
return
def disk_used(options):
conn=sqlite3.connect(options.sqlfile)
conn.text_factory=str
db=conn.cursor()
db.execute('SELECT size,replace(file,?,"") as path FROM list WHERE file LIKE ?',
(os.path.realpath(options.diskused)+"/",
os.path.realpath(options.diskused)+"%",
))
entries=[]
sizes=[]
for row in db:
start_path=row[1].split('/')
start_path="/".join(start_path[0:int(options.diskused_depth)])
if start_path not in entries:
entries.append(start_path)
sizes.append(row[0])
else:
sizes[ entries.index(start_path) ]+=row[0]
for entry in zip(sizes,entries):
print("| ".join([ str(entry[0]).ljust(14),
humanize_size(entry[0]).rjust(8),
entry[1]]))
def has_changes(options):
conn=sqlite3.connect(options.sqlfile)
conn.text_factory=str
@@ -103,11 +130,11 @@ def has_changes_deleted(db):
def has_changes_additions(db,options):
for path,dirs,files in os.walk(options.startpath,followlinks=options.symlinks):
dirs=clean_dirs(dirs)
db_files=get_folder_contents(db,os.path.abspath(path)+'/')
db_files=get_folder_contents(db,os.path.realpath(path)+'/')
if not options.symlinks:
files=clean_syms(files,path)
for file in files:
filename=os.path.abspath(os.path.join(path,file))
filename=os.path.realpath(os.path.join(path,file))
if file==options.sqlfile:
continue
#if not is_listed(db,filename):
@@ -132,11 +159,11 @@ def add_recurse(options):
dirs=clean_dirs(dirs)
dirs.sort()
files.sort()
db_files=get_folder_contents(db,os.path.abspath(path)+'/')
db_files=get_folder_contents(db,os.path.realpath(path)+'/')
if not options.symlinks:
files=clean_syms(files,path)
for file in files:
filename=os.path.abspath(os.path.join(path,file))
filename=os.path.realpath(os.path.join(path,file))
if file==options.sqlfile:
continue
#if not is_listed(db,filename):
@@ -201,6 +228,18 @@ def hash_match(db,filename,hash):
count=db.fetchall()
return count[0][0]==hash
def humanize_size(size,precision=1):
if size==None:
return 'nan'
suffixes=['B','KB','MB','GB','TB']
suffixIndex = 0
defPrecision=0
while size > 1024:
suffixIndex += 1 #increment the index of the suffix
size = size/1024.0 #apply the division
defPrecision=precision
return "%.*f%s"%(defPrecision,size,suffixes[suffixIndex])
def get_md5(filename):
''' returns content based hash, only first 50Mb is read '''
return hashlib.md5(open(filename,'rb').read(1024*1024*50)).hexdigest()
@@ -290,6 +329,9 @@ def main():
if options.match:
matchdb(options.sqlfile,options.match,options.search)
sys.exit(0)
if options.diskused:
disk_used(options)
sys.exit(0)
if options.delete:
print('Deleting entries...')
delete_nonexisting(options.sqlfile,options)