full file read for file list

This commit is contained in:
ville rantanen
2015-01-19 15:52:23 +02:00
parent 8954f87cd0
commit 3e449e042b

View File

@@ -28,24 +28,26 @@ def setup_options():
help="Print directory sizes. Argument is the path where directories are listed from.") help="Print directory sizes. Argument is the path where directories are listed from.")
parser.add_argument("--du-depth",type=str,action='store',dest="diskused_depth",default=1, parser.add_argument("--du-depth",type=str,action='store',dest="diskused_depth",default=1,
help="Depth of summarization for --du.") help="Depth of summarization for --du.")
parser.add_argument("--dup",action="store_true",dest="duplicate",default=False,
help="Return a list of duplicate files, based on hashes. This option will flip the 'Add new files' option. [%(default)s]")
parser.add_argument("--full",action="store_true",dest="fullfile",default=False,
help="Use full files to calculate md5 checksum. Defaults to first 50Mb. [%(default)s]")
parser.add_argument("--haschanges",action="store_true",dest="haschanges",default=False, parser.add_argument("--haschanges",action="store_true",dest="haschanges",default=False,
help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.") help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.")
parser.add_argument("--hasdeletions",action="store_true",dest="hasdeletions",default=False, parser.add_argument("--hasdeletions",action="store_true",dest="hasdeletions",default=False,
help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.") help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.")
parser.add_argument("--hasadditions",action="store_true",dest="hasadditions",default=False, parser.add_argument("--hasadditions",action="store_true",dest="hasadditions",default=False,
help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.") help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.")
parser.add_argument("-l",action="store_true",dest="symlinks",default=False,
help="Follow symbolic links [%(default)s]")
parser.add_argument("--dup",action="store_true",dest="duplicate",default=False,
help="Return a list of duplicate files, based on hashes. This option will flip the 'Add new files' option. [%(default)s]")
parser.add_argument("-x",action="append",dest="exclude",default=[],
help="Exclude folder name from the lists. This option may be issued several times")
parser.add_argument("-f",action="store",dest="sqlfile",default=SQLFILE, parser.add_argument("-f",action="store",dest="sqlfile",default=SQLFILE,
help="SQL file name to use [%(default)s]") help="SQL file name to use [%(default)s]")
parser.add_argument("-s",type=str,action='append',dest="search",default=[], parser.add_argument("-l",action="store_true",dest="symlinks",default=False,
help="Search list based on path pattern") help="Follow symbolic links [%(default)s]")
parser.add_argument("--match",type=str,dest="match",default=False, parser.add_argument("--match",type=str,dest="match",default=False,
help="Search for closest match from basenames, can be helped with adding -s") help="Search for closest match from basenames, can be helped with adding -s")
parser.add_argument("-s",type=str,action='append',dest="search",default=[],
help="Search list based on path pattern")
parser.add_argument("-x",action="append",dest="exclude",default=[],
help="Exclude folder name from the lists. This option may be issued several times")
parser.add_argument('startpath', action="store",default='.', nargs='?') parser.add_argument('startpath', action="store",default='.', nargs='?')
options=parser.parse_args() options=parser.parse_args()
@@ -169,24 +171,24 @@ def add_recurse(options):
#if not is_listed(db,filename): #if not is_listed(db,filename):
if file not in db_files: if file not in db_files:
if options.add: if options.add:
add_single(conn,filename,change=False) add_single(conn,filename,change=False,fullfile=options.fullfile)
else: else:
if options.changed: if options.changed:
ftime=os.path.getmtime(filename) ftime=os.path.getmtime(filename)
if not ftime_match(db,filename,ftime): if not ftime_match(db,filename,ftime):
#file content changed #file content changed
add_single(conn,filename,change=True) add_single(conn,filename,change=True,fullfile=options.fullfile)
conn.commit() conn.commit()
return return
def add_single(conn,filename,change=False,hash=None,minsize=0): def add_single(conn,filename,change=False,hash=None,minsize=0,fullfile=False):
print "%(f)s" % {'f':filename} print "%(f)s" % {'f':filename}
db=conn.cursor() db=conn.cursor()
try: try:
if hash==None: if hash==None:
hash=get_md5(filename) hash=get_md5(filename,fullfile)
ftime=os.path.getmtime(filename) ftime=os.path.getmtime(filename)
fsize=os.path.getsize(filename) fsize=os.path.getsize(filename)
mime=MIME.file(filename.encode('UTF-8')) mime=MIME.file(filename.encode('UTF-8'))
@@ -250,8 +252,15 @@ def humanize_size(size,precision=1):
defPrecision=precision defPrecision=precision
return "%.*f%s"%(defPrecision,size,suffixes[suffixIndex]) return "%.*f%s"%(defPrecision,size,suffixes[suffixIndex])
def get_md5(filename): def get_md5(filename,fullfile=False):
''' returns content based hash, only first 50Mb is read ''' ''' returns content based hash, only first 50Mb is read, unless user wants the whole file '''
if fullfile:
block_size=2**20
md5 = hashlib.md5()
with open(filename,'rb') as f:
for chunk in iter(lambda: f.read(block_size), b''):
md5.update(chunk)
return md5.hexdigest()
return hashlib.md5(open(filename,'rb').read(1024*1024*50)).hexdigest() return hashlib.md5(open(filename,'rb').read(1024*1024*50)).hexdigest()
def clean_dirs(dirs): def clean_dirs(dirs):