full file read for file list
This commit is contained in:
37
file_list.py
37
file_list.py
@@ -28,24 +28,26 @@ def setup_options():
|
||||
help="Print directory sizes. Argument is the path where directories are listed from.")
|
||||
parser.add_argument("--du-depth",type=str,action='store',dest="diskused_depth",default=1,
|
||||
help="Depth of summarization for --du.")
|
||||
parser.add_argument("--dup",action="store_true",dest="duplicate",default=False,
|
||||
help="Return a list of duplicate files, based on hashes. This option will flip the 'Add new files' option. [%(default)s]")
|
||||
parser.add_argument("--full",action="store_true",dest="fullfile",default=False,
|
||||
help="Use full files to calculate md5 checksum. Defaults to first 50Mb. [%(default)s]")
|
||||
parser.add_argument("--haschanges",action="store_true",dest="haschanges",default=False,
|
||||
help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.")
|
||||
parser.add_argument("--hasdeletions",action="store_true",dest="hasdeletions",default=False,
|
||||
help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.")
|
||||
parser.add_argument("--hasadditions",action="store_true",dest="hasadditions",default=False,
|
||||
help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.")
|
||||
parser.add_argument("-l",action="store_true",dest="symlinks",default=False,
|
||||
help="Follow symbolic links [%(default)s]")
|
||||
parser.add_argument("--dup",action="store_true",dest="duplicate",default=False,
|
||||
help="Return a list of duplicate files, based on hashes. This option will flip the 'Add new files' option. [%(default)s]")
|
||||
parser.add_argument("-x",action="append",dest="exclude",default=[],
|
||||
help="Exclude folder name from the lists. This option may be issued several times")
|
||||
parser.add_argument("-f",action="store",dest="sqlfile",default=SQLFILE,
|
||||
help="SQL file name to use [%(default)s]")
|
||||
parser.add_argument("-s",type=str,action='append',dest="search",default=[],
|
||||
help="Search list based on path pattern")
|
||||
parser.add_argument("-l",action="store_true",dest="symlinks",default=False,
|
||||
help="Follow symbolic links [%(default)s]")
|
||||
parser.add_argument("--match",type=str,dest="match",default=False,
|
||||
help="Search for closest match from basenames, can be helped with adding -s")
|
||||
parser.add_argument("-s",type=str,action='append',dest="search",default=[],
|
||||
help="Search list based on path pattern")
|
||||
parser.add_argument("-x",action="append",dest="exclude",default=[],
|
||||
help="Exclude folder name from the lists. This option may be issued several times")
|
||||
parser.add_argument('startpath', action="store",default='.', nargs='?')
|
||||
|
||||
options=parser.parse_args()
|
||||
@@ -169,24 +171,24 @@ def add_recurse(options):
|
||||
#if not is_listed(db,filename):
|
||||
if file not in db_files:
|
||||
if options.add:
|
||||
add_single(conn,filename,change=False)
|
||||
add_single(conn,filename,change=False,fullfile=options.fullfile)
|
||||
else:
|
||||
if options.changed:
|
||||
ftime=os.path.getmtime(filename)
|
||||
if not ftime_match(db,filename,ftime):
|
||||
#file content changed
|
||||
add_single(conn,filename,change=True)
|
||||
add_single(conn,filename,change=True,fullfile=options.fullfile)
|
||||
conn.commit()
|
||||
|
||||
return
|
||||
|
||||
def add_single(conn,filename,change=False,hash=None,minsize=0):
|
||||
def add_single(conn,filename,change=False,hash=None,minsize=0,fullfile=False):
|
||||
|
||||
print "%(f)s" % {'f':filename}
|
||||
db=conn.cursor()
|
||||
try:
|
||||
if hash==None:
|
||||
hash=get_md5(filename)
|
||||
hash=get_md5(filename,fullfile)
|
||||
ftime=os.path.getmtime(filename)
|
||||
fsize=os.path.getsize(filename)
|
||||
mime=MIME.file(filename.encode('UTF-8'))
|
||||
@@ -250,8 +252,15 @@ def humanize_size(size,precision=1):
|
||||
defPrecision=precision
|
||||
return "%.*f%s"%(defPrecision,size,suffixes[suffixIndex])
|
||||
|
||||
def get_md5(filename):
|
||||
''' returns content based hash, only first 50Mb is read '''
|
||||
def get_md5(filename,fullfile=False):
|
||||
''' returns content based hash, only first 50Mb is read, unless user wants the whole file '''
|
||||
if fullfile:
|
||||
block_size=2**20
|
||||
md5 = hashlib.md5()
|
||||
with open(filename,'rb') as f:
|
||||
for chunk in iter(lambda: f.read(block_size), b''):
|
||||
md5.update(chunk)
|
||||
return md5.hexdigest()
|
||||
return hashlib.md5(open(filename,'rb').read(1024*1024*50)).hexdigest()
|
||||
|
||||
def clean_dirs(dirs):
|
||||
|
||||
Reference in New Issue
Block a user