ordering methods for duplicates
This commit is contained in:
@@ -37,6 +37,10 @@ def setup_options():
|
||||
help="Depth of summarization for --du.")
|
||||
parser.add_argument("--dup",action="store_true",dest="duplicate",default=False,
|
||||
help="Return a list of duplicate files, based on hashes. This option will flip the 'Add new files' option. [%(default)s]")
|
||||
parser.add_argument("--dup-order",action="store",dest="duplicate_order",default='path',
|
||||
help = "Order duplicates by a method. (length = path str length)",
|
||||
choices = ('age','length','file','path')
|
||||
)
|
||||
parser.add_argument("--haschanges",action="store_true",dest="haschanges",default=False,
|
||||
help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.")
|
||||
parser.add_argument("--hasdeletions",action="store_true",dest="hasdeletions",default=False,
|
||||
@@ -67,6 +71,7 @@ def setup_options():
|
||||
options.sqlpath=os.path.dirname(os.path.realpath(options.sqlfile))
|
||||
return options
|
||||
|
||||
|
||||
def add_recurse(options):
|
||||
conn=sqlite3.connect(options.sqlfile)
|
||||
conn.text_factory=str
|
||||
@@ -101,6 +106,7 @@ def add_recurse(options):
|
||||
sys.stdout.write("\n")
|
||||
return
|
||||
|
||||
|
||||
def add_single(conn,filename,change=False,hash=None,minsize=0,fullfile=False):
|
||||
try:
|
||||
fsize=os.path.getsize(filename)
|
||||
@@ -130,6 +136,7 @@ def add_single(conn,filename,change=False,hash=None,minsize=0,fullfile=False):
|
||||
sys.stdout.write('\r')
|
||||
return
|
||||
|
||||
|
||||
def checkdb(options):
|
||||
needle=options.search
|
||||
if len(needle)==0:
|
||||
@@ -192,6 +199,7 @@ def clean_dirs(dirs):
|
||||
dirs.remove(s)
|
||||
return dirs
|
||||
|
||||
|
||||
def clean_syms(files,path):
|
||||
nonsyms=[]
|
||||
for f in files:
|
||||
@@ -220,6 +228,7 @@ def createdb(options):
|
||||
conn.commit()
|
||||
return
|
||||
|
||||
|
||||
def delete_nonexisting(sqlfile,options):
|
||||
conn=sqlite3.connect(sqlfile)
|
||||
conn.text_factory=str
|
||||
@@ -240,6 +249,7 @@ def delete_nonexisting(sqlfile,options):
|
||||
conn.commit()
|
||||
return
|
||||
|
||||
|
||||
def disk_used(options):
|
||||
conn=sqlite3.connect(options.sqlfile)
|
||||
conn.text_factory=str
|
||||
@@ -266,13 +276,15 @@ def disk_used(options):
|
||||
humanize_size(entry[0]).rjust(8),
|
||||
entry[1]]))
|
||||
|
||||
|
||||
def filename_join(path,name,options):
|
||||
filename=os.path.realpath(os.path.join(path,name))
|
||||
if options.relative:
|
||||
return os.path.relpath(filename, options.sqlpath)
|
||||
return filename
|
||||
|
||||
def find_duplicates(sqlfile):
|
||||
|
||||
def find_duplicates(sqlfile, order):
|
||||
conn=sqlite3.connect(sqlfile)
|
||||
conn.text_factory=str
|
||||
db=conn.cursor()
|
||||
@@ -285,7 +297,7 @@ def find_duplicates(sqlfile):
|
||||
flist=[]
|
||||
for row in dbh:
|
||||
flist.append(row)
|
||||
flist.sort(key=lambda file: file[0])
|
||||
sort_by_method(flist, order)
|
||||
duphash.append((hash, flist))
|
||||
duphash.sort(key=lambda file: file[1][0])
|
||||
return duphash
|
||||
@@ -317,6 +329,7 @@ def get_folder_contents(db,path):
|
||||
files.append(base)
|
||||
return files
|
||||
|
||||
|
||||
def get_md5(filename,fullfile=False):
|
||||
''' returns content based hash, only first 50Mb is read, unless user wants the whole file '''
|
||||
fsize=os.path.getsize(filename)
|
||||
@@ -360,6 +373,7 @@ def has_changes_deleted(db,exit=True):
|
||||
deleted.append(row[0])
|
||||
return deleted
|
||||
|
||||
|
||||
def has_changes_additions(db,options,exit=True):
|
||||
added=[]
|
||||
changed=[]
|
||||
@@ -402,6 +416,7 @@ def humanize_date(date):
|
||||
return ''
|
||||
return datetime.datetime.fromtimestamp(int(date)).strftime('%Y-%m-%d %H:%M:%S')
|
||||
|
||||
|
||||
def humanize_size(size,precision=1):
|
||||
if size==None:
|
||||
return 'nan'
|
||||
@@ -414,11 +429,13 @@ def humanize_size(size,precision=1):
|
||||
defPrecision=precision
|
||||
return "%.*f%s"%(defPrecision,size,suffixes[suffixIndex])
|
||||
|
||||
|
||||
def is_listed(db,filename):
|
||||
db.execute("SELECT COUNT(*) FROM list where file == ?",(filename,))
|
||||
count=db.fetchall()
|
||||
return count[0][0]>0
|
||||
|
||||
|
||||
def matchdb(sqlfile,needle,helper):
|
||||
needle=needle.lower()
|
||||
import difflib as dl
|
||||
@@ -441,20 +458,27 @@ def matchdb(sqlfile,needle,helper):
|
||||
best_match=row[0]
|
||||
print(best_match)
|
||||
|
||||
def print_structure(files):
|
||||
def print_duplicates(files):
|
||||
for hash in files:
|
||||
#print(hash[0])
|
||||
i=1
|
||||
for f in hash[1]:
|
||||
print "%(i)d: %(x)d:%(f)s " % {'i':i, 'f':f[0], 'x':f[1]}
|
||||
print("%(i)d|%(s)s|%(d)s|%(f)s " % {
|
||||
'i':i,
|
||||
'f':f[0],
|
||||
'd': humanize_date(f[2]),
|
||||
's': humanize_size(f[1])
|
||||
})
|
||||
i+=1
|
||||
return
|
||||
|
||||
|
||||
def print_stderr(s):
|
||||
sys.stderr.write(s)
|
||||
sys.stderr.write("\n")
|
||||
sys.stderr.flush()
|
||||
|
||||
|
||||
def searchdb(sqlfile,needle):
|
||||
needle=['%'+i+'%' for i in needle]
|
||||
like_query=' OR '.join(['file LIKE ?' for i in needle])
|
||||
@@ -465,6 +489,18 @@ def searchdb(sqlfile,needle):
|
||||
for row in db:
|
||||
print(row[0])
|
||||
|
||||
|
||||
def sort_by_method(flist, order):
|
||||
if order == 'path':
|
||||
flist.sort(key=lambda file: file[0])
|
||||
if order == 'file':
|
||||
flist.sort(key=lambda file: os.path.basename(file[0]))
|
||||
if order == 'age':
|
||||
flist.sort(key=lambda file: file[2])
|
||||
if order == 'length':
|
||||
flist.sort(key=lambda file: len(file[0]))
|
||||
|
||||
|
||||
def stored_options(options):
|
||||
try:
|
||||
conn=sqlite3.connect(options.sqlfile)
|
||||
@@ -483,6 +519,7 @@ def stored_options(options):
|
||||
|
||||
return options
|
||||
|
||||
|
||||
def main():
|
||||
options=setup_options();
|
||||
|
||||
@@ -513,8 +550,8 @@ def main():
|
||||
print('Adding '+options.startpath+' entries...')
|
||||
add_recurse(options)
|
||||
if options.duplicate:
|
||||
files=find_duplicates(options.sqlfile)
|
||||
print_structure(files)
|
||||
files=find_duplicates(options.sqlfile, options.duplicate_order)
|
||||
print_duplicates(files)
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user