diff --git a/bin/archivefs-mount b/bin/archivefs-mount new file mode 120000 index 0000000..35a4cfb --- /dev/null +++ b/bin/archivefs-mount @@ -0,0 +1 @@ +../files/archivefs-mount \ No newline at end of file diff --git a/files/archivefs-mount b/files/archivefs-mount new file mode 100755 index 0000000..182eede --- /dev/null +++ b/files/archivefs-mount @@ -0,0 +1,77 @@ +#!/bin/bash + + +_helpexit() { + printf "Usage: %s [-u] archive.file + + arhchive is iso/tar/zip/rar whatever archivemount can handle + -u will unmount all FUSE.archivemount paths if no path given! + +" "$( basename $0 )" + echo Current mounts: + cat /proc/mounts | grep fuse.archivemount | awk '{ print $1 "\t" $2 }' + exit 1 +} + +for (( i=1; i<=$#; i++ )); do + [[ ${!i} = "-h" ]] && _helpexit + [[ ${!i} = "--help" ]] && _helpexit +done +unmount=false +for (( i=1; i<=$#; i++ )); do + [[ ${!i} = "-u" ]] && { unmount=true; continue; } + if [[ -z "$archive" ]]; then + archive="${!i}" + fi +done + +mkdir -p ~/mnt/am +valid_name=$( basename "$archive" | sed -e 's/\s/_/g' ) +mountpath=~/mnt/am/"$valid_name" + +if [[ "$unmount" = true ]]; then + cd ~/mnt + if [[ -n "$archive" ]]; then + echo Unmounting "$mountpath" + fusermount -u -z "$mountpath" + rmdir --ignore-fail-on-non-empty "$mountpath" &>/dev/null + else + # no path, unmount all + cat /proc/mounts | grep fuse.archivemount | awk '{ print $2 }' | while read dir; do + echo Unmounting $dir + fusermount -u -z "$dir" + rmdir --ignore-fail-on-non-empty "$dir" &>/dev/null + done + fi + exit +fi + +if [[ -z "$archive" ]]; then + echo "No archive given" + _helpexit +fi + +if [[ -d "$mountpath" ]]; then + device1=$( stat -c "%d" "$mountpath" ) + device2=$( stat -c "%d" ~/mnt/am ) +else + device1=valid + device2=valid +fi + +if [[ $device1 = $device2 ]]; then + echo "Mounting $archive in ~/mnt/am/$valid_name" + mkdir -p "$mountpath" + + archivemount \ + -o readonly \ + -o intr \ + -o uid=`id -u` \ + -o gid=`id -g` \ + "$archive" "$mountpath" + if [[ $? -gt 0 ]]; then + rmdir "$mountpath" + fi +else + echo "~/mnt/$valid_name is already mounted" +fi diff --git a/files/file_list.py b/files/file_list.py index c9a83af..18a60c3 100755 --- a/files/file_list.py +++ b/files/file_list.py @@ -8,192 +8,314 @@ import re import sqlite3 import subprocess import hashlib -import magic + +# import magic from argparse import ArgumentParser import configparser import io import datetime -SQLFILE='list_of_files.sqlite' -IMGMATCH=re.compile('.*\.jpg$|.*\.jpeg$|.*\.png$',re.I) -BADDIRS=[] -MINSIZE=0 -MIME=magic.open(magic.MAGIC_NONE) -#MIME=magic.open(magic.MAGIC_MIME) -MIME.load() -ANIM=['.','·',"'","'",'·','.','_'] -DEFAULT_CHUNK=1024*1024*50 +SQLFILE = "list_of_files.sqlite" +IMGMATCH = re.compile(".*\.jpg$|.*\.jpeg$|.*\.png$", re.I) +BADDIRS = [] +MINSIZE = 0 +# MIME=magic.open(magic.MAGIC_NONE) +##MIME=magic.open(magic.MAGIC_MIME) +# MIME.load() +ANIM = [".", "·", "'", "'", "·", ".", "_"] +DEFAULT_CHUNK = 1024 * 1024 * 50 + def setup_options(): - parser=ArgumentParser(description="Maintains the list of images sqlite file") - parser.add_argument("-a",action="store_false",dest="add",default=True, - help="Do not add new files [%(default)s]") - parser.add_argument("-c",action="store_true",dest="changed",default=False, - help="Modify changed files [%(default)s]") - parser.add_argument("--check",action="store_true",dest="check",default=False, - help="Check md5sums of files. Limit check with -s.") - parser.add_argument("-d",action="store_true",dest="delete",default=False, - help="Delete non-existing entries [%(default)s]") - parser.add_argument("--du",type=str,action='store',dest="diskused",default=False, - help="Print directory sizes. Argument is the path where directories are listed from.") - parser.add_argument("--du-depth",type=str,action='store',dest="diskused_depth",default=1, - help="Depth of summarization for --du.") - parser.add_argument("--dup",action="store_true",dest="duplicate",default=False, - help="Return a list of duplicate files, based on hashes. This option will flip the 'Add new files' option. [%(default)s]") - parser.add_argument("--dup-order",action="store",dest="duplicate_order",default='path', - help = "Order duplicates by a method. (length = path str length)", - choices = ('age','length','file','path') + parser = ArgumentParser(description="Maintains the list of images sqlite file") + parser.add_argument( + "-a", + action="store_false", + dest="add", + default=True, + help="Do not add new files [%(default)s]", ) - parser.add_argument("--haschanges",action="store_true",dest="haschanges",default=False, - help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.") - parser.add_argument("--hasdeletions",action="store_true",dest="hasdeletions",default=False, - help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.") - parser.add_argument("--hasadditions",action="store_true",dest="hasadditions",default=False, - help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.") - parser.add_argument("-f",action="store",dest="sqlfile",default=SQLFILE, - help="SQL file name to use [%(default)s]") - parser.add_argument("-l",action="store_true",dest="symlinks",default=False, - help="Follow symbolic links [%(default)s]") - parser.add_argument("--match",type=str,dest="match",default=False, - help="Search for closest match from basenames, can be helped with adding -s") - parser.add_argument("-s",type=str,action='append',dest="search",default=[], - help="Search list based on path pattern") - parser.add_argument("-x",action="append",dest="exclude",default=[], - help="Exclude folder name from the lists. This option may be issued several times") - parser.add_argument("--full",action="store_true",dest="fullfile",default=False, - help="ONLY FOR NEW DB CREATION. Use full files to calculate md5 checksum. Defaults to first 50Mb. [%(default)s]") - parser.add_argument("--relative",action="store_true",dest="relative",default=False, - help="ONLY FOR NEW DB CREATION. Store filenames relative to database file.") - parser.add_argument('startpath', action="store",default='.', nargs='?') + parser.add_argument( + "-c", + action="store_true", + dest="changed", + default=False, + help="Modify changed files [%(default)s]", + ) + parser.add_argument( + "--check", + action="store_true", + dest="check", + default=False, + help="Check md5sums of files. Limit check with -s.", + ) + parser.add_argument( + "-d", + action="store_true", + dest="delete", + default=False, + help="Delete non-existing entries [%(default)s]", + ) + parser.add_argument( + "--du", + type=str, + action="store", + dest="diskused", + default=False, + help="Print directory sizes. Argument is the path where directories are listed from.", + ) + parser.add_argument( + "--du-depth", + type=str, + action="store", + dest="diskused_depth", + default=1, + help="Depth of summarization for --du.", + ) + parser.add_argument( + "--dup", + action="store_true", + dest="duplicate", + default=False, + help="Return a list of duplicate files, based on hashes. This option will flip the 'Add new files' option. [%(default)s]", + ) + parser.add_argument( + "--dup-order", + action="store", + dest="duplicate_order", + default="path", + help="Order duplicates by a method. (length = path str length)", + choices=("age", "length", "file", "path"), + ) + parser.add_argument( + "--haschanges", + action="store_true", + dest="haschanges", + default=False, + help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.", + ) + parser.add_argument( + "--hasdeletions", + action="store_true", + dest="hasdeletions", + default=False, + help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.", + ) + parser.add_argument( + "--hasadditions", + action="store_true", + dest="hasadditions", + default=False, + help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.", + ) + parser.add_argument( + "-f", + action="store", + dest="sqlfile", + default=SQLFILE, + help="SQL file name to use [%(default)s]", + ) + parser.add_argument( + "-l", + action="store_true", + dest="symlinks", + default=False, + help="Follow symbolic links [%(default)s]", + ) + parser.add_argument( + "--match", + type=str, + dest="match", + default=False, + help="Search for closest match from basenames, can be helped with adding -s", + ) + parser.add_argument( + "-s", + type=str, + action="append", + dest="search", + default=[], + help="Search list based on path pattern", + ) + parser.add_argument( + "-x", + action="append", + dest="exclude", + default=[], + help="Exclude folder name from the lists. This option may be issued several times", + ) + parser.add_argument( + "--full", + action="store_true", + dest="fullfile", + default=False, + help="ONLY FOR NEW DB CREATION. Use full files to calculate md5 checksum. Defaults to first 50Mb. [%(default)s]", + ) + parser.add_argument( + "--relative", + action="store_true", + dest="relative", + default=False, + help="ONLY FOR NEW DB CREATION. Store filenames relative to database file.", + ) + parser.add_argument("startpath", action="store", default=".", nargs="?") - options=parser.parse_args() + options = parser.parse_args() BADDIRS.extend(options.exclude) if options.duplicate: - options.add=not options.add + options.add = not options.add - options.sqlpath=os.path.dirname(os.path.realpath(options.sqlfile)) + options.sqlpath = os.path.dirname(os.path.realpath(options.sqlfile)) return options def add_recurse(options): - conn=sqlite3.connect(options.sqlfile) - conn.text_factory=str - db=conn.cursor() - prev_path_len=0 - for path,dirs,files in os.walk(options.startpath,followlinks=options.symlinks): - sys.stdout.write(("\r%s%s"%(filename_join(path,".",options),(prev_path_len-len(path))*' '))) - prev_path_len=len(path) - dirs=clean_dirs(dirs) + conn = sqlite3.connect(options.sqlfile) + conn.text_factory = str + db = conn.cursor() + prev_path_len = 0 + for path, dirs, files in os.walk(options.startpath, followlinks=options.symlinks): + sys.stdout.write( + ( + "\r%s%s" + % (filename_join(path, ".", options), (prev_path_len - len(path)) * " ") + ) + ) + prev_path_len = len(path) + dirs = clean_dirs(dirs) dirs.sort() files.sort() - db_files=get_folder_contents(db,filename_join(path,"",options)+"/") + db_files = get_folder_contents(db, filename_join(path, "", options) + "/") if not options.symlinks: - files=clean_syms(files,path) + files = clean_syms(files, path) for file in files: - filename=filename_join(path,file,options) - if file==options.sqlfile: + filename = filename_join(path, file, options) + if file == options.sqlfile: continue if not os.path.isfile(filename): continue - #if not is_listed(db,filename): + # if not is_listed(db,filename): if file not in db_files: if options.add: - add_single(conn,filename,change=False,fullfile=options.fullfile) + add_single(conn, filename, change=False, fullfile=options.fullfile) else: if options.changed: - ftime=os.path.getmtime(filename) - if not ftime_match(db,filename,ftime): - #file content changed - add_single(conn,filename,change=True,fullfile=options.fullfile) + ftime = os.path.getmtime(filename) + if not ftime_match(db, filename, ftime): + # file content changed + add_single( + conn, filename, change=True, fullfile=options.fullfile + ) conn.commit() sys.stdout.write("\n") return -def add_single(conn,filename,change=False,hash=None,minsize=0,fullfile=False): +def add_single(conn, filename, change=False, hash=None, minsize=0, fullfile=False): try: - fsize=os.path.getsize(filename) - hsize=humanize_size(fsize) + fsize = os.path.getsize(filename) + hsize = humanize_size(fsize) except IOError: - hsize="" - print("\r%s (%s)"%(filename,hsize)) - db=conn.cursor() + hsize = "" + print("\r%s (%s)" % (filename, hsize)) + db = conn.cursor() try: - if hash==None: - hash=get_md5(filename,fullfile) - ftime=os.path.getmtime(filename) - mime=MIME.file(str(filename.encode('UTF-8'))) + if hash == None: + hash = get_md5(filename, fullfile) + ftime = os.path.getmtime(filename) except IOError: - print("File '%s' not found. Bad link?"%(filename,)) + print("File '%s' not found. Bad link?" % (filename,)) return - except (UnicodeDecodeError, TypeError): - mime="NA" if change: - db.execute("UPDATE list SET date=?, hash=?, size=?, mime=? \ - WHERE file=?",(ftime,hash,fsize,mime,filename)) - #print "changing: %(f)s " % {'f':filename} + db.execute( + "UPDATE list SET date=?, hash=?, size=?, \ + WHERE file=?", + (ftime, hash, fsize, filename), + ) + # print "changing: %(f)s " % {'f':filename} else: - db.execute("INSERT INTO list(file,date,hash,size,mime)\ - VALUES(?,?,?,?,?)",(filename,ftime,hash,fsize,mime)) - sys.stdout.write('\r') + db.execute( + "INSERT INTO list(file,date,hash,size)\ + VALUES(?,?,?,?)", + (filename, ftime, hash, fsize), + ) + sys.stdout.write("\r") return def checkdb(options): - needle=options.search - if len(needle)==0: - needle.append('%') - needle=['%'+i+'%' for i in needle] - like_query=' OR '.join(['file LIKE ?' for i in needle]) - conn=sqlite3.connect(options.sqlfile) - conn.text_factory=str - db=conn.cursor() - db.execute("SELECT file,hash,size,date FROM list WHERE "+like_query+" ORDER BY file",needle) - missing=[] - differing=[] - OK_count=0 + needle = options.search + if len(needle) == 0: + needle.append("%") + needle = ["%" + i + "%" for i in needle] + like_query = " OR ".join(["file LIKE ?" for i in needle]) + conn = sqlite3.connect(options.sqlfile) + conn.text_factory = str + db = conn.cursor() + db.execute( + "SELECT file,hash,size,date FROM list WHERE " + like_query + " ORDER BY file", + needle, + ) + missing = [] + differing = [] + OK_count = 0 for row in db: - status='OK' - sys.stdout.write("\r%s"%(row[0],)) + status = "OK" + sys.stdout.write("\r%s" % (row[0],)) if os.path.exists(row[0]): - md5f=get_md5(row[0],options.fullfile) - if row[1]!=md5f: - status='Checksum-difference' + md5f = get_md5(row[0], options.fullfile) + if row[1] != md5f: + status = "Checksum-difference" differing.append(row) else: - status='Not-found' + status = "Not-found" missing.append(row) - sys.stdout.write("\r%s %s\n"%(row[0],status)) - if status=='OK': - OK_count+=1 - if len(differing)>0: + sys.stdout.write("\r%s %s\n" % (row[0], status)) + if status == "OK": + OK_count += 1 + if len(differing) > 0: print_stderr("----\nDiffering files:") - pad=str(max([len(x[0]) for x in differing])) + pad = str(max([len(x[0]) for x in differing])) for f in differing: - print(("%-"+pad+"s (%s %7s => %s %7s)")%(f[0],humanize_date(f[3]),humanize_size(f[2]), - humanize_date(os.path.getmtime(f[0])), - humanize_size(os.path.getsize(f[0])))) - if len(missing)>0: + print( + ("%-" + pad + "s (%s %7s => %s %7s)") + % ( + f[0], + humanize_date(f[3]), + humanize_size(f[2]), + humanize_date(os.path.getmtime(f[0])), + humanize_size(os.path.getsize(f[0])), + ) + ) + if len(missing) > 0: print("----\nMissing files:") - pad=str(max([len(x[0]) for x in missing])) + pad = str(max([len(x[0]) for x in missing])) for f in missing: - print(("%-"+pad+"s (%s %7s)")%(f[0],humanize_date(f[3]),humanize_size(f[2]))) - (added,changed)=has_changes_additions(db,options,False) - if len(added)>0: + print( + ("%-" + pad + "s (%s %7s)") + % (f[0], humanize_date(f[3]), humanize_size(f[2])) + ) + (added, changed) = has_changes_additions(db, options, False) + if len(added) > 0: print("----\nAdded files:") - pad=str(max([len(x[0]) for x in added])) + pad = str(max([len(x[0]) for x in added])) for f in added: - print(("%-"+pad+"s (%s %7s)")%(f, - humanize_date(os.path.getmtime(f)), - humanize_size(os.path.getsize(f)))) + print( + ("%-" + pad + "s (%s %7s)") + % ( + f, + humanize_date(os.path.getmtime(f)), + humanize_size(os.path.getsize(f)), + ) + ) print("----\nFile check summary:") - print("Database modified: %s"%(humanize_date(os.path.getmtime(options.sqlfile)),)) - print("Checksum matches : %d"%(OK_count,)) - print("Checksum mismatch: %d"%(len(differing),)) - print("Files missing : %d"%(len(missing),)) - print("Files added : %d"%(len(added),)) + print("Database modified: %s" % (humanize_date(os.path.getmtime(options.sqlfile)),)) + print("Checksum matches : %d" % (OK_count,)) + print("Checksum mismatch: %d" % (len(differing),)) + print("Files missing : %d" % (len(missing),)) + print("Files added : %d" % (len(added),)) def clean_dirs(dirs): @@ -203,101 +325,113 @@ def clean_dirs(dirs): return dirs -def clean_syms(files,path): - nonsyms=[] +def clean_syms(files, path): + nonsyms = [] for f in files: - if not os.path.islink(os.path.join(path,f)): + if not os.path.islink(os.path.join(path, f)): nonsyms.append(f) return nonsyms + def createdb(options): - conn=sqlite3.connect(options.sqlfile) - db=conn.cursor() - conn.text_factory=str - db.execute('CREATE TABLE list (id INTEGER PRIMARY KEY AUTOINCREMENT,\ + conn = sqlite3.connect(options.sqlfile) + db = conn.cursor() + conn.text_factory = str + db.execute( + "CREATE TABLE list (id INTEGER PRIMARY KEY AUTOINCREMENT,\ file TEXT,date INTEGER, hash TEXT,\ - size INTEGER, mime TEXT)') - db.execute('CREATE TABLE config (id INTEGER PRIMARY KEY AUTOINCREMENT,\ - object TEXT)') + size INTEGER, mime TEXT)" + ) + db.execute( + "CREATE TABLE config (id INTEGER PRIMARY KEY AUTOINCREMENT,\ + object TEXT)" + ) conn.commit() config = configparser.RawConfigParser() config.add_section("General") - config.set("General","Relative",str(options.relative)) - config.set("General","FullFile",str(options.fullfile)) - store=io.StringIO() + config.set("General", "Relative", str(options.relative)) + config.set("General", "FullFile", str(options.fullfile)) + store = io.StringIO() config.write(store) - db.execute("INSERT INTO config (object) values (?)",(store.getvalue(),)) + db.execute("INSERT INTO config (object) values (?)", (store.getvalue(),)) conn.commit() return -def delete_nonexisting(sqlfile,options): - conn=sqlite3.connect(sqlfile) - conn.text_factory=str - db=conn.cursor() - dbdel=conn.cursor() - db.execute('SELECT file FROM list') +def delete_nonexisting(sqlfile, options): + conn = sqlite3.connect(sqlfile) + conn.text_factory = str + db = conn.cursor() + dbdel = conn.cursor() + db.execute("SELECT file FROM list") for row in db: if os.path.exists(row[0]): - delete=False + delete = False if not options.symlinks: if os.path.islink(row[0]): - delete=True + delete = True else: - delete=True + delete = True if delete: - print('removing.. '+row[0]) - dbdel.execute("DELETE FROM list where file == ?",(row[0],)) + print("removing.. " + row[0]) + dbdel.execute("DELETE FROM list where file == ?", (row[0],)) conn.commit() return def disk_used(options): - conn=sqlite3.connect(options.sqlfile) - conn.text_factory=str - db=conn.cursor() - checkpath=filename_join(options.diskused,"",options)+"/" - if checkpath=="./": - checkpath="" - db.execute('SELECT size,replace(file,?,"") as path FROM list WHERE file LIKE ?', - (checkpath, - checkpath+"%", - )) - entries=[] - sizes=[] + conn = sqlite3.connect(options.sqlfile) + conn.text_factory = str + db = conn.cursor() + checkpath = filename_join(options.diskused, "", options) + "/" + if checkpath == "./": + checkpath = "" + db.execute( + 'SELECT size,replace(file,?,"") as path FROM list WHERE file LIKE ?', + ( + checkpath, + checkpath + "%", + ), + ) + entries = [] + sizes = [] for row in db: - start_path=row[1].split('/') - start_path="/".join(start_path[0:int(options.diskused_depth)]) + start_path = row[1].split("/") + start_path = "/".join(start_path[0 : int(options.diskused_depth)]) if start_path not in entries: entries.append(start_path) sizes.append(row[0]) else: - sizes[ entries.index(start_path) ]+=row[0] - for entry in zip(sizes,entries): - print("| ".join([ str(entry[0]).ljust(14), - humanize_size(entry[0]).rjust(8), - entry[1]])) + sizes[entries.index(start_path)] += row[0] + for entry in zip(sizes, entries): + print( + "| ".join( + [str(entry[0]).ljust(14), humanize_size(entry[0]).rjust(8), entry[1]] + ) + ) -def filename_join(path,name,options): - filename=os.path.realpath(os.path.join(path,name)) +def filename_join(path, name, options): + filename = os.path.realpath(os.path.join(path, name)) if options.relative: return os.path.relpath(filename, options.sqlpath) return filename def find_duplicates(sqlfile, order): - conn=sqlite3.connect(sqlfile) - conn.text_factory=str - db=conn.cursor() - dbh=conn.cursor() - db.execute("SELECT hash,count(*) FROM list WHERE size > 0 GROUP BY hash HAVING count(*) > 1 ") - duphash=[] + conn = sqlite3.connect(sqlfile) + conn.text_factory = str + db = conn.cursor() + dbh = conn.cursor() + db.execute( + "SELECT hash,count(*) FROM list WHERE size > 0 GROUP BY hash HAVING count(*) > 1 " + ) + duphash = [] for row in db: - hash=row[0] - dbh.execute("SELECT file,size,date FROM list WHERE hash = ?",(hash,)) - flist=[] + hash = row[0] + dbh.execute("SELECT file,size,date FROM list WHERE hash = ?", (hash,)) + flist = [] for row in dbh: flist.append(row) sort_by_method(flist, order) @@ -305,174 +439,189 @@ def find_duplicates(sqlfile, order): duphash.sort(key=lambda file: file[1][0]) return duphash -def ftime_match(db,filename,ftime): - db.execute("SELECT date FROM list where file == ?",(filename,)) - count=db.fetchall() - return count[0][0]==ftime -def get_folder_contents(db,path): - ''' return the contents of the folder ''' - files=[] - if path=="./": - db.execute("SELECT file FROM list where file NOT LIKE ?",('%/%',)) - path="" +def ftime_match(db, filename, ftime): + db.execute("SELECT date FROM list where file == ?", (filename,)) + count = db.fetchall() + return count[0][0] == ftime + + +def get_folder_contents(db, path): + """return the contents of the folder""" + files = [] + if path == "./": + db.execute("SELECT file FROM list where file NOT LIKE ?", ("%/%",)) + path = "" else: - db.execute("SELECT file FROM list where file LIKE ?",(path+'%',)) + db.execute("SELECT file FROM list where file LIKE ?", (path + "%",)) for row in db: try: - base=row[0].replace(path,'',1) + base = row[0].replace(path, "", 1) except UnicodeDecodeError: - print(row[0]+" is giving me trouble.") + print(row[0] + " is giving me trouble.") try: - base=row[0].encode('utf-8').replace(path,'',1) + base = row[0].encode("utf-8").replace(path, "", 1) except UnicodeDecodeError: - print(row[0]+" is still giving me trouble.") + print(row[0] + " is still giving me trouble.") sys.exit(1) - if base.find('/')==-1: + if base.find("/") == -1: files.append(base) return files -def get_md5(filename,fullfile=False): - ''' returns content based hash, only first 50Mb is read, unless user wants the whole file ''' - fsize=os.path.getsize(filename) - if fullfile and fsize>DEFAULT_CHUNK: - anim_i=0 - anim_len=len(ANIM) - block_size=2**24 - percents_per_block=int(100/(float(fsize)/block_size)) +def get_md5(filename, fullfile=False): + """returns content based hash, only first 50Mb is read, unless user wants the whole file""" + fsize = os.path.getsize(filename) + if fullfile and fsize > DEFAULT_CHUNK: + anim_i = 0 + anim_len = len(ANIM) + block_size = 2**24 + percents_per_block = int(100 / (float(fsize) / block_size)) md5 = hashlib.md5() - with open(filename,'rb') as f: - for chunk in iter(lambda: f.read(block_size), b''): - sys.stderr.write('\r %s (%02d%%)'%(ANIM[anim_i%anim_len],int(anim_i*percents_per_block))) + with open(filename, "rb") as f: + for chunk in iter(lambda: f.read(block_size), b""): + sys.stderr.write( + "\r %s (%02d%%)" + % (ANIM[anim_i % anim_len], int(anim_i * percents_per_block)) + ) sys.stderr.flush() - anim_i+=1 + anim_i += 1 md5.update(chunk) - sys.stderr.write('\r ') + sys.stderr.write("\r ") return md5.hexdigest() - return hashlib.md5(open(filename,'rb').read(DEFAULT_CHUNK)).hexdigest() + return hashlib.md5(open(filename, "rb").read(DEFAULT_CHUNK)).hexdigest() def has_changes(options): - conn=sqlite3.connect(options.sqlfile) - conn.text_factory=str - db=conn.cursor() + conn = sqlite3.connect(options.sqlfile) + conn.text_factory = str + db = conn.cursor() if options.haschanges: - options.changed=True + options.changed = True if options.hasdeletions or options.haschanges: has_changes_deleted(db) if options.hasadditions or options.haschanges: - has_changes_additions(db,options) + has_changes_additions(db, options) -def has_changes_deleted(db,exit=True): - db.execute('SELECT file FROM list') - deleted=[] + +def has_changes_deleted(db, exit=True): + db.execute("SELECT file FROM list") + deleted = [] for row in db: if not os.path.exists(row[0]): if exit: - print('True') + print("True") sys.exit(1) else: deleted.append(row[0]) return deleted -def has_changes_additions(db,options,exit=True): - added=[] - changed=[] - for path,dirs,files in os.walk(options.startpath,followlinks=options.symlinks): - dirs=clean_dirs(dirs) - db_files=get_folder_contents(db,filename_join(path,"",options)+"/") +def has_changes_additions(db, options, exit=True): + added = [] + changed = [] + for path, dirs, files in os.walk(options.startpath, followlinks=options.symlinks): + dirs = clean_dirs(dirs) + db_files = get_folder_contents(db, filename_join(path, "", options) + "/") if not options.symlinks: - files=clean_syms(files,path) + files = clean_syms(files, path) for file in files: - filename=filename_join(path,file,options) - if file==options.sqlfile: + filename = filename_join(path, file, options) + if file == options.sqlfile: continue - #if not is_listed(db,filename): + # if not is_listed(db,filename): if file not in db_files: if exit: - print('True') + print("True") sys.exit(1) else: added.append(filename) else: if options.changed: - ftime=os.path.getmtime(filename) - if not ftime_match(db,filename,ftime): - #file content changed + ftime = os.path.getmtime(filename) + if not ftime_match(db, filename, ftime): + # file content changed if exit: - print('True') + print("True") sys.exit(1) else: changed.append(filename) - return (added,changed) + return (added, changed) + + +# ~ def hash_match(db,filename,hash): +# ~ db.execute("SELECT hash FROM list where file == ?",(filename,)) +# ~ count=db.fetchall() +# ~ return count[0][0]==hash -#~ def hash_match(db,filename,hash): - #~ db.execute("SELECT hash FROM list where file == ?",(filename,)) - #~ count=db.fetchall() - #~ return count[0][0]==hash def humanize_date(date): - if date==None: - return '' - return datetime.datetime.fromtimestamp(int(date)).strftime('%Y-%m-%d %H:%M:%S') + if date == None: + return "" + return datetime.datetime.fromtimestamp(int(date)).strftime("%Y-%m-%d %H:%M:%S") -def humanize_size(size,precision=1): - if size==None: - return 'nan' - suffixes=['B','KB','MB','GB','TB'] +def humanize_size(size, precision=1): + if size == None: + return "nan" + suffixes = ["B", "KB", "MB", "GB", "TB"] suffixIndex = 0 - defPrecision=0 + defPrecision = 0 while size > 1024: - suffixIndex += 1 #increment the index of the suffix - size = float(size/1024.0) #apply the division - defPrecision=precision - return "%.*f%s"%(defPrecision,size,suffixes[suffixIndex]) + suffixIndex += 1 # increment the index of the suffix + size = float(size / 1024.0) # apply the division + defPrecision = precision + return "%.*f%s" % (defPrecision, size, suffixes[suffixIndex]) -def is_listed(db,filename): - db.execute("SELECT COUNT(*) FROM list where file == ?",(filename,)) - count=db.fetchall() - return count[0][0]>0 +def is_listed(db, filename): + db.execute("SELECT COUNT(*) FROM list where file == ?", (filename,)) + count = db.fetchall() + return count[0][0] > 0 -def matchdb(sqlfile,needle,helper): - needle=needle.lower() +def matchdb(sqlfile, needle, helper): + needle = needle.lower() import difflib as dl - conn=sqlite3.connect(sqlfile) - conn.text_factory=str - db=conn.cursor() - if len(helper)>0: - helper=['%'+i+'%' for i in helper] - like_query=' OR '.join(['file LIKE ?' for i in helper]) - db.execute("SELECT file FROM list WHERE "+like_query+" ORDER BY date DESC",helper) + + conn = sqlite3.connect(sqlfile) + conn.text_factory = str + db = conn.cursor() + if len(helper) > 0: + helper = ["%" + i + "%" for i in helper] + like_query = " OR ".join(["file LIKE ?" for i in helper]) + db.execute( + "SELECT file FROM list WHERE " + like_query + " ORDER BY date DESC", helper + ) else: db.execute("SELECT file FROM list ORDER BY date DESC") - ratio=0 - best_match="" + ratio = 0 + best_match = "" for row in db: - s=dl.SequenceMatcher(None, os.path.basename(row[0]).lower(), needle) - s_ratio=s.ratio() + s = dl.SequenceMatcher(None, os.path.basename(row[0]).lower(), needle) + s_ratio = s.ratio() if ratio < s_ratio: - ratio=s_ratio - best_match=row[0] + ratio = s_ratio + best_match = row[0] print(best_match) + def print_duplicates(files): for hash in files: - #print(hash[0]) - i=1 + # print(hash[0]) + i = 1 for f in hash[1]: - print("%(i)d|%(s)s|%(d)s|%(f)s " % { - 'i':i, - 'f':f[0], - 'd': humanize_date(f[2]), - 's': humanize_size(f[1]) - }) - i+=1 + print( + "%(i)d|%(s)s|%(d)s|%(f)s " + % { + "i": i, + "f": f[0], + "d": humanize_date(f[2]), + "s": humanize_size(f[1]), + } + ) + i += 1 return @@ -482,53 +631,53 @@ def print_stderr(s): sys.stderr.flush() -def searchdb(sqlfile,needle): - needle=['%'+i+'%' for i in needle] - like_query=' OR '.join(['file LIKE ?' for i in needle]) - conn=sqlite3.connect(sqlfile) - conn.text_factory=str - db=conn.cursor() - db.execute("SELECT file FROM list WHERE "+like_query+" ORDER BY file",needle) +def searchdb(sqlfile, needle): + needle = ["%" + i + "%" for i in needle] + like_query = " OR ".join(["file LIKE ?" for i in needle]) + conn = sqlite3.connect(sqlfile) + conn.text_factory = str + db = conn.cursor() + db.execute("SELECT file FROM list WHERE " + like_query + " ORDER BY file", needle) for row in db: print(row[0]) def sort_by_method(flist, order): - if order == 'path': + if order == "path": flist.sort(key=lambda file: file[0]) - if order == 'file': + if order == "file": flist.sort(key=lambda file: os.path.basename(file[0])) - if order == 'age': + if order == "age": flist.sort(key=lambda file: file[2]) - if order == 'length': + if order == "length": flist.sort(key=lambda file: len(file[0])) def stored_options(options): try: - conn=sqlite3.connect(options.sqlfile) - db=conn.cursor() - conn.text_factory=str + conn = sqlite3.connect(options.sqlfile) + db = conn.cursor() + conn.text_factory = str db.execute("SELECT object FROM config") - store="" + store = "" for row in db: - store+=row[0]+'\n' + store += row[0] + "\n" config = configparser.RawConfigParser() - config.readfp(io.BytesIO(store)) - options.relative=config.getboolean("General","Relative") - options.fullfile=config.getboolean("General","FullFile") - except: + config.read_file(io.StringIO(store)) + options.relative = config.getboolean("General", "Relative") + options.fullfile = config.getboolean("General", "FullFile") + except Exception as e: pass return options def main(): - options=setup_options(); + options = setup_options() if not os.path.exists(options.sqlfile): - createdb(options); - options=stored_options(options) + createdb(options) + options = stored_options(options) if options.relative: os.chdir(options.sqlpath) if options.haschanges or options.hasadditions or options.hasdeletions: @@ -537,26 +686,26 @@ def main(): if options.check: checkdb(options) sys.exit(0) - if len(options.search)>0 and not options.match: - searchdb(options.sqlfile,options.search) + if len(options.search) > 0 and not options.match: + searchdb(options.sqlfile, options.search) sys.exit(0) if options.match: - matchdb(options.sqlfile,options.match,options.search) + matchdb(options.sqlfile, options.match, options.search) sys.exit(0) if options.diskused: disk_used(options) sys.exit(0) if options.delete: - print('Deleting entries...') - delete_nonexisting(options.sqlfile,options) + print("Deleting entries...") + delete_nonexisting(options.sqlfile, options) if options.add or options.changed: - print('Adding '+options.startpath+' entries...') + print("Adding " + options.startpath + " entries...") add_recurse(options) if options.duplicate: - files=find_duplicates(options.sqlfile, options.duplicate_order) + files = find_duplicates(options.sqlfile, options.duplicate_order) print_duplicates(files) sys.exit(0) -main() +main()