removed mimes, less dependencies. fixed bug in options reading

2023-01-01 21:46:18 +02:00
parent 31660eff18
commit 887be6b750
1 changed files with 463 additions and 314 deletions
--- a/files/file_list.py
+++ b/files/file_list.py
@@ -8,63 +8,157 @@ import re
 import sqlite3
 import subprocess
 import hashlib
-import magic
+
 # import magic
 from argparse import ArgumentParser
 import configparser
 import io
 import datetime
-SQLFILE='list_of_files.sqlite'
+SQLFILE = "list_of_files.sqlite"
-IMGMATCH=re.compile('.*\.jpg$|.*\.jpeg$|.*\.png$',re.I)
+IMGMATCH = re.compile(".*\.jpg$|.*\.jpeg$|.*\.png$", re.I)
 BADDIRS = []
 MINSIZE = 0
-MIME=magic.open(magic.MAGIC_NONE)
+# MIME=magic.open(magic.MAGIC_NONE)
-#MIME=magic.open(magic.MAGIC_MIME)
+##MIME=magic.open(magic.MAGIC_MIME)
-MIME.load()
+# MIME.load()
-ANIM=['.','·',"'","'",'·','.','_']
+ANIM = [".", "·", "'", "'", "·", ".", "_"]
 DEFAULT_CHUNK = 1024 * 1024 * 50
 def setup_options():
    parser = ArgumentParser(description="Maintains the list of images sqlite file")
-    parser.add_argument("-a",action="store_false",dest="add",default=True,
+    parser.add_argument(
-                      help="Do not add new files [%(default)s]")
+        "-a",
-    parser.add_argument("-c",action="store_true",dest="changed",default=False,
+        action="store_false",
-                     help="Modify changed files [%(default)s]")
+        dest="add",
-    parser.add_argument("--check",action="store_true",dest="check",default=False,
+        default=True,
-                     help="Check md5sums of files. Limit check with -s.")
+        help="Do not add new files [%(default)s]",
    parser.add_argument("-d",action="store_true",dest="delete",default=False,
                     help="Delete non-existing entries [%(default)s]")
    parser.add_argument("--du",type=str,action='store',dest="diskused",default=False,
                      help="Print directory sizes. Argument is the path where directories are listed from.")
    parser.add_argument("--du-depth",type=str,action='store',dest="diskused_depth",default=1,
                      help="Depth of summarization for --du.")
    parser.add_argument("--dup",action="store_true",dest="duplicate",default=False,
                     help="Return a list of duplicate files, based on hashes. This option will flip the 'Add new files' option. [%(default)s]")
    parser.add_argument("--dup-order",action="store",dest="duplicate_order",default='path',
        help = "Order duplicates by a method. (length = path str length)",
        choices = ('age','length','file','path')
    )
-    parser.add_argument("--haschanges",action="store_true",dest="haschanges",default=False,
+    parser.add_argument(
-                      help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.")
+        "-c",
-    parser.add_argument("--hasdeletions",action="store_true",dest="hasdeletions",default=False,
+        action="store_true",
-                      help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.")
+        dest="changed",
-    parser.add_argument("--hasadditions",action="store_true",dest="hasadditions",default=False,
+        default=False,
-                      help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.")
+        help="Modify changed files [%(default)s]",
-    parser.add_argument("-f",action="store",dest="sqlfile",default=SQLFILE,
+    )
-                      help="SQL file name to use [%(default)s]")
+    parser.add_argument(
-    parser.add_argument("-l",action="store_true",dest="symlinks",default=False,
+        "--check",
-                     help="Follow symbolic links [%(default)s]")
+        action="store_true",
-    parser.add_argument("--match",type=str,dest="match",default=False,
+        dest="check",
-                      help="Search for closest match from basenames, can be helped with adding -s")
+        default=False,
-    parser.add_argument("-s",type=str,action='append',dest="search",default=[],
+        help="Check md5sums of files. Limit check with -s.",
-                      help="Search list based on path pattern")
+    )
-    parser.add_argument("-x",action="append",dest="exclude",default=[],
+    parser.add_argument(
-                     help="Exclude folder name from the lists. This option may be issued several times")
+        "-d",
-    parser.add_argument("--full",action="store_true",dest="fullfile",default=False,
+        action="store_true",
-                     help="ONLY FOR NEW DB CREATION. Use full files to calculate md5 checksum. Defaults to first 50Mb. [%(default)s]")
+        dest="delete",
-    parser.add_argument("--relative",action="store_true",dest="relative",default=False,
+        default=False,
-                     help="ONLY FOR NEW DB CREATION. Store filenames relative to database file.")
+        help="Delete non-existing entries [%(default)s]",
-    parser.add_argument('startpath', action="store",default='.', nargs='?')
+    )
    parser.add_argument(
        "--du",
        type=str,
        action="store",
        dest="diskused",
        default=False,
        help="Print directory sizes. Argument is the path where directories are listed from.",
    )
    parser.add_argument(
        "--du-depth",
        type=str,
        action="store",
        dest="diskused_depth",
        default=1,
        help="Depth of summarization for --du.",
    )
    parser.add_argument(
        "--dup",
        action="store_true",
        dest="duplicate",
        default=False,
        help="Return a list of duplicate files, based on hashes. This option will flip the 'Add new files' option. [%(default)s]",
    )
    parser.add_argument(
        "--dup-order",
        action="store",
        dest="duplicate_order",
        default="path",
        help="Order duplicates by a method. (length = path str length)",
        choices=("age", "length", "file", "path"),
    )
    parser.add_argument(
        "--haschanges",
        action="store_true",
        dest="haschanges",
        default=False,
        help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.",
    )
    parser.add_argument(
        "--hasdeletions",
        action="store_true",
        dest="hasdeletions",
        default=False,
        help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.",
    )
    parser.add_argument(
        "--hasadditions",
        action="store_true",
        dest="hasadditions",
        default=False,
        help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.",
    )
    parser.add_argument(
        "-f",
        action="store",
        dest="sqlfile",
        default=SQLFILE,
        help="SQL file name to use [%(default)s]",
    )
    parser.add_argument(
        "-l",
        action="store_true",
        dest="symlinks",
        default=False,
        help="Follow symbolic links [%(default)s]",
    )
    parser.add_argument(
        "--match",
        type=str,
        dest="match",
        default=False,
        help="Search for closest match from basenames, can be helped with adding -s",
    )
    parser.add_argument(
        "-s",
        type=str,
        action="append",
        dest="search",
        default=[],
        help="Search list based on path pattern",
    )
    parser.add_argument(
        "-x",
        action="append",
        dest="exclude",
        default=[],
        help="Exclude folder name from the lists. This option may be issued several times",
    )
    parser.add_argument(
        "--full",
        action="store_true",
        dest="fullfile",
        default=False,
        help="ONLY FOR NEW DB CREATION. Use full files to calculate md5 checksum. Defaults to first 50Mb. [%(default)s]",
    )
    parser.add_argument(
        "--relative",
        action="store_true",
        dest="relative",
        default=False,
        help="ONLY FOR NEW DB CREATION. Store filenames relative to database file.",
    )
    parser.add_argument("startpath", action="store", default=".", nargs="?")
    options = parser.parse_args()
    BADDIRS.extend(options.exclude)
@@ -81,7 +175,12 @@ def add_recurse(options):
    db = conn.cursor()
    prev_path_len = 0
    for path, dirs, files in os.walk(options.startpath, followlinks=options.symlinks):
-        sys.stdout.write(("\r%s%s"%(filename_join(path,".",options),(prev_path_len-len(path))*' ')))
+        sys.stdout.write(
            (
                "\r%s%s"
                % (filename_join(path, ".", options), (prev_path_len - len(path)) * " ")
            )
        )
        prev_path_len = len(path)
        dirs = clean_dirs(dirs)
        dirs.sort()
@@ -104,7 +203,9 @@ def add_recurse(options):
                    ftime = os.path.getmtime(filename)
                    if not ftime_match(db, filename, ftime):
                        # file content changed
-                        add_single(conn,filename,change=True,fullfile=options.fullfile)
+                        add_single(
                            conn, filename, change=True, fullfile=options.fullfile
                        )
        conn.commit()
    sys.stdout.write("\n")
    return
@@ -122,71 +223,92 @@ def add_single(conn,filename,change=False,hash=None,minsize=0,fullfile=False):
        if hash == None:
            hash = get_md5(filename, fullfile)
        ftime = os.path.getmtime(filename)
        mime=MIME.file(str(filename.encode('UTF-8')))
    except IOError:
        print("File '%s' not found. Bad link?" % (filename,))
        return
    except (UnicodeDecodeError, TypeError):
        mime="NA"
    if change:
-        db.execute("UPDATE list SET date=?, hash=?, size=?, mime=? \
+        db.execute(
-               WHERE file=?",(ftime,hash,fsize,mime,filename))
+            "UPDATE list SET date=?, hash=?, size=?, \
               WHERE file=?",
            (ftime, hash, fsize, filename),
        )
        # print "changing: %(f)s " % {'f':filename}
    else:
-        db.execute("INSERT INTO list(file,date,hash,size,mime)\
+        db.execute(
-               VALUES(?,?,?,?,?)",(filename,ftime,hash,fsize,mime))
+            "INSERT INTO list(file,date,hash,size)\
-    sys.stdout.write('\r')
+               VALUES(?,?,?,?)",
            (filename, ftime, hash, fsize),
        )
    sys.stdout.write("\r")
    return
 def checkdb(options):
    needle = options.search
    if len(needle) == 0:
-        needle.append('%')
+        needle.append("%")
-    needle=['%'+i+'%' for i in needle]
+    needle = ["%" + i + "%" for i in needle]
-    like_query=' OR '.join(['file LIKE ?' for i in needle])
+    like_query = " OR ".join(["file LIKE ?" for i in needle])
    conn = sqlite3.connect(options.sqlfile)
    conn.text_factory = str
    db = conn.cursor()
-    db.execute("SELECT file,hash,size,date FROM list WHERE "+like_query+" ORDER BY file",needle)
+    db.execute(
        "SELECT file,hash,size,date FROM list WHERE " + like_query + " ORDER BY file",
        needle,
    )
    missing = []
    differing = []
    OK_count = 0
    for row in db:
-        status='OK'
+        status = "OK"
        sys.stdout.write("\r%s" % (row[0],))
        if os.path.exists(row[0]):
            md5f = get_md5(row[0], options.fullfile)
            if row[1] != md5f:
-                status='Checksum-difference'
+                status = "Checksum-difference"
                differing.append(row)
        else:
-            status='Not-found'
+            status = "Not-found"
            missing.append(row)
        sys.stdout.write("\r%s %s\n" % (row[0], status))
-        if status=='OK':
+        if status == "OK":
            OK_count += 1
    if len(differing) > 0:
        print_stderr("----\nDiffering files:")
        pad = str(max([len(x[0]) for x in differing]))
        for f in differing:
-            print(("%-"+pad+"s (%s %7s => %s %7s)")%(f[0],humanize_date(f[3]),humanize_size(f[2]),
+            print(
                ("%-" + pad + "s (%s %7s => %s %7s)")
                % (
                    f[0],
                    humanize_date(f[3]),
                    humanize_size(f[2]),
                    humanize_date(os.path.getmtime(f[0])),
-                                                humanize_size(os.path.getsize(f[0]))))
+                    humanize_size(os.path.getsize(f[0])),
                )
            )
    if len(missing) > 0:
        print("----\nMissing files:")
        pad = str(max([len(x[0]) for x in missing]))
        for f in missing:
-            print(("%-"+pad+"s (%s %7s)")%(f[0],humanize_date(f[3]),humanize_size(f[2])))
+            print(
                ("%-" + pad + "s (%s %7s)")
                % (f[0], humanize_date(f[3]), humanize_size(f[2]))
            )
    (added, changed) = has_changes_additions(db, options, False)
    if len(added) > 0:
        print("----\nAdded files:")
        pad = str(max([len(x[0]) for x in added]))
        for f in added:
-            print(("%-"+pad+"s (%s %7s)")%(f,
+            print(
                ("%-" + pad + "s (%s %7s)")
                % (
                    f,
                    humanize_date(os.path.getmtime(f)),
-                                                humanize_size(os.path.getsize(f))))
+                    humanize_size(os.path.getsize(f)),
                )
            )
    print("----\nFile check summary:")
    print("Database modified: %s" % (humanize_date(os.path.getmtime(options.sqlfile)),))
@@ -210,15 +332,20 @@ def clean_syms(files,path):
            nonsyms.append(f)
    return nonsyms
 def createdb(options):
    conn = sqlite3.connect(options.sqlfile)
    db = conn.cursor()
    conn.text_factory = str
-    db.execute('CREATE TABLE list (id INTEGER PRIMARY KEY AUTOINCREMENT,\
+    db.execute(
        "CREATE TABLE list (id INTEGER PRIMARY KEY AUTOINCREMENT,\
               file TEXT,date INTEGER, hash TEXT,\
-               size INTEGER, mime TEXT)')
+               size INTEGER, mime TEXT)"
-    db.execute('CREATE TABLE config (id INTEGER PRIMARY KEY AUTOINCREMENT,\
+    )
-               object TEXT)')
+    db.execute(
        "CREATE TABLE config (id INTEGER PRIMARY KEY AUTOINCREMENT,\
               object TEXT)"
    )
    conn.commit()
    config = configparser.RawConfigParser()
@@ -237,7 +364,7 @@ def delete_nonexisting(sqlfile,options):
    conn.text_factory = str
    db = conn.cursor()
    dbdel = conn.cursor()
-    db.execute('SELECT file FROM list')
+    db.execute("SELECT file FROM list")
    for row in db:
        if os.path.exists(row[0]):
            delete = False
@@ -247,7 +374,7 @@ def delete_nonexisting(sqlfile,options):
        else:
            delete = True
        if delete:
-            print('removing.. '+row[0])
+            print("removing.. " + row[0])
            dbdel.execute("DELETE FROM list where file == ?", (row[0],))
    conn.commit()
    return
@@ -260,14 +387,17 @@ def disk_used(options):
    checkpath = filename_join(options.diskused, "", options) + "/"
    if checkpath == "./":
        checkpath = ""
-    db.execute('SELECT size,replace(file,?,"") as path FROM list WHERE file LIKE ?',
+    db.execute(
-                     (checkpath,
+        'SELECT size,replace(file,?,"") as path FROM list WHERE file LIKE ?',
        (
            checkpath,
            checkpath + "%",
-                     ))
+        ),
    )
    entries = []
    sizes = []
    for row in db:
-        start_path=row[1].split('/')
+        start_path = row[1].split("/")
        start_path = "/".join(start_path[0 : int(options.diskused_depth)])
        if start_path not in entries:
            entries.append(start_path)
@@ -275,9 +405,11 @@ def disk_used(options):
        else:
            sizes[entries.index(start_path)] += row[0]
    for entry in zip(sizes, entries):
-        print("| ".join([ str(entry[0]).ljust(14),
+        print(
-                         humanize_size(entry[0]).rjust(8),
+            "| ".join(
-                         entry[1]]))
+                [str(entry[0]).ljust(14), humanize_size(entry[0]).rjust(8), entry[1]]
            )
        )
 def filename_join(path, name, options):
@@ -292,7 +424,9 @@ def find_duplicates(sqlfile, order):
    conn.text_factory = str
    db = conn.cursor()
    dbh = conn.cursor()
-    db.execute("SELECT hash,count(*) FROM list WHERE size > 0 GROUP BY hash HAVING count(*) > 1 ")
+    db.execute(
        "SELECT hash,count(*) FROM list WHERE size > 0 GROUP BY hash HAVING count(*) > 1 "
    )
    duphash = []
    for row in db:
        hash = row[0]
@@ -305,36 +439,38 @@ def find_duplicates(sqlfile, order):
    duphash.sort(key=lambda file: file[1][0])
    return duphash
 def ftime_match(db, filename, ftime):
    db.execute("SELECT date FROM list where file == ?", (filename,))
    count = db.fetchall()
    return count[0][0] == ftime
 def get_folder_contents(db, path):
-    ''' return the contents of the folder '''
+    """return the contents of the folder"""
    files = []
    if path == "./":
-        db.execute("SELECT file FROM list where file NOT LIKE ?",('%/%',))
+        db.execute("SELECT file FROM list where file NOT LIKE ?", ("%/%",))
        path = ""
    else:
-        db.execute("SELECT file FROM list where file LIKE ?",(path+'%',))
+        db.execute("SELECT file FROM list where file LIKE ?", (path + "%",))
    for row in db:
        try:
-            base=row[0].replace(path,'',1)
+            base = row[0].replace(path, "", 1)
        except UnicodeDecodeError:
            print(row[0] + " is giving me trouble.")
            try:
-                base=row[0].encode('utf-8').replace(path,'',1)
+                base = row[0].encode("utf-8").replace(path, "", 1)
            except UnicodeDecodeError:
                print(row[0] + " is still giving me trouble.")
                sys.exit(1)
-        if base.find('/')==-1:
+        if base.find("/") == -1:
            files.append(base)
    return files
 def get_md5(filename, fullfile=False):
-    ''' returns content based hash, only first 50Mb is read, unless user wants the whole file '''
+    """returns content based hash, only first 50Mb is read, unless user wants the whole file"""
    fsize = os.path.getsize(filename)
    if fullfile and fsize > DEFAULT_CHUNK:
        anim_i = 0
@@ -342,15 +478,18 @@ def get_md5(filename,fullfile=False):
        block_size = 2**24
        percents_per_block = int(100 / (float(fsize) / block_size))
        md5 = hashlib.md5()
-        with open(filename,'rb') as f:
+        with open(filename, "rb") as f:
-            for chunk in iter(lambda: f.read(block_size), b''):
+            for chunk in iter(lambda: f.read(block_size), b""):
-                sys.stderr.write('\r %s (%02d%%)'%(ANIM[anim_i%anim_len],int(anim_i*percents_per_block)))
+                sys.stderr.write(
                    "\r %s (%02d%%)"
                    % (ANIM[anim_i % anim_len], int(anim_i * percents_per_block))
                )
                sys.stderr.flush()
                anim_i += 1
                md5.update(chunk)
-        sys.stderr.write('\r        ')
+        sys.stderr.write("\r        ")
        return md5.hexdigest()
-    return hashlib.md5(open(filename,'rb').read(DEFAULT_CHUNK)).hexdigest()
+    return hashlib.md5(open(filename, "rb").read(DEFAULT_CHUNK)).hexdigest()
 def has_changes(options):
@@ -364,13 +503,14 @@ def has_changes(options):
    if options.hasadditions or options.haschanges:
        has_changes_additions(db, options)
 def has_changes_deleted(db, exit=True):
-    db.execute('SELECT file FROM list')
+    db.execute("SELECT file FROM list")
    deleted = []
    for row in db:
        if not os.path.exists(row[0]):
            if exit:
-                print('True')
+                print("True")
                sys.exit(1)
            else:
                deleted.append(row[0])
@@ -392,7 +532,7 @@ def has_changes_additions(db,options,exit=True):
            # if not is_listed(db,filename):
            if file not in db_files:
                if exit:
-                    print('True')
+                    print("True")
                    sys.exit(1)
                else:
                    added.append(filename)
@@ -402,28 +542,30 @@ def has_changes_additions(db,options,exit=True):
                    if not ftime_match(db, filename, ftime):
                        # file content changed
                        if exit:
-                            print('True')
+                            print("True")
                            sys.exit(1)
                        else:
                            changed.append(filename)
    return (added, changed)
 # ~ def hash_match(db,filename,hash):
 # ~ db.execute("SELECT hash FROM list where file == ?",(filename,))
 # ~ count=db.fetchall()
 # ~ return count[0][0]==hash
 def humanize_date(date):
    if date == None:
-        return ''
+        return ""
-    return datetime.datetime.fromtimestamp(int(date)).strftime('%Y-%m-%d %H:%M:%S')
+    return datetime.datetime.fromtimestamp(int(date)).strftime("%Y-%m-%d %H:%M:%S")
 def humanize_size(size, precision=1):
    if size == None:
-        return 'nan'
+        return "nan"
-    suffixes=['B','KB','MB','GB','TB']
+    suffixes = ["B", "KB", "MB", "GB", "TB"]
    suffixIndex = 0
    defPrecision = 0
    while size > 1024:
@@ -442,13 +584,16 @@ def is_listed(db,filename):
 def matchdb(sqlfile, needle, helper):
    needle = needle.lower()
    import difflib as dl
    conn = sqlite3.connect(sqlfile)
    conn.text_factory = str
    db = conn.cursor()
    if len(helper) > 0:
-        helper=['%'+i+'%' for i in helper]
+        helper = ["%" + i + "%" for i in helper]
-        like_query=' OR '.join(['file LIKE ?' for i in helper])
+        like_query = " OR ".join(["file LIKE ?" for i in helper])
-        db.execute("SELECT file FROM list WHERE "+like_query+" ORDER BY date DESC",helper)
+        db.execute(
            "SELECT file FROM list WHERE " + like_query + " ORDER BY date DESC", helper
        )
    else:
        db.execute("SELECT file FROM list ORDER BY date DESC")
    ratio = 0
@@ -461,17 +606,21 @@ def matchdb(sqlfile,needle,helper):
            best_match = row[0]
    print(best_match)
 def print_duplicates(files):
    for hash in files:
        # print(hash[0])
        i = 1
        for f in hash[1]:
-            print("%(i)d|%(s)s|%(d)s|%(f)s " % {
+            print(
-                'i':i,
+                "%(i)d|%(s)s|%(d)s|%(f)s "
-                'f':f[0],
+                % {
-                'd': humanize_date(f[2]),
+                    "i": i,
-                's': humanize_size(f[1])
+                    "f": f[0],
-            })
+                    "d": humanize_date(f[2]),
                    "s": humanize_size(f[1]),
                }
            )
            i += 1
    return
@@ -483,8 +632,8 @@ def print_stderr(s):
 def searchdb(sqlfile, needle):
-    needle=['%'+i+'%' for i in needle]
+    needle = ["%" + i + "%" for i in needle]
-    like_query=' OR '.join(['file LIKE ?' for i in needle])
+    like_query = " OR ".join(["file LIKE ?" for i in needle])
    conn = sqlite3.connect(sqlfile)
    conn.text_factory = str
    db = conn.cursor()
@@ -494,13 +643,13 @@ def searchdb(sqlfile,needle):
 def sort_by_method(flist, order):
-    if order == 'path':
+    if order == "path":
        flist.sort(key=lambda file: file[0])
-    if order == 'file':
+    if order == "file":
        flist.sort(key=lambda file: os.path.basename(file[0]))
-    if order == 'age':
+    if order == "age":
        flist.sort(key=lambda file: file[2])
-    if order == 'length':
+    if order == "length":
        flist.sort(key=lambda file: len(file[0]))
@@ -512,22 +661,22 @@ def stored_options(options):
        db.execute("SELECT object FROM config")
        store = ""
        for row in db:
-            store+=row[0]+'\n'
+            store += row[0] + "\n"
        config = configparser.RawConfigParser()
-        config.readfp(io.BytesIO(store))
+        config.read_file(io.StringIO(store))
        options.relative = config.getboolean("General", "Relative")
        options.fullfile = config.getboolean("General", "FullFile")
-    except:
+    except Exception as e:
        pass
    return options
 def main():
-    options=setup_options();
+    options = setup_options()
    if not os.path.exists(options.sqlfile):
-        createdb(options);
+        createdb(options)
    options = stored_options(options)
    if options.relative:
        os.chdir(options.sqlpath)
@@ -547,10 +696,10 @@ def main():
        disk_used(options)
        sys.exit(0)
    if options.delete:
-        print('Deleting entries...')
+        print("Deleting entries...")
        delete_nonexisting(options.sqlfile, options)
    if options.add or options.changed:
-        print('Adding '+options.startpath+' entries...')
+        print("Adding " + options.startpath + " entries...")
        add_recurse(options)
    if options.duplicate:
        files = find_duplicates(options.sqlfile, options.duplicate_order)
@@ -558,5 +707,5 @@ def main():
    sys.exit(0)
 main()
 main()