Merge branch 'master' of bitbucket.org:MoonQ/q-tools

This commit is contained in:
Ville Rantanen
2023-01-02 08:37:59 +02:00
3 changed files with 541 additions and 314 deletions

1
bin/archivefs-mount Symbolic link
View File

@@ -0,0 +1 @@
../files/archivefs-mount

77
files/archivefs-mount Executable file
View File

@@ -0,0 +1,77 @@
#!/bin/bash
_helpexit() {
printf "Usage: %s [-u] archive.file
arhchive is iso/tar/zip/rar whatever archivemount can handle
-u will unmount all FUSE.archivemount paths if no path given!
" "$( basename $0 )"
echo Current mounts:
cat /proc/mounts | grep fuse.archivemount | awk '{ print $1 "\t" $2 }'
exit 1
}
for (( i=1; i<=$#; i++ )); do
[[ ${!i} = "-h" ]] && _helpexit
[[ ${!i} = "--help" ]] && _helpexit
done
unmount=false
for (( i=1; i<=$#; i++ )); do
[[ ${!i} = "-u" ]] && { unmount=true; continue; }
if [[ -z "$archive" ]]; then
archive="${!i}"
fi
done
mkdir -p ~/mnt/am
valid_name=$( basename "$archive" | sed -e 's/\s/_/g' )
mountpath=~/mnt/am/"$valid_name"
if [[ "$unmount" = true ]]; then
cd ~/mnt
if [[ -n "$archive" ]]; then
echo Unmounting "$mountpath"
fusermount -u -z "$mountpath"
rmdir --ignore-fail-on-non-empty "$mountpath" &>/dev/null
else
# no path, unmount all
cat /proc/mounts | grep fuse.archivemount | awk '{ print $2 }' | while read dir; do
echo Unmounting $dir
fusermount -u -z "$dir"
rmdir --ignore-fail-on-non-empty "$dir" &>/dev/null
done
fi
exit
fi
if [[ -z "$archive" ]]; then
echo "No archive given"
_helpexit
fi
if [[ -d "$mountpath" ]]; then
device1=$( stat -c "%d" "$mountpath" )
device2=$( stat -c "%d" ~/mnt/am )
else
device1=valid
device2=valid
fi
if [[ $device1 = $device2 ]]; then
echo "Mounting $archive in ~/mnt/am/$valid_name"
mkdir -p "$mountpath"
archivemount \
-o readonly \
-o intr \
-o uid=`id -u` \
-o gid=`id -g` \
"$archive" "$mountpath"
if [[ $? -gt 0 ]]; then
rmdir "$mountpath"
fi
else
echo "~/mnt/$valid_name is already mounted"
fi

View File

@@ -8,63 +8,157 @@ import re
import sqlite3
import subprocess
import hashlib
import magic
# import magic
from argparse import ArgumentParser
import configparser
import io
import datetime
SQLFILE='list_of_files.sqlite'
IMGMATCH=re.compile('.*\.jpg$|.*\.jpeg$|.*\.png$',re.I)
SQLFILE = "list_of_files.sqlite"
IMGMATCH = re.compile(".*\.jpg$|.*\.jpeg$|.*\.png$", re.I)
BADDIRS = []
MINSIZE = 0
MIME=magic.open(magic.MAGIC_NONE)
#MIME=magic.open(magic.MAGIC_MIME)
MIME.load()
ANIM=['.','·',"'","'",'·','.','_']
# MIME=magic.open(magic.MAGIC_NONE)
##MIME=magic.open(magic.MAGIC_MIME)
# MIME.load()
ANIM = [".", "·", "'", "'", "·", ".", "_"]
DEFAULT_CHUNK = 1024 * 1024 * 50
def setup_options():
parser = ArgumentParser(description="Maintains the list of images sqlite file")
parser.add_argument("-a",action="store_false",dest="add",default=True,
help="Do not add new files [%(default)s]")
parser.add_argument("-c",action="store_true",dest="changed",default=False,
help="Modify changed files [%(default)s]")
parser.add_argument("--check",action="store_true",dest="check",default=False,
help="Check md5sums of files. Limit check with -s.")
parser.add_argument("-d",action="store_true",dest="delete",default=False,
help="Delete non-existing entries [%(default)s]")
parser.add_argument("--du",type=str,action='store',dest="diskused",default=False,
help="Print directory sizes. Argument is the path where directories are listed from.")
parser.add_argument("--du-depth",type=str,action='store',dest="diskused_depth",default=1,
help="Depth of summarization for --du.")
parser.add_argument("--dup",action="store_true",dest="duplicate",default=False,
help="Return a list of duplicate files, based on hashes. This option will flip the 'Add new files' option. [%(default)s]")
parser.add_argument("--dup-order",action="store",dest="duplicate_order",default='path',
help = "Order duplicates by a method. (length = path str length)",
choices = ('age','length','file','path')
parser.add_argument(
"-a",
action="store_false",
dest="add",
default=True,
help="Do not add new files [%(default)s]",
)
parser.add_argument("--haschanges",action="store_true",dest="haschanges",default=False,
help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.")
parser.add_argument("--hasdeletions",action="store_true",dest="hasdeletions",default=False,
help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.")
parser.add_argument("--hasadditions",action="store_true",dest="hasadditions",default=False,
help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.")
parser.add_argument("-f",action="store",dest="sqlfile",default=SQLFILE,
help="SQL file name to use [%(default)s]")
parser.add_argument("-l",action="store_true",dest="symlinks",default=False,
help="Follow symbolic links [%(default)s]")
parser.add_argument("--match",type=str,dest="match",default=False,
help="Search for closest match from basenames, can be helped with adding -s")
parser.add_argument("-s",type=str,action='append',dest="search",default=[],
help="Search list based on path pattern")
parser.add_argument("-x",action="append",dest="exclude",default=[],
help="Exclude folder name from the lists. This option may be issued several times")
parser.add_argument("--full",action="store_true",dest="fullfile",default=False,
help="ONLY FOR NEW DB CREATION. Use full files to calculate md5 checksum. Defaults to first 50Mb. [%(default)s]")
parser.add_argument("--relative",action="store_true",dest="relative",default=False,
help="ONLY FOR NEW DB CREATION. Store filenames relative to database file.")
parser.add_argument('startpath', action="store",default='.', nargs='?')
parser.add_argument(
"-c",
action="store_true",
dest="changed",
default=False,
help="Modify changed files [%(default)s]",
)
parser.add_argument(
"--check",
action="store_true",
dest="check",
default=False,
help="Check md5sums of files. Limit check with -s.",
)
parser.add_argument(
"-d",
action="store_true",
dest="delete",
default=False,
help="Delete non-existing entries [%(default)s]",
)
parser.add_argument(
"--du",
type=str,
action="store",
dest="diskused",
default=False,
help="Print directory sizes. Argument is the path where directories are listed from.",
)
parser.add_argument(
"--du-depth",
type=str,
action="store",
dest="diskused_depth",
default=1,
help="Depth of summarization for --du.",
)
parser.add_argument(
"--dup",
action="store_true",
dest="duplicate",
default=False,
help="Return a list of duplicate files, based on hashes. This option will flip the 'Add new files' option. [%(default)s]",
)
parser.add_argument(
"--dup-order",
action="store",
dest="duplicate_order",
default="path",
help="Order duplicates by a method. (length = path str length)",
choices=("age", "length", "file", "path"),
)
parser.add_argument(
"--haschanges",
action="store_true",
dest="haschanges",
default=False,
help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.",
)
parser.add_argument(
"--hasdeletions",
action="store_true",
dest="hasdeletions",
default=False,
help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.",
)
parser.add_argument(
"--hasadditions",
action="store_true",
dest="hasadditions",
default=False,
help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.",
)
parser.add_argument(
"-f",
action="store",
dest="sqlfile",
default=SQLFILE,
help="SQL file name to use [%(default)s]",
)
parser.add_argument(
"-l",
action="store_true",
dest="symlinks",
default=False,
help="Follow symbolic links [%(default)s]",
)
parser.add_argument(
"--match",
type=str,
dest="match",
default=False,
help="Search for closest match from basenames, can be helped with adding -s",
)
parser.add_argument(
"-s",
type=str,
action="append",
dest="search",
default=[],
help="Search list based on path pattern",
)
parser.add_argument(
"-x",
action="append",
dest="exclude",
default=[],
help="Exclude folder name from the lists. This option may be issued several times",
)
parser.add_argument(
"--full",
action="store_true",
dest="fullfile",
default=False,
help="ONLY FOR NEW DB CREATION. Use full files to calculate md5 checksum. Defaults to first 50Mb. [%(default)s]",
)
parser.add_argument(
"--relative",
action="store_true",
dest="relative",
default=False,
help="ONLY FOR NEW DB CREATION. Store filenames relative to database file.",
)
parser.add_argument("startpath", action="store", default=".", nargs="?")
options = parser.parse_args()
BADDIRS.extend(options.exclude)
@@ -81,7 +175,12 @@ def add_recurse(options):
db = conn.cursor()
prev_path_len = 0
for path, dirs, files in os.walk(options.startpath, followlinks=options.symlinks):
sys.stdout.write(("\r%s%s"%(filename_join(path,".",options),(prev_path_len-len(path))*' ')))
sys.stdout.write(
(
"\r%s%s"
% (filename_join(path, ".", options), (prev_path_len - len(path)) * " ")
)
)
prev_path_len = len(path)
dirs = clean_dirs(dirs)
dirs.sort()
@@ -104,7 +203,9 @@ def add_recurse(options):
ftime = os.path.getmtime(filename)
if not ftime_match(db, filename, ftime):
# file content changed
add_single(conn,filename,change=True,fullfile=options.fullfile)
add_single(
conn, filename, change=True, fullfile=options.fullfile
)
conn.commit()
sys.stdout.write("\n")
return
@@ -122,71 +223,92 @@ def add_single(conn,filename,change=False,hash=None,minsize=0,fullfile=False):
if hash == None:
hash = get_md5(filename, fullfile)
ftime = os.path.getmtime(filename)
mime=MIME.file(str(filename.encode('UTF-8')))
except IOError:
print("File '%s' not found. Bad link?" % (filename,))
return
except (UnicodeDecodeError, TypeError):
mime="NA"
if change:
db.execute("UPDATE list SET date=?, hash=?, size=?, mime=? \
WHERE file=?",(ftime,hash,fsize,mime,filename))
db.execute(
"UPDATE list SET date=?, hash=?, size=?, \
WHERE file=?",
(ftime, hash, fsize, filename),
)
# print "changing: %(f)s " % {'f':filename}
else:
db.execute("INSERT INTO list(file,date,hash,size,mime)\
VALUES(?,?,?,?,?)",(filename,ftime,hash,fsize,mime))
sys.stdout.write('\r')
db.execute(
"INSERT INTO list(file,date,hash,size)\
VALUES(?,?,?,?)",
(filename, ftime, hash, fsize),
)
sys.stdout.write("\r")
return
def checkdb(options):
needle = options.search
if len(needle) == 0:
needle.append('%')
needle=['%'+i+'%' for i in needle]
like_query=' OR '.join(['file LIKE ?' for i in needle])
needle.append("%")
needle = ["%" + i + "%" for i in needle]
like_query = " OR ".join(["file LIKE ?" for i in needle])
conn = sqlite3.connect(options.sqlfile)
conn.text_factory = str
db = conn.cursor()
db.execute("SELECT file,hash,size,date FROM list WHERE "+like_query+" ORDER BY file",needle)
db.execute(
"SELECT file,hash,size,date FROM list WHERE " + like_query + " ORDER BY file",
needle,
)
missing = []
differing = []
OK_count = 0
for row in db:
status='OK'
status = "OK"
sys.stdout.write("\r%s" % (row[0],))
if os.path.exists(row[0]):
md5f = get_md5(row[0], options.fullfile)
if row[1] != md5f:
status='Checksum-difference'
status = "Checksum-difference"
differing.append(row)
else:
status='Not-found'
status = "Not-found"
missing.append(row)
sys.stdout.write("\r%s %s\n" % (row[0], status))
if status=='OK':
if status == "OK":
OK_count += 1
if len(differing) > 0:
print_stderr("----\nDiffering files:")
pad = str(max([len(x[0]) for x in differing]))
for f in differing:
print(("%-"+pad+"s (%s %7s => %s %7s)")%(f[0],humanize_date(f[3]),humanize_size(f[2]),
print(
("%-" + pad + "s (%s %7s => %s %7s)")
% (
f[0],
humanize_date(f[3]),
humanize_size(f[2]),
humanize_date(os.path.getmtime(f[0])),
humanize_size(os.path.getsize(f[0]))))
humanize_size(os.path.getsize(f[0])),
)
)
if len(missing) > 0:
print("----\nMissing files:")
pad = str(max([len(x[0]) for x in missing]))
for f in missing:
print(("%-"+pad+"s (%s %7s)")%(f[0],humanize_date(f[3]),humanize_size(f[2])))
print(
("%-" + pad + "s (%s %7s)")
% (f[0], humanize_date(f[3]), humanize_size(f[2]))
)
(added, changed) = has_changes_additions(db, options, False)
if len(added) > 0:
print("----\nAdded files:")
pad = str(max([len(x[0]) for x in added]))
for f in added:
print(("%-"+pad+"s (%s %7s)")%(f,
print(
("%-" + pad + "s (%s %7s)")
% (
f,
humanize_date(os.path.getmtime(f)),
humanize_size(os.path.getsize(f))))
humanize_size(os.path.getsize(f)),
)
)
print("----\nFile check summary:")
print("Database modified: %s" % (humanize_date(os.path.getmtime(options.sqlfile)),))
@@ -210,15 +332,20 @@ def clean_syms(files,path):
nonsyms.append(f)
return nonsyms
def createdb(options):
conn = sqlite3.connect(options.sqlfile)
db = conn.cursor()
conn.text_factory = str
db.execute('CREATE TABLE list (id INTEGER PRIMARY KEY AUTOINCREMENT,\
db.execute(
"CREATE TABLE list (id INTEGER PRIMARY KEY AUTOINCREMENT,\
file TEXT,date INTEGER, hash TEXT,\
size INTEGER, mime TEXT)')
db.execute('CREATE TABLE config (id INTEGER PRIMARY KEY AUTOINCREMENT,\
object TEXT)')
size INTEGER, mime TEXT)"
)
db.execute(
"CREATE TABLE config (id INTEGER PRIMARY KEY AUTOINCREMENT,\
object TEXT)"
)
conn.commit()
config = configparser.RawConfigParser()
@@ -237,7 +364,7 @@ def delete_nonexisting(sqlfile,options):
conn.text_factory = str
db = conn.cursor()
dbdel = conn.cursor()
db.execute('SELECT file FROM list')
db.execute("SELECT file FROM list")
for row in db:
if os.path.exists(row[0]):
delete = False
@@ -247,7 +374,7 @@ def delete_nonexisting(sqlfile,options):
else:
delete = True
if delete:
print('removing.. '+row[0])
print("removing.. " + row[0])
dbdel.execute("DELETE FROM list where file == ?", (row[0],))
conn.commit()
return
@@ -260,14 +387,17 @@ def disk_used(options):
checkpath = filename_join(options.diskused, "", options) + "/"
if checkpath == "./":
checkpath = ""
db.execute('SELECT size,replace(file,?,"") as path FROM list WHERE file LIKE ?',
(checkpath,
db.execute(
'SELECT size,replace(file,?,"") as path FROM list WHERE file LIKE ?',
(
checkpath,
checkpath + "%",
))
),
)
entries = []
sizes = []
for row in db:
start_path=row[1].split('/')
start_path = row[1].split("/")
start_path = "/".join(start_path[0 : int(options.diskused_depth)])
if start_path not in entries:
entries.append(start_path)
@@ -275,9 +405,11 @@ def disk_used(options):
else:
sizes[entries.index(start_path)] += row[0]
for entry in zip(sizes, entries):
print("| ".join([ str(entry[0]).ljust(14),
humanize_size(entry[0]).rjust(8),
entry[1]]))
print(
"| ".join(
[str(entry[0]).ljust(14), humanize_size(entry[0]).rjust(8), entry[1]]
)
)
def filename_join(path, name, options):
@@ -292,7 +424,9 @@ def find_duplicates(sqlfile, order):
conn.text_factory = str
db = conn.cursor()
dbh = conn.cursor()
db.execute("SELECT hash,count(*) FROM list WHERE size > 0 GROUP BY hash HAVING count(*) > 1 ")
db.execute(
"SELECT hash,count(*) FROM list WHERE size > 0 GROUP BY hash HAVING count(*) > 1 "
)
duphash = []
for row in db:
hash = row[0]
@@ -305,36 +439,38 @@ def find_duplicates(sqlfile, order):
duphash.sort(key=lambda file: file[1][0])
return duphash
def ftime_match(db, filename, ftime):
db.execute("SELECT date FROM list where file == ?", (filename,))
count = db.fetchall()
return count[0][0] == ftime
def get_folder_contents(db, path):
''' return the contents of the folder '''
"""return the contents of the folder"""
files = []
if path == "./":
db.execute("SELECT file FROM list where file NOT LIKE ?",('%/%',))
db.execute("SELECT file FROM list where file NOT LIKE ?", ("%/%",))
path = ""
else:
db.execute("SELECT file FROM list where file LIKE ?",(path+'%',))
db.execute("SELECT file FROM list where file LIKE ?", (path + "%",))
for row in db:
try:
base=row[0].replace(path,'',1)
base = row[0].replace(path, "", 1)
except UnicodeDecodeError:
print(row[0] + " is giving me trouble.")
try:
base=row[0].encode('utf-8').replace(path,'',1)
base = row[0].encode("utf-8").replace(path, "", 1)
except UnicodeDecodeError:
print(row[0] + " is still giving me trouble.")
sys.exit(1)
if base.find('/')==-1:
if base.find("/") == -1:
files.append(base)
return files
def get_md5(filename, fullfile=False):
''' returns content based hash, only first 50Mb is read, unless user wants the whole file '''
"""returns content based hash, only first 50Mb is read, unless user wants the whole file"""
fsize = os.path.getsize(filename)
if fullfile and fsize > DEFAULT_CHUNK:
anim_i = 0
@@ -342,15 +478,18 @@ def get_md5(filename,fullfile=False):
block_size = 2**24
percents_per_block = int(100 / (float(fsize) / block_size))
md5 = hashlib.md5()
with open(filename,'rb') as f:
for chunk in iter(lambda: f.read(block_size), b''):
sys.stderr.write('\r %s (%02d%%)'%(ANIM[anim_i%anim_len],int(anim_i*percents_per_block)))
with open(filename, "rb") as f:
for chunk in iter(lambda: f.read(block_size), b""):
sys.stderr.write(
"\r %s (%02d%%)"
% (ANIM[anim_i % anim_len], int(anim_i * percents_per_block))
)
sys.stderr.flush()
anim_i += 1
md5.update(chunk)
sys.stderr.write('\r ')
sys.stderr.write("\r ")
return md5.hexdigest()
return hashlib.md5(open(filename,'rb').read(DEFAULT_CHUNK)).hexdigest()
return hashlib.md5(open(filename, "rb").read(DEFAULT_CHUNK)).hexdigest()
def has_changes(options):
@@ -364,13 +503,14 @@ def has_changes(options):
if options.hasadditions or options.haschanges:
has_changes_additions(db, options)
def has_changes_deleted(db, exit=True):
db.execute('SELECT file FROM list')
db.execute("SELECT file FROM list")
deleted = []
for row in db:
if not os.path.exists(row[0]):
if exit:
print('True')
print("True")
sys.exit(1)
else:
deleted.append(row[0])
@@ -392,7 +532,7 @@ def has_changes_additions(db,options,exit=True):
# if not is_listed(db,filename):
if file not in db_files:
if exit:
print('True')
print("True")
sys.exit(1)
else:
added.append(filename)
@@ -402,28 +542,30 @@ def has_changes_additions(db,options,exit=True):
if not ftime_match(db, filename, ftime):
# file content changed
if exit:
print('True')
print("True")
sys.exit(1)
else:
changed.append(filename)
return (added, changed)
# ~ def hash_match(db,filename,hash):
# ~ db.execute("SELECT hash FROM list where file == ?",(filename,))
# ~ count=db.fetchall()
# ~ return count[0][0]==hash
def humanize_date(date):
if date == None:
return ''
return datetime.datetime.fromtimestamp(int(date)).strftime('%Y-%m-%d %H:%M:%S')
return ""
return datetime.datetime.fromtimestamp(int(date)).strftime("%Y-%m-%d %H:%M:%S")
def humanize_size(size, precision=1):
if size == None:
return 'nan'
suffixes=['B','KB','MB','GB','TB']
return "nan"
suffixes = ["B", "KB", "MB", "GB", "TB"]
suffixIndex = 0
defPrecision = 0
while size > 1024:
@@ -442,13 +584,16 @@ def is_listed(db,filename):
def matchdb(sqlfile, needle, helper):
needle = needle.lower()
import difflib as dl
conn = sqlite3.connect(sqlfile)
conn.text_factory = str
db = conn.cursor()
if len(helper) > 0:
helper=['%'+i+'%' for i in helper]
like_query=' OR '.join(['file LIKE ?' for i in helper])
db.execute("SELECT file FROM list WHERE "+like_query+" ORDER BY date DESC",helper)
helper = ["%" + i + "%" for i in helper]
like_query = " OR ".join(["file LIKE ?" for i in helper])
db.execute(
"SELECT file FROM list WHERE " + like_query + " ORDER BY date DESC", helper
)
else:
db.execute("SELECT file FROM list ORDER BY date DESC")
ratio = 0
@@ -461,17 +606,21 @@ def matchdb(sqlfile,needle,helper):
best_match = row[0]
print(best_match)
def print_duplicates(files):
for hash in files:
# print(hash[0])
i = 1
for f in hash[1]:
print("%(i)d|%(s)s|%(d)s|%(f)s " % {
'i':i,
'f':f[0],
'd': humanize_date(f[2]),
's': humanize_size(f[1])
})
print(
"%(i)d|%(s)s|%(d)s|%(f)s "
% {
"i": i,
"f": f[0],
"d": humanize_date(f[2]),
"s": humanize_size(f[1]),
}
)
i += 1
return
@@ -483,8 +632,8 @@ def print_stderr(s):
def searchdb(sqlfile, needle):
needle=['%'+i+'%' for i in needle]
like_query=' OR '.join(['file LIKE ?' for i in needle])
needle = ["%" + i + "%" for i in needle]
like_query = " OR ".join(["file LIKE ?" for i in needle])
conn = sqlite3.connect(sqlfile)
conn.text_factory = str
db = conn.cursor()
@@ -494,13 +643,13 @@ def searchdb(sqlfile,needle):
def sort_by_method(flist, order):
if order == 'path':
if order == "path":
flist.sort(key=lambda file: file[0])
if order == 'file':
if order == "file":
flist.sort(key=lambda file: os.path.basename(file[0]))
if order == 'age':
if order == "age":
flist.sort(key=lambda file: file[2])
if order == 'length':
if order == "length":
flist.sort(key=lambda file: len(file[0]))
@@ -512,22 +661,22 @@ def stored_options(options):
db.execute("SELECT object FROM config")
store = ""
for row in db:
store+=row[0]+'\n'
store += row[0] + "\n"
config = configparser.RawConfigParser()
config.readfp(io.BytesIO(store))
config.read_file(io.StringIO(store))
options.relative = config.getboolean("General", "Relative")
options.fullfile = config.getboolean("General", "FullFile")
except:
except Exception as e:
pass
return options
def main():
options=setup_options();
options = setup_options()
if not os.path.exists(options.sqlfile):
createdb(options);
createdb(options)
options = stored_options(options)
if options.relative:
os.chdir(options.sqlpath)
@@ -547,10 +696,10 @@ def main():
disk_used(options)
sys.exit(0)
if options.delete:
print('Deleting entries...')
print("Deleting entries...")
delete_nonexisting(options.sqlfile, options)
if options.add or options.changed:
print('Adding '+options.startpath+' entries...')
print("Adding " + options.startpath + " entries...")
add_recurse(options)
if options.duplicate:
files = find_duplicates(options.sqlfile, options.duplicate_order)
@@ -558,5 +707,5 @@ def main():
sys.exit(0)
main()
main()