removed mimes, less dependencies. fixed bug in options reading
This commit is contained in:
@@ -8,63 +8,157 @@ import re
|
|||||||
import sqlite3
|
import sqlite3
|
||||||
import subprocess
|
import subprocess
|
||||||
import hashlib
|
import hashlib
|
||||||
import magic
|
|
||||||
|
# import magic
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
import configparser
|
import configparser
|
||||||
import io
|
import io
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
SQLFILE='list_of_files.sqlite'
|
SQLFILE = "list_of_files.sqlite"
|
||||||
IMGMATCH=re.compile('.*\.jpg$|.*\.jpeg$|.*\.png$',re.I)
|
IMGMATCH = re.compile(".*\.jpg$|.*\.jpeg$|.*\.png$", re.I)
|
||||||
BADDIRS = []
|
BADDIRS = []
|
||||||
MINSIZE = 0
|
MINSIZE = 0
|
||||||
MIME=magic.open(magic.MAGIC_NONE)
|
# MIME=magic.open(magic.MAGIC_NONE)
|
||||||
#MIME=magic.open(magic.MAGIC_MIME)
|
##MIME=magic.open(magic.MAGIC_MIME)
|
||||||
MIME.load()
|
# MIME.load()
|
||||||
ANIM=['.','·',"'","'",'·','.','_']
|
ANIM = [".", "·", "'", "'", "·", ".", "_"]
|
||||||
DEFAULT_CHUNK = 1024 * 1024 * 50
|
DEFAULT_CHUNK = 1024 * 1024 * 50
|
||||||
|
|
||||||
|
|
||||||
def setup_options():
|
def setup_options():
|
||||||
parser = ArgumentParser(description="Maintains the list of images sqlite file")
|
parser = ArgumentParser(description="Maintains the list of images sqlite file")
|
||||||
parser.add_argument("-a",action="store_false",dest="add",default=True,
|
parser.add_argument(
|
||||||
help="Do not add new files [%(default)s]")
|
"-a",
|
||||||
parser.add_argument("-c",action="store_true",dest="changed",default=False,
|
action="store_false",
|
||||||
help="Modify changed files [%(default)s]")
|
dest="add",
|
||||||
parser.add_argument("--check",action="store_true",dest="check",default=False,
|
default=True,
|
||||||
help="Check md5sums of files. Limit check with -s.")
|
help="Do not add new files [%(default)s]",
|
||||||
parser.add_argument("-d",action="store_true",dest="delete",default=False,
|
|
||||||
help="Delete non-existing entries [%(default)s]")
|
|
||||||
parser.add_argument("--du",type=str,action='store',dest="diskused",default=False,
|
|
||||||
help="Print directory sizes. Argument is the path where directories are listed from.")
|
|
||||||
parser.add_argument("--du-depth",type=str,action='store',dest="diskused_depth",default=1,
|
|
||||||
help="Depth of summarization for --du.")
|
|
||||||
parser.add_argument("--dup",action="store_true",dest="duplicate",default=False,
|
|
||||||
help="Return a list of duplicate files, based on hashes. This option will flip the 'Add new files' option. [%(default)s]")
|
|
||||||
parser.add_argument("--dup-order",action="store",dest="duplicate_order",default='path',
|
|
||||||
help = "Order duplicates by a method. (length = path str length)",
|
|
||||||
choices = ('age','length','file','path')
|
|
||||||
)
|
)
|
||||||
parser.add_argument("--haschanges",action="store_true",dest="haschanges",default=False,
|
parser.add_argument(
|
||||||
help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.")
|
"-c",
|
||||||
parser.add_argument("--hasdeletions",action="store_true",dest="hasdeletions",default=False,
|
action="store_true",
|
||||||
help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.")
|
dest="changed",
|
||||||
parser.add_argument("--hasadditions",action="store_true",dest="hasadditions",default=False,
|
default=False,
|
||||||
help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.")
|
help="Modify changed files [%(default)s]",
|
||||||
parser.add_argument("-f",action="store",dest="sqlfile",default=SQLFILE,
|
)
|
||||||
help="SQL file name to use [%(default)s]")
|
parser.add_argument(
|
||||||
parser.add_argument("-l",action="store_true",dest="symlinks",default=False,
|
"--check",
|
||||||
help="Follow symbolic links [%(default)s]")
|
action="store_true",
|
||||||
parser.add_argument("--match",type=str,dest="match",default=False,
|
dest="check",
|
||||||
help="Search for closest match from basenames, can be helped with adding -s")
|
default=False,
|
||||||
parser.add_argument("-s",type=str,action='append',dest="search",default=[],
|
help="Check md5sums of files. Limit check with -s.",
|
||||||
help="Search list based on path pattern")
|
)
|
||||||
parser.add_argument("-x",action="append",dest="exclude",default=[],
|
parser.add_argument(
|
||||||
help="Exclude folder name from the lists. This option may be issued several times")
|
"-d",
|
||||||
parser.add_argument("--full",action="store_true",dest="fullfile",default=False,
|
action="store_true",
|
||||||
help="ONLY FOR NEW DB CREATION. Use full files to calculate md5 checksum. Defaults to first 50Mb. [%(default)s]")
|
dest="delete",
|
||||||
parser.add_argument("--relative",action="store_true",dest="relative",default=False,
|
default=False,
|
||||||
help="ONLY FOR NEW DB CREATION. Store filenames relative to database file.")
|
help="Delete non-existing entries [%(default)s]",
|
||||||
parser.add_argument('startpath', action="store",default='.', nargs='?')
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--du",
|
||||||
|
type=str,
|
||||||
|
action="store",
|
||||||
|
dest="diskused",
|
||||||
|
default=False,
|
||||||
|
help="Print directory sizes. Argument is the path where directories are listed from.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--du-depth",
|
||||||
|
type=str,
|
||||||
|
action="store",
|
||||||
|
dest="diskused_depth",
|
||||||
|
default=1,
|
||||||
|
help="Depth of summarization for --du.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--dup",
|
||||||
|
action="store_true",
|
||||||
|
dest="duplicate",
|
||||||
|
default=False,
|
||||||
|
help="Return a list of duplicate files, based on hashes. This option will flip the 'Add new files' option. [%(default)s]",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--dup-order",
|
||||||
|
action="store",
|
||||||
|
dest="duplicate_order",
|
||||||
|
default="path",
|
||||||
|
help="Order duplicates by a method. (length = path str length)",
|
||||||
|
choices=("age", "length", "file", "path"),
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--haschanges",
|
||||||
|
action="store_true",
|
||||||
|
dest="haschanges",
|
||||||
|
default=False,
|
||||||
|
help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--hasdeletions",
|
||||||
|
action="store_true",
|
||||||
|
dest="hasdeletions",
|
||||||
|
default=False,
|
||||||
|
help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--hasadditions",
|
||||||
|
action="store_true",
|
||||||
|
dest="hasadditions",
|
||||||
|
default=False,
|
||||||
|
help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-f",
|
||||||
|
action="store",
|
||||||
|
dest="sqlfile",
|
||||||
|
default=SQLFILE,
|
||||||
|
help="SQL file name to use [%(default)s]",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-l",
|
||||||
|
action="store_true",
|
||||||
|
dest="symlinks",
|
||||||
|
default=False,
|
||||||
|
help="Follow symbolic links [%(default)s]",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--match",
|
||||||
|
type=str,
|
||||||
|
dest="match",
|
||||||
|
default=False,
|
||||||
|
help="Search for closest match from basenames, can be helped with adding -s",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-s",
|
||||||
|
type=str,
|
||||||
|
action="append",
|
||||||
|
dest="search",
|
||||||
|
default=[],
|
||||||
|
help="Search list based on path pattern",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-x",
|
||||||
|
action="append",
|
||||||
|
dest="exclude",
|
||||||
|
default=[],
|
||||||
|
help="Exclude folder name from the lists. This option may be issued several times",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--full",
|
||||||
|
action="store_true",
|
||||||
|
dest="fullfile",
|
||||||
|
default=False,
|
||||||
|
help="ONLY FOR NEW DB CREATION. Use full files to calculate md5 checksum. Defaults to first 50Mb. [%(default)s]",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--relative",
|
||||||
|
action="store_true",
|
||||||
|
dest="relative",
|
||||||
|
default=False,
|
||||||
|
help="ONLY FOR NEW DB CREATION. Store filenames relative to database file.",
|
||||||
|
)
|
||||||
|
parser.add_argument("startpath", action="store", default=".", nargs="?")
|
||||||
|
|
||||||
options = parser.parse_args()
|
options = parser.parse_args()
|
||||||
BADDIRS.extend(options.exclude)
|
BADDIRS.extend(options.exclude)
|
||||||
@@ -81,7 +175,12 @@ def add_recurse(options):
|
|||||||
db = conn.cursor()
|
db = conn.cursor()
|
||||||
prev_path_len = 0
|
prev_path_len = 0
|
||||||
for path, dirs, files in os.walk(options.startpath, followlinks=options.symlinks):
|
for path, dirs, files in os.walk(options.startpath, followlinks=options.symlinks):
|
||||||
sys.stdout.write(("\r%s%s"%(filename_join(path,".",options),(prev_path_len-len(path))*' ')))
|
sys.stdout.write(
|
||||||
|
(
|
||||||
|
"\r%s%s"
|
||||||
|
% (filename_join(path, ".", options), (prev_path_len - len(path)) * " ")
|
||||||
|
)
|
||||||
|
)
|
||||||
prev_path_len = len(path)
|
prev_path_len = len(path)
|
||||||
dirs = clean_dirs(dirs)
|
dirs = clean_dirs(dirs)
|
||||||
dirs.sort()
|
dirs.sort()
|
||||||
@@ -104,7 +203,9 @@ def add_recurse(options):
|
|||||||
ftime = os.path.getmtime(filename)
|
ftime = os.path.getmtime(filename)
|
||||||
if not ftime_match(db, filename, ftime):
|
if not ftime_match(db, filename, ftime):
|
||||||
# file content changed
|
# file content changed
|
||||||
add_single(conn,filename,change=True,fullfile=options.fullfile)
|
add_single(
|
||||||
|
conn, filename, change=True, fullfile=options.fullfile
|
||||||
|
)
|
||||||
conn.commit()
|
conn.commit()
|
||||||
sys.stdout.write("\n")
|
sys.stdout.write("\n")
|
||||||
return
|
return
|
||||||
@@ -122,71 +223,92 @@ def add_single(conn,filename,change=False,hash=None,minsize=0,fullfile=False):
|
|||||||
if hash == None:
|
if hash == None:
|
||||||
hash = get_md5(filename, fullfile)
|
hash = get_md5(filename, fullfile)
|
||||||
ftime = os.path.getmtime(filename)
|
ftime = os.path.getmtime(filename)
|
||||||
mime=MIME.file(str(filename.encode('UTF-8')))
|
|
||||||
except IOError:
|
except IOError:
|
||||||
print("File '%s' not found. Bad link?" % (filename,))
|
print("File '%s' not found. Bad link?" % (filename,))
|
||||||
return
|
return
|
||||||
except (UnicodeDecodeError, TypeError):
|
|
||||||
mime="NA"
|
|
||||||
|
|
||||||
if change:
|
if change:
|
||||||
db.execute("UPDATE list SET date=?, hash=?, size=?, mime=? \
|
db.execute(
|
||||||
WHERE file=?",(ftime,hash,fsize,mime,filename))
|
"UPDATE list SET date=?, hash=?, size=?, \
|
||||||
|
WHERE file=?",
|
||||||
|
(ftime, hash, fsize, filename),
|
||||||
|
)
|
||||||
# print "changing: %(f)s " % {'f':filename}
|
# print "changing: %(f)s " % {'f':filename}
|
||||||
else:
|
else:
|
||||||
db.execute("INSERT INTO list(file,date,hash,size,mime)\
|
db.execute(
|
||||||
VALUES(?,?,?,?,?)",(filename,ftime,hash,fsize,mime))
|
"INSERT INTO list(file,date,hash,size)\
|
||||||
sys.stdout.write('\r')
|
VALUES(?,?,?,?)",
|
||||||
|
(filename, ftime, hash, fsize),
|
||||||
|
)
|
||||||
|
sys.stdout.write("\r")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def checkdb(options):
|
def checkdb(options):
|
||||||
needle = options.search
|
needle = options.search
|
||||||
if len(needle) == 0:
|
if len(needle) == 0:
|
||||||
needle.append('%')
|
needle.append("%")
|
||||||
needle=['%'+i+'%' for i in needle]
|
needle = ["%" + i + "%" for i in needle]
|
||||||
like_query=' OR '.join(['file LIKE ?' for i in needle])
|
like_query = " OR ".join(["file LIKE ?" for i in needle])
|
||||||
conn = sqlite3.connect(options.sqlfile)
|
conn = sqlite3.connect(options.sqlfile)
|
||||||
conn.text_factory = str
|
conn.text_factory = str
|
||||||
db = conn.cursor()
|
db = conn.cursor()
|
||||||
db.execute("SELECT file,hash,size,date FROM list WHERE "+like_query+" ORDER BY file",needle)
|
db.execute(
|
||||||
|
"SELECT file,hash,size,date FROM list WHERE " + like_query + " ORDER BY file",
|
||||||
|
needle,
|
||||||
|
)
|
||||||
missing = []
|
missing = []
|
||||||
differing = []
|
differing = []
|
||||||
OK_count = 0
|
OK_count = 0
|
||||||
for row in db:
|
for row in db:
|
||||||
status='OK'
|
status = "OK"
|
||||||
sys.stdout.write("\r%s" % (row[0],))
|
sys.stdout.write("\r%s" % (row[0],))
|
||||||
if os.path.exists(row[0]):
|
if os.path.exists(row[0]):
|
||||||
md5f = get_md5(row[0], options.fullfile)
|
md5f = get_md5(row[0], options.fullfile)
|
||||||
if row[1] != md5f:
|
if row[1] != md5f:
|
||||||
status='Checksum-difference'
|
status = "Checksum-difference"
|
||||||
differing.append(row)
|
differing.append(row)
|
||||||
else:
|
else:
|
||||||
status='Not-found'
|
status = "Not-found"
|
||||||
missing.append(row)
|
missing.append(row)
|
||||||
sys.stdout.write("\r%s %s\n" % (row[0], status))
|
sys.stdout.write("\r%s %s\n" % (row[0], status))
|
||||||
if status=='OK':
|
if status == "OK":
|
||||||
OK_count += 1
|
OK_count += 1
|
||||||
if len(differing) > 0:
|
if len(differing) > 0:
|
||||||
print_stderr("----\nDiffering files:")
|
print_stderr("----\nDiffering files:")
|
||||||
pad = str(max([len(x[0]) for x in differing]))
|
pad = str(max([len(x[0]) for x in differing]))
|
||||||
for f in differing:
|
for f in differing:
|
||||||
print(("%-"+pad+"s (%s %7s => %s %7s)")%(f[0],humanize_date(f[3]),humanize_size(f[2]),
|
print(
|
||||||
|
("%-" + pad + "s (%s %7s => %s %7s)")
|
||||||
|
% (
|
||||||
|
f[0],
|
||||||
|
humanize_date(f[3]),
|
||||||
|
humanize_size(f[2]),
|
||||||
humanize_date(os.path.getmtime(f[0])),
|
humanize_date(os.path.getmtime(f[0])),
|
||||||
humanize_size(os.path.getsize(f[0]))))
|
humanize_size(os.path.getsize(f[0])),
|
||||||
|
)
|
||||||
|
)
|
||||||
if len(missing) > 0:
|
if len(missing) > 0:
|
||||||
print("----\nMissing files:")
|
print("----\nMissing files:")
|
||||||
pad = str(max([len(x[0]) for x in missing]))
|
pad = str(max([len(x[0]) for x in missing]))
|
||||||
for f in missing:
|
for f in missing:
|
||||||
print(("%-"+pad+"s (%s %7s)")%(f[0],humanize_date(f[3]),humanize_size(f[2])))
|
print(
|
||||||
|
("%-" + pad + "s (%s %7s)")
|
||||||
|
% (f[0], humanize_date(f[3]), humanize_size(f[2]))
|
||||||
|
)
|
||||||
(added, changed) = has_changes_additions(db, options, False)
|
(added, changed) = has_changes_additions(db, options, False)
|
||||||
if len(added) > 0:
|
if len(added) > 0:
|
||||||
print("----\nAdded files:")
|
print("----\nAdded files:")
|
||||||
pad = str(max([len(x[0]) for x in added]))
|
pad = str(max([len(x[0]) for x in added]))
|
||||||
for f in added:
|
for f in added:
|
||||||
print(("%-"+pad+"s (%s %7s)")%(f,
|
print(
|
||||||
|
("%-" + pad + "s (%s %7s)")
|
||||||
|
% (
|
||||||
|
f,
|
||||||
humanize_date(os.path.getmtime(f)),
|
humanize_date(os.path.getmtime(f)),
|
||||||
humanize_size(os.path.getsize(f))))
|
humanize_size(os.path.getsize(f)),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
print("----\nFile check summary:")
|
print("----\nFile check summary:")
|
||||||
print("Database modified: %s" % (humanize_date(os.path.getmtime(options.sqlfile)),))
|
print("Database modified: %s" % (humanize_date(os.path.getmtime(options.sqlfile)),))
|
||||||
@@ -210,15 +332,20 @@ def clean_syms(files,path):
|
|||||||
nonsyms.append(f)
|
nonsyms.append(f)
|
||||||
return nonsyms
|
return nonsyms
|
||||||
|
|
||||||
|
|
||||||
def createdb(options):
|
def createdb(options):
|
||||||
conn = sqlite3.connect(options.sqlfile)
|
conn = sqlite3.connect(options.sqlfile)
|
||||||
db = conn.cursor()
|
db = conn.cursor()
|
||||||
conn.text_factory = str
|
conn.text_factory = str
|
||||||
db.execute('CREATE TABLE list (id INTEGER PRIMARY KEY AUTOINCREMENT,\
|
db.execute(
|
||||||
|
"CREATE TABLE list (id INTEGER PRIMARY KEY AUTOINCREMENT,\
|
||||||
file TEXT,date INTEGER, hash TEXT,\
|
file TEXT,date INTEGER, hash TEXT,\
|
||||||
size INTEGER, mime TEXT)')
|
size INTEGER, mime TEXT)"
|
||||||
db.execute('CREATE TABLE config (id INTEGER PRIMARY KEY AUTOINCREMENT,\
|
)
|
||||||
object TEXT)')
|
db.execute(
|
||||||
|
"CREATE TABLE config (id INTEGER PRIMARY KEY AUTOINCREMENT,\
|
||||||
|
object TEXT)"
|
||||||
|
)
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
||||||
config = configparser.RawConfigParser()
|
config = configparser.RawConfigParser()
|
||||||
@@ -237,7 +364,7 @@ def delete_nonexisting(sqlfile,options):
|
|||||||
conn.text_factory = str
|
conn.text_factory = str
|
||||||
db = conn.cursor()
|
db = conn.cursor()
|
||||||
dbdel = conn.cursor()
|
dbdel = conn.cursor()
|
||||||
db.execute('SELECT file FROM list')
|
db.execute("SELECT file FROM list")
|
||||||
for row in db:
|
for row in db:
|
||||||
if os.path.exists(row[0]):
|
if os.path.exists(row[0]):
|
||||||
delete = False
|
delete = False
|
||||||
@@ -247,7 +374,7 @@ def delete_nonexisting(sqlfile,options):
|
|||||||
else:
|
else:
|
||||||
delete = True
|
delete = True
|
||||||
if delete:
|
if delete:
|
||||||
print('removing.. '+row[0])
|
print("removing.. " + row[0])
|
||||||
dbdel.execute("DELETE FROM list where file == ?", (row[0],))
|
dbdel.execute("DELETE FROM list where file == ?", (row[0],))
|
||||||
conn.commit()
|
conn.commit()
|
||||||
return
|
return
|
||||||
@@ -260,14 +387,17 @@ def disk_used(options):
|
|||||||
checkpath = filename_join(options.diskused, "", options) + "/"
|
checkpath = filename_join(options.diskused, "", options) + "/"
|
||||||
if checkpath == "./":
|
if checkpath == "./":
|
||||||
checkpath = ""
|
checkpath = ""
|
||||||
db.execute('SELECT size,replace(file,?,"") as path FROM list WHERE file LIKE ?',
|
db.execute(
|
||||||
(checkpath,
|
'SELECT size,replace(file,?,"") as path FROM list WHERE file LIKE ?',
|
||||||
|
(
|
||||||
|
checkpath,
|
||||||
checkpath + "%",
|
checkpath + "%",
|
||||||
))
|
),
|
||||||
|
)
|
||||||
entries = []
|
entries = []
|
||||||
sizes = []
|
sizes = []
|
||||||
for row in db:
|
for row in db:
|
||||||
start_path=row[1].split('/')
|
start_path = row[1].split("/")
|
||||||
start_path = "/".join(start_path[0 : int(options.diskused_depth)])
|
start_path = "/".join(start_path[0 : int(options.diskused_depth)])
|
||||||
if start_path not in entries:
|
if start_path not in entries:
|
||||||
entries.append(start_path)
|
entries.append(start_path)
|
||||||
@@ -275,9 +405,11 @@ def disk_used(options):
|
|||||||
else:
|
else:
|
||||||
sizes[entries.index(start_path)] += row[0]
|
sizes[entries.index(start_path)] += row[0]
|
||||||
for entry in zip(sizes, entries):
|
for entry in zip(sizes, entries):
|
||||||
print("| ".join([ str(entry[0]).ljust(14),
|
print(
|
||||||
humanize_size(entry[0]).rjust(8),
|
"| ".join(
|
||||||
entry[1]]))
|
[str(entry[0]).ljust(14), humanize_size(entry[0]).rjust(8), entry[1]]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def filename_join(path, name, options):
|
def filename_join(path, name, options):
|
||||||
@@ -292,7 +424,9 @@ def find_duplicates(sqlfile, order):
|
|||||||
conn.text_factory = str
|
conn.text_factory = str
|
||||||
db = conn.cursor()
|
db = conn.cursor()
|
||||||
dbh = conn.cursor()
|
dbh = conn.cursor()
|
||||||
db.execute("SELECT hash,count(*) FROM list WHERE size > 0 GROUP BY hash HAVING count(*) > 1 ")
|
db.execute(
|
||||||
|
"SELECT hash,count(*) FROM list WHERE size > 0 GROUP BY hash HAVING count(*) > 1 "
|
||||||
|
)
|
||||||
duphash = []
|
duphash = []
|
||||||
for row in db:
|
for row in db:
|
||||||
hash = row[0]
|
hash = row[0]
|
||||||
@@ -305,36 +439,38 @@ def find_duplicates(sqlfile, order):
|
|||||||
duphash.sort(key=lambda file: file[1][0])
|
duphash.sort(key=lambda file: file[1][0])
|
||||||
return duphash
|
return duphash
|
||||||
|
|
||||||
|
|
||||||
def ftime_match(db, filename, ftime):
|
def ftime_match(db, filename, ftime):
|
||||||
db.execute("SELECT date FROM list where file == ?", (filename,))
|
db.execute("SELECT date FROM list where file == ?", (filename,))
|
||||||
count = db.fetchall()
|
count = db.fetchall()
|
||||||
return count[0][0] == ftime
|
return count[0][0] == ftime
|
||||||
|
|
||||||
|
|
||||||
def get_folder_contents(db, path):
|
def get_folder_contents(db, path):
|
||||||
''' return the contents of the folder '''
|
"""return the contents of the folder"""
|
||||||
files = []
|
files = []
|
||||||
if path == "./":
|
if path == "./":
|
||||||
db.execute("SELECT file FROM list where file NOT LIKE ?",('%/%',))
|
db.execute("SELECT file FROM list where file NOT LIKE ?", ("%/%",))
|
||||||
path = ""
|
path = ""
|
||||||
else:
|
else:
|
||||||
db.execute("SELECT file FROM list where file LIKE ?",(path+'%',))
|
db.execute("SELECT file FROM list where file LIKE ?", (path + "%",))
|
||||||
for row in db:
|
for row in db:
|
||||||
try:
|
try:
|
||||||
base=row[0].replace(path,'',1)
|
base = row[0].replace(path, "", 1)
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
print(row[0] + " is giving me trouble.")
|
print(row[0] + " is giving me trouble.")
|
||||||
try:
|
try:
|
||||||
base=row[0].encode('utf-8').replace(path,'',1)
|
base = row[0].encode("utf-8").replace(path, "", 1)
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
print(row[0] + " is still giving me trouble.")
|
print(row[0] + " is still giving me trouble.")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
if base.find('/')==-1:
|
if base.find("/") == -1:
|
||||||
files.append(base)
|
files.append(base)
|
||||||
return files
|
return files
|
||||||
|
|
||||||
|
|
||||||
def get_md5(filename, fullfile=False):
|
def get_md5(filename, fullfile=False):
|
||||||
''' returns content based hash, only first 50Mb is read, unless user wants the whole file '''
|
"""returns content based hash, only first 50Mb is read, unless user wants the whole file"""
|
||||||
fsize = os.path.getsize(filename)
|
fsize = os.path.getsize(filename)
|
||||||
if fullfile and fsize > DEFAULT_CHUNK:
|
if fullfile and fsize > DEFAULT_CHUNK:
|
||||||
anim_i = 0
|
anim_i = 0
|
||||||
@@ -342,15 +478,18 @@ def get_md5(filename,fullfile=False):
|
|||||||
block_size = 2**24
|
block_size = 2**24
|
||||||
percents_per_block = int(100 / (float(fsize) / block_size))
|
percents_per_block = int(100 / (float(fsize) / block_size))
|
||||||
md5 = hashlib.md5()
|
md5 = hashlib.md5()
|
||||||
with open(filename,'rb') as f:
|
with open(filename, "rb") as f:
|
||||||
for chunk in iter(lambda: f.read(block_size), b''):
|
for chunk in iter(lambda: f.read(block_size), b""):
|
||||||
sys.stderr.write('\r %s (%02d%%)'%(ANIM[anim_i%anim_len],int(anim_i*percents_per_block)))
|
sys.stderr.write(
|
||||||
|
"\r %s (%02d%%)"
|
||||||
|
% (ANIM[anim_i % anim_len], int(anim_i * percents_per_block))
|
||||||
|
)
|
||||||
sys.stderr.flush()
|
sys.stderr.flush()
|
||||||
anim_i += 1
|
anim_i += 1
|
||||||
md5.update(chunk)
|
md5.update(chunk)
|
||||||
sys.stderr.write('\r ')
|
sys.stderr.write("\r ")
|
||||||
return md5.hexdigest()
|
return md5.hexdigest()
|
||||||
return hashlib.md5(open(filename,'rb').read(DEFAULT_CHUNK)).hexdigest()
|
return hashlib.md5(open(filename, "rb").read(DEFAULT_CHUNK)).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
def has_changes(options):
|
def has_changes(options):
|
||||||
@@ -364,13 +503,14 @@ def has_changes(options):
|
|||||||
if options.hasadditions or options.haschanges:
|
if options.hasadditions or options.haschanges:
|
||||||
has_changes_additions(db, options)
|
has_changes_additions(db, options)
|
||||||
|
|
||||||
|
|
||||||
def has_changes_deleted(db, exit=True):
|
def has_changes_deleted(db, exit=True):
|
||||||
db.execute('SELECT file FROM list')
|
db.execute("SELECT file FROM list")
|
||||||
deleted = []
|
deleted = []
|
||||||
for row in db:
|
for row in db:
|
||||||
if not os.path.exists(row[0]):
|
if not os.path.exists(row[0]):
|
||||||
if exit:
|
if exit:
|
||||||
print('True')
|
print("True")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
else:
|
else:
|
||||||
deleted.append(row[0])
|
deleted.append(row[0])
|
||||||
@@ -392,7 +532,7 @@ def has_changes_additions(db,options,exit=True):
|
|||||||
# if not is_listed(db,filename):
|
# if not is_listed(db,filename):
|
||||||
if file not in db_files:
|
if file not in db_files:
|
||||||
if exit:
|
if exit:
|
||||||
print('True')
|
print("True")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
else:
|
else:
|
||||||
added.append(filename)
|
added.append(filename)
|
||||||
@@ -402,28 +542,30 @@ def has_changes_additions(db,options,exit=True):
|
|||||||
if not ftime_match(db, filename, ftime):
|
if not ftime_match(db, filename, ftime):
|
||||||
# file content changed
|
# file content changed
|
||||||
if exit:
|
if exit:
|
||||||
print('True')
|
print("True")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
else:
|
else:
|
||||||
changed.append(filename)
|
changed.append(filename)
|
||||||
|
|
||||||
return (added, changed)
|
return (added, changed)
|
||||||
|
|
||||||
|
|
||||||
# ~ def hash_match(db,filename,hash):
|
# ~ def hash_match(db,filename,hash):
|
||||||
# ~ db.execute("SELECT hash FROM list where file == ?",(filename,))
|
# ~ db.execute("SELECT hash FROM list where file == ?",(filename,))
|
||||||
# ~ count=db.fetchall()
|
# ~ count=db.fetchall()
|
||||||
# ~ return count[0][0]==hash
|
# ~ return count[0][0]==hash
|
||||||
|
|
||||||
|
|
||||||
def humanize_date(date):
|
def humanize_date(date):
|
||||||
if date == None:
|
if date == None:
|
||||||
return ''
|
return ""
|
||||||
return datetime.datetime.fromtimestamp(int(date)).strftime('%Y-%m-%d %H:%M:%S')
|
return datetime.datetime.fromtimestamp(int(date)).strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
|
||||||
|
|
||||||
def humanize_size(size, precision=1):
|
def humanize_size(size, precision=1):
|
||||||
if size == None:
|
if size == None:
|
||||||
return 'nan'
|
return "nan"
|
||||||
suffixes=['B','KB','MB','GB','TB']
|
suffixes = ["B", "KB", "MB", "GB", "TB"]
|
||||||
suffixIndex = 0
|
suffixIndex = 0
|
||||||
defPrecision = 0
|
defPrecision = 0
|
||||||
while size > 1024:
|
while size > 1024:
|
||||||
@@ -442,13 +584,16 @@ def is_listed(db,filename):
|
|||||||
def matchdb(sqlfile, needle, helper):
|
def matchdb(sqlfile, needle, helper):
|
||||||
needle = needle.lower()
|
needle = needle.lower()
|
||||||
import difflib as dl
|
import difflib as dl
|
||||||
|
|
||||||
conn = sqlite3.connect(sqlfile)
|
conn = sqlite3.connect(sqlfile)
|
||||||
conn.text_factory = str
|
conn.text_factory = str
|
||||||
db = conn.cursor()
|
db = conn.cursor()
|
||||||
if len(helper) > 0:
|
if len(helper) > 0:
|
||||||
helper=['%'+i+'%' for i in helper]
|
helper = ["%" + i + "%" for i in helper]
|
||||||
like_query=' OR '.join(['file LIKE ?' for i in helper])
|
like_query = " OR ".join(["file LIKE ?" for i in helper])
|
||||||
db.execute("SELECT file FROM list WHERE "+like_query+" ORDER BY date DESC",helper)
|
db.execute(
|
||||||
|
"SELECT file FROM list WHERE " + like_query + " ORDER BY date DESC", helper
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
db.execute("SELECT file FROM list ORDER BY date DESC")
|
db.execute("SELECT file FROM list ORDER BY date DESC")
|
||||||
ratio = 0
|
ratio = 0
|
||||||
@@ -461,17 +606,21 @@ def matchdb(sqlfile,needle,helper):
|
|||||||
best_match = row[0]
|
best_match = row[0]
|
||||||
print(best_match)
|
print(best_match)
|
||||||
|
|
||||||
|
|
||||||
def print_duplicates(files):
|
def print_duplicates(files):
|
||||||
for hash in files:
|
for hash in files:
|
||||||
# print(hash[0])
|
# print(hash[0])
|
||||||
i = 1
|
i = 1
|
||||||
for f in hash[1]:
|
for f in hash[1]:
|
||||||
print("%(i)d|%(s)s|%(d)s|%(f)s " % {
|
print(
|
||||||
'i':i,
|
"%(i)d|%(s)s|%(d)s|%(f)s "
|
||||||
'f':f[0],
|
% {
|
||||||
'd': humanize_date(f[2]),
|
"i": i,
|
||||||
's': humanize_size(f[1])
|
"f": f[0],
|
||||||
})
|
"d": humanize_date(f[2]),
|
||||||
|
"s": humanize_size(f[1]),
|
||||||
|
}
|
||||||
|
)
|
||||||
i += 1
|
i += 1
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -483,8 +632,8 @@ def print_stderr(s):
|
|||||||
|
|
||||||
|
|
||||||
def searchdb(sqlfile, needle):
|
def searchdb(sqlfile, needle):
|
||||||
needle=['%'+i+'%' for i in needle]
|
needle = ["%" + i + "%" for i in needle]
|
||||||
like_query=' OR '.join(['file LIKE ?' for i in needle])
|
like_query = " OR ".join(["file LIKE ?" for i in needle])
|
||||||
conn = sqlite3.connect(sqlfile)
|
conn = sqlite3.connect(sqlfile)
|
||||||
conn.text_factory = str
|
conn.text_factory = str
|
||||||
db = conn.cursor()
|
db = conn.cursor()
|
||||||
@@ -494,13 +643,13 @@ def searchdb(sqlfile,needle):
|
|||||||
|
|
||||||
|
|
||||||
def sort_by_method(flist, order):
|
def sort_by_method(flist, order):
|
||||||
if order == 'path':
|
if order == "path":
|
||||||
flist.sort(key=lambda file: file[0])
|
flist.sort(key=lambda file: file[0])
|
||||||
if order == 'file':
|
if order == "file":
|
||||||
flist.sort(key=lambda file: os.path.basename(file[0]))
|
flist.sort(key=lambda file: os.path.basename(file[0]))
|
||||||
if order == 'age':
|
if order == "age":
|
||||||
flist.sort(key=lambda file: file[2])
|
flist.sort(key=lambda file: file[2])
|
||||||
if order == 'length':
|
if order == "length":
|
||||||
flist.sort(key=lambda file: len(file[0]))
|
flist.sort(key=lambda file: len(file[0]))
|
||||||
|
|
||||||
|
|
||||||
@@ -512,22 +661,22 @@ def stored_options(options):
|
|||||||
db.execute("SELECT object FROM config")
|
db.execute("SELECT object FROM config")
|
||||||
store = ""
|
store = ""
|
||||||
for row in db:
|
for row in db:
|
||||||
store+=row[0]+'\n'
|
store += row[0] + "\n"
|
||||||
config = configparser.RawConfigParser()
|
config = configparser.RawConfigParser()
|
||||||
config.readfp(io.BytesIO(store))
|
config.read_file(io.StringIO(store))
|
||||||
options.relative = config.getboolean("General", "Relative")
|
options.relative = config.getboolean("General", "Relative")
|
||||||
options.fullfile = config.getboolean("General", "FullFile")
|
options.fullfile = config.getboolean("General", "FullFile")
|
||||||
except:
|
except Exception as e:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
return options
|
return options
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
options=setup_options();
|
options = setup_options()
|
||||||
|
|
||||||
if not os.path.exists(options.sqlfile):
|
if not os.path.exists(options.sqlfile):
|
||||||
createdb(options);
|
createdb(options)
|
||||||
options = stored_options(options)
|
options = stored_options(options)
|
||||||
if options.relative:
|
if options.relative:
|
||||||
os.chdir(options.sqlpath)
|
os.chdir(options.sqlpath)
|
||||||
@@ -547,10 +696,10 @@ def main():
|
|||||||
disk_used(options)
|
disk_used(options)
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
if options.delete:
|
if options.delete:
|
||||||
print('Deleting entries...')
|
print("Deleting entries...")
|
||||||
delete_nonexisting(options.sqlfile, options)
|
delete_nonexisting(options.sqlfile, options)
|
||||||
if options.add or options.changed:
|
if options.add or options.changed:
|
||||||
print('Adding '+options.startpath+' entries...')
|
print("Adding " + options.startpath + " entries...")
|
||||||
add_recurse(options)
|
add_recurse(options)
|
||||||
if options.duplicate:
|
if options.duplicate:
|
||||||
files = find_duplicates(options.sqlfile, options.duplicate_order)
|
files = find_duplicates(options.sqlfile, options.duplicate_order)
|
||||||
@@ -558,5 +707,5 @@ def main():
|
|||||||
|
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
main()
|
|
||||||
|
|
||||||
|
main()
|
||||||
|
|||||||
Reference in New Issue
Block a user