filelist with config, and relative path support
This commit is contained in:
@@ -7,6 +7,7 @@ import subprocess
|
||||
import hashlib
|
||||
import magic
|
||||
from argparse import ArgumentParser
|
||||
import ConfigParser,StringIO,io
|
||||
|
||||
SQLFILE='list_of_files.sqlite'
|
||||
IMGMATCH=re.compile('.*\.jpg$|.*\.jpeg$|.*\.png$',re.I)
|
||||
@@ -32,8 +33,6 @@ def setup_options():
|
||||
help="Depth of summarization for --du.")
|
||||
parser.add_argument("--dup",action="store_true",dest="duplicate",default=False,
|
||||
help="Return a list of duplicate files, based on hashes. This option will flip the 'Add new files' option. [%(default)s]")
|
||||
parser.add_argument("--full",action="store_true",dest="fullfile",default=False,
|
||||
help="Use full files to calculate md5 checksum. Defaults to first 50Mb. [%(default)s]")
|
||||
parser.add_argument("--haschanges",action="store_true",dest="haschanges",default=False,
|
||||
help="Do not change anything, return True and exit code 1 if DB needs update. Exit code 0 if all intact.")
|
||||
parser.add_argument("--hasdeletions",action="store_true",dest="hasdeletions",default=False,
|
||||
@@ -50,6 +49,10 @@ def setup_options():
|
||||
help="Search list based on path pattern")
|
||||
parser.add_argument("-x",action="append",dest="exclude",default=[],
|
||||
help="Exclude folder name from the lists. This option may be issued several times")
|
||||
parser.add_argument("--full",action="store_true",dest="fullfile",default=False,
|
||||
help="ONLY FOR NEW DB CREATION. Use full files to calculate md5 checksum. Defaults to first 50Mb. [%(default)s]")
|
||||
parser.add_argument("--relative",action="store_true",dest="relative",default=False,
|
||||
help="ONLY FOR NEW DB CREATION. Store filenames relative to database file.")
|
||||
parser.add_argument('startpath', action="store",default='.', nargs='?')
|
||||
|
||||
options=parser.parse_args()
|
||||
@@ -57,104 +60,9 @@ def setup_options():
|
||||
if options.duplicate:
|
||||
options.add=not options.add
|
||||
options.startpath=unicode(options.startpath, "UTF-8")
|
||||
options.sqlpath=os.path.dirname(os.path.realpath(options.sqlfile))
|
||||
return options
|
||||
|
||||
def createdb(fname):
|
||||
conn=sqlite3.connect(fname)
|
||||
db=conn.cursor()
|
||||
conn.text_factory=str
|
||||
db.execute('CREATE TABLE list (id INTEGER PRIMARY KEY AUTOINCREMENT,\
|
||||
file TEXT,date INTEGER, hash TEXT,\
|
||||
size INTEGER, mime TEXT)')
|
||||
conn.commit()
|
||||
return
|
||||
|
||||
def delete_nonexisting(sqlfile,options):
|
||||
conn=sqlite3.connect(sqlfile)
|
||||
conn.text_factory=str
|
||||
db=conn.cursor()
|
||||
dbdel=conn.cursor()
|
||||
db.execute('SELECT file FROM list')
|
||||
for row in db:
|
||||
if os.path.exists(row[0]):
|
||||
delete=False
|
||||
if not options.symlinks:
|
||||
if os.path.islink(row[0]):
|
||||
delete=True
|
||||
else:
|
||||
delete=True
|
||||
if delete:
|
||||
print('removing.. '+row[0])
|
||||
dbdel.execute("DELETE FROM list where file == ?",(row[0],))
|
||||
conn.commit()
|
||||
return
|
||||
|
||||
def disk_used(options):
|
||||
conn=sqlite3.connect(options.sqlfile)
|
||||
conn.text_factory=str
|
||||
db=conn.cursor()
|
||||
db.execute('SELECT size,replace(file,?,"") as path FROM list WHERE file LIKE ?',
|
||||
(os.path.realpath(options.diskused)+"/",
|
||||
os.path.realpath(options.diskused)+"%",
|
||||
))
|
||||
entries=[]
|
||||
sizes=[]
|
||||
for row in db:
|
||||
start_path=row[1].split('/')
|
||||
start_path="/".join(start_path[0:int(options.diskused_depth)])
|
||||
if start_path not in entries:
|
||||
entries.append(start_path)
|
||||
sizes.append(row[0])
|
||||
else:
|
||||
sizes[ entries.index(start_path) ]+=row[0]
|
||||
for entry in zip(sizes,entries):
|
||||
print("| ".join([ str(entry[0]).ljust(14),
|
||||
humanize_size(entry[0]).rjust(8),
|
||||
entry[1]]))
|
||||
|
||||
def has_changes(options):
|
||||
conn=sqlite3.connect(options.sqlfile)
|
||||
conn.text_factory=str
|
||||
db=conn.cursor()
|
||||
if options.haschanges:
|
||||
options.changed=True
|
||||
if options.hasdeletions or options.haschanges:
|
||||
has_changes_deleted(db)
|
||||
if options.hasadditions or options.haschanges:
|
||||
has_changes_additions(db,options)
|
||||
|
||||
def has_changes_deleted(db):
|
||||
db.execute('SELECT file FROM list')
|
||||
for row in db:
|
||||
if not os.path.exists(row[0]):
|
||||
print('True')
|
||||
sys.exit(1)
|
||||
return
|
||||
|
||||
def has_changes_additions(db,options):
|
||||
for path,dirs,files in os.walk(options.startpath,followlinks=options.symlinks):
|
||||
dirs=clean_dirs(dirs)
|
||||
db_files=get_folder_contents(db,os.path.realpath(path)+'/')
|
||||
if not options.symlinks:
|
||||
files=clean_syms(files,path)
|
||||
for file in files:
|
||||
filename=os.path.realpath(os.path.join(path,file))
|
||||
if file==options.sqlfile:
|
||||
continue
|
||||
#if not is_listed(db,filename):
|
||||
if file not in db_files:
|
||||
print('True')
|
||||
sys.exit(1)
|
||||
else:
|
||||
if options.changed:
|
||||
ftime=os.path.getmtime(filename)
|
||||
if not ftime_match(db,filename,ftime):
|
||||
#file content changed
|
||||
print('True')
|
||||
sys.exit(1)
|
||||
|
||||
return
|
||||
|
||||
def add_recurse(options):
|
||||
conn=sqlite3.connect(options.sqlfile)
|
||||
conn.text_factory=str
|
||||
@@ -163,11 +71,11 @@ def add_recurse(options):
|
||||
dirs=clean_dirs(dirs)
|
||||
dirs.sort()
|
||||
files.sort()
|
||||
db_files=get_folder_contents(db,os.path.realpath(path)+'/')
|
||||
db_files=get_folder_contents(db,filename_join(path,"",options)+"/")
|
||||
if not options.symlinks:
|
||||
files=clean_syms(files,path)
|
||||
for file in files:
|
||||
filename=os.path.realpath(os.path.join(path,file))
|
||||
filename=filename_join(path,file,options)
|
||||
if file==options.sqlfile:
|
||||
continue
|
||||
if not os.path.isfile(filename):
|
||||
@@ -211,61 +119,24 @@ def add_single(conn,filename,change=False,hash=None,minsize=0,fullfile=False):
|
||||
VALUES(?,?,?,?,?)",(filename,ftime,hash,fsize,mime))
|
||||
return
|
||||
|
||||
def is_listed(db,filename):
|
||||
db.execute("SELECT COUNT(*) FROM list where file == ?",(filename,))
|
||||
count=db.fetchall()
|
||||
return count[0][0]>0
|
||||
|
||||
def get_folder_contents(db,path):
|
||||
''' return the contents of the folder '''
|
||||
files=[]
|
||||
db.execute("SELECT file FROM list where file LIKE ?",(path+'%',))
|
||||
def checkdb(sqlfile,fullFile,needle):
|
||||
if len(needle)==0:
|
||||
needle.append('%')
|
||||
needle=['%'+i+'%' for i in needle]
|
||||
like_query=' OR '.join(['file LIKE ?' for i in needle])
|
||||
conn=sqlite3.connect(sqlfile)
|
||||
conn.text_factory=str
|
||||
db=conn.cursor()
|
||||
db.execute("SELECT file,hash FROM list WHERE "+like_query+" ORDER BY file",needle)
|
||||
for row in db:
|
||||
try:
|
||||
base=row[0].decode('utf-8').replace(path,'',1)
|
||||
except UnicodeDecodeError:
|
||||
print(row[0]+" is giving me trouble.")
|
||||
try:
|
||||
base=row[0].encode('utf-8').replace(path,'',1)
|
||||
except UnicodeDecodeError:
|
||||
print(row[0]+" is still giving me trouble.")
|
||||
sys.exit(1)
|
||||
if base.find('/')==-1:
|
||||
files.append(base)
|
||||
return files
|
||||
|
||||
def ftime_match(db,filename,ftime):
|
||||
db.execute("SELECT date FROM list where file == ?",(filename,))
|
||||
count=db.fetchall()
|
||||
return count[0][0]==ftime
|
||||
|
||||
def hash_match(db,filename,hash):
|
||||
db.execute("SELECT hash FROM list where file == ?",(filename,))
|
||||
count=db.fetchall()
|
||||
return count[0][0]==hash
|
||||
|
||||
def humanize_size(size,precision=1):
|
||||
if size==None:
|
||||
return 'nan'
|
||||
suffixes=['B','KB','MB','GB','TB']
|
||||
suffixIndex = 0
|
||||
defPrecision=0
|
||||
while size > 1024:
|
||||
suffixIndex += 1 #increment the index of the suffix
|
||||
size = size/1024.0 #apply the division
|
||||
defPrecision=precision
|
||||
return "%.*f%s"%(defPrecision,size,suffixes[suffixIndex])
|
||||
|
||||
def get_md5(filename,fullfile=False):
|
||||
''' returns content based hash, only first 50Mb is read, unless user wants the whole file '''
|
||||
if fullfile:
|
||||
block_size=2**20
|
||||
md5 = hashlib.md5()
|
||||
with open(filename,'rb') as f:
|
||||
for chunk in iter(lambda: f.read(block_size), b''):
|
||||
md5.update(chunk)
|
||||
return md5.hexdigest()
|
||||
return hashlib.md5(open(filename,'rb').read(1024*1024*50)).hexdigest()
|
||||
status='OK'
|
||||
if os.path.exists(row[0]):
|
||||
md5f=get_md5(row[0],fullFile)
|
||||
if row[1]!=md5f:
|
||||
status='Checksum-difference'
|
||||
else:
|
||||
status='Not-found'
|
||||
print("%s %s"%(row[0],status))
|
||||
|
||||
def clean_dirs(dirs):
|
||||
for s in dirs[:]:
|
||||
@@ -280,6 +151,79 @@ def clean_syms(files,path):
|
||||
nonsyms.append(f)
|
||||
return nonsyms
|
||||
|
||||
def createdb(options):
|
||||
conn=sqlite3.connect(options.sqlfile)
|
||||
db=conn.cursor()
|
||||
conn.text_factory=str
|
||||
db.execute('CREATE TABLE list (id INTEGER PRIMARY KEY AUTOINCREMENT,\
|
||||
file TEXT,date INTEGER, hash TEXT,\
|
||||
size INTEGER, mime TEXT)')
|
||||
db.execute('CREATE TABLE config (id INTEGER PRIMARY KEY AUTOINCREMENT,\
|
||||
object TEXT)')
|
||||
conn.commit()
|
||||
|
||||
config = ConfigParser.RawConfigParser()
|
||||
config.add_section("General")
|
||||
config.set("General","Relative",str(options.relative))
|
||||
config.set("General","FullFile",str(options.fullfile))
|
||||
store=StringIO.StringIO()
|
||||
config.write(store)
|
||||
db.execute("INSERT INTO config (object) values (?)",(store.getvalue(),))
|
||||
conn.commit()
|
||||
return
|
||||
|
||||
def delete_nonexisting(sqlfile,options):
|
||||
conn=sqlite3.connect(sqlfile)
|
||||
conn.text_factory=str
|
||||
db=conn.cursor()
|
||||
dbdel=conn.cursor()
|
||||
db.execute('SELECT file FROM list')
|
||||
for row in db:
|
||||
if os.path.exists(row[0]):
|
||||
delete=False
|
||||
if not options.symlinks:
|
||||
if os.path.islink(row[0]):
|
||||
delete=True
|
||||
else:
|
||||
delete=True
|
||||
if delete:
|
||||
print('removing.. '+row[0])
|
||||
dbdel.execute("DELETE FROM list where file == ?",(row[0],))
|
||||
conn.commit()
|
||||
return
|
||||
|
||||
def disk_used(options):
|
||||
conn=sqlite3.connect(options.sqlfile)
|
||||
conn.text_factory=str
|
||||
db=conn.cursor()
|
||||
checkpath=filename_join(options.diskused,"",options)+"/"
|
||||
if checkpath=="./":
|
||||
checkpath=""
|
||||
db.execute('SELECT size,replace(file,?,"") as path FROM list WHERE file LIKE ?',
|
||||
(checkpath,
|
||||
checkpath+"%",
|
||||
))
|
||||
entries=[]
|
||||
sizes=[]
|
||||
for row in db:
|
||||
start_path=row[1].split('/')
|
||||
start_path="/".join(start_path[0:int(options.diskused_depth)])
|
||||
if start_path not in entries:
|
||||
entries.append(start_path)
|
||||
sizes.append(row[0])
|
||||
else:
|
||||
sizes[ entries.index(start_path) ]+=row[0]
|
||||
for entry in zip(sizes,entries):
|
||||
print("| ".join([ str(entry[0]).ljust(14),
|
||||
humanize_size(entry[0]).rjust(8),
|
||||
entry[1]]))
|
||||
|
||||
def filename_join(path,name,options):
|
||||
filename=os.path.realpath(os.path.join(path,name))
|
||||
if options.relative:
|
||||
return os.path.relpath(filename, options.sqlpath)
|
||||
return filename
|
||||
|
||||
def find_duplicates(sqlfile):
|
||||
conn=sqlite3.connect(sqlfile)
|
||||
conn.text_factory=str
|
||||
@@ -298,34 +242,110 @@ def find_duplicates(sqlfile):
|
||||
duphash.sort(key=lambda file: file[1][0])
|
||||
return duphash
|
||||
|
||||
def searchdb(sqlfile,needle):
|
||||
needle=['%'+i+'%' for i in needle]
|
||||
like_query=' OR '.join(['file LIKE ?' for i in needle])
|
||||
conn=sqlite3.connect(sqlfile)
|
||||
conn.text_factory=str
|
||||
db=conn.cursor()
|
||||
db.execute("SELECT file FROM list WHERE "+like_query+" ORDER BY file",needle)
|
||||
for row in db:
|
||||
print(row[0])
|
||||
def ftime_match(db,filename,ftime):
|
||||
db.execute("SELECT date FROM list where file == ?",(filename,))
|
||||
count=db.fetchall()
|
||||
return count[0][0]==ftime
|
||||
|
||||
def checkdb(sqlfile,fullFile,needle):
|
||||
if len(needle)==0:
|
||||
needle.append('%')
|
||||
needle=['%'+i+'%' for i in needle]
|
||||
like_query=' OR '.join(['file LIKE ?' for i in needle])
|
||||
conn=sqlite3.connect(sqlfile)
|
||||
def get_folder_contents(db,path):
|
||||
''' return the contents of the folder '''
|
||||
files=[]
|
||||
if path=="./":
|
||||
db.execute("SELECT file FROM list where file NOT LIKE ?",('%/%',))
|
||||
path=""
|
||||
else:
|
||||
db.execute("SELECT file FROM list where file LIKE ?",(path+'%',))
|
||||
for row in db:
|
||||
try:
|
||||
base=row[0].decode('utf-8').replace(path,'',1)
|
||||
except UnicodeDecodeError:
|
||||
print(row[0]+" is giving me trouble.")
|
||||
try:
|
||||
base=row[0].encode('utf-8').replace(path,'',1)
|
||||
except UnicodeDecodeError:
|
||||
print(row[0]+" is still giving me trouble.")
|
||||
sys.exit(1)
|
||||
if base.find('/')==-1:
|
||||
files.append(base)
|
||||
return files
|
||||
|
||||
|
||||
def get_md5(filename,fullfile=False):
|
||||
''' returns content based hash, only first 50Mb is read, unless user wants the whole file '''
|
||||
if fullfile:
|
||||
block_size=2**20
|
||||
md5 = hashlib.md5()
|
||||
with open(filename,'rb') as f:
|
||||
for chunk in iter(lambda: f.read(block_size), b''):
|
||||
md5.update(chunk)
|
||||
return md5.hexdigest()
|
||||
return hashlib.md5(open(filename,'rb').read(1024*1024*50)).hexdigest()
|
||||
|
||||
|
||||
def has_changes(options):
|
||||
conn=sqlite3.connect(options.sqlfile)
|
||||
conn.text_factory=str
|
||||
db=conn.cursor()
|
||||
db.execute("SELECT file,hash FROM list WHERE "+like_query+" ORDER BY file",needle)
|
||||
if options.haschanges:
|
||||
options.changed=True
|
||||
if options.hasdeletions or options.haschanges:
|
||||
has_changes_deleted(db)
|
||||
if options.hasadditions or options.haschanges:
|
||||
has_changes_additions(db,options)
|
||||
|
||||
def has_changes_deleted(db):
|
||||
db.execute('SELECT file FROM list')
|
||||
for row in db:
|
||||
status='OK'
|
||||
if os.path.exists(row[0]):
|
||||
md5f=get_md5(row[0],fullFile)
|
||||
if row[1]!=md5f:
|
||||
status='Checksum-difference'
|
||||
if not os.path.exists(row[0]):
|
||||
print('True')
|
||||
sys.exit(1)
|
||||
return
|
||||
|
||||
def has_changes_additions(db,options):
|
||||
for path,dirs,files in os.walk(options.startpath,followlinks=options.symlinks):
|
||||
dirs=clean_dirs(dirs)
|
||||
db_files=get_folder_contents(db,filename_join(path,"",options)+"/")
|
||||
if not options.symlinks:
|
||||
files=clean_syms(files,path)
|
||||
for file in files:
|
||||
filename=filename_join(path,file,options)
|
||||
if file==options.sqlfile:
|
||||
continue
|
||||
#if not is_listed(db,filename):
|
||||
if file not in db_files:
|
||||
print('True')
|
||||
sys.exit(1)
|
||||
else:
|
||||
status='Not-found'
|
||||
print("%s %s"%(row[0],status))
|
||||
if options.changed:
|
||||
ftime=os.path.getmtime(filename)
|
||||
if not ftime_match(db,filename,ftime):
|
||||
#file content changed
|
||||
print('True')
|
||||
sys.exit(1)
|
||||
|
||||
return
|
||||
|
||||
#~ def hash_match(db,filename,hash):
|
||||
#~ db.execute("SELECT hash FROM list where file == ?",(filename,))
|
||||
#~ count=db.fetchall()
|
||||
#~ return count[0][0]==hash
|
||||
|
||||
def humanize_size(size,precision=1):
|
||||
if size==None:
|
||||
return 'nan'
|
||||
suffixes=['B','KB','MB','GB','TB']
|
||||
suffixIndex = 0
|
||||
defPrecision=0
|
||||
while size > 1024:
|
||||
suffixIndex += 1 #increment the index of the suffix
|
||||
size = size/1024.0 #apply the division
|
||||
defPrecision=precision
|
||||
return "%.*f%s"%(defPrecision,size,suffixes[suffixIndex])
|
||||
|
||||
def is_listed(db,filename):
|
||||
db.execute("SELECT COUNT(*) FROM list where file == ?",(filename,))
|
||||
count=db.fetchall()
|
||||
return count[0][0]>0
|
||||
|
||||
def matchdb(sqlfile,needle,helper):
|
||||
needle=needle.lower()
|
||||
@@ -358,11 +378,42 @@ def print_structure(files):
|
||||
i+=1
|
||||
return
|
||||
|
||||
def searchdb(sqlfile,needle):
|
||||
needle=['%'+i+'%' for i in needle]
|
||||
like_query=' OR '.join(['file LIKE ?' for i in needle])
|
||||
conn=sqlite3.connect(sqlfile)
|
||||
conn.text_factory=str
|
||||
db=conn.cursor()
|
||||
db.execute("SELECT file FROM list WHERE "+like_query+" ORDER BY file",needle)
|
||||
for row in db:
|
||||
print(row[0])
|
||||
|
||||
def stored_options(options):
|
||||
try:
|
||||
conn=sqlite3.connect(options.sqlfile)
|
||||
db=conn.cursor()
|
||||
conn.text_factory=str
|
||||
db.execute("SELECT object FROM config")
|
||||
store=""
|
||||
for row in db:
|
||||
store+=row[0]+'\n'
|
||||
config = ConfigParser.RawConfigParser()
|
||||
config.readfp(io.BytesIO(store))
|
||||
options.relative=config.getboolean("General","Relative")
|
||||
options.fullfile=config.getboolean("General","FullFile")
|
||||
except:
|
||||
pass
|
||||
|
||||
return options
|
||||
|
||||
def main():
|
||||
options=setup_options();
|
||||
|
||||
if not os.path.exists(options.sqlfile):
|
||||
createdb(options.sqlfile);
|
||||
createdb(options);
|
||||
options=stored_options(options)
|
||||
if options.relative:
|
||||
os.chdir(options.sqlpath)
|
||||
if options.haschanges or options.hasadditions or options.hasdeletions:
|
||||
has_changes(options)
|
||||
sys.exit(0)
|
||||
|
||||
Reference in New Issue
Block a user