927 lines
29 KiB
Python
927 lines
29 KiB
Python
import os
|
|
import re
|
|
import sys
|
|
import traceback
|
|
from argparse import ArgumentParser
|
|
from datetime import datetime
|
|
|
|
import tabulate
|
|
from imagelist2.db import DB, DBCachedWriter, sqlite_sqrt, sqlite_square
|
|
from imagelist2.image import ImageBrokenError, ImageMeasure, is_image_extension
|
|
from tqdm import tqdm
|
|
|
|
__version__ = "0.0.8"
|
|
SQLFILE = "image-list.sqlite"
|
|
BADDIRS = ["_tn", "_med", ".tn", ".med"]
|
|
MINSIZE = 0
|
|
|
|
|
|
class ImageList:
|
|
def __init__(self, opts):
|
|
|
|
self.options = opts
|
|
self.db = DB(self.options.sqlfile)
|
|
self.db_writer = DBCachedWriter(self.db)
|
|
self.root_path = os.path.dirname(os.path.realpath(self.options.sqlfile))
|
|
self.similarity_header = ("#", "File", "PD", "CD", "RD", "Shp", "W", "H")
|
|
|
|
def recursive_add(self):
|
|
|
|
dir_count = 0
|
|
image_count = 0
|
|
for path, dirs, files in os.walk(os.path.realpath(self.options.startpath), followlinks=self.options.symlinks):
|
|
clean_dirs(dirs)
|
|
dir_count += 1
|
|
|
|
progress = tqdm(
|
|
total=dir_count,
|
|
desc="Directories",
|
|
position=0,
|
|
delay=1,
|
|
leave=False,
|
|
)
|
|
for path, dirs, files in os.walk(os.path.realpath(self.options.startpath), followlinks=self.options.symlinks):
|
|
progress.update()
|
|
progress.write(self.db.file2relative(path))
|
|
clean_dirs(dirs)
|
|
files = [os.path.realpath(os.path.join(path, f)) for f in files]
|
|
if not self.options.symlinks:
|
|
files = clean_syms(files)
|
|
files.sort()
|
|
dirs.sort()
|
|
db_files = self.db.get_folder_contents(path + "/")
|
|
for file in tqdm(files, desc="Files", delay=1, position=1, leave=False):
|
|
if not is_image_extension(file):
|
|
continue
|
|
image = ImageMeasure(file)
|
|
if file in db_files:
|
|
if self.options.changed:
|
|
has_changed = False
|
|
if self.db.is_time_mismatch(image):
|
|
has_changed = True
|
|
if not has_changed:
|
|
if self.db.is_hash_mismatch(image):
|
|
has_changed = True
|
|
if has_changed:
|
|
image_count += 1
|
|
self.add_single(image, change=True)
|
|
else:
|
|
if not self.options.no_add:
|
|
image_count += 1
|
|
self.add_single(image, change=False)
|
|
self.db_writer.commit()
|
|
if image_count > 0:
|
|
print(f"Added/changed {image_count} images")
|
|
return
|
|
|
|
def add_single(self, image, change=False):
|
|
|
|
if change:
|
|
query = "UPDATE list SET hash=?, date=? ,size=? WHERE file=?"
|
|
error_msg = f"error adding image: {image.filename}"
|
|
else:
|
|
query = "INSERT INTO list(hash,date,size,file) VALUES (?,?,?,?)"
|
|
error_msg = f"error changing image: {image.filename}"
|
|
try:
|
|
|
|
self.db_writer.execute(
|
|
query,
|
|
(
|
|
image.get_hash(),
|
|
image.get_time(),
|
|
image.get_size(),
|
|
image.filename,
|
|
),
|
|
)
|
|
|
|
except:
|
|
print(error_msg)
|
|
traceback.print_exc(file=sys.stdout)
|
|
sys.exit(1)
|
|
|
|
def base_add(self):
|
|
|
|
seen_hash = set()
|
|
missing_base = (
|
|
self.db.cursor()
|
|
.execute(
|
|
"""
|
|
SELECT list.hash, list.file
|
|
FROM list
|
|
LEFT JOIN data ON data.hash = list.hash
|
|
WHERE data.hash IS NULL AND data.broken IS NULL
|
|
"""
|
|
)
|
|
.fetchall()
|
|
)
|
|
if len(missing_base) == 0:
|
|
return
|
|
|
|
for i, row in enumerate(tqdm(missing_base, desc="Base info", delay=1)):
|
|
if row[0] in seen_hash:
|
|
continue
|
|
seen_hash.add(row[0])
|
|
filename = row[1]
|
|
if filename == None:
|
|
continue
|
|
image = ImageMeasure(filename)
|
|
if image.is_broken():
|
|
self.db_writer.execute(
|
|
"""INSERT INTO data(hash,broken)
|
|
VALUES(?,?)""",
|
|
(row[0], True),
|
|
)
|
|
else:
|
|
self.db_writer.execute(
|
|
"""INSERT INTO data(hash,portrait,width,height,description,broken)
|
|
VALUES(?,?,?,?,?,?)""",
|
|
(
|
|
row[0],
|
|
image.get_portrait(),
|
|
image.get_width(),
|
|
image.get_height(),
|
|
image.get_description(),
|
|
False,
|
|
),
|
|
)
|
|
self.db_writer.commit()
|
|
return
|
|
|
|
def delete_missing(self):
|
|
|
|
to_delete = []
|
|
for row in tqdm(self.db.cursor().execute("SELECT file FROM list").fetchall(), delay=1, desc="Clean files"):
|
|
if not os.path.exists(row[0]):
|
|
to_delete.append(row[0])
|
|
|
|
cursor = self.db.cursor()
|
|
for file in tqdm(to_delete, desc="Cleaning", delay=1):
|
|
cursor.execute("DELETE FROM list where file == ?", (file,))
|
|
self.db.conn.commit()
|
|
if len(to_delete) > 0:
|
|
print(f"Cleaned {len(to_delete)} images")
|
|
return
|
|
|
|
def clean_data(self):
|
|
|
|
to_delete = []
|
|
for row in self.db.cursor().execute(
|
|
"""SELECT data.hash FROM data LEFT JOIN list ON list.hash = data.hash WHERE list.hash IS NULL OR data.broken IS NULL"""
|
|
):
|
|
to_delete.append(row[0])
|
|
|
|
if len(to_delete) > 0:
|
|
cursor = self.db.cursor()
|
|
for row in to_delete:
|
|
cursor.execute("DELETE FROM data where hash = ?", (row,))
|
|
|
|
self.db.conn.commit()
|
|
print(f"Cleaned {len(to_delete)} metadata")
|
|
|
|
to_delete = []
|
|
for row in self.db.cursor().execute(
|
|
"""SELECT tags.hash FROM tags LEFT JOIN list ON list.hash = tags.hash WHERE list.hash IS NULL"""
|
|
):
|
|
to_delete.append(row[0])
|
|
|
|
if len(to_delete) > 0:
|
|
cursor = self.db.cursor()
|
|
for row in to_delete:
|
|
cursor.execute("DELETE FROM tags where hash = ?", (row,))
|
|
self.db.conn.commit()
|
|
print(f"Cleaned {len(to_delete)} tags")
|
|
|
|
def measure(self):
|
|
|
|
duplicates = set()
|
|
missing_measurements = (
|
|
self.db.cursor()
|
|
.execute(
|
|
"""
|
|
SELECT
|
|
list.file,
|
|
data.hash,
|
|
data.p_hash,
|
|
data.sharpness,
|
|
data.R,
|
|
data.G,
|
|
data.B,
|
|
data.BR,
|
|
data.BG,
|
|
data.BB
|
|
FROM data
|
|
LEFT JOIN list ON data.hash = list.hash
|
|
WHERE
|
|
(
|
|
data.p_hash IS NULL
|
|
OR data.sharpness IS NULL
|
|
OR data.R IS NULL
|
|
)
|
|
AND
|
|
(
|
|
data.broken IS FALSE OR data.broken IS NULL
|
|
)
|
|
"""
|
|
)
|
|
.fetchall()
|
|
)
|
|
if len(missing_measurements) == 0:
|
|
return
|
|
for i, row in enumerate(tqdm(missing_measurements, desc="Measure", delay=1, smoothing=0.01)):
|
|
filename = row[0]
|
|
if filename == None:
|
|
continue
|
|
if row[1] in duplicates:
|
|
continue
|
|
duplicates.add(row[1])
|
|
try:
|
|
image = ImageMeasure(filename)
|
|
(
|
|
image.hash,
|
|
image.p_hash,
|
|
image.sharpness,
|
|
image.colors["R"],
|
|
image.colors["G"],
|
|
image.colors["B"],
|
|
image.colors["BR"],
|
|
image.colors["BG"],
|
|
image.colors["BB"],
|
|
) = row[1:]
|
|
# Calculate if required
|
|
image.get_p_hash()
|
|
image.sharpness = image.get_sharpness()
|
|
image.colors.update(image.get_colors())
|
|
if image.broken:
|
|
print("image broke")
|
|
raise ImageBrokenError()
|
|
except ImageBrokenError:
|
|
self.db_writer.execute(
|
|
"""UPDATE data SET broken = ?
|
|
WHERE hash = ?
|
|
""",
|
|
(
|
|
image.broken,
|
|
image.hash,
|
|
),
|
|
)
|
|
continue
|
|
|
|
self.db_writer.execute(
|
|
"""UPDATE data SET
|
|
p_hash = ?,
|
|
sharpness = ?,
|
|
R = ?,
|
|
G = ?,
|
|
B = ?,
|
|
BR = ?,
|
|
BG = ?,
|
|
BB = ?,
|
|
broken = ?
|
|
WHERE hash = ?
|
|
""",
|
|
(
|
|
image.p_hash,
|
|
image.sharpness,
|
|
image.colors["R"],
|
|
image.colors["G"],
|
|
image.colors["B"],
|
|
image.colors["BR"],
|
|
image.colors["BG"],
|
|
image.colors["BB"],
|
|
image.broken,
|
|
image.hash,
|
|
),
|
|
)
|
|
|
|
self.db_writer.commit()
|
|
return
|
|
|
|
def disk_used(self):
|
|
|
|
if self.options.diskused_depth is None:
|
|
self.options.diskused_depth = 9999999999
|
|
|
|
searchpath = os.path.realpath(self.options.path)
|
|
# self.options.diskused_depth += 1
|
|
result = self.db.cursor().execute(
|
|
"SELECT size, REPLACE(file,?,'') as path FROM list WHERE file LIKE ?",
|
|
(
|
|
searchpath + "/",
|
|
searchpath + "%",
|
|
),
|
|
)
|
|
entries = []
|
|
sizes = []
|
|
for row in result:
|
|
start_path = row[1].split("/")
|
|
start_path = "/".join(start_path[0 : int(self.options.diskused_depth)])
|
|
if len(start_path) != len(row[1]):
|
|
start_path += "/"
|
|
if start_path not in entries:
|
|
entries.append(start_path)
|
|
sizes.append(row[0])
|
|
else:
|
|
sizes[entries.index(start_path)] += row[0]
|
|
table = Tabulate(("Size[b]", "Size", "Path"))
|
|
for entry in zip(sizes, entries):
|
|
table.append((entry[0], humanize_size(entry[0]), entry[1]))
|
|
table.print()
|
|
|
|
def broken(self):
|
|
result = self.db.cursor().execute(
|
|
"""
|
|
SELECT
|
|
file FROM files
|
|
WHERE broken IS TRUE
|
|
""",
|
|
)
|
|
print("#File")
|
|
for row in result:
|
|
print(row[0])
|
|
|
|
def db_print(self):
|
|
result = self.db.cursor().execute(
|
|
"""
|
|
SELECT * FROM files
|
|
""",
|
|
)
|
|
table = Tabulate([c[0] for c in result.description])
|
|
for row in result:
|
|
table.append(row)
|
|
table.print()
|
|
|
|
def duplicates(self):
|
|
result = self.db.cursor().execute(
|
|
"""
|
|
WITH
|
|
duplicates AS (SELECT hash FROM list GROUP BY hash HAVING count(hash) > 1),
|
|
f AS (SELECT
|
|
list.hash,list.file FROM list
|
|
LEFT JOIN duplicates ON (list.hash = duplicates.hash)
|
|
WHERE duplicates.hash IS NOT NULL
|
|
ORDER BY file
|
|
)
|
|
SELECT
|
|
CAST((row_number() OVER (PARTITION BY f.hash))-1 AS TEXT) AS row,
|
|
RELATIVE(file)
|
|
FROM f
|
|
""",
|
|
)
|
|
table = Tabulate(["#", "File"])
|
|
for row in result:
|
|
c = "==" if row[0] == "0" else f">{row[0]}"
|
|
table.append([c, row[1]])
|
|
table.print()
|
|
|
|
def nearestcolor(self):
|
|
"""Find closest matching images to given RGB color"""
|
|
src = self.options.nearestcolor
|
|
|
|
try:
|
|
src = [int(i) for i in src.strip().strip('"').split(",")]
|
|
if len(src) == 3:
|
|
src.append(1)
|
|
f = ""
|
|
except ValueError:
|
|
src = src.strip().split(",")
|
|
if len(src) == 1:
|
|
limit = 1
|
|
else:
|
|
limit = int(src[1])
|
|
image = ImageMeasure(src[0])
|
|
colors = image.get_colors()
|
|
f = os.path.realpath(src[0])
|
|
src = (colors["R"], colors["G"], colors["B"], limit)
|
|
|
|
self.db.conn.create_function("SQUARE", 1, sqlite_square)
|
|
self.db.conn.create_function("SQRT", 1, sqlite_sqrt)
|
|
result = self.db.cursor().execute(
|
|
"""
|
|
WITH distances AS (
|
|
SELECT
|
|
hash,
|
|
ROUND(SQRT(SQUARE(BR-?)+SQUARE(BG-?)+SQUARE(BB-?)),1) as distance,
|
|
BR,BG,BB
|
|
FROM data
|
|
WHERE BR IS NOT NULL
|
|
ORDER BY distance
|
|
LIMIT ?
|
|
)
|
|
SELECT
|
|
RELATIVE(list.file),
|
|
distances.distance,
|
|
distances.BR,
|
|
distances.BG,
|
|
distances.BB
|
|
FROM list
|
|
LEFT JOIN
|
|
distances ON (distances.hash = list.hash)
|
|
WHERE distances.hash IS NOT NULL AND list.file != ?
|
|
ORDER BY distances.distance
|
|
""",
|
|
(src[0], src[1], src[2], src[3], f),
|
|
)
|
|
table = Tabulate(("Path", "Dist", "BR", "BG", "BB"))
|
|
for hit in result:
|
|
p, d, r, g, b = hit
|
|
table.append(
|
|
(
|
|
p,
|
|
str(d),
|
|
str(int(r)),
|
|
str(int(g)),
|
|
str(int(b)),
|
|
)
|
|
)
|
|
table.print()
|
|
|
|
def similarity(self):
|
|
|
|
def print_visually_similar(file, thr):
|
|
cmp_image = ImageMeasure(file)
|
|
cmp_image.set_all()
|
|
cmp_image.filename = cmp_image.filename
|
|
compare_list = self.db.cursor().execute(
|
|
"""SELECT
|
|
RELATIVE(file),width,height,sharpness,
|
|
PDISTANCE(p_hash, ?) AS p_dist,
|
|
COLORDIFF(R,G,B,?,?,?) AS c_diff,
|
|
SHAPEDIFF(width,height,?,?) AS s_diff
|
|
FROM files
|
|
WHERE p_hash IS NOT NULL AND
|
|
sharpness > 0 AND
|
|
hash != ? AND
|
|
p_dist <= ?
|
|
ORDER BY p_dist, file""",
|
|
(
|
|
cmp_image.p_hash,
|
|
cmp_image.colors["R"],
|
|
cmp_image.colors["G"],
|
|
cmp_image.colors["B"],
|
|
cmp_image.width,
|
|
cmp_image.height,
|
|
cmp_image.hash,
|
|
thr,
|
|
),
|
|
)
|
|
table = Tabulate(self.similarity_header)
|
|
table.append(
|
|
(
|
|
"==",
|
|
self.db.file2relative(cmp_image.filename),
|
|
0,
|
|
0,
|
|
0,
|
|
cmp_image.sharpness,
|
|
cmp_image.width,
|
|
cmp_image.height,
|
|
)
|
|
)
|
|
for counter, row in enumerate(compare_list):
|
|
f2, w2, h2, s2, pdist, cdiff, sdiff = row
|
|
table.append((f">{counter+1}", f2, pdist, cdiff, sdiff, s2, w2, h2))
|
|
|
|
table.print()
|
|
|
|
def get_visual_duplicates():
|
|
|
|
return self.db.cursor().execute(
|
|
"""
|
|
WITH
|
|
duplicates AS (SELECT p_hash FROM data WHERE p_hash IS NOT NULL GROUP BY p_hash HAVING count(p_hash) > 1)
|
|
SELECT
|
|
RELATIVE(files.file) AS file,
|
|
files.width,
|
|
files.height,
|
|
files.sharpness,
|
|
files.R,
|
|
files.G,
|
|
files.B,
|
|
files.p_hash
|
|
FROM files
|
|
WHERE p_hash IN ( SELECT p_hash FROM duplicates )
|
|
ORDER BY p_hash, files.size DESC
|
|
"""
|
|
)
|
|
|
|
def print_visual_duplicates():
|
|
fblock = None
|
|
counter = 0
|
|
table = Tabulate(self.similarity_header)
|
|
for row in get_visual_duplicates():
|
|
f, w, h, s, r, g, b, p_hash = row
|
|
if fblock != p_hash:
|
|
fblock = p_hash
|
|
counter = 0
|
|
table.append(("==", f, 0, 0, 0, s, w, h))
|
|
image1 = ImageMeasure(f)
|
|
image1.width = w
|
|
image1.height = h
|
|
image1.sharpness = s
|
|
image1.colors.update({"B": b, "G": g, "R": r})
|
|
continue
|
|
counter += 1
|
|
image2 = ImageMeasure(f)
|
|
image2.width = w
|
|
image2.height = h
|
|
image2.sharpness = s
|
|
image2.colors.update({"B": b, "G": g, "R": r})
|
|
cdiff = image1.color_difference(image2)
|
|
sdiff = image1.shape_difference(image2)
|
|
table.append((f">{counter}", f, 0, cdiff, sdiff, s, w, h))
|
|
table.print()
|
|
|
|
def print_self_similarity(thr):
|
|
fingerprint_list = self.db.cursor().execute(
|
|
"""
|
|
WITH disttab AS (
|
|
WITH
|
|
t1 AS ( SELECT * FROM files WHERE p_hash IS NOT NULL ),
|
|
t2 AS ( SELECT * FROM files WHERE p_hash IS NOT NULL )
|
|
SELECT
|
|
RELATIVE(t1.file) AS file1,
|
|
t1.width AS width1,
|
|
t1.height AS height1,
|
|
t1.sharpness AS sharpness1,
|
|
RELATIVE(t2.file) AS file2,
|
|
t2.width AS width2,
|
|
t2.height AS height2,
|
|
t2.sharpness AS sharpness2,
|
|
PDISTANCE(t1.p_hash,t2.p_hash) AS p_distance,
|
|
t1.R AS t1r, t1.G AS t1g, t1.B AS t1b,
|
|
t2.R AS t2r, t2.G AS t2g, t2.B AS t2b
|
|
FROM t1 INNER JOIN t2
|
|
ON t1.file < t2.file
|
|
WHERE p_distance <= ?
|
|
ORDER BY t1.file, p_distance, t2.file
|
|
)
|
|
SELECT
|
|
file1,width1,height1,sharpness1,
|
|
file2,width2,height2,sharpness2,
|
|
p_distance,
|
|
ROUND(SQRT((t1r-t2r)*(t1r-t2r)+(t1g-t2g)*(t1g-t2g)+(t1b-t2b)*(t1b-t2b)),1) AS c_diff,
|
|
ROUND(ABS((CAST(width1 AS FLOAT) / CAST(height1 AS FLOAT)) - (CAST(width2 AS FLOAT) / CAST(height2 AS FLOAT))), 4) AS s_diff
|
|
FROM disttab
|
|
""",
|
|
(thr,),
|
|
)
|
|
f1block = None
|
|
counter = 0
|
|
table = Tabulate(self.similarity_header)
|
|
for row in fingerprint_list:
|
|
f1, w1, h1, s1, f2, w2, h2, s2, pdist, cdiff, sdiff = row
|
|
if f1block != f1:
|
|
f1block = f1
|
|
counter = 0
|
|
table.append(("==", f1, 0, 0, 0, s1, w1, h1))
|
|
counter += 1
|
|
table.append((f">{counter}", f2, pdist, cdiff, sdiff, s2, w2, h2))
|
|
table.print()
|
|
|
|
def print_similarity_block(rows):
|
|
if len(rows) > 0:
|
|
for row in rows:
|
|
pre, f, w, h, s, pdist, cdiff, sdiff = row
|
|
print(f"{pre}|{f}|{pdist}|{cdiff}|{sdiff}|{s}|{w}|{h}")
|
|
|
|
if self.options.similarity:
|
|
thr = 20
|
|
try:
|
|
thr = int(self.options.similarity)
|
|
file = None
|
|
except ValueError:
|
|
file = self.options.similarity.split(",")
|
|
if len(file) == 1:
|
|
thr = 20
|
|
else:
|
|
thr = int(file[1])
|
|
file = file[0]
|
|
|
|
if file is None:
|
|
# Similarity inside the dataset
|
|
print_self_similarity(thr)
|
|
else:
|
|
# Read single image, and find similarty to that
|
|
print_visually_similar(file, thr)
|
|
|
|
if self.options.visual_duplicate:
|
|
print_visual_duplicates()
|
|
|
|
def tag_manage(self):
|
|
|
|
fname = os.path.realpath(self.options.file)
|
|
hash = self.db.file2hash(fname)
|
|
if hash is None:
|
|
raise Exception(f"{fname} not in database")
|
|
|
|
for add in self.options.add_tag:
|
|
self.db.cursor().execute(
|
|
"INSERT INTO tags(hash,tag) VALUES (?,?)",
|
|
(hash, add),
|
|
)
|
|
for rm in self.options.delete_tag:
|
|
self.db.cursor().execute(
|
|
"DELETE FROM tags WHERE hash = ? AND tag = ?",
|
|
(hash, rm),
|
|
)
|
|
if len(self.options.add_tag) + len(self.options.delete_tag) > 0:
|
|
self.db.conn.commit()
|
|
|
|
tags = self.db.cursor().execute(
|
|
"""
|
|
SELECT
|
|
tags.tag
|
|
FROM tags
|
|
WHERE tags.hash = ?
|
|
""",
|
|
(hash,),
|
|
)
|
|
print(",".join([x[0] for x in tags]))
|
|
|
|
|
|
class Tabulate:
|
|
def __init__(self, header):
|
|
self.header = header
|
|
self.rows = []
|
|
|
|
def append(self, row):
|
|
self.rows.append(row)
|
|
|
|
def print(self):
|
|
|
|
tabulate._table_formats["github"] = tabulate.TableFormat(
|
|
lineabove=None,
|
|
linebelowheader=None,
|
|
linebetweenrows=None,
|
|
linebelow=None,
|
|
headerrow=tabulate.DataRow("|", "|", "|"),
|
|
datarow=tabulate.DataRow("|", "|", "|"),
|
|
padding=0,
|
|
with_header_hide=["lineabove"],
|
|
)
|
|
|
|
print(tabulate.tabulate(self.rows, headers=self.header, tablefmt="github", showindex=False))
|
|
|
|
|
|
def clean_dirs(dirs):
|
|
"""Remove in place, because os.walk uses the same variable"""
|
|
remove = []
|
|
for i, s in enumerate(dirs):
|
|
if (s in BADDIRS) or s.startswith("."):
|
|
remove.append(i)
|
|
for r in sorted(remove, reverse=True):
|
|
del dirs[r]
|
|
|
|
|
|
def clean_syms(files):
|
|
return [f for f in files if not os.path.islink(f)]
|
|
|
|
|
|
def humanize_size(size, precision=1):
|
|
if size == None:
|
|
return "nan"
|
|
suffixes = ["B", "KB", "MB", "GB", "TB", "PB", "EB"]
|
|
suffixIndex = 0
|
|
defPrecision = 0
|
|
while size > 1024:
|
|
suffixIndex += 1
|
|
size = size / 1024.0
|
|
defPrecision = precision
|
|
return "%.*f%s" % (defPrecision, size, suffixes[suffixIndex])
|
|
|
|
|
|
def humanize_date(date):
|
|
if date == None:
|
|
return ""
|
|
return datetime.fromtimestamp(int(date)).strftime("%Y-%m-%d %H:%M:%S")
|
|
|
|
|
|
def setup_options():
|
|
parser = ArgumentParser(description=f"Maintains a list of images sqlite file (v{__version__})")
|
|
parser.add_argument(
|
|
"-f",
|
|
action="store",
|
|
dest="sqlfile",
|
|
default=SQLFILE,
|
|
help="SQL file name to use [%(default)s]",
|
|
)
|
|
subparsers = parser.add_subparsers(title="Command", dest="command")
|
|
|
|
help = subparsers.add_parser("help", help="Help on all commands")
|
|
db = subparsers.add_parser("db", help="Update database")
|
|
search = subparsers.add_parser("search", help="Search similarity")
|
|
du = subparsers.add_parser("du", help="Disk usage")
|
|
tag = subparsers.add_parser("tag", help="Tag manager")
|
|
|
|
db.add_argument(
|
|
"--no-add",
|
|
"-a",
|
|
action="store_true",
|
|
dest="no_add",
|
|
default=False,
|
|
help="Do not add new files [%(default)s]",
|
|
)
|
|
db.add_argument(
|
|
"--measure",
|
|
"-m",
|
|
action="store_true",
|
|
dest="measure",
|
|
default=False,
|
|
help="Measure various statistics for similarity/color searches. [%(default)s]",
|
|
)
|
|
db.add_argument(
|
|
"--changed",
|
|
"-c",
|
|
action="store_true",
|
|
dest="changed",
|
|
default=False,
|
|
help="Search for changed files and update their entries [%(default)s]",
|
|
)
|
|
db.add_argument(
|
|
"--no-delete",
|
|
"-d",
|
|
action="store_true",
|
|
dest="no_delete",
|
|
default=False,
|
|
help="Do not delete non-existing entries [%(default)s]",
|
|
)
|
|
db.add_argument(
|
|
"--no-delete-data",
|
|
"-D",
|
|
action="store_true",
|
|
dest="no_delete_data",
|
|
default=False,
|
|
help="Do not delete unused metadata [%(default)s]",
|
|
)
|
|
db.add_argument(
|
|
"-x",
|
|
action="append",
|
|
dest="exclude",
|
|
default=[],
|
|
help="Exclude folder name. This option may be issued several times.",
|
|
)
|
|
db.add_argument(
|
|
"-l",
|
|
action="store_true",
|
|
dest="symlinks",
|
|
default=False,
|
|
help="Follow symbolic links [%(default)s]",
|
|
)
|
|
db.add_argument(
|
|
"--print",
|
|
action="store_true",
|
|
dest="print",
|
|
default=False,
|
|
help="Print the whole database [%(default)s]",
|
|
)
|
|
db.add_argument("startpath", action="store", default=".", nargs="?", help="Path to start scanning for images.")
|
|
|
|
du.add_argument(
|
|
"-d",
|
|
type=int,
|
|
action="store",
|
|
dest="diskused_depth",
|
|
default=None,
|
|
help="Depth of summarization for du.",
|
|
)
|
|
|
|
du.add_argument(
|
|
type=str,
|
|
action="store",
|
|
dest="path",
|
|
default=".",
|
|
help="Print directory sizes. Argument is the path where directories are listed from.",
|
|
nargs="?",
|
|
)
|
|
|
|
search.add_argument(
|
|
"--broken",
|
|
action="store_true",
|
|
dest="broken",
|
|
default=False,
|
|
help="Return a list of broken files [%(default)s]",
|
|
)
|
|
|
|
search.add_argument(
|
|
"--dup",
|
|
action="store_true",
|
|
dest="duplicate",
|
|
default=False,
|
|
help="Return a list of duplicate files, based on file hashes. [%(default)s]",
|
|
)
|
|
|
|
search.add_argument(
|
|
"--visdup",
|
|
action="store_true",
|
|
dest="visual_duplicate",
|
|
default=False,
|
|
help="Return a list of visually exact duplicate files, based on perceptual hashes. [%(default)s]",
|
|
)
|
|
|
|
search.add_argument(
|
|
"--color",
|
|
type=str,
|
|
dest="nearestcolor",
|
|
default=False,
|
|
help="Search list for nearest ambient color. format: R,G,B in uint8. Add fourth value to limit search to number of hits. Also accepts format file,hits to find nearest color to given file.",
|
|
)
|
|
|
|
search.add_argument(
|
|
"--similar",
|
|
type=str,
|
|
dest="similarity",
|
|
default=None,
|
|
help="Search list for similar images. Value 0-255 for similarity threshold. 0=high similarity. "
|
|
+ "If value is a filename, search similar to that image. "
|
|
+ "Append with ',value' to limit similarity. default to 20."
|
|
+ "The output columns: PD PerceptualDiff., CD ColorDiff., "
|
|
+ "RD AspectRatioDiff.,Shp SharpnessIndex. This function does not return exact duplicates.",
|
|
)
|
|
|
|
tag.add_argument(
|
|
"-t",
|
|
action="append",
|
|
dest="add_tag",
|
|
default=[],
|
|
help="Give file a tag.",
|
|
)
|
|
tag.add_argument(
|
|
"-d",
|
|
action="append",
|
|
dest="delete_tag",
|
|
default=[],
|
|
help="Delete a tag.",
|
|
)
|
|
tag.add_argument(
|
|
type=str,
|
|
dest="file",
|
|
default=None,
|
|
help="File name for tagging.",
|
|
)
|
|
|
|
options = parser.parse_args()
|
|
|
|
if options.command == "help":
|
|
parser.print_help()
|
|
print("\n====\nCommand: db")
|
|
db.print_help()
|
|
print("\n====\nCommand: search")
|
|
search.print_help()
|
|
print("\n====\nCommand: du")
|
|
du.print_help()
|
|
print("\n====\nCommand: tag")
|
|
tag.print_help()
|
|
sys.exit(0)
|
|
|
|
if options.command == None:
|
|
parser.print_help()
|
|
sys.exit(0)
|
|
# options = db.parse_args()
|
|
# options.command = "db"
|
|
# options.sqlfile = SQLFILE
|
|
|
|
if options.command == "db":
|
|
BADDIRS.extend(options.exclude)
|
|
|
|
return options
|
|
|
|
|
|
def main():
|
|
|
|
options = setup_options()
|
|
il = ImageList(options)
|
|
|
|
if options.command == "db":
|
|
if not options.no_add:
|
|
il.recursive_add()
|
|
il.base_add()
|
|
if not options.no_delete:
|
|
il.delete_missing()
|
|
if not options.no_delete_data:
|
|
il.clean_data()
|
|
if options.measure:
|
|
il.base_add()
|
|
il.measure()
|
|
if options.print:
|
|
il.db_print()
|
|
if options.command == "du":
|
|
il.disk_used()
|
|
if options.command == "search":
|
|
if options.duplicate:
|
|
il.duplicates()
|
|
if options.visual_duplicate:
|
|
il.similarity()
|
|
if options.nearestcolor:
|
|
il.nearestcolor()
|
|
if options.similarity:
|
|
il.similarity()
|
|
if options.broken:
|
|
il.broken()
|
|
if options.command == "tag":
|
|
il.tag_manage()
|
|
print("")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|