switch to using p_hash
This commit is contained in:
@@ -5,11 +5,12 @@ import traceback
|
||||
from argparse import ArgumentParser
|
||||
from datetime import datetime
|
||||
|
||||
import tabulate
|
||||
from imagelist2.db import DB, sqlite_sqrt, sqlite_square
|
||||
from imagelist2.image import ImageMeasure, is_image_extension
|
||||
from tqdm import tqdm
|
||||
|
||||
__version__ = "0.0.3"
|
||||
__version__ = "0.0.4"
|
||||
SQLFILE = "image-list.sqlite"
|
||||
# IMGMATCH = re.compile("|".join([".*\." + x + "$" |.*\.jpeg$|.*\.png$|.*\.gif$|.*\.tif$", re.I)
|
||||
BADDIRS = ["_tn", "_med", ".tn", ".med"]
|
||||
@@ -22,6 +23,7 @@ class ImageList:
|
||||
self.options = opts
|
||||
self.db = DB(self.options.sqlfile)
|
||||
self.root_path = os.path.dirname(os.path.realpath(self.options.sqlfile))
|
||||
self.similarity_header = ("#", "File", "PD", "CD", "RD", "Shp", "W", "H")
|
||||
|
||||
def recursive_add(self):
|
||||
|
||||
@@ -126,13 +128,7 @@ class ImageList:
|
||||
cursor.execute(
|
||||
"""INSERT INTO data(hash,portrait,width,height,description)
|
||||
VALUES(?,?,?,?,?)""",
|
||||
(
|
||||
row[0],
|
||||
image.get_portrait(),
|
||||
image.get_width(),
|
||||
image.get_height(),
|
||||
image.get_description()
|
||||
),
|
||||
(row[0], image.get_portrait(), image.get_width(), image.get_height(), image.get_description()),
|
||||
)
|
||||
if i % 50 == 0:
|
||||
self.db.conn.commit()
|
||||
@@ -194,6 +190,8 @@ class ImageList:
|
||||
list.file,
|
||||
data.hash,
|
||||
data.fingerprint,
|
||||
data.w_hash,
|
||||
data.p_hash,
|
||||
data.sharpness,
|
||||
data.R,
|
||||
data.G,
|
||||
@@ -223,21 +221,25 @@ class ImageList:
|
||||
image = ImageMeasure(filename)
|
||||
image.hash = row[1]
|
||||
image.fingerprint = row[2]
|
||||
image.sharpness = row[3]
|
||||
image.colors["R"] = row[4]
|
||||
image.colors["G"] = row[5]
|
||||
image.colors["B"] = row[6]
|
||||
image.colors["BR"] = row[7]
|
||||
image.colors["BG"] = row[8]
|
||||
image.colors["BB"] = row[9]
|
||||
image.w_hash = row[3]
|
||||
image.p_hash = row[4]
|
||||
image.sharpness = row[5]
|
||||
image.colors["R"] = row[6]
|
||||
image.colors["G"] = row[7]
|
||||
image.colors["B"] = row[8]
|
||||
image.colors["BR"] = row[9]
|
||||
image.colors["BG"] = row[10]
|
||||
image.colors["BB"] = row[11]
|
||||
# Calculate if required
|
||||
image.fingerprint = image.get_fingerprint()
|
||||
image.get_fingerprint()
|
||||
image.sharpness = image.get_sharpness()
|
||||
image.colors.update(image.get_colors())
|
||||
|
||||
cursor.execute(
|
||||
"""UPDATE data SET
|
||||
fingerprint = ?,
|
||||
w_hash = ?,
|
||||
p_hash = ?,
|
||||
sharpness = ?,
|
||||
R = ?,
|
||||
G = ?,
|
||||
@@ -249,6 +251,8 @@ class ImageList:
|
||||
""",
|
||||
(
|
||||
image.fingerprint,
|
||||
image.w_hash,
|
||||
image.p_hash,
|
||||
image.sharpness,
|
||||
image.colors["R"],
|
||||
image.colors["G"],
|
||||
@@ -291,8 +295,10 @@ class ImageList:
|
||||
sizes.append(row[0])
|
||||
else:
|
||||
sizes[entries.index(start_path)] += row[0]
|
||||
table = Tabulate(("Size[b]", "Size", "Path"))
|
||||
for entry in zip(sizes, entries):
|
||||
print("| ".join([str(entry[0]).ljust(14), humanize_size(entry[0]).rjust(8), entry[1]]))
|
||||
table.append((entry[0], humanize_size(entry[0]), entry[1]))
|
||||
table.print()
|
||||
|
||||
def duplicates(self):
|
||||
result = self.db.cursor().execute(
|
||||
@@ -311,9 +317,11 @@ class ImageList:
|
||||
FROM f
|
||||
""",
|
||||
)
|
||||
table = Tabulate(["#", "File"])
|
||||
for row in result:
|
||||
c = "=" if row[0] == "0" else ">"
|
||||
print(c + "|".join(row))
|
||||
c = "==" if row[0] == "0" else f">{row[0]}"
|
||||
table.append([c, row[1]])
|
||||
table.print()
|
||||
|
||||
def nearestcolor(self):
|
||||
"""Find closest matching images to given RGB color"""
|
||||
@@ -356,21 +364,19 @@ class ImageList:
|
||||
""",
|
||||
(src[0], src[1], src[2], src[3], f),
|
||||
)
|
||||
|
||||
print("|".join(("Path", "Dist", "BR", "BG", "BB")))
|
||||
table = Tabulate(("Path", "Dist", "BR", "BG", "BB"))
|
||||
for hit in result:
|
||||
p, d, r, g, b = hit
|
||||
print(
|
||||
"|".join(
|
||||
(
|
||||
p,
|
||||
str(d),
|
||||
str(int(r)),
|
||||
str(int(g)),
|
||||
str(int(b)),
|
||||
)
|
||||
table.append(
|
||||
(
|
||||
p,
|
||||
str(d),
|
||||
str(int(r)),
|
||||
str(int(g)),
|
||||
str(int(b)),
|
||||
)
|
||||
)
|
||||
table.print()
|
||||
|
||||
def similarity(self):
|
||||
|
||||
@@ -378,79 +384,159 @@ class ImageList:
|
||||
image = ImageMeasure(None)
|
||||
image.hash = row[0]
|
||||
image.fingerprint = row[1]
|
||||
image.sharpness = row[2]
|
||||
image.width = row[3]
|
||||
image.height = row[4]
|
||||
image.colors["R"] = row[5]
|
||||
image.colors["G"] = row[6]
|
||||
image.colors["B"] = row[7]
|
||||
image.w_hash = row[2]
|
||||
image.p_hash = row[3]
|
||||
image.sharpness = row[4]
|
||||
image.width = row[5]
|
||||
image.height = row[6]
|
||||
image.colors["R"] = row[7]
|
||||
image.colors["G"] = row[8]
|
||||
image.colors["B"] = row[9]
|
||||
return image
|
||||
|
||||
def get_matching(cmp_image):
|
||||
|
||||
def print_visually_similar(file, thr):
|
||||
cmp_image = ImageMeasure(file)
|
||||
cmp_image.set_all()
|
||||
cmp_image.filename = cmp_image.filename
|
||||
compare_list = self.db.cursor().execute(
|
||||
"""SELECT hash,fingerprint,sharpness,width,height,R,G,B
|
||||
FROM data
|
||||
WHERE fingerprint IS NOT NULL AND sharpness > 0 AND hash != ?""",
|
||||
(cmp_image.hash,),
|
||||
"""SELECT
|
||||
RELATIVE(file),width,height,sharpness,
|
||||
PDISTANCE(p_hash, ?) AS p_dist,
|
||||
COLORDIFF(R,G,B,?,?,?) AS c_diff,
|
||||
SHAPEDIFF(width,height,?,?) AS s_diff
|
||||
FROM files
|
||||
WHERE p_hash IS NOT NULL AND
|
||||
sharpness > 0 AND
|
||||
hash != ? AND
|
||||
p_dist <= ?
|
||||
ORDER BY p_dist, file""",
|
||||
(
|
||||
cmp_image.p_hash,
|
||||
cmp_image.colors["R"],
|
||||
cmp_image.colors["G"],
|
||||
cmp_image.colors["B"],
|
||||
cmp_image.width,
|
||||
cmp_image.height,
|
||||
cmp_image.hash,
|
||||
thr,
|
||||
),
|
||||
)
|
||||
match_list = []
|
||||
for row2 in compare_list:
|
||||
other_image = set_image(row2)
|
||||
similarity = cmp_image.similarity_difference(other_image)
|
||||
if similarity <= thr:
|
||||
other_image.similarity["distance"] = similarity
|
||||
other_image.similarity["color"] = cmp_image.color_difference(other_image)
|
||||
other_image.similarity["aspect"] = cmp_image.shape_difference(other_image)
|
||||
other_image.filename = self.db.hash2file(other_image.hash)
|
||||
match_list.append(other_image)
|
||||
return match_list
|
||||
table = Tabulate(self.similarity_header)
|
||||
table.append(
|
||||
(
|
||||
"==",
|
||||
self.db.file2relative(cmp_image.filename),
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
cmp_image.sharpness,
|
||||
cmp_image.width,
|
||||
cmp_image.height,
|
||||
)
|
||||
)
|
||||
for counter, row in enumerate(compare_list):
|
||||
f2, w2, h2, s2, pdist, cdiff, sdiff = row
|
||||
table.append((f">{counter+1}", f2, pdist, cdiff, sdiff, s2, w2, h2))
|
||||
|
||||
table.print()
|
||||
|
||||
def get_visual_duplicates():
|
||||
|
||||
def get_duplicates():
|
||||
return self.db.cursor().execute(
|
||||
"""
|
||||
WITH
|
||||
duplicates AS (SELECT fingerprint FROM data GROUP BY fingerprint HAVING count(fingerprint) > 1 AND sharpness > 0),
|
||||
duphash AS (
|
||||
SELECT duplicates.fingerprint, data.hash, data.sharpness, data.width, data.height, data.R, data.G, data.B
|
||||
FROM duplicates
|
||||
LEFT JOIN data ON (duplicates.fingerprint = data.fingerprint)
|
||||
),
|
||||
f AS (SELECT
|
||||
duphash.fingerprint, duphash.hash,list.file,
|
||||
duphash.sharpness,
|
||||
duphash.width, duphash.height,
|
||||
duphash.R, duphash.G, duphash.B
|
||||
FROM duphash
|
||||
LEFT JOIN list ON (list.hash = duphash.hash)
|
||||
WHERE list.file IS NOT NULL
|
||||
ORDER BY list.file
|
||||
WITH
|
||||
duplicates AS (SELECT p_hash FROM data GROUP BY p_hash HAVING count(p_hash) > 1)
|
||||
SELECT
|
||||
RELATIVE(files.file) AS file,
|
||||
files.width,
|
||||
files.height,
|
||||
files.sharpness,
|
||||
files.R,
|
||||
files.G,
|
||||
files.B,
|
||||
files.p_hash
|
||||
FROM files
|
||||
WHERE p_hash IN ( SELECT p_hash FROM duplicates )
|
||||
ORDER BY p_hash, files.size DESC
|
||||
"""
|
||||
)
|
||||
|
||||
def print_visual_duplicates():
|
||||
fblock = None
|
||||
counter = 0
|
||||
table = Tabulate(self.similarity_header)
|
||||
for row in get_visual_duplicates():
|
||||
f, w, h, s, r, g, b, p_hash = row
|
||||
if fblock != p_hash:
|
||||
fblock = p_hash
|
||||
counter = 0
|
||||
table.append(("==", f, 0, 0, 0, s, w, h))
|
||||
image1 = ImageMeasure(f)
|
||||
image1.width = w
|
||||
image1.height = h
|
||||
image1.sharpness = s
|
||||
image1.colors.update({"B": b, "G": g, "R": r})
|
||||
continue
|
||||
counter += 1
|
||||
image2 = ImageMeasure(f)
|
||||
image2.width = w
|
||||
image2.height = h
|
||||
image2.sharpness = s
|
||||
image2.colors.update({"B": b, "G": g, "R": r})
|
||||
cdiff = image1.color_difference(image2)
|
||||
sdiff = image1.shape_difference(image2)
|
||||
table.append((f">{counter}", f, 0, cdiff, sdiff, s, w, h))
|
||||
table.print()
|
||||
|
||||
def print_self_similarity(thr):
|
||||
fingerprint_list = self.db.cursor().execute(
|
||||
"""
|
||||
WITH disttab AS (
|
||||
WITH
|
||||
t1 AS ( SELECT * FROM files ),
|
||||
t2 AS ( SELECT * FROM files )
|
||||
SELECT
|
||||
RELATIVE(t1.file) AS file1,
|
||||
t1.width AS width1,
|
||||
t1.height AS height1,
|
||||
t1.sharpness AS sharpness1,
|
||||
RELATIVE(t2.file) AS file2,
|
||||
t2.width AS width2,
|
||||
t2.height AS height2,
|
||||
t2.sharpness AS sharpness2,
|
||||
PDISTANCE(t1.p_hash,t2.p_hash) AS p_distance,
|
||||
COLORDIFF(t1.R,t1.G,t1.B,t2.R,t2.G,t2.B) AS c_diff,
|
||||
SHAPEDIFF(t1.width,t1.height,t2.width,t2.height) AS s_diff
|
||||
FROM t1 INNER JOIN t2
|
||||
ON t1.file < t2.file
|
||||
WHERE p_distance <= ?
|
||||
ORDER BY t1.file, p_distance, t2.file
|
||||
)
|
||||
SELECT
|
||||
CAST((row_number() OVER (PARTITION BY f.fingerprint))-1 AS TEXT) AS row,
|
||||
file,
|
||||
hash,
|
||||
fingerprint,
|
||||
sharpness,width,height,R,G,B
|
||||
FROM f
|
||||
""",
|
||||
SELECT * FROM disttab
|
||||
""",
|
||||
(thr,),
|
||||
)
|
||||
f1block = None
|
||||
counter = 0
|
||||
table = Tabulate(self.similarity_header)
|
||||
for row in fingerprint_list:
|
||||
f1, w1, h1, s1, f2, w2, h2, s2, pdist, cdiff, sdiff = row
|
||||
if f1block != f1:
|
||||
f1block = f1
|
||||
counter = 0
|
||||
table.append(("==", f1, 0, 0, 0, s1, w1, h1))
|
||||
counter += 1
|
||||
table.append((f">{counter}", f2, pdist, cdiff, sdiff, s2, w2, h2))
|
||||
table.print()
|
||||
|
||||
def print_matching(match_list, cmp_image):
|
||||
if len(match_list) > 0:
|
||||
match_list.sort(key=lambda i: i.similarity["distance"])
|
||||
print_similarity_row(cmp_image, "=", 0)
|
||||
for i, img in enumerate(match_list):
|
||||
print_similarity_row(img, ">", i + 1)
|
||||
def print_similarity_block(rows):
|
||||
if len(rows) > 0:
|
||||
for row in rows:
|
||||
pre, f, w, h, s, pdist, cdiff, sdiff = row
|
||||
print(f"{pre}|{f}|{pdist}|{cdiff}|{sdiff}|{s}|{w}|{h}")
|
||||
|
||||
def print_similarity_row(img, c, index):
|
||||
fnames = ", ".join([self.db.file2relative(f) for f in img.filename])
|
||||
print(
|
||||
f"{c}{index}|{fnames}|{img.similarity['distance']}|{img.similarity['color']}|{img.similarity['aspect']}|{img.sharpness}|{img.width}|{img.height}"
|
||||
)
|
||||
|
||||
print("|".join(("#", "File", "SD", "CD", "RD", "Shp", "W", "H")))
|
||||
if self.options.similarity:
|
||||
thr = 20
|
||||
try:
|
||||
thr = int(self.options.similarity)
|
||||
file = None
|
||||
@@ -463,48 +549,14 @@ class ImageList:
|
||||
file = file[0]
|
||||
|
||||
if file is None:
|
||||
# Measure similarity on all files
|
||||
fingerprint_list = self.db.cursor().execute(
|
||||
"""
|
||||
SELECT hash,fingerprint,sharpness,width,height,R,G,B
|
||||
FROM data
|
||||
WHERE fingerprint IS NOT NULL
|
||||
AND sharpness > 0"""
|
||||
)
|
||||
checked = set()
|
||||
for i, row in enumerate(fingerprint_list):
|
||||
if row[0] in checked:
|
||||
continue
|
||||
cmp_image = set_image(row)
|
||||
cmp_image.filename = self.db.hash2file(cmp_image.hash)
|
||||
match_list = get_matching(cmp_image)
|
||||
for m in match_list:
|
||||
checked.add(m.hash)
|
||||
print_matching(match_list, cmp_image)
|
||||
# Similarity inside the dataset
|
||||
print_self_similarity(thr)
|
||||
else:
|
||||
# Read single image, and find similarty to that
|
||||
cmp_image = ImageMeasure(file)
|
||||
cmp_image.set_all()
|
||||
cmp_image.filename = [cmp_image.filename]
|
||||
match_list = get_matching(cmp_image)
|
||||
print_matching(match_list, cmp_image)
|
||||
print_visually_similar(file, thr)
|
||||
|
||||
if self.options.visual_duplicate:
|
||||
match_list = []
|
||||
for row in get_duplicates():
|
||||
if row[0] == "0":
|
||||
if len(match_list) > 0:
|
||||
print_matching(match_list, cmp_image)
|
||||
cmp_image = set_image(row[2:])
|
||||
cmp_image.filename = [row[1]]
|
||||
match_list = []
|
||||
else:
|
||||
other_image = set_image(row[2:])
|
||||
other_image.filename = [row[1]]
|
||||
other_image.similarity["color"] = cmp_image.color_difference(other_image)
|
||||
other_image.similarity["aspect"] = cmp_image.shape_difference(other_image)
|
||||
match_list.append(other_image)
|
||||
print_matching(match_list, cmp_image)
|
||||
print_visual_duplicates()
|
||||
|
||||
def tag_manage(self):
|
||||
|
||||
@@ -538,6 +590,30 @@ class ImageList:
|
||||
print(",".join([x[0] for x in tags]))
|
||||
|
||||
|
||||
class Tabulate:
|
||||
def __init__(self, header):
|
||||
self.header = header
|
||||
self.rows = []
|
||||
|
||||
def append(self, row):
|
||||
self.rows.append(row)
|
||||
|
||||
def print(self):
|
||||
|
||||
tabulate._table_formats["github"] = tabulate.TableFormat(
|
||||
lineabove=None,
|
||||
linebelowheader=None,
|
||||
linebetweenrows=None,
|
||||
linebelow=None,
|
||||
headerrow=tabulate.DataRow("|", "|", "|"),
|
||||
datarow=tabulate.DataRow("|", "|", "|"),
|
||||
padding=0,
|
||||
with_header_hide=["lineabove"],
|
||||
)
|
||||
|
||||
print(tabulate.tabulate(self.rows, headers=self.header, tablefmt="github", showindex=False))
|
||||
|
||||
|
||||
def clean_dirs(dirs):
|
||||
"""Remove in place, because os.walk uses the same variable"""
|
||||
remove = []
|
||||
@@ -694,7 +770,7 @@ def setup_options():
|
||||
help="Search list for similar images. Value 0-255 for similarity threshold. 0=high similarity. "
|
||||
+ "If value is a filename, search similar to that image. "
|
||||
+ "Append with ',value' to limit similarity. default to 20."
|
||||
+ "The output columns: SD SimilarityDiff., CD ColorDiff., "
|
||||
+ "The output columns: PD PerceptualDiff., CD ColorDiff., "
|
||||
+ "RD AspectRatioDiff.,Shp SharpnessIndex. This function does not return exact duplicates.",
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user