switch to using p_hash
This commit is contained in:
@@ -5,11 +5,12 @@ import traceback
|
|||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
import tabulate
|
||||||
from imagelist2.db import DB, sqlite_sqrt, sqlite_square
|
from imagelist2.db import DB, sqlite_sqrt, sqlite_square
|
||||||
from imagelist2.image import ImageMeasure, is_image_extension
|
from imagelist2.image import ImageMeasure, is_image_extension
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
__version__ = "0.0.3"
|
__version__ = "0.0.4"
|
||||||
SQLFILE = "image-list.sqlite"
|
SQLFILE = "image-list.sqlite"
|
||||||
# IMGMATCH = re.compile("|".join([".*\." + x + "$" |.*\.jpeg$|.*\.png$|.*\.gif$|.*\.tif$", re.I)
|
# IMGMATCH = re.compile("|".join([".*\." + x + "$" |.*\.jpeg$|.*\.png$|.*\.gif$|.*\.tif$", re.I)
|
||||||
BADDIRS = ["_tn", "_med", ".tn", ".med"]
|
BADDIRS = ["_tn", "_med", ".tn", ".med"]
|
||||||
@@ -22,6 +23,7 @@ class ImageList:
|
|||||||
self.options = opts
|
self.options = opts
|
||||||
self.db = DB(self.options.sqlfile)
|
self.db = DB(self.options.sqlfile)
|
||||||
self.root_path = os.path.dirname(os.path.realpath(self.options.sqlfile))
|
self.root_path = os.path.dirname(os.path.realpath(self.options.sqlfile))
|
||||||
|
self.similarity_header = ("#", "File", "PD", "CD", "RD", "Shp", "W", "H")
|
||||||
|
|
||||||
def recursive_add(self):
|
def recursive_add(self):
|
||||||
|
|
||||||
@@ -126,13 +128,7 @@ class ImageList:
|
|||||||
cursor.execute(
|
cursor.execute(
|
||||||
"""INSERT INTO data(hash,portrait,width,height,description)
|
"""INSERT INTO data(hash,portrait,width,height,description)
|
||||||
VALUES(?,?,?,?,?)""",
|
VALUES(?,?,?,?,?)""",
|
||||||
(
|
(row[0], image.get_portrait(), image.get_width(), image.get_height(), image.get_description()),
|
||||||
row[0],
|
|
||||||
image.get_portrait(),
|
|
||||||
image.get_width(),
|
|
||||||
image.get_height(),
|
|
||||||
image.get_description()
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
if i % 50 == 0:
|
if i % 50 == 0:
|
||||||
self.db.conn.commit()
|
self.db.conn.commit()
|
||||||
@@ -194,6 +190,8 @@ class ImageList:
|
|||||||
list.file,
|
list.file,
|
||||||
data.hash,
|
data.hash,
|
||||||
data.fingerprint,
|
data.fingerprint,
|
||||||
|
data.w_hash,
|
||||||
|
data.p_hash,
|
||||||
data.sharpness,
|
data.sharpness,
|
||||||
data.R,
|
data.R,
|
||||||
data.G,
|
data.G,
|
||||||
@@ -223,21 +221,25 @@ class ImageList:
|
|||||||
image = ImageMeasure(filename)
|
image = ImageMeasure(filename)
|
||||||
image.hash = row[1]
|
image.hash = row[1]
|
||||||
image.fingerprint = row[2]
|
image.fingerprint = row[2]
|
||||||
image.sharpness = row[3]
|
image.w_hash = row[3]
|
||||||
image.colors["R"] = row[4]
|
image.p_hash = row[4]
|
||||||
image.colors["G"] = row[5]
|
image.sharpness = row[5]
|
||||||
image.colors["B"] = row[6]
|
image.colors["R"] = row[6]
|
||||||
image.colors["BR"] = row[7]
|
image.colors["G"] = row[7]
|
||||||
image.colors["BG"] = row[8]
|
image.colors["B"] = row[8]
|
||||||
image.colors["BB"] = row[9]
|
image.colors["BR"] = row[9]
|
||||||
|
image.colors["BG"] = row[10]
|
||||||
|
image.colors["BB"] = row[11]
|
||||||
# Calculate if required
|
# Calculate if required
|
||||||
image.fingerprint = image.get_fingerprint()
|
image.get_fingerprint()
|
||||||
image.sharpness = image.get_sharpness()
|
image.sharpness = image.get_sharpness()
|
||||||
image.colors.update(image.get_colors())
|
image.colors.update(image.get_colors())
|
||||||
|
|
||||||
cursor.execute(
|
cursor.execute(
|
||||||
"""UPDATE data SET
|
"""UPDATE data SET
|
||||||
fingerprint = ?,
|
fingerprint = ?,
|
||||||
|
w_hash = ?,
|
||||||
|
p_hash = ?,
|
||||||
sharpness = ?,
|
sharpness = ?,
|
||||||
R = ?,
|
R = ?,
|
||||||
G = ?,
|
G = ?,
|
||||||
@@ -249,6 +251,8 @@ class ImageList:
|
|||||||
""",
|
""",
|
||||||
(
|
(
|
||||||
image.fingerprint,
|
image.fingerprint,
|
||||||
|
image.w_hash,
|
||||||
|
image.p_hash,
|
||||||
image.sharpness,
|
image.sharpness,
|
||||||
image.colors["R"],
|
image.colors["R"],
|
||||||
image.colors["G"],
|
image.colors["G"],
|
||||||
@@ -291,8 +295,10 @@ class ImageList:
|
|||||||
sizes.append(row[0])
|
sizes.append(row[0])
|
||||||
else:
|
else:
|
||||||
sizes[entries.index(start_path)] += row[0]
|
sizes[entries.index(start_path)] += row[0]
|
||||||
|
table = Tabulate(("Size[b]", "Size", "Path"))
|
||||||
for entry in zip(sizes, entries):
|
for entry in zip(sizes, entries):
|
||||||
print("| ".join([str(entry[0]).ljust(14), humanize_size(entry[0]).rjust(8), entry[1]]))
|
table.append((entry[0], humanize_size(entry[0]), entry[1]))
|
||||||
|
table.print()
|
||||||
|
|
||||||
def duplicates(self):
|
def duplicates(self):
|
||||||
result = self.db.cursor().execute(
|
result = self.db.cursor().execute(
|
||||||
@@ -311,9 +317,11 @@ class ImageList:
|
|||||||
FROM f
|
FROM f
|
||||||
""",
|
""",
|
||||||
)
|
)
|
||||||
|
table = Tabulate(["#", "File"])
|
||||||
for row in result:
|
for row in result:
|
||||||
c = "=" if row[0] == "0" else ">"
|
c = "==" if row[0] == "0" else f">{row[0]}"
|
||||||
print(c + "|".join(row))
|
table.append([c, row[1]])
|
||||||
|
table.print()
|
||||||
|
|
||||||
def nearestcolor(self):
|
def nearestcolor(self):
|
||||||
"""Find closest matching images to given RGB color"""
|
"""Find closest matching images to given RGB color"""
|
||||||
@@ -356,21 +364,19 @@ class ImageList:
|
|||||||
""",
|
""",
|
||||||
(src[0], src[1], src[2], src[3], f),
|
(src[0], src[1], src[2], src[3], f),
|
||||||
)
|
)
|
||||||
|
table = Tabulate(("Path", "Dist", "BR", "BG", "BB"))
|
||||||
print("|".join(("Path", "Dist", "BR", "BG", "BB")))
|
|
||||||
for hit in result:
|
for hit in result:
|
||||||
p, d, r, g, b = hit
|
p, d, r, g, b = hit
|
||||||
print(
|
table.append(
|
||||||
"|".join(
|
(
|
||||||
(
|
p,
|
||||||
p,
|
str(d),
|
||||||
str(d),
|
str(int(r)),
|
||||||
str(int(r)),
|
str(int(g)),
|
||||||
str(int(g)),
|
str(int(b)),
|
||||||
str(int(b)),
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
table.print()
|
||||||
|
|
||||||
def similarity(self):
|
def similarity(self):
|
||||||
|
|
||||||
@@ -378,79 +384,159 @@ class ImageList:
|
|||||||
image = ImageMeasure(None)
|
image = ImageMeasure(None)
|
||||||
image.hash = row[0]
|
image.hash = row[0]
|
||||||
image.fingerprint = row[1]
|
image.fingerprint = row[1]
|
||||||
image.sharpness = row[2]
|
image.w_hash = row[2]
|
||||||
image.width = row[3]
|
image.p_hash = row[3]
|
||||||
image.height = row[4]
|
image.sharpness = row[4]
|
||||||
image.colors["R"] = row[5]
|
image.width = row[5]
|
||||||
image.colors["G"] = row[6]
|
image.height = row[6]
|
||||||
image.colors["B"] = row[7]
|
image.colors["R"] = row[7]
|
||||||
|
image.colors["G"] = row[8]
|
||||||
|
image.colors["B"] = row[9]
|
||||||
return image
|
return image
|
||||||
|
|
||||||
def get_matching(cmp_image):
|
def print_visually_similar(file, thr):
|
||||||
|
cmp_image = ImageMeasure(file)
|
||||||
|
cmp_image.set_all()
|
||||||
|
cmp_image.filename = cmp_image.filename
|
||||||
compare_list = self.db.cursor().execute(
|
compare_list = self.db.cursor().execute(
|
||||||
"""SELECT hash,fingerprint,sharpness,width,height,R,G,B
|
"""SELECT
|
||||||
FROM data
|
RELATIVE(file),width,height,sharpness,
|
||||||
WHERE fingerprint IS NOT NULL AND sharpness > 0 AND hash != ?""",
|
PDISTANCE(p_hash, ?) AS p_dist,
|
||||||
(cmp_image.hash,),
|
COLORDIFF(R,G,B,?,?,?) AS c_diff,
|
||||||
|
SHAPEDIFF(width,height,?,?) AS s_diff
|
||||||
|
FROM files
|
||||||
|
WHERE p_hash IS NOT NULL AND
|
||||||
|
sharpness > 0 AND
|
||||||
|
hash != ? AND
|
||||||
|
p_dist <= ?
|
||||||
|
ORDER BY p_dist, file""",
|
||||||
|
(
|
||||||
|
cmp_image.p_hash,
|
||||||
|
cmp_image.colors["R"],
|
||||||
|
cmp_image.colors["G"],
|
||||||
|
cmp_image.colors["B"],
|
||||||
|
cmp_image.width,
|
||||||
|
cmp_image.height,
|
||||||
|
cmp_image.hash,
|
||||||
|
thr,
|
||||||
|
),
|
||||||
)
|
)
|
||||||
match_list = []
|
table = Tabulate(self.similarity_header)
|
||||||
for row2 in compare_list:
|
table.append(
|
||||||
other_image = set_image(row2)
|
(
|
||||||
similarity = cmp_image.similarity_difference(other_image)
|
"==",
|
||||||
if similarity <= thr:
|
self.db.file2relative(cmp_image.filename),
|
||||||
other_image.similarity["distance"] = similarity
|
0,
|
||||||
other_image.similarity["color"] = cmp_image.color_difference(other_image)
|
0,
|
||||||
other_image.similarity["aspect"] = cmp_image.shape_difference(other_image)
|
0,
|
||||||
other_image.filename = self.db.hash2file(other_image.hash)
|
cmp_image.sharpness,
|
||||||
match_list.append(other_image)
|
cmp_image.width,
|
||||||
return match_list
|
cmp_image.height,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
for counter, row in enumerate(compare_list):
|
||||||
|
f2, w2, h2, s2, pdist, cdiff, sdiff = row
|
||||||
|
table.append((f">{counter+1}", f2, pdist, cdiff, sdiff, s2, w2, h2))
|
||||||
|
|
||||||
|
table.print()
|
||||||
|
|
||||||
|
def get_visual_duplicates():
|
||||||
|
|
||||||
def get_duplicates():
|
|
||||||
return self.db.cursor().execute(
|
return self.db.cursor().execute(
|
||||||
"""
|
"""
|
||||||
WITH
|
WITH
|
||||||
duplicates AS (SELECT fingerprint FROM data GROUP BY fingerprint HAVING count(fingerprint) > 1 AND sharpness > 0),
|
duplicates AS (SELECT p_hash FROM data GROUP BY p_hash HAVING count(p_hash) > 1)
|
||||||
duphash AS (
|
SELECT
|
||||||
SELECT duplicates.fingerprint, data.hash, data.sharpness, data.width, data.height, data.R, data.G, data.B
|
RELATIVE(files.file) AS file,
|
||||||
FROM duplicates
|
files.width,
|
||||||
LEFT JOIN data ON (duplicates.fingerprint = data.fingerprint)
|
files.height,
|
||||||
),
|
files.sharpness,
|
||||||
f AS (SELECT
|
files.R,
|
||||||
duphash.fingerprint, duphash.hash,list.file,
|
files.G,
|
||||||
duphash.sharpness,
|
files.B,
|
||||||
duphash.width, duphash.height,
|
files.p_hash
|
||||||
duphash.R, duphash.G, duphash.B
|
FROM files
|
||||||
FROM duphash
|
WHERE p_hash IN ( SELECT p_hash FROM duplicates )
|
||||||
LEFT JOIN list ON (list.hash = duphash.hash)
|
ORDER BY p_hash, files.size DESC
|
||||||
WHERE list.file IS NOT NULL
|
"""
|
||||||
ORDER BY list.file
|
)
|
||||||
|
|
||||||
|
def print_visual_duplicates():
|
||||||
|
fblock = None
|
||||||
|
counter = 0
|
||||||
|
table = Tabulate(self.similarity_header)
|
||||||
|
for row in get_visual_duplicates():
|
||||||
|
f, w, h, s, r, g, b, p_hash = row
|
||||||
|
if fblock != p_hash:
|
||||||
|
fblock = p_hash
|
||||||
|
counter = 0
|
||||||
|
table.append(("==", f, 0, 0, 0, s, w, h))
|
||||||
|
image1 = ImageMeasure(f)
|
||||||
|
image1.width = w
|
||||||
|
image1.height = h
|
||||||
|
image1.sharpness = s
|
||||||
|
image1.colors.update({"B": b, "G": g, "R": r})
|
||||||
|
continue
|
||||||
|
counter += 1
|
||||||
|
image2 = ImageMeasure(f)
|
||||||
|
image2.width = w
|
||||||
|
image2.height = h
|
||||||
|
image2.sharpness = s
|
||||||
|
image2.colors.update({"B": b, "G": g, "R": r})
|
||||||
|
cdiff = image1.color_difference(image2)
|
||||||
|
sdiff = image1.shape_difference(image2)
|
||||||
|
table.append((f">{counter}", f, 0, cdiff, sdiff, s, w, h))
|
||||||
|
table.print()
|
||||||
|
|
||||||
|
def print_self_similarity(thr):
|
||||||
|
fingerprint_list = self.db.cursor().execute(
|
||||||
|
"""
|
||||||
|
WITH disttab AS (
|
||||||
|
WITH
|
||||||
|
t1 AS ( SELECT * FROM files ),
|
||||||
|
t2 AS ( SELECT * FROM files )
|
||||||
|
SELECT
|
||||||
|
RELATIVE(t1.file) AS file1,
|
||||||
|
t1.width AS width1,
|
||||||
|
t1.height AS height1,
|
||||||
|
t1.sharpness AS sharpness1,
|
||||||
|
RELATIVE(t2.file) AS file2,
|
||||||
|
t2.width AS width2,
|
||||||
|
t2.height AS height2,
|
||||||
|
t2.sharpness AS sharpness2,
|
||||||
|
PDISTANCE(t1.p_hash,t2.p_hash) AS p_distance,
|
||||||
|
COLORDIFF(t1.R,t1.G,t1.B,t2.R,t2.G,t2.B) AS c_diff,
|
||||||
|
SHAPEDIFF(t1.width,t1.height,t2.width,t2.height) AS s_diff
|
||||||
|
FROM t1 INNER JOIN t2
|
||||||
|
ON t1.file < t2.file
|
||||||
|
WHERE p_distance <= ?
|
||||||
|
ORDER BY t1.file, p_distance, t2.file
|
||||||
)
|
)
|
||||||
SELECT
|
SELECT * FROM disttab
|
||||||
CAST((row_number() OVER (PARTITION BY f.fingerprint))-1 AS TEXT) AS row,
|
""",
|
||||||
file,
|
(thr,),
|
||||||
hash,
|
|
||||||
fingerprint,
|
|
||||||
sharpness,width,height,R,G,B
|
|
||||||
FROM f
|
|
||||||
""",
|
|
||||||
)
|
)
|
||||||
|
f1block = None
|
||||||
|
counter = 0
|
||||||
|
table = Tabulate(self.similarity_header)
|
||||||
|
for row in fingerprint_list:
|
||||||
|
f1, w1, h1, s1, f2, w2, h2, s2, pdist, cdiff, sdiff = row
|
||||||
|
if f1block != f1:
|
||||||
|
f1block = f1
|
||||||
|
counter = 0
|
||||||
|
table.append(("==", f1, 0, 0, 0, s1, w1, h1))
|
||||||
|
counter += 1
|
||||||
|
table.append((f">{counter}", f2, pdist, cdiff, sdiff, s2, w2, h2))
|
||||||
|
table.print()
|
||||||
|
|
||||||
def print_matching(match_list, cmp_image):
|
def print_similarity_block(rows):
|
||||||
if len(match_list) > 0:
|
if len(rows) > 0:
|
||||||
match_list.sort(key=lambda i: i.similarity["distance"])
|
for row in rows:
|
||||||
print_similarity_row(cmp_image, "=", 0)
|
pre, f, w, h, s, pdist, cdiff, sdiff = row
|
||||||
for i, img in enumerate(match_list):
|
print(f"{pre}|{f}|{pdist}|{cdiff}|{sdiff}|{s}|{w}|{h}")
|
||||||
print_similarity_row(img, ">", i + 1)
|
|
||||||
|
|
||||||
def print_similarity_row(img, c, index):
|
|
||||||
fnames = ", ".join([self.db.file2relative(f) for f in img.filename])
|
|
||||||
print(
|
|
||||||
f"{c}{index}|{fnames}|{img.similarity['distance']}|{img.similarity['color']}|{img.similarity['aspect']}|{img.sharpness}|{img.width}|{img.height}"
|
|
||||||
)
|
|
||||||
|
|
||||||
print("|".join(("#", "File", "SD", "CD", "RD", "Shp", "W", "H")))
|
|
||||||
if self.options.similarity:
|
if self.options.similarity:
|
||||||
|
thr = 20
|
||||||
try:
|
try:
|
||||||
thr = int(self.options.similarity)
|
thr = int(self.options.similarity)
|
||||||
file = None
|
file = None
|
||||||
@@ -463,48 +549,14 @@ class ImageList:
|
|||||||
file = file[0]
|
file = file[0]
|
||||||
|
|
||||||
if file is None:
|
if file is None:
|
||||||
# Measure similarity on all files
|
# Similarity inside the dataset
|
||||||
fingerprint_list = self.db.cursor().execute(
|
print_self_similarity(thr)
|
||||||
"""
|
|
||||||
SELECT hash,fingerprint,sharpness,width,height,R,G,B
|
|
||||||
FROM data
|
|
||||||
WHERE fingerprint IS NOT NULL
|
|
||||||
AND sharpness > 0"""
|
|
||||||
)
|
|
||||||
checked = set()
|
|
||||||
for i, row in enumerate(fingerprint_list):
|
|
||||||
if row[0] in checked:
|
|
||||||
continue
|
|
||||||
cmp_image = set_image(row)
|
|
||||||
cmp_image.filename = self.db.hash2file(cmp_image.hash)
|
|
||||||
match_list = get_matching(cmp_image)
|
|
||||||
for m in match_list:
|
|
||||||
checked.add(m.hash)
|
|
||||||
print_matching(match_list, cmp_image)
|
|
||||||
else:
|
else:
|
||||||
# Read single image, and find similarty to that
|
# Read single image, and find similarty to that
|
||||||
cmp_image = ImageMeasure(file)
|
print_visually_similar(file, thr)
|
||||||
cmp_image.set_all()
|
|
||||||
cmp_image.filename = [cmp_image.filename]
|
|
||||||
match_list = get_matching(cmp_image)
|
|
||||||
print_matching(match_list, cmp_image)
|
|
||||||
|
|
||||||
if self.options.visual_duplicate:
|
if self.options.visual_duplicate:
|
||||||
match_list = []
|
print_visual_duplicates()
|
||||||
for row in get_duplicates():
|
|
||||||
if row[0] == "0":
|
|
||||||
if len(match_list) > 0:
|
|
||||||
print_matching(match_list, cmp_image)
|
|
||||||
cmp_image = set_image(row[2:])
|
|
||||||
cmp_image.filename = [row[1]]
|
|
||||||
match_list = []
|
|
||||||
else:
|
|
||||||
other_image = set_image(row[2:])
|
|
||||||
other_image.filename = [row[1]]
|
|
||||||
other_image.similarity["color"] = cmp_image.color_difference(other_image)
|
|
||||||
other_image.similarity["aspect"] = cmp_image.shape_difference(other_image)
|
|
||||||
match_list.append(other_image)
|
|
||||||
print_matching(match_list, cmp_image)
|
|
||||||
|
|
||||||
def tag_manage(self):
|
def tag_manage(self):
|
||||||
|
|
||||||
@@ -538,6 +590,30 @@ class ImageList:
|
|||||||
print(",".join([x[0] for x in tags]))
|
print(",".join([x[0] for x in tags]))
|
||||||
|
|
||||||
|
|
||||||
|
class Tabulate:
|
||||||
|
def __init__(self, header):
|
||||||
|
self.header = header
|
||||||
|
self.rows = []
|
||||||
|
|
||||||
|
def append(self, row):
|
||||||
|
self.rows.append(row)
|
||||||
|
|
||||||
|
def print(self):
|
||||||
|
|
||||||
|
tabulate._table_formats["github"] = tabulate.TableFormat(
|
||||||
|
lineabove=None,
|
||||||
|
linebelowheader=None,
|
||||||
|
linebetweenrows=None,
|
||||||
|
linebelow=None,
|
||||||
|
headerrow=tabulate.DataRow("|", "|", "|"),
|
||||||
|
datarow=tabulate.DataRow("|", "|", "|"),
|
||||||
|
padding=0,
|
||||||
|
with_header_hide=["lineabove"],
|
||||||
|
)
|
||||||
|
|
||||||
|
print(tabulate.tabulate(self.rows, headers=self.header, tablefmt="github", showindex=False))
|
||||||
|
|
||||||
|
|
||||||
def clean_dirs(dirs):
|
def clean_dirs(dirs):
|
||||||
"""Remove in place, because os.walk uses the same variable"""
|
"""Remove in place, because os.walk uses the same variable"""
|
||||||
remove = []
|
remove = []
|
||||||
@@ -694,7 +770,7 @@ def setup_options():
|
|||||||
help="Search list for similar images. Value 0-255 for similarity threshold. 0=high similarity. "
|
help="Search list for similar images. Value 0-255 for similarity threshold. 0=high similarity. "
|
||||||
+ "If value is a filename, search similar to that image. "
|
+ "If value is a filename, search similar to that image. "
|
||||||
+ "Append with ',value' to limit similarity. default to 20."
|
+ "Append with ',value' to limit similarity. default to 20."
|
||||||
+ "The output columns: SD SimilarityDiff., CD ColorDiff., "
|
+ "The output columns: PD PerceptualDiff., CD ColorDiff., "
|
||||||
+ "RD AspectRatioDiff.,Shp SharpnessIndex. This function does not return exact duplicates.",
|
+ "RD AspectRatioDiff.,Shp SharpnessIndex. This function does not return exact duplicates.",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,16 @@
|
|||||||
import os
|
import os
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
import sys
|
||||||
from math import sqrt as sqlite_sqrt
|
from math import sqrt as sqlite_sqrt
|
||||||
|
|
||||||
|
import sqlite_vec
|
||||||
|
|
||||||
|
from .image import (
|
||||||
|
calculate_color_difference,
|
||||||
|
calculate_phash_distance,
|
||||||
|
calculate_shape_difference,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class DB:
|
class DB:
|
||||||
def __init__(self, sqlfile):
|
def __init__(self, sqlfile):
|
||||||
@@ -26,6 +35,8 @@ class DB:
|
|||||||
width INTEGER,
|
width INTEGER,
|
||||||
height INTEGER,
|
height INTEGER,
|
||||||
fingerprint TEXT,
|
fingerprint TEXT,
|
||||||
|
p_hash TEXT,
|
||||||
|
w_hash TEXT,
|
||||||
sharpness NUMERIC,
|
sharpness NUMERIC,
|
||||||
R REAL, G REAL, B REAL, BR REAL, BG REAL, BB REAL
|
R REAL, G REAL, B REAL, BR REAL, BG REAL, BB REAL
|
||||||
)"""
|
)"""
|
||||||
@@ -48,6 +59,12 @@ class DB:
|
|||||||
conn = sqlite3.connect(self.sqlfile)
|
conn = sqlite3.connect(self.sqlfile)
|
||||||
conn.text_factory = str
|
conn.text_factory = str
|
||||||
conn.create_function("RELATIVE", 1, self.file2relative)
|
conn.create_function("RELATIVE", 1, self.file2relative)
|
||||||
|
conn.create_function("PDISTANCE", 2, calculate_phash_distance)
|
||||||
|
conn.create_function("COLORDIFF", 6, calculate_color_difference)
|
||||||
|
conn.create_function("SHAPEDIFF", 4, calculate_shape_difference)
|
||||||
|
conn.enable_load_extension(True)
|
||||||
|
sqlite_vec.load(conn)
|
||||||
|
conn.enable_load_extension(False)
|
||||||
self.conn = conn
|
self.conn = conn
|
||||||
|
|
||||||
return conn
|
return conn
|
||||||
|
|||||||
@@ -34,6 +34,8 @@ class ImageMeasure:
|
|||||||
self.height = None
|
self.height = None
|
||||||
self.portrait = None
|
self.portrait = None
|
||||||
self.fingerprint = None
|
self.fingerprint = None
|
||||||
|
self.w_hash = None
|
||||||
|
self.p_hash = None
|
||||||
self.sharpness = None
|
self.sharpness = None
|
||||||
self.colors = {x: None for x in ("R", "G", "B", "BR", "BG", "BB")}
|
self.colors = {x: None for x in ("R", "G", "B", "BR", "BG", "BB")}
|
||||||
self.similarity = {"distance": 0, "color": 0, "aspect": 0}
|
self.similarity = {"distance": 0, "color": 0, "aspect": 0}
|
||||||
@@ -116,7 +118,6 @@ class ImageMeasure:
|
|||||||
self.description = read_image_comment(self.filename)
|
self.description = read_image_comment(self.filename)
|
||||||
return self.description
|
return self.description
|
||||||
|
|
||||||
|
|
||||||
def get_image(self, image_type="numpy"):
|
def get_image(self, image_type="numpy"):
|
||||||
|
|
||||||
if self.image is None:
|
if self.image is None:
|
||||||
@@ -136,11 +137,22 @@ class ImageMeasure:
|
|||||||
def get_fingerprint(self):
|
def get_fingerprint(self):
|
||||||
|
|
||||||
if self.fingerprint is None:
|
if self.fingerprint is None:
|
||||||
# self.fingerprint = str(imagehash.phash(self.get_image("PIL"), hash_size=8))
|
self.get_w_hash()
|
||||||
|
self.get_p_hash()
|
||||||
self.fingerprint = str(imagehash.dhash(self.get_image("PIL"), hash_size=8))
|
self.fingerprint = str(imagehash.dhash(self.get_image("PIL"), hash_size=8))
|
||||||
|
|
||||||
return self.fingerprint
|
return self.fingerprint
|
||||||
|
|
||||||
|
def get_w_hash(self):
|
||||||
|
if self.w_hash is None:
|
||||||
|
self.w_hash = str(imagehash.whash(self.get_image("PIL"), hash_size=8))
|
||||||
|
return self.w_hash
|
||||||
|
|
||||||
|
def get_p_hash(self):
|
||||||
|
if self.p_hash is None:
|
||||||
|
self.p_hash = str(imagehash.phash(self.get_image("PIL"), hash_size=8))
|
||||||
|
return self.p_hash
|
||||||
|
|
||||||
def get_sharpness(self):
|
def get_sharpness(self):
|
||||||
|
|
||||||
if self.sharpness is None:
|
if self.sharpness is None:
|
||||||
@@ -180,34 +192,54 @@ class ImageMeasure:
|
|||||||
return self.colors
|
return self.colors
|
||||||
|
|
||||||
def similarity_difference(self, other):
|
def similarity_difference(self, other):
|
||||||
|
try:
|
||||||
|
other_phash = imagehash.hex_to_hash(other.get_p_hash())
|
||||||
|
this_phash = imagehash.hex_to_hash(self.get_p_hash())
|
||||||
|
|
||||||
other_phash = imagehash.hex_to_hash(other.get_fingerprint())
|
return other_phash - this_phash
|
||||||
this_phash = imagehash.hex_to_hash(self.get_fingerprint())
|
except Exception as e:
|
||||||
return other_phash - this_phash
|
print(e, file=sys.stderr)
|
||||||
|
return 255
|
||||||
|
|
||||||
def color_difference(self, other):
|
def color_difference(self, other):
|
||||||
|
|
||||||
other_color = other.get_colors()
|
other_color = other.get_colors()
|
||||||
this_color = self.get_colors()
|
this_color = self.get_colors()
|
||||||
diff = round(
|
|
||||||
np.sqrt(
|
return calculate_color_difference(
|
||||||
np.square(other_color["R"] - this_color["R"])
|
this_color["R"], this_color["G"], this_color["B"], other_color["R"], other_color["G"], other_color["B"]
|
||||||
+ np.square(other_color["G"] - this_color["G"])
|
|
||||||
+ np.square(other_color["B"] - this_color["B"])
|
|
||||||
),
|
|
||||||
1,
|
|
||||||
)
|
)
|
||||||
return diff
|
|
||||||
|
|
||||||
def shape_difference(self, other):
|
def shape_difference(self, other):
|
||||||
|
return calculate_shape_difference(self.width, self.height, other.width, other.height)
|
||||||
return round(abs(float(other.width) / float(other.height) - float(self.width) / float(self.height)), 4)
|
|
||||||
|
|
||||||
|
|
||||||
EXTENSIONS = (".jpg", ".png", ".tif", ".gif", ".jpeg", ".tiff")
|
EXTENSIONS = (".jpg", ".png", ".tif", ".gif", ".jpeg", ".tiff")
|
||||||
JPEG_EXTENSIONS = (".jpg", ".jpeg")
|
JPEG_EXTENSIONS = (".jpg", ".jpeg")
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_color_difference(r1, g1, b1, r2, g2, b2):
|
||||||
|
diff = round(
|
||||||
|
np.sqrt(np.square(r1 - r2) + np.square(g1 - g2) + np.square(b1 - b2)),
|
||||||
|
1,
|
||||||
|
)
|
||||||
|
return diff
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_shape_difference(w1, h1, w2, h2):
|
||||||
|
return round(abs(float(w1) / float(h1) - float(w2) / float(h2)), 4)
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_phash_distance(h1, h2):
|
||||||
|
try:
|
||||||
|
return imagehash.hex_to_hash(h1) - imagehash.hex_to_hash(h2)
|
||||||
|
except Exception as e:
|
||||||
|
print(e, file=sys.stderr)
|
||||||
|
print((h1, h2), file=sys.stderr)
|
||||||
|
|
||||||
|
return 255
|
||||||
|
|
||||||
|
|
||||||
def is_image_extension(f):
|
def is_image_extension(f):
|
||||||
return os.path.splitext(f.lower())[1] in EXTENSIONS
|
return os.path.splitext(f.lower())[1] in EXTENSIONS
|
||||||
|
|
||||||
@@ -234,7 +266,8 @@ def read_image_size(fname):
|
|||||||
im = Image.open(fname)
|
im = Image.open(fname)
|
||||||
return im.width, im.height
|
return im.width, im.height
|
||||||
|
|
||||||
|
|
||||||
def read_image_comment(fname):
|
def read_image_comment(fname):
|
||||||
"""Just reading the comment with PIL"""
|
"""Just reading the comment with PIL"""
|
||||||
im = Image.open(fname)
|
im = Image.open(fname)
|
||||||
return im.info.get('comment','')
|
return im.info.get("comment", "")
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ classifiers = [
|
|||||||
"Programming Language :: Python :: Implementation :: CPython",
|
"Programming Language :: Python :: Implementation :: CPython",
|
||||||
"Programming Language :: Python :: Implementation :: PyPy",
|
"Programming Language :: Python :: Implementation :: PyPy",
|
||||||
]
|
]
|
||||||
dependencies = ["PyTurboJPEG", "Pillow", "ImageHash", "numpy", "tqdm"]
|
dependencies = ["PyTurboJPEG", "Pillow", "ImageHash", "numpy", "tqdm", "sqlite-vec", "tabulate"]
|
||||||
|
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
image-list = "imagelist2:main"
|
image-list = "imagelist2:main"
|
||||||
|
|||||||
@@ -22,5 +22,5 @@ setup(
|
|||||||
"image-list = imagelist2:main",
|
"image-list = imagelist2:main",
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
install_requires=["PyTurboJPEG", "Pillow", "ImageHash", "numpy", "tqdm"],
|
install_requires=["PyTurboJPEG", "Pillow", "ImageHash", "numpy", "tqdm", "sqlite-vec", "tabulate"],
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user