handle broken images

This commit is contained in:
Q
2025-06-04 17:22:39 +03:00
parent 4cf6b754b1
commit 3380f865f8
4 changed files with 163 additions and 46 deletions

View File

@@ -32,7 +32,7 @@ PRINT_LIST := 'sqlite3 -header image-list.sqlite "SELECT * FROM list" | tabulate
test: test-db test-du test-dup test-tag ## Test test: test-db test-du test-dup test-tag ## Test
test-db: test-db:
set -e set -ex
. useve-runner . useve-runner
useve imagelist2 useve imagelist2
echo ================================= echo =================================
@@ -58,6 +58,7 @@ test-db:
image-list db -x imagelist2 image-list db -x imagelist2
eval ${PRINT_TABLE} eval ${PRINT_TABLE}
rm folder1/black.png rm folder1/black.png
dd if=folder1/wizard.jpg of=folder1/wizard.half.jpg count=1 bs=1024
image-list db -x imagelist2 image-list db -x imagelist2
eval ${PRINT_TABLE} eval ${PRINT_TABLE}
mogrify -rotate 90 folder1/cyan.png mogrify -rotate 90 folder1/cyan.png
@@ -102,6 +103,8 @@ test-dup:
image-list search --similar 30 image-list search --similar 30
echo ========== Similar by file ====================== echo ========== Similar by file ======================
image-list search --similar folder1/wizard.jpg image-list search --similar folder1/wizard.jpg
echo ========== Broken files ======================
image-list search --broken
test-tag: test-tag:
set -e set -e

View File

@@ -7,12 +7,11 @@ from datetime import datetime
import tabulate import tabulate
from imagelist2.db import DB, DBCachedWriter, sqlite_sqrt, sqlite_square from imagelist2.db import DB, DBCachedWriter, sqlite_sqrt, sqlite_square
from imagelist2.image import ImageMeasure, is_image_extension from imagelist2.image import ImageBrokenError, ImageMeasure, is_image_extension
from tqdm import tqdm from tqdm import tqdm
__version__ = "0.0.6" __version__ = "0.0.7"
SQLFILE = "image-list.sqlite" SQLFILE = "image-list.sqlite"
# IMGMATCH = re.compile("|".join([".*\." + x + "$" |.*\.jpeg$|.*\.png$|.*\.gif$|.*\.tif$", re.I)
BADDIRS = ["_tn", "_med", ".tn", ".med"] BADDIRS = ["_tn", "_med", ".tn", ".med"]
MINSIZE = 0 MINSIZE = 0
@@ -110,7 +109,7 @@ class ImageList:
SELECT list.hash, list.file SELECT list.hash, list.file
FROM list FROM list
LEFT JOIN data ON data.hash = list.hash LEFT JOIN data ON data.hash = list.hash
WHERE data.hash IS NULL WHERE data.hash IS NULL AND data.broken IS NULL
""" """
) )
.fetchall() .fetchall()
@@ -126,10 +125,24 @@ class ImageList:
if filename == None: if filename == None:
continue continue
image = ImageMeasure(filename) image = ImageMeasure(filename)
if image.is_broken():
self.db_writer.execute( self.db_writer.execute(
"""INSERT INTO data(hash,portrait,width,height,description) """INSERT INTO data(hash,broken)
VALUES(?,?,?,?,?)""", VALUES(?,?)""",
(row[0], image.get_portrait(), image.get_width(), image.get_height(), image.get_description()), (row[0], True),
)
else:
self.db_writer.execute(
"""INSERT INTO data(hash,portrait,width,height,description,broken)
VALUES(?,?,?,?,?,?)""",
(
row[0],
image.get_portrait(),
image.get_width(),
image.get_height(),
image.get_description(),
False,
),
) )
self.db_writer.commit() self.db_writer.commit()
return return
@@ -198,9 +211,14 @@ class ImageList:
data.BB data.BB
FROM data FROM data
LEFT JOIN list ON data.hash = list.hash LEFT JOIN list ON data.hash = list.hash
WHERE data.p_hash IS NULL WHERE
(
data.p_hash IS NULL
OR data.sharpness IS NULL OR data.sharpness IS NULL
OR data.R IS NULL OR data.R IS NULL
)
AND
data.broken IS FALSE
""" """
) )
.fetchall() .fetchall()
@@ -214,6 +232,7 @@ class ImageList:
if row[1] in duplicates: if row[1] in duplicates:
continue continue
duplicates.add(row[1]) duplicates.add(row[1])
try:
image = ImageMeasure(filename) image = ImageMeasure(filename)
( (
image.hash, image.hash,
@@ -230,6 +249,20 @@ class ImageList:
image.get_p_hash() image.get_p_hash()
image.sharpness = image.get_sharpness() image.sharpness = image.get_sharpness()
image.colors.update(image.get_colors()) image.colors.update(image.get_colors())
if image.broken:
print("image broke")
raise ImageBrokenError()
except ImageBrokenError:
self.db_writer.execute(
"""UPDATE data SET broken = ?
WHERE hash = ?
""",
(
image.broken,
image.hash,
),
)
continue
self.db_writer.execute( self.db_writer.execute(
"""UPDATE data SET """UPDATE data SET
@@ -290,6 +323,29 @@ class ImageList:
table.append((entry[0], humanize_size(entry[0]), entry[1])) table.append((entry[0], humanize_size(entry[0]), entry[1]))
table.print() table.print()
def broken(self):
result = self.db.cursor().execute(
"""
SELECT
file FROM files
WHERE broken IS TRUE
""",
)
print("#File")
for row in result:
print(row[0])
def db_print(self):
result = self.db.cursor().execute(
"""
SELECT * FROM files
""",
)
table = Tabulate([c[0] for c in result.description])
for row in result:
table.append(row)
table.print()
def duplicates(self): def duplicates(self):
result = self.db.cursor().execute( result = self.db.cursor().execute(
""" """
@@ -338,7 +394,14 @@ class ImageList:
result = self.db.cursor().execute( result = self.db.cursor().execute(
""" """
WITH distances AS ( WITH distances AS (
SELECT hash, ROUND(SQRT(SQUARE(BR-?)+SQUARE(BG-?)+SQUARE(BB-?)),1) as distance,BR,BG,BB FROM data ORDER BY distance LIMIT ? SELECT
hash,
ROUND(SQRT(SQUARE(BR-?)+SQUARE(BG-?)+SQUARE(BB-?)),1) as distance,
BR,BG,BB
FROM data
WHERE BR IS NOT NULL
ORDER BY distance
LIMIT ?
) )
SELECT SELECT
RELATIVE(list.file), RELATIVE(list.file),
@@ -421,7 +484,7 @@ class ImageList:
return self.db.cursor().execute( return self.db.cursor().execute(
""" """
WITH WITH
duplicates AS (SELECT p_hash FROM data GROUP BY p_hash HAVING count(p_hash) > 1) duplicates AS (SELECT p_hash FROM data WHERE p_hash IS NOT NULL GROUP BY p_hash HAVING count(p_hash) > 1)
SELECT SELECT
RELATIVE(files.file) AS file, RELATIVE(files.file) AS file,
files.width, files.width,
@@ -469,8 +532,8 @@ class ImageList:
""" """
WITH disttab AS ( WITH disttab AS (
WITH WITH
t1 AS ( SELECT * FROM files ), t1 AS ( SELECT * FROM files WHERE p_hash IS NOT NULL ),
t2 AS ( SELECT * FROM files ) t2 AS ( SELECT * FROM files WHERE p_hash IS NOT NULL )
SELECT SELECT
RELATIVE(t1.file) AS file1, RELATIVE(t1.file) AS file1,
t1.width AS width1, t1.width AS width1,
@@ -700,6 +763,13 @@ def setup_options():
default=False, default=False,
help="Follow symbolic links [%(default)s]", help="Follow symbolic links [%(default)s]",
) )
db.add_argument(
"--print",
action="store_true",
dest="print",
default=False,
help="Print the whole database [%(default)s]",
)
db.add_argument("startpath", action="store", default=".", nargs="?", help="Path to start scanning for images.") db.add_argument("startpath", action="store", default=".", nargs="?", help="Path to start scanning for images.")
du.add_argument( du.add_argument(
@@ -720,6 +790,14 @@ def setup_options():
nargs="?", nargs="?",
) )
search.add_argument(
"--broken",
action="store_true",
dest="broken",
default=False,
help="Return a list of broken files [%(default)s]",
)
search.add_argument( search.add_argument(
"--dup", "--dup",
action="store_true", action="store_true",
@@ -820,6 +898,8 @@ def main():
if options.measure: if options.measure:
il.base_add() il.base_add()
il.measure() il.measure()
if options.print:
il.db_print()
if options.command == "du": if options.command == "du":
il.disk_used() il.disk_used()
if options.command == "search": if options.command == "search":
@@ -831,6 +911,8 @@ def main():
il.nearestcolor() il.nearestcolor()
if options.similarity: if options.similarity:
il.similarity() il.similarity()
if options.broken:
il.broken()
if options.command == "tag": if options.command == "tag":
il.tag_manage() il.tag_manage()
print("") print("")

View File

@@ -38,7 +38,8 @@ class DB:
height INTEGER, height INTEGER,
p_hash TEXT, p_hash TEXT,
sharpness NUMERIC, sharpness NUMERIC,
R REAL, G REAL, B REAL, BR REAL, BG REAL, BB REAL R REAL, G REAL, B REAL, BR REAL, BG REAL, BB REAL,
broken BOOLEAN
)""" )"""
) )
db.execute("CREATE TABLE tags (hash TEXT,tag TEXT)") db.execute("CREATE TABLE tags (hash TEXT,tag TEXT)")

View File

@@ -26,6 +26,7 @@ Border[:, 9] = True
class ImageMeasure: class ImageMeasure:
def __init__(self, filename): def __init__(self, filename):
self.filename = filename self.filename = filename
self.broken = None
self.hash = None self.hash = None
self.time = None self.time = None
self.size = None self.size = None
@@ -53,6 +54,8 @@ class ImageMeasure:
def set_all(self): def set_all(self):
self.set_filename_absolute() self.set_filename_absolute()
if self.is_broken():
raise ImageBrokenError()
self.get_hash() self.get_hash()
self.get_time() self.get_time()
self.get_size() self.get_size()
@@ -64,6 +67,15 @@ class ImageMeasure:
def set_filename_absolute(self): def set_filename_absolute(self):
self.filename = os.path.realpath(self.filename) self.filename = os.path.realpath(self.filename)
def is_broken(self):
if self.broken is None:
try:
read_image_size(self.filename)
self.broken = False
except Exception:
self.broken = True
return self.broken
def get_hash(self): def get_hash(self):
"""Return hash of the file""" """Return hash of the file"""
if self.hash is None: if self.hash is None:
@@ -120,7 +132,14 @@ class ImageMeasure:
def get_image(self, image_type="numpy"): def get_image(self, image_type="numpy"):
if self.image is None: if self.image is None:
try:
self.image, self.image_type = read_image(self.filename) self.image, self.image_type = read_image(self.filename)
except Exception as e:
print(self.filename, file=sys.stderr)
print(e, file=sys.stderr)
self.broken = True
raise ImageBrokenError()
if self.image_type == "numpy": if self.image_type == "numpy":
if len(self.image.shape) > 2: if len(self.image.shape) > 2:
# BGR -> RGB # BGR -> RGB
@@ -153,6 +172,7 @@ class ImageMeasure:
4, 4,
) )
except Exception: except Exception:
self.broken = True
self.sharpness = 0 self.sharpness = 0
return self.sharpness return self.sharpness
@@ -163,6 +183,7 @@ class ImageMeasure:
return int(np.mean(im[Border])) return int(np.mean(im[Border]))
if self.colors["R"] is None: if self.colors["R"] is None:
try:
im = self.get_image("PIL").convert("RGB") im = self.get_image("PIL").convert("RGB")
th = im.copy() th = im.copy()
th.thumbnail((1, 1), resample=Image.BILINEAR) th.thumbnail((1, 1), resample=Image.BILINEAR)
@@ -174,6 +195,11 @@ class ImageMeasure:
self.colors["BR"] = get_border(im[:, :, 0]) self.colors["BR"] = get_border(im[:, :, 0])
self.colors["BG"] = get_border(im[:, :, 1]) self.colors["BG"] = get_border(im[:, :, 1])
self.colors["BB"] = get_border(im[:, :, 2]) self.colors["BB"] = get_border(im[:, :, 2])
except Exception as e:
print(self.filename, file=sys.stderr)
print(e, file=sys.stderr)
self.broken = True
return self.colors
return self.colors return self.colors
def similarity_difference(self, other): def similarity_difference(self, other):
@@ -199,6 +225,11 @@ class ImageMeasure:
return calculate_shape_difference(self.width, self.height, other.width, other.height) return calculate_shape_difference(self.width, self.height, other.width, other.height)
class ImageBrokenError(Exception):
def __init__(self):
self.msg = "Image Broken: Can not read image"
EXTENSIONS = (".jpg", ".png", ".tif", ".gif", ".jpeg", ".tiff") EXTENSIONS = (".jpg", ".png", ".tif", ".gif", ".jpeg", ".tiff")
JPEG_EXTENSIONS = (".jpg", ".jpeg") JPEG_EXTENSIONS = (".jpg", ".jpeg")