handle broken images

This commit is contained in:
Q
2025-06-04 17:22:39 +03:00
parent 4cf6b754b1
commit 3380f865f8
4 changed files with 163 additions and 46 deletions

View File

@@ -32,7 +32,7 @@ PRINT_LIST := 'sqlite3 -header image-list.sqlite "SELECT * FROM list" | tabulate
test: test-db test-du test-dup test-tag ## Test
test-db:
set -e
set -ex
. useve-runner
useve imagelist2
echo =================================
@@ -58,6 +58,7 @@ test-db:
image-list db -x imagelist2
eval ${PRINT_TABLE}
rm folder1/black.png
dd if=folder1/wizard.jpg of=folder1/wizard.half.jpg count=1 bs=1024
image-list db -x imagelist2
eval ${PRINT_TABLE}
mogrify -rotate 90 folder1/cyan.png
@@ -102,6 +103,8 @@ test-dup:
image-list search --similar 30
echo ========== Similar by file ======================
image-list search --similar folder1/wizard.jpg
echo ========== Broken files ======================
image-list search --broken
test-tag:
set -e

View File

@@ -7,12 +7,11 @@ from datetime import datetime
import tabulate
from imagelist2.db import DB, DBCachedWriter, sqlite_sqrt, sqlite_square
from imagelist2.image import ImageMeasure, is_image_extension
from imagelist2.image import ImageBrokenError, ImageMeasure, is_image_extension
from tqdm import tqdm
__version__ = "0.0.6"
__version__ = "0.0.7"
SQLFILE = "image-list.sqlite"
# IMGMATCH = re.compile("|".join([".*\." + x + "$" |.*\.jpeg$|.*\.png$|.*\.gif$|.*\.tif$", re.I)
BADDIRS = ["_tn", "_med", ".tn", ".med"]
MINSIZE = 0
@@ -110,7 +109,7 @@ class ImageList:
SELECT list.hash, list.file
FROM list
LEFT JOIN data ON data.hash = list.hash
WHERE data.hash IS NULL
WHERE data.hash IS NULL AND data.broken IS NULL
"""
)
.fetchall()
@@ -126,10 +125,24 @@ class ImageList:
if filename == None:
continue
image = ImageMeasure(filename)
if image.is_broken():
self.db_writer.execute(
"""INSERT INTO data(hash,portrait,width,height,description)
VALUES(?,?,?,?,?)""",
(row[0], image.get_portrait(), image.get_width(), image.get_height(), image.get_description()),
"""INSERT INTO data(hash,broken)
VALUES(?,?)""",
(row[0], True),
)
else:
self.db_writer.execute(
"""INSERT INTO data(hash,portrait,width,height,description,broken)
VALUES(?,?,?,?,?,?)""",
(
row[0],
image.get_portrait(),
image.get_width(),
image.get_height(),
image.get_description(),
False,
),
)
self.db_writer.commit()
return
@@ -198,9 +211,14 @@ class ImageList:
data.BB
FROM data
LEFT JOIN list ON data.hash = list.hash
WHERE data.p_hash IS NULL
WHERE
(
data.p_hash IS NULL
OR data.sharpness IS NULL
OR data.R IS NULL
)
AND
data.broken IS FALSE
"""
)
.fetchall()
@@ -214,6 +232,7 @@ class ImageList:
if row[1] in duplicates:
continue
duplicates.add(row[1])
try:
image = ImageMeasure(filename)
(
image.hash,
@@ -230,6 +249,20 @@ class ImageList:
image.get_p_hash()
image.sharpness = image.get_sharpness()
image.colors.update(image.get_colors())
if image.broken:
print("image broke")
raise ImageBrokenError()
except ImageBrokenError:
self.db_writer.execute(
"""UPDATE data SET broken = ?
WHERE hash = ?
""",
(
image.broken,
image.hash,
),
)
continue
self.db_writer.execute(
"""UPDATE data SET
@@ -290,6 +323,29 @@ class ImageList:
table.append((entry[0], humanize_size(entry[0]), entry[1]))
table.print()
def broken(self):
result = self.db.cursor().execute(
"""
SELECT
file FROM files
WHERE broken IS TRUE
""",
)
print("#File")
for row in result:
print(row[0])
def db_print(self):
result = self.db.cursor().execute(
"""
SELECT * FROM files
""",
)
table = Tabulate([c[0] for c in result.description])
for row in result:
table.append(row)
table.print()
def duplicates(self):
result = self.db.cursor().execute(
"""
@@ -338,7 +394,14 @@ class ImageList:
result = self.db.cursor().execute(
"""
WITH distances AS (
SELECT hash, ROUND(SQRT(SQUARE(BR-?)+SQUARE(BG-?)+SQUARE(BB-?)),1) as distance,BR,BG,BB FROM data ORDER BY distance LIMIT ?
SELECT
hash,
ROUND(SQRT(SQUARE(BR-?)+SQUARE(BG-?)+SQUARE(BB-?)),1) as distance,
BR,BG,BB
FROM data
WHERE BR IS NOT NULL
ORDER BY distance
LIMIT ?
)
SELECT
RELATIVE(list.file),
@@ -421,7 +484,7 @@ class ImageList:
return self.db.cursor().execute(
"""
WITH
duplicates AS (SELECT p_hash FROM data GROUP BY p_hash HAVING count(p_hash) > 1)
duplicates AS (SELECT p_hash FROM data WHERE p_hash IS NOT NULL GROUP BY p_hash HAVING count(p_hash) > 1)
SELECT
RELATIVE(files.file) AS file,
files.width,
@@ -469,8 +532,8 @@ class ImageList:
"""
WITH disttab AS (
WITH
t1 AS ( SELECT * FROM files ),
t2 AS ( SELECT * FROM files )
t1 AS ( SELECT * FROM files WHERE p_hash IS NOT NULL ),
t2 AS ( SELECT * FROM files WHERE p_hash IS NOT NULL )
SELECT
RELATIVE(t1.file) AS file1,
t1.width AS width1,
@@ -700,6 +763,13 @@ def setup_options():
default=False,
help="Follow symbolic links [%(default)s]",
)
db.add_argument(
"--print",
action="store_true",
dest="print",
default=False,
help="Print the whole database [%(default)s]",
)
db.add_argument("startpath", action="store", default=".", nargs="?", help="Path to start scanning for images.")
du.add_argument(
@@ -720,6 +790,14 @@ def setup_options():
nargs="?",
)
search.add_argument(
"--broken",
action="store_true",
dest="broken",
default=False,
help="Return a list of broken files [%(default)s]",
)
search.add_argument(
"--dup",
action="store_true",
@@ -820,6 +898,8 @@ def main():
if options.measure:
il.base_add()
il.measure()
if options.print:
il.db_print()
if options.command == "du":
il.disk_used()
if options.command == "search":
@@ -831,6 +911,8 @@ def main():
il.nearestcolor()
if options.similarity:
il.similarity()
if options.broken:
il.broken()
if options.command == "tag":
il.tag_manage()
print("")

View File

@@ -38,7 +38,8 @@ class DB:
height INTEGER,
p_hash TEXT,
sharpness NUMERIC,
R REAL, G REAL, B REAL, BR REAL, BG REAL, BB REAL
R REAL, G REAL, B REAL, BR REAL, BG REAL, BB REAL,
broken BOOLEAN
)"""
)
db.execute("CREATE TABLE tags (hash TEXT,tag TEXT)")

View File

@@ -26,6 +26,7 @@ Border[:, 9] = True
class ImageMeasure:
def __init__(self, filename):
self.filename = filename
self.broken = None
self.hash = None
self.time = None
self.size = None
@@ -53,6 +54,8 @@ class ImageMeasure:
def set_all(self):
self.set_filename_absolute()
if self.is_broken():
raise ImageBrokenError()
self.get_hash()
self.get_time()
self.get_size()
@@ -64,6 +67,15 @@ class ImageMeasure:
def set_filename_absolute(self):
self.filename = os.path.realpath(self.filename)
def is_broken(self):
if self.broken is None:
try:
read_image_size(self.filename)
self.broken = False
except Exception:
self.broken = True
return self.broken
def get_hash(self):
"""Return hash of the file"""
if self.hash is None:
@@ -120,7 +132,14 @@ class ImageMeasure:
def get_image(self, image_type="numpy"):
if self.image is None:
try:
self.image, self.image_type = read_image(self.filename)
except Exception as e:
print(self.filename, file=sys.stderr)
print(e, file=sys.stderr)
self.broken = True
raise ImageBrokenError()
if self.image_type == "numpy":
if len(self.image.shape) > 2:
# BGR -> RGB
@@ -153,6 +172,7 @@ class ImageMeasure:
4,
)
except Exception:
self.broken = True
self.sharpness = 0
return self.sharpness
@@ -163,6 +183,7 @@ class ImageMeasure:
return int(np.mean(im[Border]))
if self.colors["R"] is None:
try:
im = self.get_image("PIL").convert("RGB")
th = im.copy()
th.thumbnail((1, 1), resample=Image.BILINEAR)
@@ -174,6 +195,11 @@ class ImageMeasure:
self.colors["BR"] = get_border(im[:, :, 0])
self.colors["BG"] = get_border(im[:, :, 1])
self.colors["BB"] = get_border(im[:, :, 2])
except Exception as e:
print(self.filename, file=sys.stderr)
print(e, file=sys.stderr)
self.broken = True
return self.colors
return self.colors
def similarity_difference(self, other):
@@ -199,6 +225,11 @@ class ImageMeasure:
return calculate_shape_difference(self.width, self.height, other.width, other.height)
class ImageBrokenError(Exception):
def __init__(self):
self.msg = "Image Broken: Can not read image"
EXTENSIONS = (".jpg", ".png", ".tif", ".gif", ".jpeg", ".tiff")
JPEG_EXTENSIONS = (".jpg", ".jpeg")