handle broken images
This commit is contained in:
@@ -32,7 +32,7 @@ PRINT_LIST := 'sqlite3 -header image-list.sqlite "SELECT * FROM list" | tabulate
|
||||
test: test-db test-du test-dup test-tag ## Test
|
||||
|
||||
test-db:
|
||||
set -e
|
||||
set -ex
|
||||
. useve-runner
|
||||
useve imagelist2
|
||||
echo =================================
|
||||
@@ -58,6 +58,7 @@ test-db:
|
||||
image-list db -x imagelist2
|
||||
eval ${PRINT_TABLE}
|
||||
rm folder1/black.png
|
||||
dd if=folder1/wizard.jpg of=folder1/wizard.half.jpg count=1 bs=1024
|
||||
image-list db -x imagelist2
|
||||
eval ${PRINT_TABLE}
|
||||
mogrify -rotate 90 folder1/cyan.png
|
||||
@@ -102,6 +103,8 @@ test-dup:
|
||||
image-list search --similar 30
|
||||
echo ========== Similar by file ======================
|
||||
image-list search --similar folder1/wizard.jpg
|
||||
echo ========== Broken files ======================
|
||||
image-list search --broken
|
||||
|
||||
test-tag:
|
||||
set -e
|
||||
|
||||
@@ -7,12 +7,11 @@ from datetime import datetime
|
||||
|
||||
import tabulate
|
||||
from imagelist2.db import DB, DBCachedWriter, sqlite_sqrt, sqlite_square
|
||||
from imagelist2.image import ImageMeasure, is_image_extension
|
||||
from imagelist2.image import ImageBrokenError, ImageMeasure, is_image_extension
|
||||
from tqdm import tqdm
|
||||
|
||||
__version__ = "0.0.6"
|
||||
__version__ = "0.0.7"
|
||||
SQLFILE = "image-list.sqlite"
|
||||
# IMGMATCH = re.compile("|".join([".*\." + x + "$" |.*\.jpeg$|.*\.png$|.*\.gif$|.*\.tif$", re.I)
|
||||
BADDIRS = ["_tn", "_med", ".tn", ".med"]
|
||||
MINSIZE = 0
|
||||
|
||||
@@ -110,7 +109,7 @@ class ImageList:
|
||||
SELECT list.hash, list.file
|
||||
FROM list
|
||||
LEFT JOIN data ON data.hash = list.hash
|
||||
WHERE data.hash IS NULL
|
||||
WHERE data.hash IS NULL AND data.broken IS NULL
|
||||
"""
|
||||
)
|
||||
.fetchall()
|
||||
@@ -126,10 +125,24 @@ class ImageList:
|
||||
if filename == None:
|
||||
continue
|
||||
image = ImageMeasure(filename)
|
||||
if image.is_broken():
|
||||
self.db_writer.execute(
|
||||
"""INSERT INTO data(hash,portrait,width,height,description)
|
||||
VALUES(?,?,?,?,?)""",
|
||||
(row[0], image.get_portrait(), image.get_width(), image.get_height(), image.get_description()),
|
||||
"""INSERT INTO data(hash,broken)
|
||||
VALUES(?,?)""",
|
||||
(row[0], True),
|
||||
)
|
||||
else:
|
||||
self.db_writer.execute(
|
||||
"""INSERT INTO data(hash,portrait,width,height,description,broken)
|
||||
VALUES(?,?,?,?,?,?)""",
|
||||
(
|
||||
row[0],
|
||||
image.get_portrait(),
|
||||
image.get_width(),
|
||||
image.get_height(),
|
||||
image.get_description(),
|
||||
False,
|
||||
),
|
||||
)
|
||||
self.db_writer.commit()
|
||||
return
|
||||
@@ -198,9 +211,14 @@ class ImageList:
|
||||
data.BB
|
||||
FROM data
|
||||
LEFT JOIN list ON data.hash = list.hash
|
||||
WHERE data.p_hash IS NULL
|
||||
WHERE
|
||||
(
|
||||
data.p_hash IS NULL
|
||||
OR data.sharpness IS NULL
|
||||
OR data.R IS NULL
|
||||
)
|
||||
AND
|
||||
data.broken IS FALSE
|
||||
"""
|
||||
)
|
||||
.fetchall()
|
||||
@@ -214,6 +232,7 @@ class ImageList:
|
||||
if row[1] in duplicates:
|
||||
continue
|
||||
duplicates.add(row[1])
|
||||
try:
|
||||
image = ImageMeasure(filename)
|
||||
(
|
||||
image.hash,
|
||||
@@ -230,6 +249,20 @@ class ImageList:
|
||||
image.get_p_hash()
|
||||
image.sharpness = image.get_sharpness()
|
||||
image.colors.update(image.get_colors())
|
||||
if image.broken:
|
||||
print("image broke")
|
||||
raise ImageBrokenError()
|
||||
except ImageBrokenError:
|
||||
self.db_writer.execute(
|
||||
"""UPDATE data SET broken = ?
|
||||
WHERE hash = ?
|
||||
""",
|
||||
(
|
||||
image.broken,
|
||||
image.hash,
|
||||
),
|
||||
)
|
||||
continue
|
||||
|
||||
self.db_writer.execute(
|
||||
"""UPDATE data SET
|
||||
@@ -290,6 +323,29 @@ class ImageList:
|
||||
table.append((entry[0], humanize_size(entry[0]), entry[1]))
|
||||
table.print()
|
||||
|
||||
def broken(self):
|
||||
result = self.db.cursor().execute(
|
||||
"""
|
||||
SELECT
|
||||
file FROM files
|
||||
WHERE broken IS TRUE
|
||||
""",
|
||||
)
|
||||
print("#File")
|
||||
for row in result:
|
||||
print(row[0])
|
||||
|
||||
def db_print(self):
|
||||
result = self.db.cursor().execute(
|
||||
"""
|
||||
SELECT * FROM files
|
||||
""",
|
||||
)
|
||||
table = Tabulate([c[0] for c in result.description])
|
||||
for row in result:
|
||||
table.append(row)
|
||||
table.print()
|
||||
|
||||
def duplicates(self):
|
||||
result = self.db.cursor().execute(
|
||||
"""
|
||||
@@ -338,7 +394,14 @@ class ImageList:
|
||||
result = self.db.cursor().execute(
|
||||
"""
|
||||
WITH distances AS (
|
||||
SELECT hash, ROUND(SQRT(SQUARE(BR-?)+SQUARE(BG-?)+SQUARE(BB-?)),1) as distance,BR,BG,BB FROM data ORDER BY distance LIMIT ?
|
||||
SELECT
|
||||
hash,
|
||||
ROUND(SQRT(SQUARE(BR-?)+SQUARE(BG-?)+SQUARE(BB-?)),1) as distance,
|
||||
BR,BG,BB
|
||||
FROM data
|
||||
WHERE BR IS NOT NULL
|
||||
ORDER BY distance
|
||||
LIMIT ?
|
||||
)
|
||||
SELECT
|
||||
RELATIVE(list.file),
|
||||
@@ -421,7 +484,7 @@ class ImageList:
|
||||
return self.db.cursor().execute(
|
||||
"""
|
||||
WITH
|
||||
duplicates AS (SELECT p_hash FROM data GROUP BY p_hash HAVING count(p_hash) > 1)
|
||||
duplicates AS (SELECT p_hash FROM data WHERE p_hash IS NOT NULL GROUP BY p_hash HAVING count(p_hash) > 1)
|
||||
SELECT
|
||||
RELATIVE(files.file) AS file,
|
||||
files.width,
|
||||
@@ -469,8 +532,8 @@ class ImageList:
|
||||
"""
|
||||
WITH disttab AS (
|
||||
WITH
|
||||
t1 AS ( SELECT * FROM files ),
|
||||
t2 AS ( SELECT * FROM files )
|
||||
t1 AS ( SELECT * FROM files WHERE p_hash IS NOT NULL ),
|
||||
t2 AS ( SELECT * FROM files WHERE p_hash IS NOT NULL )
|
||||
SELECT
|
||||
RELATIVE(t1.file) AS file1,
|
||||
t1.width AS width1,
|
||||
@@ -700,6 +763,13 @@ def setup_options():
|
||||
default=False,
|
||||
help="Follow symbolic links [%(default)s]",
|
||||
)
|
||||
db.add_argument(
|
||||
"--print",
|
||||
action="store_true",
|
||||
dest="print",
|
||||
default=False,
|
||||
help="Print the whole database [%(default)s]",
|
||||
)
|
||||
db.add_argument("startpath", action="store", default=".", nargs="?", help="Path to start scanning for images.")
|
||||
|
||||
du.add_argument(
|
||||
@@ -720,6 +790,14 @@ def setup_options():
|
||||
nargs="?",
|
||||
)
|
||||
|
||||
search.add_argument(
|
||||
"--broken",
|
||||
action="store_true",
|
||||
dest="broken",
|
||||
default=False,
|
||||
help="Return a list of broken files [%(default)s]",
|
||||
)
|
||||
|
||||
search.add_argument(
|
||||
"--dup",
|
||||
action="store_true",
|
||||
@@ -820,6 +898,8 @@ def main():
|
||||
if options.measure:
|
||||
il.base_add()
|
||||
il.measure()
|
||||
if options.print:
|
||||
il.db_print()
|
||||
if options.command == "du":
|
||||
il.disk_used()
|
||||
if options.command == "search":
|
||||
@@ -831,6 +911,8 @@ def main():
|
||||
il.nearestcolor()
|
||||
if options.similarity:
|
||||
il.similarity()
|
||||
if options.broken:
|
||||
il.broken()
|
||||
if options.command == "tag":
|
||||
il.tag_manage()
|
||||
print("")
|
||||
|
||||
@@ -38,7 +38,8 @@ class DB:
|
||||
height INTEGER,
|
||||
p_hash TEXT,
|
||||
sharpness NUMERIC,
|
||||
R REAL, G REAL, B REAL, BR REAL, BG REAL, BB REAL
|
||||
R REAL, G REAL, B REAL, BR REAL, BG REAL, BB REAL,
|
||||
broken BOOLEAN
|
||||
)"""
|
||||
)
|
||||
db.execute("CREATE TABLE tags (hash TEXT,tag TEXT)")
|
||||
|
||||
@@ -26,6 +26,7 @@ Border[:, 9] = True
|
||||
class ImageMeasure:
|
||||
def __init__(self, filename):
|
||||
self.filename = filename
|
||||
self.broken = None
|
||||
self.hash = None
|
||||
self.time = None
|
||||
self.size = None
|
||||
@@ -53,6 +54,8 @@ class ImageMeasure:
|
||||
|
||||
def set_all(self):
|
||||
self.set_filename_absolute()
|
||||
if self.is_broken():
|
||||
raise ImageBrokenError()
|
||||
self.get_hash()
|
||||
self.get_time()
|
||||
self.get_size()
|
||||
@@ -64,6 +67,15 @@ class ImageMeasure:
|
||||
def set_filename_absolute(self):
|
||||
self.filename = os.path.realpath(self.filename)
|
||||
|
||||
def is_broken(self):
|
||||
if self.broken is None:
|
||||
try:
|
||||
read_image_size(self.filename)
|
||||
self.broken = False
|
||||
except Exception:
|
||||
self.broken = True
|
||||
return self.broken
|
||||
|
||||
def get_hash(self):
|
||||
"""Return hash of the file"""
|
||||
if self.hash is None:
|
||||
@@ -120,7 +132,14 @@ class ImageMeasure:
|
||||
def get_image(self, image_type="numpy"):
|
||||
|
||||
if self.image is None:
|
||||
try:
|
||||
self.image, self.image_type = read_image(self.filename)
|
||||
except Exception as e:
|
||||
print(self.filename, file=sys.stderr)
|
||||
print(e, file=sys.stderr)
|
||||
self.broken = True
|
||||
raise ImageBrokenError()
|
||||
|
||||
if self.image_type == "numpy":
|
||||
if len(self.image.shape) > 2:
|
||||
# BGR -> RGB
|
||||
@@ -153,6 +172,7 @@ class ImageMeasure:
|
||||
4,
|
||||
)
|
||||
except Exception:
|
||||
self.broken = True
|
||||
self.sharpness = 0
|
||||
|
||||
return self.sharpness
|
||||
@@ -163,6 +183,7 @@ class ImageMeasure:
|
||||
return int(np.mean(im[Border]))
|
||||
|
||||
if self.colors["R"] is None:
|
||||
try:
|
||||
im = self.get_image("PIL").convert("RGB")
|
||||
th = im.copy()
|
||||
th.thumbnail((1, 1), resample=Image.BILINEAR)
|
||||
@@ -174,6 +195,11 @@ class ImageMeasure:
|
||||
self.colors["BR"] = get_border(im[:, :, 0])
|
||||
self.colors["BG"] = get_border(im[:, :, 1])
|
||||
self.colors["BB"] = get_border(im[:, :, 2])
|
||||
except Exception as e:
|
||||
print(self.filename, file=sys.stderr)
|
||||
print(e, file=sys.stderr)
|
||||
self.broken = True
|
||||
return self.colors
|
||||
return self.colors
|
||||
|
||||
def similarity_difference(self, other):
|
||||
@@ -199,6 +225,11 @@ class ImageMeasure:
|
||||
return calculate_shape_difference(self.width, self.height, other.width, other.height)
|
||||
|
||||
|
||||
class ImageBrokenError(Exception):
|
||||
def __init__(self):
|
||||
self.msg = "Image Broken: Can not read image"
|
||||
|
||||
|
||||
EXTENSIONS = (".jpg", ".png", ".tif", ".gif", ".jpeg", ".tiff")
|
||||
JPEG_EXTENSIONS = (".jpg", ".jpeg")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user