handle broken images
This commit is contained in:
@@ -32,7 +32,7 @@ PRINT_LIST := 'sqlite3 -header image-list.sqlite "SELECT * FROM list" | tabulate
|
|||||||
test: test-db test-du test-dup test-tag ## Test
|
test: test-db test-du test-dup test-tag ## Test
|
||||||
|
|
||||||
test-db:
|
test-db:
|
||||||
set -e
|
set -ex
|
||||||
. useve-runner
|
. useve-runner
|
||||||
useve imagelist2
|
useve imagelist2
|
||||||
echo =================================
|
echo =================================
|
||||||
@@ -58,6 +58,7 @@ test-db:
|
|||||||
image-list db -x imagelist2
|
image-list db -x imagelist2
|
||||||
eval ${PRINT_TABLE}
|
eval ${PRINT_TABLE}
|
||||||
rm folder1/black.png
|
rm folder1/black.png
|
||||||
|
dd if=folder1/wizard.jpg of=folder1/wizard.half.jpg count=1 bs=1024
|
||||||
image-list db -x imagelist2
|
image-list db -x imagelist2
|
||||||
eval ${PRINT_TABLE}
|
eval ${PRINT_TABLE}
|
||||||
mogrify -rotate 90 folder1/cyan.png
|
mogrify -rotate 90 folder1/cyan.png
|
||||||
@@ -102,6 +103,8 @@ test-dup:
|
|||||||
image-list search --similar 30
|
image-list search --similar 30
|
||||||
echo ========== Similar by file ======================
|
echo ========== Similar by file ======================
|
||||||
image-list search --similar folder1/wizard.jpg
|
image-list search --similar folder1/wizard.jpg
|
||||||
|
echo ========== Broken files ======================
|
||||||
|
image-list search --broken
|
||||||
|
|
||||||
test-tag:
|
test-tag:
|
||||||
set -e
|
set -e
|
||||||
|
|||||||
@@ -7,12 +7,11 @@ from datetime import datetime
|
|||||||
|
|
||||||
import tabulate
|
import tabulate
|
||||||
from imagelist2.db import DB, DBCachedWriter, sqlite_sqrt, sqlite_square
|
from imagelist2.db import DB, DBCachedWriter, sqlite_sqrt, sqlite_square
|
||||||
from imagelist2.image import ImageMeasure, is_image_extension
|
from imagelist2.image import ImageBrokenError, ImageMeasure, is_image_extension
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
__version__ = "0.0.6"
|
__version__ = "0.0.7"
|
||||||
SQLFILE = "image-list.sqlite"
|
SQLFILE = "image-list.sqlite"
|
||||||
# IMGMATCH = re.compile("|".join([".*\." + x + "$" |.*\.jpeg$|.*\.png$|.*\.gif$|.*\.tif$", re.I)
|
|
||||||
BADDIRS = ["_tn", "_med", ".tn", ".med"]
|
BADDIRS = ["_tn", "_med", ".tn", ".med"]
|
||||||
MINSIZE = 0
|
MINSIZE = 0
|
||||||
|
|
||||||
@@ -110,7 +109,7 @@ class ImageList:
|
|||||||
SELECT list.hash, list.file
|
SELECT list.hash, list.file
|
||||||
FROM list
|
FROM list
|
||||||
LEFT JOIN data ON data.hash = list.hash
|
LEFT JOIN data ON data.hash = list.hash
|
||||||
WHERE data.hash IS NULL
|
WHERE data.hash IS NULL AND data.broken IS NULL
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
.fetchall()
|
.fetchall()
|
||||||
@@ -126,11 +125,25 @@ class ImageList:
|
|||||||
if filename == None:
|
if filename == None:
|
||||||
continue
|
continue
|
||||||
image = ImageMeasure(filename)
|
image = ImageMeasure(filename)
|
||||||
self.db_writer.execute(
|
if image.is_broken():
|
||||||
"""INSERT INTO data(hash,portrait,width,height,description)
|
self.db_writer.execute(
|
||||||
VALUES(?,?,?,?,?)""",
|
"""INSERT INTO data(hash,broken)
|
||||||
(row[0], image.get_portrait(), image.get_width(), image.get_height(), image.get_description()),
|
VALUES(?,?)""",
|
||||||
)
|
(row[0], True),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.db_writer.execute(
|
||||||
|
"""INSERT INTO data(hash,portrait,width,height,description,broken)
|
||||||
|
VALUES(?,?,?,?,?,?)""",
|
||||||
|
(
|
||||||
|
row[0],
|
||||||
|
image.get_portrait(),
|
||||||
|
image.get_width(),
|
||||||
|
image.get_height(),
|
||||||
|
image.get_description(),
|
||||||
|
False,
|
||||||
|
),
|
||||||
|
)
|
||||||
self.db_writer.commit()
|
self.db_writer.commit()
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -198,9 +211,14 @@ class ImageList:
|
|||||||
data.BB
|
data.BB
|
||||||
FROM data
|
FROM data
|
||||||
LEFT JOIN list ON data.hash = list.hash
|
LEFT JOIN list ON data.hash = list.hash
|
||||||
WHERE data.p_hash IS NULL
|
WHERE
|
||||||
OR data.sharpness IS NULL
|
(
|
||||||
OR data.R IS NULL
|
data.p_hash IS NULL
|
||||||
|
OR data.sharpness IS NULL
|
||||||
|
OR data.R IS NULL
|
||||||
|
)
|
||||||
|
AND
|
||||||
|
data.broken IS FALSE
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
.fetchall()
|
.fetchall()
|
||||||
@@ -214,22 +232,37 @@ class ImageList:
|
|||||||
if row[1] in duplicates:
|
if row[1] in duplicates:
|
||||||
continue
|
continue
|
||||||
duplicates.add(row[1])
|
duplicates.add(row[1])
|
||||||
image = ImageMeasure(filename)
|
try:
|
||||||
(
|
image = ImageMeasure(filename)
|
||||||
image.hash,
|
(
|
||||||
image.p_hash,
|
image.hash,
|
||||||
image.sharpness,
|
image.p_hash,
|
||||||
image.colors["R"],
|
image.sharpness,
|
||||||
image.colors["G"],
|
image.colors["R"],
|
||||||
image.colors["B"],
|
image.colors["G"],
|
||||||
image.colors["BR"],
|
image.colors["B"],
|
||||||
image.colors["BG"],
|
image.colors["BR"],
|
||||||
image.colors["BB"],
|
image.colors["BG"],
|
||||||
) = row[1:]
|
image.colors["BB"],
|
||||||
# Calculate if required
|
) = row[1:]
|
||||||
image.get_p_hash()
|
# Calculate if required
|
||||||
image.sharpness = image.get_sharpness()
|
image.get_p_hash()
|
||||||
image.colors.update(image.get_colors())
|
image.sharpness = image.get_sharpness()
|
||||||
|
image.colors.update(image.get_colors())
|
||||||
|
if image.broken:
|
||||||
|
print("image broke")
|
||||||
|
raise ImageBrokenError()
|
||||||
|
except ImageBrokenError:
|
||||||
|
self.db_writer.execute(
|
||||||
|
"""UPDATE data SET broken = ?
|
||||||
|
WHERE hash = ?
|
||||||
|
""",
|
||||||
|
(
|
||||||
|
image.broken,
|
||||||
|
image.hash,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
self.db_writer.execute(
|
self.db_writer.execute(
|
||||||
"""UPDATE data SET
|
"""UPDATE data SET
|
||||||
@@ -290,6 +323,29 @@ class ImageList:
|
|||||||
table.append((entry[0], humanize_size(entry[0]), entry[1]))
|
table.append((entry[0], humanize_size(entry[0]), entry[1]))
|
||||||
table.print()
|
table.print()
|
||||||
|
|
||||||
|
def broken(self):
|
||||||
|
result = self.db.cursor().execute(
|
||||||
|
"""
|
||||||
|
SELECT
|
||||||
|
file FROM files
|
||||||
|
WHERE broken IS TRUE
|
||||||
|
""",
|
||||||
|
)
|
||||||
|
print("#File")
|
||||||
|
for row in result:
|
||||||
|
print(row[0])
|
||||||
|
|
||||||
|
def db_print(self):
|
||||||
|
result = self.db.cursor().execute(
|
||||||
|
"""
|
||||||
|
SELECT * FROM files
|
||||||
|
""",
|
||||||
|
)
|
||||||
|
table = Tabulate([c[0] for c in result.description])
|
||||||
|
for row in result:
|
||||||
|
table.append(row)
|
||||||
|
table.print()
|
||||||
|
|
||||||
def duplicates(self):
|
def duplicates(self):
|
||||||
result = self.db.cursor().execute(
|
result = self.db.cursor().execute(
|
||||||
"""
|
"""
|
||||||
@@ -338,7 +394,14 @@ class ImageList:
|
|||||||
result = self.db.cursor().execute(
|
result = self.db.cursor().execute(
|
||||||
"""
|
"""
|
||||||
WITH distances AS (
|
WITH distances AS (
|
||||||
SELECT hash, ROUND(SQRT(SQUARE(BR-?)+SQUARE(BG-?)+SQUARE(BB-?)),1) as distance,BR,BG,BB FROM data ORDER BY distance LIMIT ?
|
SELECT
|
||||||
|
hash,
|
||||||
|
ROUND(SQRT(SQUARE(BR-?)+SQUARE(BG-?)+SQUARE(BB-?)),1) as distance,
|
||||||
|
BR,BG,BB
|
||||||
|
FROM data
|
||||||
|
WHERE BR IS NOT NULL
|
||||||
|
ORDER BY distance
|
||||||
|
LIMIT ?
|
||||||
)
|
)
|
||||||
SELECT
|
SELECT
|
||||||
RELATIVE(list.file),
|
RELATIVE(list.file),
|
||||||
@@ -421,7 +484,7 @@ class ImageList:
|
|||||||
return self.db.cursor().execute(
|
return self.db.cursor().execute(
|
||||||
"""
|
"""
|
||||||
WITH
|
WITH
|
||||||
duplicates AS (SELECT p_hash FROM data GROUP BY p_hash HAVING count(p_hash) > 1)
|
duplicates AS (SELECT p_hash FROM data WHERE p_hash IS NOT NULL GROUP BY p_hash HAVING count(p_hash) > 1)
|
||||||
SELECT
|
SELECT
|
||||||
RELATIVE(files.file) AS file,
|
RELATIVE(files.file) AS file,
|
||||||
files.width,
|
files.width,
|
||||||
@@ -469,8 +532,8 @@ class ImageList:
|
|||||||
"""
|
"""
|
||||||
WITH disttab AS (
|
WITH disttab AS (
|
||||||
WITH
|
WITH
|
||||||
t1 AS ( SELECT * FROM files ),
|
t1 AS ( SELECT * FROM files WHERE p_hash IS NOT NULL ),
|
||||||
t2 AS ( SELECT * FROM files )
|
t2 AS ( SELECT * FROM files WHERE p_hash IS NOT NULL )
|
||||||
SELECT
|
SELECT
|
||||||
RELATIVE(t1.file) AS file1,
|
RELATIVE(t1.file) AS file1,
|
||||||
t1.width AS width1,
|
t1.width AS width1,
|
||||||
@@ -700,6 +763,13 @@ def setup_options():
|
|||||||
default=False,
|
default=False,
|
||||||
help="Follow symbolic links [%(default)s]",
|
help="Follow symbolic links [%(default)s]",
|
||||||
)
|
)
|
||||||
|
db.add_argument(
|
||||||
|
"--print",
|
||||||
|
action="store_true",
|
||||||
|
dest="print",
|
||||||
|
default=False,
|
||||||
|
help="Print the whole database [%(default)s]",
|
||||||
|
)
|
||||||
db.add_argument("startpath", action="store", default=".", nargs="?", help="Path to start scanning for images.")
|
db.add_argument("startpath", action="store", default=".", nargs="?", help="Path to start scanning for images.")
|
||||||
|
|
||||||
du.add_argument(
|
du.add_argument(
|
||||||
@@ -720,6 +790,14 @@ def setup_options():
|
|||||||
nargs="?",
|
nargs="?",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
search.add_argument(
|
||||||
|
"--broken",
|
||||||
|
action="store_true",
|
||||||
|
dest="broken",
|
||||||
|
default=False,
|
||||||
|
help="Return a list of broken files [%(default)s]",
|
||||||
|
)
|
||||||
|
|
||||||
search.add_argument(
|
search.add_argument(
|
||||||
"--dup",
|
"--dup",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
@@ -820,6 +898,8 @@ def main():
|
|||||||
if options.measure:
|
if options.measure:
|
||||||
il.base_add()
|
il.base_add()
|
||||||
il.measure()
|
il.measure()
|
||||||
|
if options.print:
|
||||||
|
il.db_print()
|
||||||
if options.command == "du":
|
if options.command == "du":
|
||||||
il.disk_used()
|
il.disk_used()
|
||||||
if options.command == "search":
|
if options.command == "search":
|
||||||
@@ -831,6 +911,8 @@ def main():
|
|||||||
il.nearestcolor()
|
il.nearestcolor()
|
||||||
if options.similarity:
|
if options.similarity:
|
||||||
il.similarity()
|
il.similarity()
|
||||||
|
if options.broken:
|
||||||
|
il.broken()
|
||||||
if options.command == "tag":
|
if options.command == "tag":
|
||||||
il.tag_manage()
|
il.tag_manage()
|
||||||
print("")
|
print("")
|
||||||
|
|||||||
@@ -38,7 +38,8 @@ class DB:
|
|||||||
height INTEGER,
|
height INTEGER,
|
||||||
p_hash TEXT,
|
p_hash TEXT,
|
||||||
sharpness NUMERIC,
|
sharpness NUMERIC,
|
||||||
R REAL, G REAL, B REAL, BR REAL, BG REAL, BB REAL
|
R REAL, G REAL, B REAL, BR REAL, BG REAL, BB REAL,
|
||||||
|
broken BOOLEAN
|
||||||
)"""
|
)"""
|
||||||
)
|
)
|
||||||
db.execute("CREATE TABLE tags (hash TEXT,tag TEXT)")
|
db.execute("CREATE TABLE tags (hash TEXT,tag TEXT)")
|
||||||
|
|||||||
@@ -26,6 +26,7 @@ Border[:, 9] = True
|
|||||||
class ImageMeasure:
|
class ImageMeasure:
|
||||||
def __init__(self, filename):
|
def __init__(self, filename):
|
||||||
self.filename = filename
|
self.filename = filename
|
||||||
|
self.broken = None
|
||||||
self.hash = None
|
self.hash = None
|
||||||
self.time = None
|
self.time = None
|
||||||
self.size = None
|
self.size = None
|
||||||
@@ -53,6 +54,8 @@ class ImageMeasure:
|
|||||||
|
|
||||||
def set_all(self):
|
def set_all(self):
|
||||||
self.set_filename_absolute()
|
self.set_filename_absolute()
|
||||||
|
if self.is_broken():
|
||||||
|
raise ImageBrokenError()
|
||||||
self.get_hash()
|
self.get_hash()
|
||||||
self.get_time()
|
self.get_time()
|
||||||
self.get_size()
|
self.get_size()
|
||||||
@@ -64,6 +67,15 @@ class ImageMeasure:
|
|||||||
def set_filename_absolute(self):
|
def set_filename_absolute(self):
|
||||||
self.filename = os.path.realpath(self.filename)
|
self.filename = os.path.realpath(self.filename)
|
||||||
|
|
||||||
|
def is_broken(self):
|
||||||
|
if self.broken is None:
|
||||||
|
try:
|
||||||
|
read_image_size(self.filename)
|
||||||
|
self.broken = False
|
||||||
|
except Exception:
|
||||||
|
self.broken = True
|
||||||
|
return self.broken
|
||||||
|
|
||||||
def get_hash(self):
|
def get_hash(self):
|
||||||
"""Return hash of the file"""
|
"""Return hash of the file"""
|
||||||
if self.hash is None:
|
if self.hash is None:
|
||||||
@@ -120,7 +132,14 @@ class ImageMeasure:
|
|||||||
def get_image(self, image_type="numpy"):
|
def get_image(self, image_type="numpy"):
|
||||||
|
|
||||||
if self.image is None:
|
if self.image is None:
|
||||||
self.image, self.image_type = read_image(self.filename)
|
try:
|
||||||
|
self.image, self.image_type = read_image(self.filename)
|
||||||
|
except Exception as e:
|
||||||
|
print(self.filename, file=sys.stderr)
|
||||||
|
print(e, file=sys.stderr)
|
||||||
|
self.broken = True
|
||||||
|
raise ImageBrokenError()
|
||||||
|
|
||||||
if self.image_type == "numpy":
|
if self.image_type == "numpy":
|
||||||
if len(self.image.shape) > 2:
|
if len(self.image.shape) > 2:
|
||||||
# BGR -> RGB
|
# BGR -> RGB
|
||||||
@@ -153,6 +172,7 @@ class ImageMeasure:
|
|||||||
4,
|
4,
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
|
self.broken = True
|
||||||
self.sharpness = 0
|
self.sharpness = 0
|
||||||
|
|
||||||
return self.sharpness
|
return self.sharpness
|
||||||
@@ -163,17 +183,23 @@ class ImageMeasure:
|
|||||||
return int(np.mean(im[Border]))
|
return int(np.mean(im[Border]))
|
||||||
|
|
||||||
if self.colors["R"] is None:
|
if self.colors["R"] is None:
|
||||||
im = self.get_image("PIL").convert("RGB")
|
try:
|
||||||
th = im.copy()
|
im = self.get_image("PIL").convert("RGB")
|
||||||
th.thumbnail((1, 1), resample=Image.BILINEAR)
|
th = im.copy()
|
||||||
th = np.array(th)
|
th.thumbnail((1, 1), resample=Image.BILINEAR)
|
||||||
im = np.array(im.resize((10, 10), resample=Image.BILINEAR))
|
th = np.array(th)
|
||||||
self.colors["R"] = int(th[0][0][0])
|
im = np.array(im.resize((10, 10), resample=Image.BILINEAR))
|
||||||
self.colors["G"] = int(th[0][0][1])
|
self.colors["R"] = int(th[0][0][0])
|
||||||
self.colors["B"] = int(th[0][0][2])
|
self.colors["G"] = int(th[0][0][1])
|
||||||
self.colors["BR"] = get_border(im[:, :, 0])
|
self.colors["B"] = int(th[0][0][2])
|
||||||
self.colors["BG"] = get_border(im[:, :, 1])
|
self.colors["BR"] = get_border(im[:, :, 0])
|
||||||
self.colors["BB"] = get_border(im[:, :, 2])
|
self.colors["BG"] = get_border(im[:, :, 1])
|
||||||
|
self.colors["BB"] = get_border(im[:, :, 2])
|
||||||
|
except Exception as e:
|
||||||
|
print(self.filename, file=sys.stderr)
|
||||||
|
print(e, file=sys.stderr)
|
||||||
|
self.broken = True
|
||||||
|
return self.colors
|
||||||
return self.colors
|
return self.colors
|
||||||
|
|
||||||
def similarity_difference(self, other):
|
def similarity_difference(self, other):
|
||||||
@@ -199,6 +225,11 @@ class ImageMeasure:
|
|||||||
return calculate_shape_difference(self.width, self.height, other.width, other.height)
|
return calculate_shape_difference(self.width, self.height, other.width, other.height)
|
||||||
|
|
||||||
|
|
||||||
|
class ImageBrokenError(Exception):
|
||||||
|
def __init__(self):
|
||||||
|
self.msg = "Image Broken: Can not read image"
|
||||||
|
|
||||||
|
|
||||||
EXTENSIONS = (".jpg", ".png", ".tif", ".gif", ".jpeg", ".tiff")
|
EXTENSIONS = (".jpg", ".png", ".tif", ".gif", ".jpeg", ".tiff")
|
||||||
JPEG_EXTENSIONS = (".jpg", ".jpeg")
|
JPEG_EXTENSIONS = (".jpg", ".jpeg")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user