diff --git a/py-packages/imagelist2/Makefile b/py-packages/imagelist2/Makefile index 0611533..911953a 100644 --- a/py-packages/imagelist2/Makefile +++ b/py-packages/imagelist2/Makefile @@ -32,7 +32,7 @@ PRINT_LIST := 'sqlite3 -header image-list.sqlite "SELECT * FROM list" | tabulate test: test-db test-du test-dup test-tag ## Test test-db: - set -e + set -ex . useve-runner useve imagelist2 echo ================================= @@ -58,6 +58,7 @@ test-db: image-list db -x imagelist2 eval ${PRINT_TABLE} rm folder1/black.png + dd if=folder1/wizard.jpg of=folder1/wizard.half.jpg count=1 bs=1024 image-list db -x imagelist2 eval ${PRINT_TABLE} mogrify -rotate 90 folder1/cyan.png @@ -102,6 +103,8 @@ test-dup: image-list search --similar 30 echo ========== Similar by file ====================== image-list search --similar folder1/wizard.jpg + echo ========== Broken files ====================== + image-list search --broken test-tag: set -e diff --git a/py-packages/imagelist2/imagelist2/__init__.py b/py-packages/imagelist2/imagelist2/__init__.py index c02b038..03e107f 100644 --- a/py-packages/imagelist2/imagelist2/__init__.py +++ b/py-packages/imagelist2/imagelist2/__init__.py @@ -7,12 +7,11 @@ from datetime import datetime import tabulate from imagelist2.db import DB, DBCachedWriter, sqlite_sqrt, sqlite_square -from imagelist2.image import ImageMeasure, is_image_extension +from imagelist2.image import ImageBrokenError, ImageMeasure, is_image_extension from tqdm import tqdm -__version__ = "0.0.6" +__version__ = "0.0.7" SQLFILE = "image-list.sqlite" -# IMGMATCH = re.compile("|".join([".*\." + x + "$" |.*\.jpeg$|.*\.png$|.*\.gif$|.*\.tif$", re.I) BADDIRS = ["_tn", "_med", ".tn", ".med"] MINSIZE = 0 @@ -110,7 +109,7 @@ class ImageList: SELECT list.hash, list.file FROM list LEFT JOIN data ON data.hash = list.hash - WHERE data.hash IS NULL + WHERE data.hash IS NULL AND data.broken IS NULL """ ) .fetchall() @@ -126,11 +125,25 @@ class ImageList: if filename == None: continue image = ImageMeasure(filename) - self.db_writer.execute( - """INSERT INTO data(hash,portrait,width,height,description) - VALUES(?,?,?,?,?)""", - (row[0], image.get_portrait(), image.get_width(), image.get_height(), image.get_description()), - ) + if image.is_broken(): + self.db_writer.execute( + """INSERT INTO data(hash,broken) + VALUES(?,?)""", + (row[0], True), + ) + else: + self.db_writer.execute( + """INSERT INTO data(hash,portrait,width,height,description,broken) + VALUES(?,?,?,?,?,?)""", + ( + row[0], + image.get_portrait(), + image.get_width(), + image.get_height(), + image.get_description(), + False, + ), + ) self.db_writer.commit() return @@ -198,9 +211,14 @@ class ImageList: data.BB FROM data LEFT JOIN list ON data.hash = list.hash - WHERE data.p_hash IS NULL - OR data.sharpness IS NULL - OR data.R IS NULL + WHERE + ( + data.p_hash IS NULL + OR data.sharpness IS NULL + OR data.R IS NULL + ) + AND + data.broken IS FALSE """ ) .fetchall() @@ -214,22 +232,37 @@ class ImageList: if row[1] in duplicates: continue duplicates.add(row[1]) - image = ImageMeasure(filename) - ( - image.hash, - image.p_hash, - image.sharpness, - image.colors["R"], - image.colors["G"], - image.colors["B"], - image.colors["BR"], - image.colors["BG"], - image.colors["BB"], - ) = row[1:] - # Calculate if required - image.get_p_hash() - image.sharpness = image.get_sharpness() - image.colors.update(image.get_colors()) + try: + image = ImageMeasure(filename) + ( + image.hash, + image.p_hash, + image.sharpness, + image.colors["R"], + image.colors["G"], + image.colors["B"], + image.colors["BR"], + image.colors["BG"], + image.colors["BB"], + ) = row[1:] + # Calculate if required + image.get_p_hash() + image.sharpness = image.get_sharpness() + image.colors.update(image.get_colors()) + if image.broken: + print("image broke") + raise ImageBrokenError() + except ImageBrokenError: + self.db_writer.execute( + """UPDATE data SET broken = ? + WHERE hash = ? + """, + ( + image.broken, + image.hash, + ), + ) + continue self.db_writer.execute( """UPDATE data SET @@ -290,6 +323,29 @@ class ImageList: table.append((entry[0], humanize_size(entry[0]), entry[1])) table.print() + def broken(self): + result = self.db.cursor().execute( + """ + SELECT + file FROM files + WHERE broken IS TRUE + """, + ) + print("#File") + for row in result: + print(row[0]) + + def db_print(self): + result = self.db.cursor().execute( + """ + SELECT * FROM files + """, + ) + table = Tabulate([c[0] for c in result.description]) + for row in result: + table.append(row) + table.print() + def duplicates(self): result = self.db.cursor().execute( """ @@ -338,7 +394,14 @@ class ImageList: result = self.db.cursor().execute( """ WITH distances AS ( - SELECT hash, ROUND(SQRT(SQUARE(BR-?)+SQUARE(BG-?)+SQUARE(BB-?)),1) as distance,BR,BG,BB FROM data ORDER BY distance LIMIT ? + SELECT + hash, + ROUND(SQRT(SQUARE(BR-?)+SQUARE(BG-?)+SQUARE(BB-?)),1) as distance, + BR,BG,BB + FROM data + WHERE BR IS NOT NULL + ORDER BY distance + LIMIT ? ) SELECT RELATIVE(list.file), @@ -421,7 +484,7 @@ class ImageList: return self.db.cursor().execute( """ WITH - duplicates AS (SELECT p_hash FROM data GROUP BY p_hash HAVING count(p_hash) > 1) + duplicates AS (SELECT p_hash FROM data WHERE p_hash IS NOT NULL GROUP BY p_hash HAVING count(p_hash) > 1) SELECT RELATIVE(files.file) AS file, files.width, @@ -469,8 +532,8 @@ class ImageList: """ WITH disttab AS ( WITH - t1 AS ( SELECT * FROM files ), - t2 AS ( SELECT * FROM files ) + t1 AS ( SELECT * FROM files WHERE p_hash IS NOT NULL ), + t2 AS ( SELECT * FROM files WHERE p_hash IS NOT NULL ) SELECT RELATIVE(t1.file) AS file1, t1.width AS width1, @@ -700,6 +763,13 @@ def setup_options(): default=False, help="Follow symbolic links [%(default)s]", ) + db.add_argument( + "--print", + action="store_true", + dest="print", + default=False, + help="Print the whole database [%(default)s]", + ) db.add_argument("startpath", action="store", default=".", nargs="?", help="Path to start scanning for images.") du.add_argument( @@ -720,6 +790,14 @@ def setup_options(): nargs="?", ) + search.add_argument( + "--broken", + action="store_true", + dest="broken", + default=False, + help="Return a list of broken files [%(default)s]", + ) + search.add_argument( "--dup", action="store_true", @@ -820,6 +898,8 @@ def main(): if options.measure: il.base_add() il.measure() + if options.print: + il.db_print() if options.command == "du": il.disk_used() if options.command == "search": @@ -831,6 +911,8 @@ def main(): il.nearestcolor() if options.similarity: il.similarity() + if options.broken: + il.broken() if options.command == "tag": il.tag_manage() print("") diff --git a/py-packages/imagelist2/imagelist2/db.py b/py-packages/imagelist2/imagelist2/db.py index f234114..bc25963 100644 --- a/py-packages/imagelist2/imagelist2/db.py +++ b/py-packages/imagelist2/imagelist2/db.py @@ -38,7 +38,8 @@ class DB: height INTEGER, p_hash TEXT, sharpness NUMERIC, - R REAL, G REAL, B REAL, BR REAL, BG REAL, BB REAL + R REAL, G REAL, B REAL, BR REAL, BG REAL, BB REAL, + broken BOOLEAN )""" ) db.execute("CREATE TABLE tags (hash TEXT,tag TEXT)") diff --git a/py-packages/imagelist2/imagelist2/image.py b/py-packages/imagelist2/imagelist2/image.py index c0aa2de..fdb74ab 100644 --- a/py-packages/imagelist2/imagelist2/image.py +++ b/py-packages/imagelist2/imagelist2/image.py @@ -26,6 +26,7 @@ Border[:, 9] = True class ImageMeasure: def __init__(self, filename): self.filename = filename + self.broken = None self.hash = None self.time = None self.size = None @@ -53,6 +54,8 @@ class ImageMeasure: def set_all(self): self.set_filename_absolute() + if self.is_broken(): + raise ImageBrokenError() self.get_hash() self.get_time() self.get_size() @@ -64,6 +67,15 @@ class ImageMeasure: def set_filename_absolute(self): self.filename = os.path.realpath(self.filename) + def is_broken(self): + if self.broken is None: + try: + read_image_size(self.filename) + self.broken = False + except Exception: + self.broken = True + return self.broken + def get_hash(self): """Return hash of the file""" if self.hash is None: @@ -120,7 +132,14 @@ class ImageMeasure: def get_image(self, image_type="numpy"): if self.image is None: - self.image, self.image_type = read_image(self.filename) + try: + self.image, self.image_type = read_image(self.filename) + except Exception as e: + print(self.filename, file=sys.stderr) + print(e, file=sys.stderr) + self.broken = True + raise ImageBrokenError() + if self.image_type == "numpy": if len(self.image.shape) > 2: # BGR -> RGB @@ -153,6 +172,7 @@ class ImageMeasure: 4, ) except Exception: + self.broken = True self.sharpness = 0 return self.sharpness @@ -163,17 +183,23 @@ class ImageMeasure: return int(np.mean(im[Border])) if self.colors["R"] is None: - im = self.get_image("PIL").convert("RGB") - th = im.copy() - th.thumbnail((1, 1), resample=Image.BILINEAR) - th = np.array(th) - im = np.array(im.resize((10, 10), resample=Image.BILINEAR)) - self.colors["R"] = int(th[0][0][0]) - self.colors["G"] = int(th[0][0][1]) - self.colors["B"] = int(th[0][0][2]) - self.colors["BR"] = get_border(im[:, :, 0]) - self.colors["BG"] = get_border(im[:, :, 1]) - self.colors["BB"] = get_border(im[:, :, 2]) + try: + im = self.get_image("PIL").convert("RGB") + th = im.copy() + th.thumbnail((1, 1), resample=Image.BILINEAR) + th = np.array(th) + im = np.array(im.resize((10, 10), resample=Image.BILINEAR)) + self.colors["R"] = int(th[0][0][0]) + self.colors["G"] = int(th[0][0][1]) + self.colors["B"] = int(th[0][0][2]) + self.colors["BR"] = get_border(im[:, :, 0]) + self.colors["BG"] = get_border(im[:, :, 1]) + self.colors["BB"] = get_border(im[:, :, 2]) + except Exception as e: + print(self.filename, file=sys.stderr) + print(e, file=sys.stderr) + self.broken = True + return self.colors return self.colors def similarity_difference(self, other): @@ -199,6 +225,11 @@ class ImageMeasure: return calculate_shape_difference(self.width, self.height, other.width, other.height) +class ImageBrokenError(Exception): + def __init__(self): + self.msg = "Image Broken: Can not read image" + + EXTENSIONS = (".jpg", ".png", ".tif", ".gif", ".jpeg", ".tiff") JPEG_EXTENSIONS = (".jpg", ".jpeg")