From 4fe95c765fea69407c2beeee88bdd8b16d913446 Mon Sep 17 00:00:00 2001 From: q Date: Thu, 12 Jun 2025 19:25:43 +0300 Subject: [PATCH] read images at add, to figure out broken images --- py-packages/imagelist2/imagelist2/__init__.py | 27 +++++++++---------- py-packages/imagelist2/imagelist2/db.py | 19 ++++--------- py-packages/imagelist2/imagelist2/image.py | 9 ++++++- 3 files changed, 25 insertions(+), 30 deletions(-) diff --git a/py-packages/imagelist2/imagelist2/__init__.py b/py-packages/imagelist2/imagelist2/__init__.py index c5e37d8..13abb7a 100644 --- a/py-packages/imagelist2/imagelist2/__init__.py +++ b/py-packages/imagelist2/imagelist2/__init__.py @@ -10,7 +10,7 @@ from imagelist2.db import DB, DBCachedWriter, sqlite_sqrt, sqlite_square from imagelist2.image import ImageBrokenError, ImageMeasure, is_image_extension from tqdm import tqdm -__version__ = "0.0.9" +__version__ = "0.0.10" SQLFILE = "image-list.sqlite" BADDIRS = ["_tn", "_med", ".tn", ".med"] MINSIZE = 0 @@ -199,10 +199,9 @@ class ImageList: def measure(self): duplicates = set() - missing_measurements = ( - self.db.cursor() - .execute( - """ + cursor = self.db.cursor() + missing_measurements = cursor.execute( + """ SELECT list.file, data.hash, @@ -227,11 +226,8 @@ class ImageList: data.broken IS FALSE OR data.broken IS NULL ) """ - ) - .fetchall() ) - if len(missing_measurements) == 0: - return + for i, row in enumerate(tqdm(missing_measurements, desc="Measure", delay=1, smoothing=0.01)): filename = row[0] if filename == None: @@ -261,9 +257,9 @@ class ImageList: raise ImageBrokenError() except ImageBrokenError: self.db_writer.execute( - """UPDATE data SET broken = ? - WHERE hash = ? - """, + """ UPDATE data SET broken = ? + WHERE hash = ? + """, ( image.broken, image.hash, @@ -272,7 +268,8 @@ class ImageList: continue self.db_writer.execute( - """UPDATE data SET + """ + UPDATE data SET p_hash = ?, sharpness = ?, R = ?, @@ -282,8 +279,8 @@ class ImageList: BG = ?, BB = ?, broken = ? - WHERE hash = ? - """, + WHERE hash = ? + """, ( image.p_hash, image.sharpness, diff --git a/py-packages/imagelist2/imagelist2/db.py b/py-packages/imagelist2/imagelist2/db.py index 9d6b813..97df092 100644 --- a/py-packages/imagelist2/imagelist2/db.py +++ b/py-packages/imagelist2/imagelist2/db.py @@ -96,21 +96,12 @@ class DB: cursor.execute("UPDATE data SET broken = ?;", (False,)) db.commit() - if config_version == "0.0.7": # => 0.0.8 - with sqlite3.connect(self.sqlfile, timeout=30) as db: - cursor = db.cursor() - config_version = "0.0.8" - cursor.execute("UPDATE config SET value = ? WHERE key = ?;", (config_version, "version")) - db.commit() + with sqlite3.connect(self.sqlfile, timeout=30) as db: + cursor = db.cursor() + cursor.execute("UPDATE config SET value = ? WHERE key = ?;", (running_version, "version")) + db.commit() - if config_version == "0.0.8": # => 0.0.9 - with sqlite3.connect(self.sqlfile, timeout=30) as db: - cursor = db.cursor() - config_version = "0.0.9" - cursor.execute("UPDATE config SET value = ? WHERE key = ?;", (config_version, "version")) - db.commit() - - print(f"Migrated to {config_version}. Restart", file=sys.stderr) + print(f"Migrated to {running_version}. Restart", file=sys.stderr) def connect(self): conn = sqlite3.connect(self.sqlfile, timeout=30) diff --git a/py-packages/imagelist2/imagelist2/image.py b/py-packages/imagelist2/imagelist2/image.py index 5cd9800..7b27e4b 100644 --- a/py-packages/imagelist2/imagelist2/image.py +++ b/py-packages/imagelist2/imagelist2/image.py @@ -68,12 +68,19 @@ class ImageMeasure: self.filename = os.path.realpath(self.filename) def is_broken(self): + """Note: Size reading does not necessarily mean file is okay for reading""" if self.broken is None: + self.broken = False try: read_image_size(self.filename) - self.broken = False except Exception: self.broken = True + if not self.broken: + try: + self.get_image(image_type="numpy") + self.get_image(image_type="PIL") + except Exception: + self.broken = True return self.broken def get_hash(self):