diff --git a/py-packages/imagelist2/Makefile b/py-packages/imagelist2/Makefile index b866bf9..0611533 100644 --- a/py-packages/imagelist2/Makefile +++ b/py-packages/imagelist2/Makefile @@ -36,6 +36,7 @@ test-db: . useve-runner useve imagelist2 echo ================================= + rm -f image-list.sqlite mkdir -p folder1/folder2 folder1/.hidden folder1/_med convert -size 600x300 xc:red red.jpg cp red.jpg folder1/.hidden/ @@ -97,7 +98,7 @@ test-dup: image-list search --color 255,0,0,10 echo ========== nearest from file ====================== image-list search --color red.jpg,3 - echo ========== Similar by dhash ====================== + echo ========== Similar by phash ====================== image-list search --similar 30 echo ========== Similar by file ====================== image-list search --similar folder1/wizard.jpg diff --git a/py-packages/imagelist2/imagelist2/__init__.py b/py-packages/imagelist2/imagelist2/__init__.py index ccce638..c02b038 100644 --- a/py-packages/imagelist2/imagelist2/__init__.py +++ b/py-packages/imagelist2/imagelist2/__init__.py @@ -6,11 +6,11 @@ from argparse import ArgumentParser from datetime import datetime import tabulate -from imagelist2.db import DB, sqlite_sqrt, sqlite_square +from imagelist2.db import DB, DBCachedWriter, sqlite_sqrt, sqlite_square from imagelist2.image import ImageMeasure, is_image_extension from tqdm import tqdm -__version__ = "0.0.5" +__version__ = "0.0.6" SQLFILE = "image-list.sqlite" # IMGMATCH = re.compile("|".join([".*\." + x + "$" |.*\.jpeg$|.*\.png$|.*\.gif$|.*\.tif$", re.I) BADDIRS = ["_tn", "_med", ".tn", ".med"] @@ -22,6 +22,7 @@ class ImageList: self.options = opts self.db = DB(self.options.sqlfile) + self.db_writer = DBCachedWriter(self.db) self.root_path = os.path.dirname(os.path.realpath(self.options.sqlfile)) self.similarity_header = ("#", "File", "PD", "CD", "RD", "Shp", "W", "H") @@ -69,7 +70,7 @@ class ImageList: if not self.options.no_add: image_count += 1 self.add_single(image, change=False) - self.db.conn.commit() + self.db_writer.commit() if image_count > 0: print(f"Added/changed {image_count} images") return @@ -84,7 +85,7 @@ class ImageList: error_msg = f"error changing image: {image.filename}" try: - self.db.cursor().execute( + self.db_writer.execute( query, ( image.get_hash(), @@ -110,13 +111,13 @@ class ImageList: FROM list LEFT JOIN data ON data.hash = list.hash WHERE data.hash IS NULL - """ + """ ) .fetchall() ) if len(missing_base) == 0: return - cursor = self.db.cursor() + for i, row in enumerate(tqdm(missing_base, desc="Base info", delay=1)): if row[0] in seen_hash: continue @@ -125,14 +126,12 @@ class ImageList: if filename == None: continue image = ImageMeasure(filename) - cursor.execute( + self.db_writer.execute( """INSERT INTO data(hash,portrait,width,height,description) VALUES(?,?,?,?,?)""", (row[0], image.get_portrait(), image.get_width(), image.get_height(), image.get_description()), ) - if i % 50 == 0: - self.db.conn.commit() - self.db.conn.commit() + self.db_writer.commit() return def delete_missing(self): @@ -189,8 +188,6 @@ class ImageList: SELECT list.file, data.hash, - data.fingerprint, - data.w_hash, data.p_hash, data.sharpness, data.R, @@ -201,7 +198,7 @@ class ImageList: data.BB FROM data LEFT JOIN list ON data.hash = list.hash - WHERE data.fingerprint IS NULL + WHERE data.p_hash IS NULL OR data.sharpness IS NULL OR data.R IS NULL """ @@ -210,7 +207,6 @@ class ImageList: ) if len(missing_measurements) == 0: return - cursor = self.db.cursor() for i, row in enumerate(tqdm(missing_measurements, desc="Measure", delay=1, smoothing=0.01)): filename = row[0] if filename == None: @@ -219,26 +215,24 @@ class ImageList: continue duplicates.add(row[1]) image = ImageMeasure(filename) - image.hash = row[1] - image.fingerprint = row[2] - image.w_hash = row[3] - image.p_hash = row[4] - image.sharpness = row[5] - image.colors["R"] = row[6] - image.colors["G"] = row[7] - image.colors["B"] = row[8] - image.colors["BR"] = row[9] - image.colors["BG"] = row[10] - image.colors["BB"] = row[11] + ( + image.hash, + image.p_hash, + image.sharpness, + image.colors["R"], + image.colors["G"], + image.colors["B"], + image.colors["BR"], + image.colors["BG"], + image.colors["BB"], + ) = row[1:] # Calculate if required - image.get_fingerprint() + image.get_p_hash() image.sharpness = image.get_sharpness() image.colors.update(image.get_colors()) - cursor.execute( + self.db_writer.execute( """UPDATE data SET - fingerprint = ?, - w_hash = ?, p_hash = ?, sharpness = ?, R = ?, @@ -250,8 +244,6 @@ class ImageList: WHERE hash = ? """, ( - image.fingerprint, - image.w_hash, image.p_hash, image.sharpness, image.colors["R"], @@ -264,9 +256,7 @@ class ImageList: ), ) - if i % 50 == 0: - self.db.conn.commit() - self.db.conn.commit() + self.db_writer.commit() return def disk_used(self): @@ -380,20 +370,6 @@ class ImageList: def similarity(self): - def set_image(row): - image = ImageMeasure(None) - image.hash = row[0] - image.fingerprint = row[1] - image.w_hash = row[2] - image.p_hash = row[3] - image.sharpness = row[4] - image.width = row[5] - image.height = row[6] - image.colors["R"] = row[7] - image.colors["G"] = row[8] - image.colors["B"] = row[9] - return image - def print_visually_similar(file, thr): cmp_image = ImageMeasure(file) cmp_image.set_all() @@ -654,7 +630,7 @@ def humanize_date(date): def setup_options(): - parser = ArgumentParser(description="Maintains a list of images sqlite file") + parser = ArgumentParser(description=f"Maintains a list of images sqlite file (v{__version__})") parser.add_argument( "-f", action="store", diff --git a/py-packages/imagelist2/imagelist2/db.py b/py-packages/imagelist2/imagelist2/db.py index 873338c..f234114 100644 --- a/py-packages/imagelist2/imagelist2/db.py +++ b/py-packages/imagelist2/imagelist2/db.py @@ -1,6 +1,7 @@ import os import sqlite3 import sys +import time from math import sqrt as sqlite_sqrt import sqlite_vec @@ -24,9 +25,10 @@ class DB: if os.path.exists(self.sqlfile): return - conn = sqlite3.connect(self.sqlfile) + conn = sqlite3.connect(self.sqlfile, timeout=30) db = conn.cursor() conn.text_factory = str + db.execute("CREATE TABLE list (file TEXT PRIMARY KEY,hash TEXT,date INTEGER,size INTEGER)") db.execute( """CREATE TABLE data ( hash TEXT PRIMARY KEY, @@ -34,14 +36,11 @@ class DB: portrait BOOLEAN, width INTEGER, height INTEGER, - fingerprint TEXT, p_hash TEXT, - w_hash TEXT, sharpness NUMERIC, R REAL, G REAL, B REAL, BR REAL, BG REAL, BB REAL )""" ) - db.execute("CREATE TABLE list (file TEXT PRIMARY KEY,hash TEXT,date INTEGER,size INTEGER)") db.execute("CREATE TABLE tags (hash TEXT,tag TEXT)") db.execute( """CREATE VIEW files AS @@ -56,7 +55,7 @@ class DB: return def connect(self): - conn = sqlite3.connect(self.sqlfile) + conn = sqlite3.connect(self.sqlfile, timeout=30) conn.text_factory = str conn.create_function("SQRT", 1, sqlite_sqrt) conn.create_function("RELATIVE", 1, self.file2relative) @@ -143,5 +142,57 @@ class DB: return os.path.relpath(file, self.root_path) +class DBCachedWriter: + def __init__(self, DB): + """DB = instance of the DB object""" + self.db = DB + self.cache = [] + self.cache_time = time.time() + self.writeout = 30 + self.writemax = 499 + self.max_retries = 5 + self.try_count = 0 + + def __del__(self): + self.close() + + def commit(self): + self.write_cache() + + def close(self): + + if len(self.cache) > 0: + self.write_cache() + + def execute(self, query, values): + + self.cache.append({"query": query, "values": values}) + + if time.time() > self.cache_time + self.writeout or len(self.cache) > self.writemax: + self.write_cache() + + def write_cache(self): + + if len(self.cache) > 0: + try: + # ~ print(f"Write cache: {len(self.cache)} rows...", file=sys.stderr) + cursor = self.db.cursor() + for row in self.cache: + # ~ print(row['query'], row['values']) + cursor.execute(row["query"], row["values"]) + self.db.conn.commit() + except sqlite3.OperationalError as e: + print("Writing failed, waiting for next writeout...", file=sys.stderr) + self.cache_time = time.time() + self.try_count += 1 + if self.try_count > self.max_retries: + print(f"Failed\nQuery: {row['query']}\nValues: {row['values']}", file=sys.stderr) + raise (e) + return + self.try_count = 0 + self.cache = [] + self.cache_time = time.time() + + def sqlite_square(x): return x * x diff --git a/py-packages/imagelist2/imagelist2/image.py b/py-packages/imagelist2/imagelist2/image.py index 76438c6..c0aa2de 100644 --- a/py-packages/imagelist2/imagelist2/image.py +++ b/py-packages/imagelist2/imagelist2/image.py @@ -33,9 +33,8 @@ class ImageMeasure: self.width = None self.height = None self.portrait = None - self.fingerprint = None - self.w_hash = None self.p_hash = None + self.p_hash16 = None self.sharpness = None self.colors = {x: None for x in ("R", "G", "B", "BR", "BG", "BB")} self.similarity = {"distance": 0, "color": 0, "aspect": 0} @@ -58,7 +57,7 @@ class ImageMeasure: self.get_time() self.get_size() self.get_shape() - self.get_fingerprint() + self.get_p_hash() self.get_sharpness() self.get_colors() @@ -134,20 +133,6 @@ class ImageMeasure: if image_type == "PIL": return Image.fromarray(self.image) - def get_fingerprint(self): - - if self.fingerprint is None: - self.get_w_hash() - self.get_p_hash() - self.fingerprint = str(imagehash.dhash(self.get_image("PIL"), hash_size=8)) - - return self.fingerprint - - def get_w_hash(self): - if self.w_hash is None: - self.w_hash = str(imagehash.whash(self.get_image("PIL"), hash_size=8)) - return self.w_hash - def get_p_hash(self): if self.p_hash is None: self.p_hash = str(imagehash.phash(self.get_image("PIL"), hash_size=8))