non-locking imagelist

This commit is contained in:
Q
2025-06-02 17:34:34 +03:00
parent df02a9b1bf
commit 4cf6b754b1
4 changed files with 85 additions and 72 deletions

View File

@@ -36,6 +36,7 @@ test-db:
. useve-runner
useve imagelist2
echo =================================
rm -f image-list.sqlite
mkdir -p folder1/folder2 folder1/.hidden folder1/_med
convert -size 600x300 xc:red red.jpg
cp red.jpg folder1/.hidden/
@@ -97,7 +98,7 @@ test-dup:
image-list search --color 255,0,0,10
echo ========== nearest from file ======================
image-list search --color red.jpg,3
echo ========== Similar by dhash ======================
echo ========== Similar by phash ======================
image-list search --similar 30
echo ========== Similar by file ======================
image-list search --similar folder1/wizard.jpg

View File

@@ -6,11 +6,11 @@ from argparse import ArgumentParser
from datetime import datetime
import tabulate
from imagelist2.db import DB, sqlite_sqrt, sqlite_square
from imagelist2.db import DB, DBCachedWriter, sqlite_sqrt, sqlite_square
from imagelist2.image import ImageMeasure, is_image_extension
from tqdm import tqdm
__version__ = "0.0.5"
__version__ = "0.0.6"
SQLFILE = "image-list.sqlite"
# IMGMATCH = re.compile("|".join([".*\." + x + "$" |.*\.jpeg$|.*\.png$|.*\.gif$|.*\.tif$", re.I)
BADDIRS = ["_tn", "_med", ".tn", ".med"]
@@ -22,6 +22,7 @@ class ImageList:
self.options = opts
self.db = DB(self.options.sqlfile)
self.db_writer = DBCachedWriter(self.db)
self.root_path = os.path.dirname(os.path.realpath(self.options.sqlfile))
self.similarity_header = ("#", "File", "PD", "CD", "RD", "Shp", "W", "H")
@@ -69,7 +70,7 @@ class ImageList:
if not self.options.no_add:
image_count += 1
self.add_single(image, change=False)
self.db.conn.commit()
self.db_writer.commit()
if image_count > 0:
print(f"Added/changed {image_count} images")
return
@@ -84,7 +85,7 @@ class ImageList:
error_msg = f"error changing image: {image.filename}"
try:
self.db.cursor().execute(
self.db_writer.execute(
query,
(
image.get_hash(),
@@ -116,7 +117,7 @@ class ImageList:
)
if len(missing_base) == 0:
return
cursor = self.db.cursor()
for i, row in enumerate(tqdm(missing_base, desc="Base info", delay=1)):
if row[0] in seen_hash:
continue
@@ -125,14 +126,12 @@ class ImageList:
if filename == None:
continue
image = ImageMeasure(filename)
cursor.execute(
self.db_writer.execute(
"""INSERT INTO data(hash,portrait,width,height,description)
VALUES(?,?,?,?,?)""",
(row[0], image.get_portrait(), image.get_width(), image.get_height(), image.get_description()),
)
if i % 50 == 0:
self.db.conn.commit()
self.db.conn.commit()
self.db_writer.commit()
return
def delete_missing(self):
@@ -189,8 +188,6 @@ class ImageList:
SELECT
list.file,
data.hash,
data.fingerprint,
data.w_hash,
data.p_hash,
data.sharpness,
data.R,
@@ -201,7 +198,7 @@ class ImageList:
data.BB
FROM data
LEFT JOIN list ON data.hash = list.hash
WHERE data.fingerprint IS NULL
WHERE data.p_hash IS NULL
OR data.sharpness IS NULL
OR data.R IS NULL
"""
@@ -210,7 +207,6 @@ class ImageList:
)
if len(missing_measurements) == 0:
return
cursor = self.db.cursor()
for i, row in enumerate(tqdm(missing_measurements, desc="Measure", delay=1, smoothing=0.01)):
filename = row[0]
if filename == None:
@@ -219,26 +215,24 @@ class ImageList:
continue
duplicates.add(row[1])
image = ImageMeasure(filename)
image.hash = row[1]
image.fingerprint = row[2]
image.w_hash = row[3]
image.p_hash = row[4]
image.sharpness = row[5]
image.colors["R"] = row[6]
image.colors["G"] = row[7]
image.colors["B"] = row[8]
image.colors["BR"] = row[9]
image.colors["BG"] = row[10]
image.colors["BB"] = row[11]
(
image.hash,
image.p_hash,
image.sharpness,
image.colors["R"],
image.colors["G"],
image.colors["B"],
image.colors["BR"],
image.colors["BG"],
image.colors["BB"],
) = row[1:]
# Calculate if required
image.get_fingerprint()
image.get_p_hash()
image.sharpness = image.get_sharpness()
image.colors.update(image.get_colors())
cursor.execute(
self.db_writer.execute(
"""UPDATE data SET
fingerprint = ?,
w_hash = ?,
p_hash = ?,
sharpness = ?,
R = ?,
@@ -250,8 +244,6 @@ class ImageList:
WHERE hash = ?
""",
(
image.fingerprint,
image.w_hash,
image.p_hash,
image.sharpness,
image.colors["R"],
@@ -264,9 +256,7 @@ class ImageList:
),
)
if i % 50 == 0:
self.db.conn.commit()
self.db.conn.commit()
self.db_writer.commit()
return
def disk_used(self):
@@ -380,20 +370,6 @@ class ImageList:
def similarity(self):
def set_image(row):
image = ImageMeasure(None)
image.hash = row[0]
image.fingerprint = row[1]
image.w_hash = row[2]
image.p_hash = row[3]
image.sharpness = row[4]
image.width = row[5]
image.height = row[6]
image.colors["R"] = row[7]
image.colors["G"] = row[8]
image.colors["B"] = row[9]
return image
def print_visually_similar(file, thr):
cmp_image = ImageMeasure(file)
cmp_image.set_all()
@@ -654,7 +630,7 @@ def humanize_date(date):
def setup_options():
parser = ArgumentParser(description="Maintains a list of images sqlite file")
parser = ArgumentParser(description=f"Maintains a list of images sqlite file (v{__version__})")
parser.add_argument(
"-f",
action="store",

View File

@@ -1,6 +1,7 @@
import os
import sqlite3
import sys
import time
from math import sqrt as sqlite_sqrt
import sqlite_vec
@@ -24,9 +25,10 @@ class DB:
if os.path.exists(self.sqlfile):
return
conn = sqlite3.connect(self.sqlfile)
conn = sqlite3.connect(self.sqlfile, timeout=30)
db = conn.cursor()
conn.text_factory = str
db.execute("CREATE TABLE list (file TEXT PRIMARY KEY,hash TEXT,date INTEGER,size INTEGER)")
db.execute(
"""CREATE TABLE data (
hash TEXT PRIMARY KEY,
@@ -34,14 +36,11 @@ class DB:
portrait BOOLEAN,
width INTEGER,
height INTEGER,
fingerprint TEXT,
p_hash TEXT,
w_hash TEXT,
sharpness NUMERIC,
R REAL, G REAL, B REAL, BR REAL, BG REAL, BB REAL
)"""
)
db.execute("CREATE TABLE list (file TEXT PRIMARY KEY,hash TEXT,date INTEGER,size INTEGER)")
db.execute("CREATE TABLE tags (hash TEXT,tag TEXT)")
db.execute(
"""CREATE VIEW files AS
@@ -56,7 +55,7 @@ class DB:
return
def connect(self):
conn = sqlite3.connect(self.sqlfile)
conn = sqlite3.connect(self.sqlfile, timeout=30)
conn.text_factory = str
conn.create_function("SQRT", 1, sqlite_sqrt)
conn.create_function("RELATIVE", 1, self.file2relative)
@@ -143,5 +142,57 @@ class DB:
return os.path.relpath(file, self.root_path)
class DBCachedWriter:
def __init__(self, DB):
"""DB = instance of the DB object"""
self.db = DB
self.cache = []
self.cache_time = time.time()
self.writeout = 30
self.writemax = 499
self.max_retries = 5
self.try_count = 0
def __del__(self):
self.close()
def commit(self):
self.write_cache()
def close(self):
if len(self.cache) > 0:
self.write_cache()
def execute(self, query, values):
self.cache.append({"query": query, "values": values})
if time.time() > self.cache_time + self.writeout or len(self.cache) > self.writemax:
self.write_cache()
def write_cache(self):
if len(self.cache) > 0:
try:
# ~ print(f"Write cache: {len(self.cache)} rows...", file=sys.stderr)
cursor = self.db.cursor()
for row in self.cache:
# ~ print(row['query'], row['values'])
cursor.execute(row["query"], row["values"])
self.db.conn.commit()
except sqlite3.OperationalError as e:
print("Writing failed, waiting for next writeout...", file=sys.stderr)
self.cache_time = time.time()
self.try_count += 1
if self.try_count > self.max_retries:
print(f"Failed\nQuery: {row['query']}\nValues: {row['values']}", file=sys.stderr)
raise (e)
return
self.try_count = 0
self.cache = []
self.cache_time = time.time()
def sqlite_square(x):
return x * x

View File

@@ -33,9 +33,8 @@ class ImageMeasure:
self.width = None
self.height = None
self.portrait = None
self.fingerprint = None
self.w_hash = None
self.p_hash = None
self.p_hash16 = None
self.sharpness = None
self.colors = {x: None for x in ("R", "G", "B", "BR", "BG", "BB")}
self.similarity = {"distance": 0, "color": 0, "aspect": 0}
@@ -58,7 +57,7 @@ class ImageMeasure:
self.get_time()
self.get_size()
self.get_shape()
self.get_fingerprint()
self.get_p_hash()
self.get_sharpness()
self.get_colors()
@@ -134,20 +133,6 @@ class ImageMeasure:
if image_type == "PIL":
return Image.fromarray(self.image)
def get_fingerprint(self):
if self.fingerprint is None:
self.get_w_hash()
self.get_p_hash()
self.fingerprint = str(imagehash.dhash(self.get_image("PIL"), hash_size=8))
return self.fingerprint
def get_w_hash(self):
if self.w_hash is None:
self.w_hash = str(imagehash.whash(self.get_image("PIL"), hash_size=8))
return self.w_hash
def get_p_hash(self):
if self.p_hash is None:
self.p_hash = str(imagehash.phash(self.get_image("PIL"), hash_size=8))