non-locking imagelist

This commit is contained in:
Q
2025-06-02 17:34:34 +03:00
parent df02a9b1bf
commit 4cf6b754b1
4 changed files with 85 additions and 72 deletions

View File

@@ -36,6 +36,7 @@ test-db:
. useve-runner . useve-runner
useve imagelist2 useve imagelist2
echo ================================= echo =================================
rm -f image-list.sqlite
mkdir -p folder1/folder2 folder1/.hidden folder1/_med mkdir -p folder1/folder2 folder1/.hidden folder1/_med
convert -size 600x300 xc:red red.jpg convert -size 600x300 xc:red red.jpg
cp red.jpg folder1/.hidden/ cp red.jpg folder1/.hidden/
@@ -97,7 +98,7 @@ test-dup:
image-list search --color 255,0,0,10 image-list search --color 255,0,0,10
echo ========== nearest from file ====================== echo ========== nearest from file ======================
image-list search --color red.jpg,3 image-list search --color red.jpg,3
echo ========== Similar by dhash ====================== echo ========== Similar by phash ======================
image-list search --similar 30 image-list search --similar 30
echo ========== Similar by file ====================== echo ========== Similar by file ======================
image-list search --similar folder1/wizard.jpg image-list search --similar folder1/wizard.jpg

View File

@@ -6,11 +6,11 @@ from argparse import ArgumentParser
from datetime import datetime from datetime import datetime
import tabulate import tabulate
from imagelist2.db import DB, sqlite_sqrt, sqlite_square from imagelist2.db import DB, DBCachedWriter, sqlite_sqrt, sqlite_square
from imagelist2.image import ImageMeasure, is_image_extension from imagelist2.image import ImageMeasure, is_image_extension
from tqdm import tqdm from tqdm import tqdm
__version__ = "0.0.5" __version__ = "0.0.6"
SQLFILE = "image-list.sqlite" SQLFILE = "image-list.sqlite"
# IMGMATCH = re.compile("|".join([".*\." + x + "$" |.*\.jpeg$|.*\.png$|.*\.gif$|.*\.tif$", re.I) # IMGMATCH = re.compile("|".join([".*\." + x + "$" |.*\.jpeg$|.*\.png$|.*\.gif$|.*\.tif$", re.I)
BADDIRS = ["_tn", "_med", ".tn", ".med"] BADDIRS = ["_tn", "_med", ".tn", ".med"]
@@ -22,6 +22,7 @@ class ImageList:
self.options = opts self.options = opts
self.db = DB(self.options.sqlfile) self.db = DB(self.options.sqlfile)
self.db_writer = DBCachedWriter(self.db)
self.root_path = os.path.dirname(os.path.realpath(self.options.sqlfile)) self.root_path = os.path.dirname(os.path.realpath(self.options.sqlfile))
self.similarity_header = ("#", "File", "PD", "CD", "RD", "Shp", "W", "H") self.similarity_header = ("#", "File", "PD", "CD", "RD", "Shp", "W", "H")
@@ -69,7 +70,7 @@ class ImageList:
if not self.options.no_add: if not self.options.no_add:
image_count += 1 image_count += 1
self.add_single(image, change=False) self.add_single(image, change=False)
self.db.conn.commit() self.db_writer.commit()
if image_count > 0: if image_count > 0:
print(f"Added/changed {image_count} images") print(f"Added/changed {image_count} images")
return return
@@ -84,7 +85,7 @@ class ImageList:
error_msg = f"error changing image: {image.filename}" error_msg = f"error changing image: {image.filename}"
try: try:
self.db.cursor().execute( self.db_writer.execute(
query, query,
( (
image.get_hash(), image.get_hash(),
@@ -110,13 +111,13 @@ class ImageList:
FROM list FROM list
LEFT JOIN data ON data.hash = list.hash LEFT JOIN data ON data.hash = list.hash
WHERE data.hash IS NULL WHERE data.hash IS NULL
""" """
) )
.fetchall() .fetchall()
) )
if len(missing_base) == 0: if len(missing_base) == 0:
return return
cursor = self.db.cursor()
for i, row in enumerate(tqdm(missing_base, desc="Base info", delay=1)): for i, row in enumerate(tqdm(missing_base, desc="Base info", delay=1)):
if row[0] in seen_hash: if row[0] in seen_hash:
continue continue
@@ -125,14 +126,12 @@ class ImageList:
if filename == None: if filename == None:
continue continue
image = ImageMeasure(filename) image = ImageMeasure(filename)
cursor.execute( self.db_writer.execute(
"""INSERT INTO data(hash,portrait,width,height,description) """INSERT INTO data(hash,portrait,width,height,description)
VALUES(?,?,?,?,?)""", VALUES(?,?,?,?,?)""",
(row[0], image.get_portrait(), image.get_width(), image.get_height(), image.get_description()), (row[0], image.get_portrait(), image.get_width(), image.get_height(), image.get_description()),
) )
if i % 50 == 0: self.db_writer.commit()
self.db.conn.commit()
self.db.conn.commit()
return return
def delete_missing(self): def delete_missing(self):
@@ -189,8 +188,6 @@ class ImageList:
SELECT SELECT
list.file, list.file,
data.hash, data.hash,
data.fingerprint,
data.w_hash,
data.p_hash, data.p_hash,
data.sharpness, data.sharpness,
data.R, data.R,
@@ -201,7 +198,7 @@ class ImageList:
data.BB data.BB
FROM data FROM data
LEFT JOIN list ON data.hash = list.hash LEFT JOIN list ON data.hash = list.hash
WHERE data.fingerprint IS NULL WHERE data.p_hash IS NULL
OR data.sharpness IS NULL OR data.sharpness IS NULL
OR data.R IS NULL OR data.R IS NULL
""" """
@@ -210,7 +207,6 @@ class ImageList:
) )
if len(missing_measurements) == 0: if len(missing_measurements) == 0:
return return
cursor = self.db.cursor()
for i, row in enumerate(tqdm(missing_measurements, desc="Measure", delay=1, smoothing=0.01)): for i, row in enumerate(tqdm(missing_measurements, desc="Measure", delay=1, smoothing=0.01)):
filename = row[0] filename = row[0]
if filename == None: if filename == None:
@@ -219,26 +215,24 @@ class ImageList:
continue continue
duplicates.add(row[1]) duplicates.add(row[1])
image = ImageMeasure(filename) image = ImageMeasure(filename)
image.hash = row[1] (
image.fingerprint = row[2] image.hash,
image.w_hash = row[3] image.p_hash,
image.p_hash = row[4] image.sharpness,
image.sharpness = row[5] image.colors["R"],
image.colors["R"] = row[6] image.colors["G"],
image.colors["G"] = row[7] image.colors["B"],
image.colors["B"] = row[8] image.colors["BR"],
image.colors["BR"] = row[9] image.colors["BG"],
image.colors["BG"] = row[10] image.colors["BB"],
image.colors["BB"] = row[11] ) = row[1:]
# Calculate if required # Calculate if required
image.get_fingerprint() image.get_p_hash()
image.sharpness = image.get_sharpness() image.sharpness = image.get_sharpness()
image.colors.update(image.get_colors()) image.colors.update(image.get_colors())
cursor.execute( self.db_writer.execute(
"""UPDATE data SET """UPDATE data SET
fingerprint = ?,
w_hash = ?,
p_hash = ?, p_hash = ?,
sharpness = ?, sharpness = ?,
R = ?, R = ?,
@@ -250,8 +244,6 @@ class ImageList:
WHERE hash = ? WHERE hash = ?
""", """,
( (
image.fingerprint,
image.w_hash,
image.p_hash, image.p_hash,
image.sharpness, image.sharpness,
image.colors["R"], image.colors["R"],
@@ -264,9 +256,7 @@ class ImageList:
), ),
) )
if i % 50 == 0: self.db_writer.commit()
self.db.conn.commit()
self.db.conn.commit()
return return
def disk_used(self): def disk_used(self):
@@ -380,20 +370,6 @@ class ImageList:
def similarity(self): def similarity(self):
def set_image(row):
image = ImageMeasure(None)
image.hash = row[0]
image.fingerprint = row[1]
image.w_hash = row[2]
image.p_hash = row[3]
image.sharpness = row[4]
image.width = row[5]
image.height = row[6]
image.colors["R"] = row[7]
image.colors["G"] = row[8]
image.colors["B"] = row[9]
return image
def print_visually_similar(file, thr): def print_visually_similar(file, thr):
cmp_image = ImageMeasure(file) cmp_image = ImageMeasure(file)
cmp_image.set_all() cmp_image.set_all()
@@ -654,7 +630,7 @@ def humanize_date(date):
def setup_options(): def setup_options():
parser = ArgumentParser(description="Maintains a list of images sqlite file") parser = ArgumentParser(description=f"Maintains a list of images sqlite file (v{__version__})")
parser.add_argument( parser.add_argument(
"-f", "-f",
action="store", action="store",

View File

@@ -1,6 +1,7 @@
import os import os
import sqlite3 import sqlite3
import sys import sys
import time
from math import sqrt as sqlite_sqrt from math import sqrt as sqlite_sqrt
import sqlite_vec import sqlite_vec
@@ -24,9 +25,10 @@ class DB:
if os.path.exists(self.sqlfile): if os.path.exists(self.sqlfile):
return return
conn = sqlite3.connect(self.sqlfile) conn = sqlite3.connect(self.sqlfile, timeout=30)
db = conn.cursor() db = conn.cursor()
conn.text_factory = str conn.text_factory = str
db.execute("CREATE TABLE list (file TEXT PRIMARY KEY,hash TEXT,date INTEGER,size INTEGER)")
db.execute( db.execute(
"""CREATE TABLE data ( """CREATE TABLE data (
hash TEXT PRIMARY KEY, hash TEXT PRIMARY KEY,
@@ -34,14 +36,11 @@ class DB:
portrait BOOLEAN, portrait BOOLEAN,
width INTEGER, width INTEGER,
height INTEGER, height INTEGER,
fingerprint TEXT,
p_hash TEXT, p_hash TEXT,
w_hash TEXT,
sharpness NUMERIC, sharpness NUMERIC,
R REAL, G REAL, B REAL, BR REAL, BG REAL, BB REAL R REAL, G REAL, B REAL, BR REAL, BG REAL, BB REAL
)""" )"""
) )
db.execute("CREATE TABLE list (file TEXT PRIMARY KEY,hash TEXT,date INTEGER,size INTEGER)")
db.execute("CREATE TABLE tags (hash TEXT,tag TEXT)") db.execute("CREATE TABLE tags (hash TEXT,tag TEXT)")
db.execute( db.execute(
"""CREATE VIEW files AS """CREATE VIEW files AS
@@ -56,7 +55,7 @@ class DB:
return return
def connect(self): def connect(self):
conn = sqlite3.connect(self.sqlfile) conn = sqlite3.connect(self.sqlfile, timeout=30)
conn.text_factory = str conn.text_factory = str
conn.create_function("SQRT", 1, sqlite_sqrt) conn.create_function("SQRT", 1, sqlite_sqrt)
conn.create_function("RELATIVE", 1, self.file2relative) conn.create_function("RELATIVE", 1, self.file2relative)
@@ -143,5 +142,57 @@ class DB:
return os.path.relpath(file, self.root_path) return os.path.relpath(file, self.root_path)
class DBCachedWriter:
def __init__(self, DB):
"""DB = instance of the DB object"""
self.db = DB
self.cache = []
self.cache_time = time.time()
self.writeout = 30
self.writemax = 499
self.max_retries = 5
self.try_count = 0
def __del__(self):
self.close()
def commit(self):
self.write_cache()
def close(self):
if len(self.cache) > 0:
self.write_cache()
def execute(self, query, values):
self.cache.append({"query": query, "values": values})
if time.time() > self.cache_time + self.writeout or len(self.cache) > self.writemax:
self.write_cache()
def write_cache(self):
if len(self.cache) > 0:
try:
# ~ print(f"Write cache: {len(self.cache)} rows...", file=sys.stderr)
cursor = self.db.cursor()
for row in self.cache:
# ~ print(row['query'], row['values'])
cursor.execute(row["query"], row["values"])
self.db.conn.commit()
except sqlite3.OperationalError as e:
print("Writing failed, waiting for next writeout...", file=sys.stderr)
self.cache_time = time.time()
self.try_count += 1
if self.try_count > self.max_retries:
print(f"Failed\nQuery: {row['query']}\nValues: {row['values']}", file=sys.stderr)
raise (e)
return
self.try_count = 0
self.cache = []
self.cache_time = time.time()
def sqlite_square(x): def sqlite_square(x):
return x * x return x * x

View File

@@ -33,9 +33,8 @@ class ImageMeasure:
self.width = None self.width = None
self.height = None self.height = None
self.portrait = None self.portrait = None
self.fingerprint = None
self.w_hash = None
self.p_hash = None self.p_hash = None
self.p_hash16 = None
self.sharpness = None self.sharpness = None
self.colors = {x: None for x in ("R", "G", "B", "BR", "BG", "BB")} self.colors = {x: None for x in ("R", "G", "B", "BR", "BG", "BB")}
self.similarity = {"distance": 0, "color": 0, "aspect": 0} self.similarity = {"distance": 0, "color": 0, "aspect": 0}
@@ -58,7 +57,7 @@ class ImageMeasure:
self.get_time() self.get_time()
self.get_size() self.get_size()
self.get_shape() self.get_shape()
self.get_fingerprint() self.get_p_hash()
self.get_sharpness() self.get_sharpness()
self.get_colors() self.get_colors()
@@ -134,20 +133,6 @@ class ImageMeasure:
if image_type == "PIL": if image_type == "PIL":
return Image.fromarray(self.image) return Image.fromarray(self.image)
def get_fingerprint(self):
if self.fingerprint is None:
self.get_w_hash()
self.get_p_hash()
self.fingerprint = str(imagehash.dhash(self.get_image("PIL"), hash_size=8))
return self.fingerprint
def get_w_hash(self):
if self.w_hash is None:
self.w_hash = str(imagehash.whash(self.get_image("PIL"), hash_size=8))
return self.w_hash
def get_p_hash(self): def get_p_hash(self):
if self.p_hash is None: if self.p_hash is None:
self.p_hash = str(imagehash.phash(self.get_image("PIL"), hash_size=8)) self.p_hash = str(imagehash.phash(self.get_image("PIL"), hash_size=8))