diff --git a/py-packages/imagelist2/Makefile b/py-packages/imagelist2/Makefile new file mode 100644 index 0000000..b866bf9 --- /dev/null +++ b/py-packages/imagelist2/Makefile @@ -0,0 +1,125 @@ +SHELL := /bin/bash +.PHONY: +.ONESHELL: +help: ## *:・゚✧*:・゚✧ This help *:・゚✧*:・゚✧ + @printf "\033[36;1m %14s \033[0;32;1m %s\033[0m\n" Target Description + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \ + awk ' \ + BEGIN {FS = ":.*?## "}; \ + { if ( $$1 != "-") { \ + printf "\033[31;1m[ \033[36;1m%14s \033[31;1m]\033[0;32;1m %s\033[0m\n", $$1, $$2 \ + } else { \ + printf " \033[0;33;1m=^= %-25s =^=\033[0m\n", $$2 \ + } \ + } \ + ' + +-: ## Building + +all: clean install test + +install: ## Run installer + set -e + . useve-runner + useve imagelist2 + py-format ./ + pip install ./ + +PRINT_TABLE := 'sqlite3 -header image-list.sqlite "SELECT * FROM files" | tabulate -1 -s "\|"' +PRINT_DATA := 'sqlite3 -header image-list.sqlite "SELECT * FROM data" | tabulate -1 -s "\|"' +PRINT_LIST := 'sqlite3 -header image-list.sqlite "SELECT * FROM list" | tabulate -1 -s "\|"' + +test: test-db test-du test-dup test-tag ## Test + +test-db: + set -e + . useve-runner + useve imagelist2 + echo ================================= + mkdir -p folder1/folder2 folder1/.hidden folder1/_med + convert -size 600x300 xc:red red.jpg + cp red.jpg folder1/.hidden/ + convert -size 600x300 xc:cyan folder1/cyan.jpg + convert -size 600x300 xc:cyan folder1/cyan.png + cp folder1/cyan.png folder1/cyan_dup2.png + cp folder1/cyan.png folder1/cyan_dup3.png + cp folder1/cyan.jpg folder1/cyan_dup2.jpg + cp folder1/cyan.jpg folder1/cyan_dup3.jpg + convert -size 600x300 plasma: folder1/noisy.png + convert -size 600x300 plasma: -blur 0x3 folder1/blur.png + convert -size 300x600 xc:blue folder1/folder2/blue.tif + convert wizard: folder1/wizard.jpg + convert folder1/wizard.jpg -resize 95%x98% folder1/wizard.mod.jpg + convert folder1/wizard.jpg -flip -resize 95%x98% folder1/wizard.flip.jpg + image-list db -x imagelist2 + eval ${PRINT_TABLE} + convert -size 600x600 xc:black folder1/black.png + image-list db -x imagelist2 + eval ${PRINT_TABLE} + rm folder1/black.png + image-list db -x imagelist2 + eval ${PRINT_TABLE} + mogrify -rotate 90 folder1/cyan.png + image-list db -x imagelist2 -c + eval ${PRINT_LIST} + eval ${PRINT_DATA} + image-list db -x imagelist2 + eval ${PRINT_DATA} + image-list db -x imagelist2 --measure + eval ${PRINT_DATA} + eval ${PRINT_TABLE} + echo "========= check sha1 =============" + sqlite3 image-list.sqlite "SELECT hash,file FROM list" -separator ' ' | sha1sum -c - + + +test-du: + set -e + . useve-runner + useve imagelist2 + echo ================================= + image-list du + echo ================================= + image-list du -d 2 + echo ================================= + image-list du -d 1 + echo ================================= + image-list du -d 1 folder1/ + +test-dup: + set -e + . useve-runner + useve imagelist2 + echo ========== duplicates ======================= + image-list search --dup + echo ========== visual duplicates ======================= + image-list search --visdup + echo ========== nearest red ====================== + image-list search --color 255,0,0,10 + echo ========== nearest from file ====================== + image-list search --color red.jpg,3 + echo ========== Similar by dhash ====================== + image-list search --similar 30 + echo ========== Similar by file ====================== + image-list search --similar folder1/wizard.jpg + +test-tag: + set -e + . useve-runner + useve imagelist2 + echo ========== tag add ======================= + image-list tag -t plain -t red red.jpg + image-list tag -t red red.jpg + echo ========== tag list ======================= + image-list tag red.jpg + echo ========== tag delete ======================= + image-list tag -d red red.jpg + echo ========== tag list fail ======================= + image-list tag red.jpg.missing || true + + +init: ## Init test env + . useve-runner + useve mk imagelist2 + +clean: ## Clean testfiles + rm -rf folder1 image-list.sqlite red.jpg || true diff --git a/py-packages/imagelist2/imagelist2/__init__.py b/py-packages/imagelist2/imagelist2/__init__.py new file mode 100644 index 0000000..fc35874 --- /dev/null +++ b/py-packages/imagelist2/imagelist2/__init__.py @@ -0,0 +1,776 @@ +import os +import re +import sys +import traceback +from argparse import ArgumentParser +from datetime import datetime + +from imagelist2.db import DB, sqlite_sqrt, sqlite_square +from imagelist2.image import ImageMeasure, is_image_extension +from tqdm import tqdm + +__version__ = "0.0.1" +SQLFILE = "image-list.sqlite" +# IMGMATCH = re.compile("|".join([".*\." + x + "$" |.*\.jpeg$|.*\.png$|.*\.gif$|.*\.tif$", re.I) +BADDIRS = ["_tn", "_med", ".tn", ".med"] +MINSIZE = 0 + + +class ImageList: + def __init__(self, opts): + + self.options = opts + self.db = DB(self.options.sqlfile) + self.root_path = os.path.dirname(os.path.realpath(self.options.sqlfile)) + + def recursive_add(self): + + dir_count = 0 + for path, dirs, files in os.walk(os.path.realpath(self.options.startpath), followlinks=self.options.symlinks): + clean_dirs(dirs) + dir_count += 1 + + progress = tqdm( + total=dir_count, + desc="Directories", + position=0, + delay=1, + leave=False, + ) + for path, dirs, files in os.walk(os.path.realpath(self.options.startpath), followlinks=self.options.symlinks): + progress.update() + progress.write(self.db.file2relative(path)) + clean_dirs(dirs) + files = [os.path.realpath(os.path.join(path, f)) for f in files] + if not self.options.symlinks: + files = clean_syms(files) + files.sort() + dirs.sort() + db_files = self.db.get_folder_contents(path + "/") + for file in tqdm(files, desc="Files", delay=1, position=1, leave=False): + if not is_image_extension(file): + continue + image = ImageMeasure(file) + if file in db_files: + if self.options.changed: + has_changed = False + if self.db.is_time_mismatch(image): + has_changed = True + if not has_changed: + if self.db.is_hash_mismatch(image): + has_changed = True + if has_changed: + self.add_single(image, change=True) + else: + if not self.options.no_add: + self.add_single(image, change=False) + self.db.conn.commit() + return + + def add_single(self, image, change=False): + + if change: + query = "UPDATE list SET hash=?, date=? ,size=? WHERE file=?" + error_msg = f"error adding file: {image.filename}" + else: + query = "INSERT INTO list(hash,date,size,file) VALUES (?,?,?,?)" + error_msg = f"error changing file: {image.filename}" + try: + + self.db.cursor().execute( + query, + ( + image.get_hash(), + image.get_time(), + image.get_size(), + image.filename, + ), + ) + + except: + print(error_msg) + traceback.print_exc(file=sys.stdout) + sys.exit(1) + + def base_add(self): + + seen_hash = set() + missing_base = ( + self.db.cursor() + .execute( + """ + SELECT list.hash, list.file + FROM list + LEFT JOIN data ON data.hash = list.hash + WHERE data.hash IS NULL + """ + ) + .fetchall() + ) + if len(missing_base) == 0: + return + cursor = self.db.cursor() + for i, row in enumerate(tqdm(missing_base, desc="Base info", delay=1)): + if row[0] in seen_hash: + continue + seen_hash.add(row[0]) + filename = row[1] + if filename == None: + continue + image = ImageMeasure(filename) + cursor.execute( + """INSERT INTO data(hash,portrait,width,height) + VALUES(?,?,?,?)""", + ( + row[0], + image.get_portrait(), + image.get_width(), + image.get_height(), + ), + ) + if i % 50 == 0: + self.db.conn.commit() + self.db.conn.commit() + return + + def delete_missing(self): + + to_delete = [] + for row in tqdm(self.db.cursor().execute("SELECT file FROM list").fetchall(), delay=1, desc="Clean files"): + if not os.path.exists(row[0]): + to_delete.append(row[0]) + + cursor = self.db.cursor() + for file in tqdm(to_delete, desc="Cleaning", delay=1): + cursor.execute("DELETE FROM list where file == ?", (file,)) + self.db.conn.commit() + if len(to_delete) > 0: + print(f"Cleaned {len(to_delete)} files") + return + + def clean_data(self): + + to_delete = [] + for row in self.db.cursor().execute( + """SELECT data.hash FROM data LEFT JOIN list ON list.hash = data.hash WHERE list.hash IS NULL""" + ): + to_delete.append(row[0]) + + if len(to_delete) > 0: + cursor = self.db.cursor() + for row in to_delete: + cursor.execute("DELETE FROM data where hash = ?", (row,)) + + self.db.conn.commit() + print(f"Cleaned {len(to_delete)} metadata") + + to_delete = [] + for row in self.db.cursor().execute( + """SELECT tags.hash FROM tags LEFT JOIN list ON list.hash = tags.hash WHERE list.hash IS NULL""" + ): + to_delete.append(row[0]) + + if len(to_delete) > 0: + cursor = self.db.cursor() + for row in to_delete: + cursor.execute("DELETE FROM tags where hash = ?", (row,)) + self.db.conn.commit() + print(f"Cleaned {len(to_delete)} tags") + + def measure(self): + + duplicates = set() + missing_measurements = ( + self.db.cursor() + .execute( + """ + SELECT + list.file, + data.hash, + data.fingerprint, + data.sharpness, + data.R, + data.G, + data.B, + data.BR, + data.BG, + data.BB + FROM data + LEFT JOIN list ON data.hash = list.hash + WHERE data.fingerprint IS NULL + OR data.sharpness IS NULL + OR data.R IS NULL + """ + ) + .fetchall() + ) + if len(missing_measurements) == 0: + return + cursor = self.db.cursor() + for i, row in enumerate(tqdm(missing_measurements, desc="Measure", delay=1, smoothing=0.01)): + filename = row[0] + if filename == None: + continue + if row[1] in duplicates: + continue + duplicates.add(row[1]) + image = ImageMeasure(filename) + image.hash = row[1] + image.fingerprint = row[2] + image.sharpness = row[3] + image.colors["R"] = row[4] + image.colors["G"] = row[5] + image.colors["B"] = row[6] + image.colors["BR"] = row[7] + image.colors["BG"] = row[8] + image.colors["BB"] = row[9] + # Calculate if required + image.fingerprint = image.get_fingerprint() + image.sharpness = image.get_sharpness() + image.colors.update(image.get_colors()) + + cursor.execute( + """UPDATE data SET + fingerprint = ?, + sharpness = ?, + R = ?, + G = ?, + B = ?, + BR = ?, + BG = ?, + BB = ? + WHERE hash = ? + """, + ( + image.fingerprint, + image.sharpness, + image.colors["R"], + image.colors["G"], + image.colors["B"], + image.colors["BR"], + image.colors["BG"], + image.colors["BB"], + image.hash, + ), + ) + + if i % 50 == 0: + self.db.conn.commit() + self.db.conn.commit() + return + + def disk_used(self): + + if self.options.diskused_depth is None: + self.options.diskused_depth = 9999999999 + + searchpath = os.path.realpath(self.options.path) + # self.options.diskused_depth += 1 + result = self.db.cursor().execute( + "SELECT size, REPLACE(file,?,'') as path FROM list WHERE file LIKE ?", + ( + searchpath + "/", + searchpath + "%", + ), + ) + entries = [] + sizes = [] + for row in result: + start_path = row[1].split("/") + start_path = "/".join(start_path[0 : int(self.options.diskused_depth)]) + if len(start_path) != len(row[1]): + start_path += "/" + if start_path not in entries: + entries.append(start_path) + sizes.append(row[0]) + else: + sizes[entries.index(start_path)] += row[0] + for entry in zip(sizes, entries): + print("| ".join([str(entry[0]).ljust(14), humanize_size(entry[0]).rjust(8), entry[1]])) + + def duplicates(self): + result = self.db.cursor().execute( + """ + WITH + duplicates AS (SELECT hash FROM list GROUP BY hash HAVING count(hash) > 1), + f AS (SELECT + list.hash,list.file FROM list + LEFT JOIN duplicates ON (list.hash = duplicates.hash) + WHERE duplicates.hash IS NOT NULL + ORDER BY file + ) + SELECT + CAST((row_number() OVER (PARTITION BY f.hash))-1 AS TEXT) AS row, + RELATIVE(file) + FROM f + """, + ) + for row in result: + c = "=" if row[0] == "0" else ">" + print(c + "|".join(row)) + + def nearestcolor(self): + """Find closest matching images to given RGB color""" + src = self.options.nearestcolor + + try: + src = [int(i) for i in src.strip().strip('"').split(",")] + if len(src) == 3: + src.append(1) + f = "" + except ValueError: + src = src.strip().split(",") + if len(src) == 1: + limit = 1 + else: + limit = int(src[1]) + image = ImageMeasure(src[0]) + colors = image.get_colors() + f = os.path.realpath(src[0]) + src = (colors["R"], colors["G"], colors["B"], limit) + + self.db.conn.create_function("SQUARE", 1, sqlite_square) + self.db.conn.create_function("SQRT", 1, sqlite_sqrt) + result = self.db.cursor().execute( + """ + WITH distances AS ( + SELECT hash, ROUND(SQRT(SQUARE(BR-?)+SQUARE(BG-?)+SQUARE(BB-?)),1) as distance,BR,BG,BB FROM data ORDER BY distance LIMIT ? + ) + SELECT + RELATIVE(list.file), + distances.distance, + distances.BR, + distances.BG, + distances.BB + FROM list + LEFT JOIN + distances ON (distances.hash = list.hash) + WHERE distances.hash IS NOT NULL AND list.file != ? + ORDER BY distances.distance + """, + (src[0], src[1], src[2], src[3], f), + ) + + print("|".join(("Path", "Dist", "BR", "BG", "BB"))) + for hit in result: + p, d, r, g, b = hit + print( + "|".join( + ( + p, + str(d), + str(int(r)), + str(int(g)), + str(int(b)), + ) + ) + ) + + def similarity(self): + + def set_image(row): + image = ImageMeasure(None) + image.hash = row[0] + image.fingerprint = row[1] + image.sharpness = row[2] + image.width = row[3] + image.height = row[4] + image.colors["R"] = row[5] + image.colors["G"] = row[6] + image.colors["B"] = row[7] + return image + + def get_matching(cmp_image): + + compare_list = self.db.cursor().execute( + """SELECT hash,fingerprint,sharpness,width,height,R,G,B + FROM data + WHERE fingerprint IS NOT NULL AND sharpness > 0 AND hash != ?""", + (cmp_image.hash,), + ) + match_list = [] + for row2 in compare_list: + other_image = set_image(row2) + similarity = cmp_image.similarity_difference(other_image) + if similarity <= thr: + other_image.similarity["distance"] = similarity + other_image.similarity["color"] = cmp_image.color_difference(other_image) + other_image.similarity["aspect"] = cmp_image.shape_difference(other_image) + other_image.filename = self.db.hash2file(other_image.hash) + match_list.append(other_image) + return match_list + + def get_duplicates(): + return self.db.cursor().execute( + """ + WITH + duplicates AS (SELECT fingerprint FROM data GROUP BY fingerprint HAVING count(fingerprint) > 1 AND sharpness > 0), + duphash AS ( + SELECT duplicates.fingerprint, data.hash, data.sharpness, data.width, data.height, data.R, data.G, data.B + FROM duplicates + LEFT JOIN data ON (duplicates.fingerprint = data.fingerprint) + ), + f AS (SELECT + duphash.fingerprint, duphash.hash,list.file, + duphash.sharpness, + duphash.width, duphash.height, + duphash.R, duphash.G, duphash.B + FROM duphash + LEFT JOIN list ON (list.hash = duphash.hash) + WHERE list.file IS NOT NULL + ORDER BY list.file + ) + SELECT + CAST((row_number() OVER (PARTITION BY f.fingerprint))-1 AS TEXT) AS row, + file, + hash, + fingerprint, + sharpness,width,height,R,G,B + FROM f + """, + ) + + def print_matching(match_list, cmp_image): + if len(match_list) > 0: + match_list.sort(key=lambda i: i.similarity["distance"]) + print_similarity_row(cmp_image, "=", 0) + for i, img in enumerate(match_list): + print_similarity_row(img, ">", i + 1) + + def print_similarity_row(img, c, index): + fnames = ", ".join([self.db.file2relative(f) for f in img.filename]) + print( + f"{c}{index}|{fnames}|{img.similarity['distance']}|{img.similarity['color']}|{img.similarity['aspect']}|{img.sharpness}|{img.width}|{img.height}" + ) + + print("|".join(("#", "File", "SD", "CD", "RD", "Shp", "W", "H"))) + if self.options.similarity: + try: + thr = int(self.options.similarity) + file = None + except ValueError: + file = self.options.similarity.split(",") + if len(file) == 1: + thr = 20 + else: + thr = int(file[1]) + file = file[0] + + if file is None: + # Measure similarity on all files + fingerprint_list = self.db.cursor().execute( + """ + SELECT hash,fingerprint,sharpness,width,height,R,G,B + FROM data + WHERE fingerprint IS NOT NULL + AND sharpness > 0""" + ) + checked = set() + for i, row in enumerate(fingerprint_list): + if row[0] in checked: + continue + cmp_image = set_image(row) + cmp_image.filename = self.db.hash2file(cmp_image.hash) + match_list = get_matching(cmp_image) + for m in match_list: + checked.add(m.hash) + print_matching(match_list, cmp_image) + else: + # Read single image, and find similarty to that + cmp_image = ImageMeasure(file) + cmp_image.set_all() + cmp_image.filename = [cmp_image.filename] + match_list = get_matching(cmp_image) + print_matching(match_list, cmp_image) + + if self.options.visual_duplicate: + match_list = [] + for row in get_duplicates(): + if row[0] == "0": + if len(match_list) > 0: + print_matching(match_list, cmp_image) + cmp_image = set_image(row[2:]) + cmp_image.filename = [row[1]] + match_list = [] + else: + other_image = set_image(row[2:]) + other_image.filename = [row[1]] + other_image.similarity["color"] = cmp_image.color_difference(other_image) + other_image.similarity["aspect"] = cmp_image.shape_difference(other_image) + match_list.append(other_image) + print_matching(match_list, cmp_image) + + def tag_manage(self): + + fname = os.path.realpath(self.options.file) + hash = self.db.file2hash(fname) + if hash is None: + raise Exception(f"{fname} not in database") + + for add in self.options.add_tag: + self.db.cursor().execute( + "INSERT INTO tags(hash,tag) VALUES (?,?)", + (hash, add), + ) + for rm in self.options.delete_tag: + self.db.cursor().execute( + "DELETE FROM tags WHERE hash = ? AND tag = ?", + (hash, rm), + ) + if len(self.options.add_tag) + len(self.options.delete_tag) > 0: + self.db.conn.commit() + + tags = self.db.cursor().execute( + """ + SELECT + tags.tag + FROM tags + WHERE tags.hash = ? + """, + (hash,), + ) + print(",".join([x[0] for x in tags])) + + +def clean_dirs(dirs): + """Remove in place, because os.walk uses the same variable""" + remove = [] + for i, s in enumerate(dirs): + if (s in BADDIRS) or s.startswith("."): + remove.append(i) + for r in sorted(remove, reverse=True): + del dirs[r] + + +def clean_syms(files): + return [f for f in files if not os.path.islink(f)] + + +def humanize_size(size, precision=1): + if size == None: + return "nan" + suffixes = ["B", "KB", "MB", "GB", "TB", "PB", "EB"] + suffixIndex = 0 + defPrecision = 0 + while size > 1024: + suffixIndex += 1 + size = size / 1024.0 + defPrecision = precision + return "%.*f%s" % (defPrecision, size, suffixes[suffixIndex]) + + +def humanize_date(date): + if date == None: + return "" + return datetime.fromtimestamp(int(date)).strftime("%Y-%m-%d %H:%M:%S") + + +def setup_options(): + parser = ArgumentParser(description="Maintains a list of images sqlite file") + parser.add_argument( + "-f", + action="store", + dest="sqlfile", + default=SQLFILE, + help="SQL file name to use [%(default)s]", + ) + subparsers = parser.add_subparsers(title="Command", dest="command") + + help = subparsers.add_parser("help", help="Help on all commands") + db = subparsers.add_parser("db", help="Update database") + search = subparsers.add_parser("search", help="Search similarity") + du = subparsers.add_parser("du", help="Disk usage") + tag = subparsers.add_parser("tag", help="Tag manager") + + db.add_argument( + "--no-add", + "-a", + action="store_true", + dest="no_add", + default=False, + help="Do not add new files [%(default)s]", + ) + db.add_argument( + "--measure", + "-m", + action="store_true", + dest="measure", + default=False, + help="Measure various statistics for similarity/color searches. [%(default)s]", + ) + db.add_argument( + "--changed", + "-c", + action="store_true", + dest="changed", + default=False, + help="Search for changed files and update their entries [%(default)s]", + ) + db.add_argument( + "--no-delete", + "-d", + action="store_true", + dest="no_delete", + default=False, + help="Do not delete non-existing entries [%(default)s]", + ) + db.add_argument( + "--no-delete-data", + "-D", + action="store_true", + dest="no_delete_data", + default=False, + help="Do not delete unused metadata [%(default)s]", + ) + db.add_argument( + "-x", + action="append", + dest="exclude", + default=[], + help="Exclude folder name. This option may be issued several times.", + ) + db.add_argument( + "-l", + action="store_true", + dest="symlinks", + default=False, + help="Follow symbolic links [%(default)s]", + ) + db.add_argument("startpath", action="store", default=".", nargs="?", help="Path to start scanning for images.") + + du.add_argument( + "-d", + type=int, + action="store", + dest="diskused_depth", + default=None, + help="Depth of summarization for du.", + ) + + du.add_argument( + type=str, + action="store", + dest="path", + default=".", + help="Print directory sizes. Argument is the path where directories are listed from.", + nargs="?", + ) + + search.add_argument( + "--dup", + action="store_true", + dest="duplicate", + default=False, + help="Return a list of duplicate files, based on file hashes. [%(default)s]", + ) + + search.add_argument( + "--visdup", + action="store_true", + dest="visual_duplicate", + default=False, + help="Return a list of visually exact duplicate files, based on perceptual hashes. [%(default)s]", + ) + + search.add_argument( + "--color", + type=str, + dest="nearestcolor", + default=False, + help="Search list for nearest ambient color. format: R,G,B in uint8. Add fourth value to limit search to number of hits. Also accepts format file,hits to find nearest color to given file.", + ) + + search.add_argument( + "--similar", + type=str, + dest="similarity", + default=None, + help="Search list for similar images. Value 0-255 for similarity threshold. 0=high similarity. " + + "If value is a filename, search similar to that image. " + + "Append with ',value' to limit similarity. default to 20." + + "The output columns: SD SimilarityDiff., CD ColorDiff., " + + "RD AspectRatioDiff.,Shp SharpnessIndex. This function does not return exact duplicates.", + ) + + tag.add_argument( + "-t", + action="append", + dest="add_tag", + default=[], + help="Give file a tag.", + ) + tag.add_argument( + "-d", + action="append", + dest="delete_tag", + default=[], + help="Delete a tag.", + ) + tag.add_argument( + type=str, + dest="file", + default=None, + help="File name for tagging.", + ) + + options = parser.parse_args() + + if options.command == "help": + parser.print_help() + print("\n====\nCommand: db") + db.print_help() + print("\n====\nCommand: search") + search.print_help() + print("\n====\nCommand: du") + du.print_help() + print("\n====\nCommand: tag") + tag.print_help() + sys.exit(0) + + if options.command == None: + parser.print_help() + sys.exit(0) + # options = db.parse_args() + # options.command = "db" + # options.sqlfile = SQLFILE + + if options.command == "db": + BADDIRS.extend(options.exclude) + + return options + + +def main(): + + options = setup_options() + il = ImageList(options) + + if options.command == "db": + if not options.no_delete: + il.delete_missing() + if not options.no_add: + il.recursive_add() + il.base_add() + if not options.no_delete_data: + il.clean_data() + if options.measure: + il.base_add() + il.measure() + if options.command == "du": + il.disk_used() + if options.command == "search": + if options.duplicate: + il.duplicates() + if options.visual_duplicate: + il.similarity() + if options.nearestcolor: + il.nearestcolor() + if options.similarity: + il.similarity() + if options.command == "tag": + il.tag_manage() + print("") + + +if __name__ == "__main__": + main() diff --git a/py-packages/imagelist2/imagelist2/db.py b/py-packages/imagelist2/imagelist2/db.py new file mode 100644 index 0000000..e6f421b --- /dev/null +++ b/py-packages/imagelist2/imagelist2/db.py @@ -0,0 +1,129 @@ +import os +import sqlite3 +from math import sqrt as sqlite_sqrt + + +class DB: + def __init__(self, sqlfile): + self.sqlfile = sqlfile + self.root_path = os.path.dirname(os.path.realpath(sqlfile)) + self.create_db() + self.connect() + + def create_db(self): + + if os.path.exists(self.sqlfile): + return + + conn = sqlite3.connect(self.sqlfile) + db = conn.cursor() + conn.text_factory = str + db.execute( + """CREATE TABLE data ( + hash TEXT PRIMARY KEY, + description TEXT, + portrait BOOLEAN, + width INTEGER, + height INTEGER, + fingerprint TEXT, + sharpness NUMERIC, + R REAL, G REAL, B REAL, BR REAL, BG REAL, BB REAL + )""" + ) + db.execute("CREATE TABLE list (file TEXT PRIMARY KEY,hash TEXT,date INTEGER,size INTEGER)") + db.execute("CREATE TABLE tags (hash TEXT,tag TEXT)") + db.execute( + """CREATE VIEW files AS + SELECT list.file, list.date, list.size, data.* + FROM list + LEFT JOIN data ON data.hash = list.hash""" + ) + db.execute("CREATE UNIQUE INDEX data_hash ON data(hash)") + db.execute("CREATE UNIQUE INDEX list_file ON list(file)") + + conn.commit() + return + + def connect(self): + conn = sqlite3.connect(self.sqlfile) + conn.text_factory = str + conn.create_function("RELATIVE", 1, self.file2relative) + self.conn = conn + + return conn + + def cursor(self): + return self.conn.cursor() + + def get_folder_contents(self, path): + """return the contents of the folder""" + files = [] + res = self.cursor().execute("SELECT file FROM list where file LIKE ?", (f"{path}%",)) + for row in res: + base = row[0].replace(path, "", 1) + if not "/" in base: + files.append(row[0]) + return files + + def is_time_mismatch(self, image): + count = ( + self.cursor() + .execute( + "SELECT COUNT(1) FROM list WHERE file = ? AND date = ?", + ( + image.filename, + image.get_time(), + ), + ) + .fetchall()[0][0] + ) + return count == 0 + + def is_hash_mismatch(self, image): + count = ( + self.cursor() + .execute( + "SELECT COUNT(1) FROM list WHERE file = ? AND hash = ?", + ( + image.filename, + image.get_hash(), + ), + ) + .fetchall()[0][0] + ) + return count == 0 + + def hash2file(self, hash): + + return [ + row[0] + for row in self.cursor() + .execute( + "SELECT file FROM LIST WHERE hash = ?", + (hash,), + ) + .fetchall() + ] + + def file2hash(self, file): + + try: + return [ + row[0] + for row in self.cursor() + .execute( + "SELECT hash FROM LIST WHERE file = ?", + (file,), + ) + .fetchall() + ][0] + except Exception: + return None + + def file2relative(self, file): + + return os.path.relpath(file, self.root_path) + + +def sqlite_square(x): + return x * x diff --git a/py-packages/imagelist2/imagelist2/image.py b/py-packages/imagelist2/imagelist2/image.py new file mode 100644 index 0000000..36c5fc7 --- /dev/null +++ b/py-packages/imagelist2/imagelist2/image.py @@ -0,0 +1,229 @@ +import hashlib +import os +import sys + +import imagehash +import numpy as np +from PIL import Image, ImageFilter + +try: + from turbojpeg import TJPF_RGB, TurboJPEG + + JPEG = TurboJPEG() +except Exception: + JPEG = None + pass + +LaplaceX = ImageFilter.Kernel(size=(3, 3), kernel=(0, 0, 0, 1, -2, 1, 0, 0, 0), scale=1, offset=0) +LaplaceY = ImageFilter.Kernel(size=(3, 3), kernel=(0, 1, 0, 0, -2, 0, 0, 1, 0), scale=1, offset=0) +Border = np.zeros((10, 10), dtype=bool) +Border[0, :] = True +# Border[9, :] = True +Border[:, 0] = True +Border[:, 9] = True + + +class ImageMeasure: + def __init__(self, filename): + self.filename = filename + self.hash = None + self.time = None + self.size = None + self.description = None + self.width = None + self.height = None + self.portrait = None + self.fingerprint = None + self.sharpness = None + self.colors = {x: None for x in ("R", "G", "B", "BR", "BG", "BB")} + self.similarity = {"distance": 0, "color": 0, "aspect": 0} + self.tags = [] + self.image = None + + def __str__(self): + + printable = [] + for k, v in self.__dict__.items(): + if k == "image": + if not self.image is None: + v = "Loaded..." + printable.append(f"{k}: {v}") + return "\n".join(printable) + + def set_all(self): + self.set_filename_absolute() + self.get_hash() + self.get_time() + self.get_size() + self.get_shape() + self.get_fingerprint() + self.get_sharpness() + self.get_colors() + + def set_filename_absolute(self): + self.filename = os.path.realpath(self.filename) + + def get_hash(self): + """Return hash of the file""" + if self.hash is None: + hasher = hashlib.sha1() + blk = 2**16 + with open(self.filename, "rb") as f: + while True: + d = f.read(blk) + if not d: + break + hasher.update(d) + + self.hash = hasher.hexdigest() + return self.hash + + def get_time(self): + """Return mtime of the file""" + if self.time is None: + self.time = int(os.path.getmtime(self.filename)) + return self.time + + def get_size(self): + if self.size is None: + self.size = os.path.getsize(self.filename) + return self.size + + def get_width(self): + if self.width is None: + self.width, _, _ = self.get_shape() + return self.width + + def get_height(self): + if self.height is None: + _, self.height, _ = self.get_shape() + return self.height + + def get_portrait(self): + if self.portrait is None: + _, _, self.portrait = self.get_shape() + return self.portrait + + def get_shape(self): + if self.width is None or self.height is None or self.portrait is None: + # self.height, self.width = self.get_image("numpy").shape[0:2] + self.width, self.height = read_image_size(self.filename) + self.portrait = self.height >= self.width + return self.width, self.height, self.portrait + + def get_image(self, image_type="numpy"): + + if self.image is None: + self.image, self.image_type = read_image(self.filename) + if self.image_type == "numpy": + if len(self.image.shape) > 2: + # BGR -> RGB + self.image = np.flip(self.image, axis=2) + + if self.image_type == image_type: + return self.image + if image_type == "numpy": + return np.array(self.image) + if image_type == "PIL": + return Image.fromarray(self.image) + + def get_fingerprint(self): + + if self.fingerprint is None: + # self.fingerprint = str(imagehash.phash(self.get_image("PIL"), hash_size=8)) + self.fingerprint = str(imagehash.dhash(self.get_image("PIL"), hash_size=8)) + + return self.fingerprint + + def get_sharpness(self): + + if self.sharpness is None: + try: + im = self.get_image("PIL").convert("L") + crop_box = (1, 1, im.width - 1, im.height - 1) + self.sharpness = round( + ( + np.sum(np.abs(np.array(im.filter(LaplaceX).crop(crop_box)).astype(float))) + + np.sum(np.abs(np.array(im.filter(LaplaceY).crop(crop_box)).astype(float))) + ) + / (2 * im.width * im.height), + 4, + ) + except Exception: + self.sharpness = 0 + + return self.sharpness + + def get_colors(self): + + def get_border(im): + return int(np.mean(im[Border])) + + if self.colors["R"] is None: + im = self.get_image("PIL").convert("RGB") + th = im.copy() + th.thumbnail((1, 1), resample=Image.BILINEAR) + th = np.array(th) + im = np.array(im.resize((10, 10), resample=Image.BILINEAR)) + self.colors["R"] = int(th[0][0][0]) + self.colors["G"] = int(th[0][0][1]) + self.colors["B"] = int(th[0][0][2]) + self.colors["BR"] = get_border(im[:, :, 0]) + self.colors["BG"] = get_border(im[:, :, 1]) + self.colors["BB"] = get_border(im[:, :, 2]) + return self.colors + + def similarity_difference(self, other): + + other_phash = imagehash.hex_to_hash(other.get_fingerprint()) + this_phash = imagehash.hex_to_hash(self.get_fingerprint()) + return other_phash - this_phash + + def color_difference(self, other): + + other_color = other.get_colors() + this_color = self.get_colors() + diff = round( + np.sqrt( + np.square(other_color["R"] - this_color["R"]) + + np.square(other_color["G"] - this_color["G"]) + + np.square(other_color["B"] - this_color["B"]) + ), + 1, + ) + return diff + + def shape_difference(self, other): + + return round(abs(float(other.width) / float(other.height) - float(self.width) / float(self.height)), 4) + + +EXTENSIONS = (".jpg", ".png", ".tif", ".gif", ".jpeg", ".tiff") +JPEG_EXTENSIONS = (".jpg", ".jpeg") + + +def is_image_extension(f): + return os.path.splitext(f.lower())[1] in EXTENSIONS + + +def is_jpeg(f): + return os.path.splitext(f.lower())[1] in JPEG_EXTENSIONS + + +def read_image(fname): + if is_jpeg(fname): + if JPEG: + try: + with open(fname, "rb") as fp: + return Image.fromarray(JPEG.decode(fp.read(), pixel_format=TJPF_RGB)), "PIL" + except Exception as e: + pass + # Do not return inside with: + im = Image.open(fname) + return im, "PIL" + + +def read_image_size(fname): + """Just reading the size is faster with PIL""" + im = Image.open(fname) + return im.width, im.height diff --git a/py-packages/imagelist2/setup.py b/py-packages/imagelist2/setup.py new file mode 100644 index 0000000..0ca44fa --- /dev/null +++ b/py-packages/imagelist2/setup.py @@ -0,0 +1,26 @@ +import os +from distutils.core import setup + + +def version_reader(path): + for line in open(path, "rt").read(1024).split("\n"): + if line.startswith("__version__"): + return line.split("=")[1].strip().replace('"', "") + + +version = version_reader(os.path.join("imagelist2", "__init__.py")) +setup( + name="imagelist2", + packages=["imagelist2"], + version=version, + description="Maintains a list of images sqlite file", + author="Ville R", + author_email="q@six9.net", + keywords=["images"], + entry_points={ + "console_scripts": [ + "image-list = imagelist2:main", + ] + }, + install_requires=["PyTurboJPEG", "Pillow", "ImageHash", "numpy", "tqdm"], +)