q-tools/image_list.py

#!/usr/bin/python
from __future__ import print_function
import sys
import os
import re
import sqlite3
import subprocess
import hashlib
import traceback
import csv
import datetime
from argparse import ArgumentParser

SQLFILE='list_of_images.sqlite'
DESCFILE='descriptions.csv'
IMGMATCH=re.compile('.*\.jpg$|.*\.jpeg$|.*\.png$|.*\.gif$|.*\.tif$',re.I)
BADDIRS=['_tn','_med']
MINSIZE=0

def setup_options():
    parser=ArgumentParser(description="Maintains the list of images sqlite file")
    parser.add_argument("-a",action="store_false",dest="add",default=True,
                      help="Do not add new files [%(default)s]")
    parser.add_argument("-c",action="store_true",dest="changed",default=False,
                     help="Modify changed files [%(default)s]")
    parser.add_argument("-d",action="store_true",dest="delete",default=False,
                     help="Delete non-existing entries [%(default)s]")
    parser.add_argument("-D",action="store_true",dest="delete_data",default=False,
                     help="Delete unused metadata [%(default)s]")
    parser.add_argument("--du",type=str,action='store',dest="diskused",default=False,
                      help="Print directory sizes. Argument is the path where directories are listed from.")
    parser.add_argument("--du-depth",type=str,action='store',dest="diskused_depth",default=1,
                      help="Depth of summarization for --du.")
    parser.add_argument("--exportDesc",action="store",dest="export_descriptions",default=None,
                      help="Walk through folders, and write "+DESCFILE+" in each folder. Format descriptions with {desc} {width}x{height} {red} {green} {blue} {Bred} {Bgreen} {Bblue} {size} {date} {name} {tags}")
    parser.add_argument("--export",action="store",dest="exportfile",default=None,
                      help="Export database to new sqlite database.")
    parser.add_argument("-f",action="store",dest="sqlfile",default=SQLFILE,
                      help="SQL file name to use [%(default)s]")
    parser.add_argument("-i",action="store",dest="importfile",default=None,
                      help="Import metadata from another sqlite database.")
    parser.add_argument("--importDesc",action="store_true",dest="import_descriptions",default=False,
                      help="Import description field from "+DESCFILE+" file in each folder")
    parser.add_argument("-l",action="store_true",dest="symlinks",default=False,
                     help="Follow symbolic links [%(default)s]")
    parser.add_argument("-m",type=int,dest="minsize",default=MINSIZE,
                     help="Minimum pixel width/height of stored image for --small search [%(default)s]")
    parser.add_argument("-r",action="store_true",dest="random",default=False,
                     help="Create randomized files for landscape and portrait images [%(default)s]")
    parser.add_argument("-R",action="store_true",dest="relative",default=False,
                     help="Try to find relative path of files, if --measuring from a different machine [%(default)s]")
    parser.add_argument("-s",type=str,dest="search",default=False,
                      help="Search list based on path pattern")
    parser.add_argument("--measure",action="store_true",dest="measure",default=False,
                     help="Measure various statistics for similarity/color searches. This option will flip the 'Add new files' option. [%(default)s]")
    parser.add_argument("--nearest",type=str,dest="nearestcolor",default=False,
                     help="Search list for nearest ambient color. format: R,G,B in float 0-1. Add fourth value to limit search to number of hits. Also accepts format  file,hits to find nearest color to given file.")
    parser.add_argument("--dup",action="store_true",dest="duplicate",default=False,
                     help="Return a list of duplicate files, based on hashes. This option will flip the 'Add new files' option. [%(default)s]")
    parser.add_argument("--del",action="store_true",dest="deleteFiles",default=False,
                     help="Delete files listed with --small. [%(default)s]")
    parser.add_argument("--small",action="store_true",dest="searchsmall",default=False,
                     help="Return a list of small files, smaller than -m INT. This option will flip the 'Add new files' option. [%(default)s]")
    parser.add_argument("--similar",type=str,dest="similarity",default=None,
                     help="Search list for similar images. Value 0-255 for similarity threshold. 0=high similarity. "+
                     "If value is a filename, search similar to that image. "+
                     "Append with ',value' to limit similarity. "+
                     "The output columns:  SD SimilarityDiff., CD ColorDiff., "+
                     "RD AspectRatioDiff.,Shp SharpnessIndex. This function does not return exact duplicates.")
    parser.add_argument("-t",type=str,dest="tag",default=None,
                     help="Give file a tag. If argument is a file name, print the tags of the file.")
    parser.add_argument("--viewer",type=str,dest="viewer",default=None,
                     help="Program to view images, %%f refers to filename(s)."+
                          "If '1', defaults to: 'geeqie -l %%f'")
    parser.add_argument("-x",action="append",dest="exclude",default=[],
                     help="Exclude folder name from the lists. This option may be issued several times.")
    parser.add_argument('startpath', action="store",default='.', nargs='?')

    options=parser.parse_args()
    BADDIRS.extend(options.exclude)
    if options.duplicate or \
       options.searchsmall or \
       options.measure or \
       options.nearestcolor or \
       options.similarity!=None or \
       options.search or \
       options.diskused:
        options.add=not options.add
    if options.tag or\
       options.importfile or\
       options.exportfile:
        options.add=False
    return options

def createdb(sqlfile):
    conn=sqlite3.connect(sqlfile)
    db=conn.cursor()
    conn.text_factory=str
    db.execute('CREATE TABLE data (hash TEXT PRIMARY KEY,\
               description TEXT,portrait NUMERIC, \
               width INTEGER,height INTEGER,\
               fingerprint TEXT,sharpness NUMERIC,\
               R REAL,G REAL, B REAL, BR REAL, BG REAL, BB REAL)')
    db.execute('CREATE TABLE list (file TEXT PRIMARY KEY,hash TEXT,date INTEGER,size INTEGER)')
    db.execute('CREATE TABLE tags (hash TEXT PRIMARY KEY,tag TEXT)')
    conn.commit()
    return

def delete_nonexisting(sqlfile):
    conn=sqlite3.connect(sqlfile)
    conn.text_factory=str
    #conn.row_factory=sqlite3.Row
    db=conn.cursor()
    dbdel=conn.cursor()
    db.execute('SELECT file FROM list')
    i=0
    for row in db:
        if not os.path.exists(row[0]):
            print('removing.. '+row[0])
            dbdel.execute("DELETE FROM list where file == ?",(row[0],))
            i+=1
    print('Removed {0} entries'.format(i))
    conn.commit()
    return

def delete_data(sqlfile):
    conn=sqlite3.connect(sqlfile)
    conn.text_factory=str
    db=conn.cursor()
    dbdel=conn.cursor()
    db.execute('''SELECT hash FROM data EXCEPT SELECT hash FROM list''')
    i=0
    for i,row in enumerate(db):
        dbdel.execute("DELETE FROM data where hash == ?",(row[0],))
    conn.commit()
    print('Removed {0} metadata'.format(i))

    tagsbefore=db.execute("SELECT COUNT(hash) FROM tags").fetchall()[0][0]
    db.execute('''SELECT hash FROM tags EXCEPT SELECT hash FROM list''')
    for row in db:
        dbdel.execute("DELETE FROM tags where hash == ?",(row[0],))
    db.execute('''DELETE FROM tags WHERE rowid NOT IN
                    ( SELECT MIN(rowid) FROM tags GROUP BY hash,tag )''')
    conn.commit()
    tagsafter=db.execute("SELECT COUNT(hash) FROM tags").fetchall()[0][0]
    print('Removed {0} tags'.format(tagsbefore-tagsafter))

    return

def delete_files(files):
    ''' Actually deletes files! '''
    print_structure(files)

    doit=confirm(prompt="Sure to delete these files?")
    if doit:
        print("now delling")
        for hash in files:
            for f in hash[1]:
                print(f[0])
                os.remove(f[0])
    return

def add_recurse(options):
    conn=sqlite3.connect(options.sqlfile)
    conn.text_factory=str
    db=conn.cursor()
    for path,dirs,files in os.walk(options.startpath,followlinks=options.symlinks):
        print('Checking '+path)
        dirs=clean_dirs(dirs)
        if not options.symlinks:
            files=clean_syms(files)
        files.sort()
        dirs.sort()
        db_files=get_folder_contents(db,os.path.realpath(path)+'/')
        for file in files:
            if IMGMATCH.match(file):
                filename=os.path.realpath(os.path.join(path,file))

                if file not in db_files:
                    if options.add:
                        try:
                            add_single(conn,filename,change=False)
                        except:
                            print('error adding file: '+filename)
                            traceback.print_exc(file=sys.stdout)
                            sys.exit(1)
                else:
                    if options.changed:
                        ftime=int(os.path.getmtime(filename))
                        #hash=get_md5(filename)
                        #if not hash_match(db,filename,hash):
                        if not ftime_match(db,filename,ftime):
                            #file content changed
                            try:
                                add_single(conn,filename,change=True)
                            except:
                                print('error changing file: '+filename)
                                traceback.print_exc(file=sys.stdout)
                                sys.exit(1)
                        # if file mentioned, and hash same, no need to change entry
        conn.commit()
    append_data(options)
    return

def add_single(conn,filename,change=False,hash=None,minsize=0):
    db=conn.cursor()
    if hash==None:
        hash=get_md5(filename)
    ftime=int(os.path.getmtime(filename))
    fsize=os.path.getsize(filename)
    if change:
        db.execute("UPDATE list SET hash=?, date=? ,size=? \
           WHERE file=?",(hash,ftime,fsize,filename))
        print("changing: %(f)s (%(x)s)" % {'f':filename, 'x':humanize_size(fsize)})
    else:
        db.execute("INSERT INTO list(file,hash,size,date)\
               VALUES(?,?,?,?)",(filename,hash,fsize,ftime))
        print("adding: %(f)s (%(x)s)" % {'f':filename, 'x':humanize_size(fsize)})
    return

def append_data(options):
    conn=sqlite3.connect(options.sqlfile)
    conn.text_factory=str
    print("Append basic metadata...")
    db=conn.cursor()
    dbh=conn.cursor()
    db.execute('''SELECT hash FROM list EXCEPT SELECT hash FROM data''')
    i=0
    for i,row in enumerate(db):
        continue
    count=i
    db.execute('''SELECT hash FROM list EXCEPT SELECT hash FROM data''')
    dirname_old=""
    for i,row in enumerate(db):
        filename=hash2file(conn.cursor(),row[0])
        if filename==None:
            continue
        filename=check_path(filename,options)
        dims=get_dims(filename)
        if int(dims[0])<int(dims[1]):
            portrait=1
        else:
            portrait=0
        dbh.execute("INSERT OR REPLACE INTO data(hash,portrait,width,height) \
               VALUES(?,?,?,?)",(row[0],portrait,dims[0],dims[1]))
        if (i%50==0):
            print("%(nr)i" % {'nr':count-i})
            conn.commit()
    conn.commit()
    return

def add_tag(options):
    conn=sqlite3.connect(options.sqlfile)
    conn.text_factory=str
    hash=file2hash(conn.cursor(), os.path.realpath(options.startpath))
    if hash==None:
        print("Image not found "+os.path.realpath(options.startpath))
        return
    db=conn.cursor()
    db.execute("INSERT INTO tags(hash,tag) \
               VALUES(?,?)",(hash,options.tag))
    conn.commit()
    print(options.startpath+":\""+options.tag+"\"")

def print_tag(options):
    conn=sqlite3.connect(options.sqlfile)
    conn.text_factory=str
    hash=file2hash(conn.cursor(), os.path.realpath(options.tag))
    if hash==None:
        print("Image not found "+os.path.realpath(options.tag))
        sys.exit(1)
    db=conn.cursor()
    db.execute("SELECT DISTINCT tag FROM tags WHERE hash = ?",(hash,))
    print( ",".join( row[0] for row in db ))

def random_lists(sqlfile):
    conn=sqlite3.connect(sqlfile)
    conn.text_factory=str
    db=conn.cursor()
    db.execute('SELECT list.file FROM list LEFT JOIN data ON list.hash = data.hash WHERE data.portrait=0')
    lfile=open('landscape.list.s','w')
    for row in db:
        lfile.write(row[0]+'\n')
    db.execute('SELECT list.file FROM list LEFT JOIN data ON list.hash = data.hash WHERE data.portrait=1')
    pfile=open('portrait.list.s','w')
    for row in db:
        pfile.write(row[0]+'\n')
    lfile.close()
    pfile.close()
    sortargs=['sort','-R','-o','landscape.list','landscape.list.s']
    p=subprocess.call(sortargs)
    sortargs=['sort','-R','-o','portrait.list','portrait.list.s']
    p=subprocess.call(sortargs)
    os.unlink('portrait.list.s')
    os.unlink('landscape.list.s')
    return

def is_listed(db,filename):
    db.execute("SELECT COUNT(*) FROM list where file == ?",(filename,))
    count=db.fetchall()
    return count[0][0]>0

def get_folder_contents(db,path):
    ''' return the contents of the folder '''
    files=[]
    db.execute("SELECT file FROM list where file LIKE ?",(path+'%',))
    for row in db:
        base=row[0].replace(path,'',1)
        if base.find('/')==-1:
            files.append(base)
    return files

def ftime_match(db,filename,ftime):
    db.execute("SELECT date FROM list WHERE file == ?",(filename,))
    count=db.fetchall()
    return count[0][0]==ftime

def hash_match(db,filename,hash):
    db.execute("SELECT hash FROM list where file == ?",(filename,))
    count=db.fetchall()
    return count[0][0]==hash

def hash2file(db,hash,firstOnly=True):
    db.execute("SELECT file FROM list where hash == ?",(hash,))
    names=db.fetchall()
    if len(names)==0:
        return None
    if firstOnly:
        return names[0][0]
    else:
        return [x[0] for x in names]

def file2hash(db,filename):
    db.execute("SELECT hash FROM list where file == ? LIMIT 1",(filename,))
    names=db.fetchall()
    if len(names)==0:
        return None
    return names[0][0]

def hash_in_data(db,hash):
    db.execute("SELECT hash FROM data where hash == ? LIMIT 1",(hash,))
    hashes=db.fetchall()
    return len(hashes)!=0
def hash_in_list(db,hash):
    db.execute("SELECT hash FROM list where hash == ? LIMIT 1",(hash,))
    hashes=db.fetchall()
    return len(hashes)!=0

def get_md5(filename):
    ''' Return hash of the first 15 megabytes of the file '''
    return hashlib.md5(open(filename,'rb').read(1024*1024*15)).hexdigest()

def get_dims(filename):
    idargs=['identify','-format','%wx%h',filename+'[0]']
    p=subprocess.Popen(idargs,stdout=subprocess.PIPE)
    out, err = p.communicate()
    return (out.strip().split('x'))

def call_viewer(opts, files):
    """ Runs the viewer program, contains defaults """

    if opts.viewer=="1":
        opts.viewer="geeqie -l %f"
    devnull = open('/dev/null', 'w')
    subprocess.call(opts.viewer.replace('%f', " ".join(files)), stderr=devnull, shell=True)

def append_colors(opt):
    conn=sqlite3.connect(opt.sqlfile)
    conn.text_factory=str
    db=conn.cursor()
    dbh=conn.cursor()
    count=dbh.execute("SELECT COUNT(hash) FROM data WHERE R IS NULL").fetchall()[0][0]
    db.execute("SELECT hash,R FROM data WHERE R IS NULL")
    dirname_old=""
    for i,row in enumerate(db):
        filename=hash2file(conn.cursor(),row[0])
        if filename==None:
            continue
        filename=check_path(filename,opt)
        colors=get_colors(filename)
        dbh.execute("UPDATE data SET R=?, G=?, B=?, BR=?, BG=?, BB=? \
               WHERE hash=?",(colors[0][0],colors[0][1],colors[0][2],
                              colors[1][0],colors[1][1],colors[1][2],row[0]))
        print("%(i)d: %(f)s (%(r)s %(g)s %(b)s)" % {'i':count-i,  'f':filename, 'r':colors[0][0],
                                    'g':colors[0][1], 'b':colors[0][2]})
        if (i%50==0):
            conn.commit();
    conn.commit()
    return

def find_color_nearest(opts):
    """ Find closest matching images to given RGB color """
    src=opts.nearestcolor
    conn=sqlite3.connect(opts.sqlfile)
    conn.text_factory=str
    db=conn.cursor()
    src=[float(i) for i in src.strip().strip('"').split(',')]
    if len(src)==3:
        src.append(1)
    db.execute("SELECT hash, ABS(BR-?)+ABS(BG-?)+ABS(BB-?) as K,BR,BG,BB FROM data ORDER BY K LIMIT ?",
                        (src[0],src[1],src[2],src[3]))
    hits=[]
    for hit in db:
        fs=hash2file(conn.cursor(),hit[0],False)
        if fs==None:
            continue
        if hit[1]==None:
            print("Color information not found. Run again with --measure.")
            return
        for f in fs:
            hits.append((f,hit[1],hit[2],hit[3],hit[4]))
    if len(hits)==0:
        print("0 hits! You may have metadata of nonexisting files!, run with -D to erase")

    file_len=str(max([len(x[0]) for x in hits]))
    for h in range(len(hits)):
        if h>=src[3]:
            break
        print( ('{: <'+file_len+'} D {:.2f} (RGB {:.2f},{:.2f},{:.2f})').format(*hits[h]))
    if opts.viewer:
        fnames=[x[0] for x in hits]
        call_viewer(opts, fnames)
    return

def find_color_nearest_file(opts):
    """ Find closest matching images to given files with RGB color """
    cmp=opts.nearestcolor.rsplit(",",1)
    if len(cmp)==1:
        thr=2
    else:
        thr=int(cmp[1])+1
    cmp=os.path.realpath(cmp[0])
    conn=sqlite3.connect(opts.sqlfile)
    conn.text_factory=str
    db=conn.cursor()
    if is_listed(db, cmp):
        hash=file2hash(conn.cursor(), cmp)
        db1=conn.cursor()
        db1.execute("SELECT hash,fingerprint,sharpness,width,height,BR,BG,BB FROM data WHERE hash=?",(hash,))
        for hit1 in db1:
            fp=int(hit1[1])
            sp=hit1[2]
            dims=hit1[3:5]
            src=hit1[5:8]
    else:
        hash=get_md5(cmp)
        fp=int(get_fingerprint(cmp))
        sp=get_sharpness(cmp)
        dims=get_dims(cmp)
        src=get_colors(cmp)[1]
    src=[float(i) for i in src]
    db.execute("SELECT hash, ABS(BR-?)+ABS(BG-?)+ABS(BB-?) as K,BR,BG,BB FROM data ORDER BY K LIMIT ?",
                        (src[0],src[1],src[2],thr))
    hits=[]
    for hit in db:
        fs=hash2file(conn.cursor(),hit[0],False)
        if fs==None:
            continue
        if hit[1]==None:
            print("Color information not found. Run again with --measure.")
            return
        for f in fs:
            if f==cmp:
                continue
            hits.append((f,hit[1],hit[2],hit[3],hit[4]))

    file_len=str(max([len(x[0]) for x in hits]))
    for h in range(len(hits)):
        if h>=thr-1:
            break
        print( ('{: <'+file_len+'} D {:.2f} (RGB {:.2f},{:.2f},{:.2f})').format(*hits[h]) )
    if opts.viewer:
        fnames=[x[0] for x in hits]
        call_viewer(opts, fnames)
    return


def get_colors(filename):
    small_args=['convert','-define','jpeg:size=64x64',filename+'[0]','-resize','10x10!','TEXT:-']
    p=subprocess.Popen(small_args,stdout=subprocess.PIPE)
    img, err = p.communicate()
    mean_args=['convert','-','-format','"%[fx:mean.r],%[fx:mean.g],%[fx:mean.b]"','info:-']
    p=subprocess.Popen(mean_args,stdout=subprocess.PIPE,stdin=subprocess.PIPE)
    mean, err = p.communicate(input=img)
    mean_args=['convert','-',
      '(','+clone','-gravity','North','-crop','10x1+0+0','-write','mpr:top','+delete',')',
      '(','+clone','-gravity','South','-crop','10x1+0+0','-write','mpr:bot','+delete',')',
      '(','+clone','-gravity','West','-crop','1x10+0+0','-rotate','90','-write','mpr:lef','+delete',')',
      '(','+clone','-gravity','East','-crop','1x10+0+0','-rotate','90','-write','mpr:rig','+delete',')',
      '+delete','mpr:top','mpr:bot','mpr:lef','mpr:rig','+append',
        '-format','"%[fx:mean.r],%[fx:mean.g],%[fx:mean.b]"','info:-']
    p=subprocess.Popen(mean_args,stdout=subprocess.PIPE,stdin=subprocess.PIPE)
    border, err = p.communicate(input=img)
    mean=[float(i) for i in mean.strip().strip('"').split(',')]
    border=[float(i) for i in border.strip().strip('"').split(',')]
    return (mean,border)

def get_color_diff(c1,c2):
    """ Return color difference from two RGB triplets """

    return abs( c1[0] - c2[0] )+abs( c1[1] - c2[1] )+abs( c1[2] - c2[2] )

def get_ratio_diff(d1,d2):
    """ Return ratio difference from two w,h dimension tuplets """

    return abs( float(d1[0])/float(d1[1]) - float(d2[0])/float(d2[1]) )

def append_fingerprints(opt):
    conn=sqlite3.connect(opt.sqlfile)
    conn.text_factory=str
    db=conn.cursor()
    dbh=conn.cursor()
    count=dbh.execute("SELECT COUNT(hash) FROM data WHERE fingerprint IS NULL").fetchall()[0][0]
    db.execute("SELECT hash FROM data WHERE fingerprint IS NULL")
    dirname_old=""
    for i,row in enumerate(db):
        filename=hash2file(conn.cursor(),row[0])
        if filename==None:
            continue
        filename=check_path(filename,opt)
        fp=get_fingerprint(filename)
        dbh.execute("UPDATE data SET fingerprint=? \
               WHERE hash=?",(fp,row[0]))
        print("%(nr)i %(f)s" % {'f':filename, 'nr':count-i})
        if (i%50==0):
            conn.commit();
    conn.commit()

def get_fingerprint(filename):
    small_args=['convert','-define','jpeg:size=256x256',filename+'[0]','-resize','160x160!',
                '-colorspace','Gray','-blur','2x2','-normalize','-equalize','-resize','16x16','-depth','1','TEXT:-']
    p=subprocess.Popen(small_args,stdout=subprocess.PIPE)
    img, err = p.communicate()
    values=''
    for row in img.split('\n'):
        gray=row.split(',')
        if len(gray)<3:
            continue
        if gray[2]=="255":
            values+='1'
        else:
            values+='0'
    return str(int(values,2))

def find_fingerprint_similar(opts):
    ''' Find all similar images, nearest match more similar than thr '''

    thr=int(opts.similarity)
    conn=sqlite3.connect(opts.sqlfile)
    conn.text_factory=str
    db1=conn.cursor()
    db2=conn.cursor()
    db1.execute("SELECT hash,fingerprint,sharpness,width,height,R,G,B FROM data WHERE sharpness > 0")
    hits=[]
    hit_list=[]
    i=None
    for i,hit1 in enumerate(db1):
        cmp=hit1[0]
        cmpf=hash2file(conn.cursor(),hit1[0])
        if cmpf==None:
            continue
        if cmpf in hit_list:
            continue
        fp=int(hit1[1])
        sp=hit1[2]
        dims=hit1[3:5]
        pixels=dims[0]*dims[1]
        colors=hit1[5:8]
        db2.execute("SELECT hash,fingerprint,sharpness,width,height,R,G,B FROM data WHERE sharpness > 0")
        this1=[ [cmpf, 0,sp,int(hit1[3]),int(hit1[4]),0,pixels,0] ]
        for hit2 in db2:
            if hit2[0]==cmp:
                continue
            similarity=bin(fp^int(hit2[1])).count('1')
            if similarity<thr:
                this2=[hit2[0], similarity, hit2[2],
                    int(hit2[3]),int(hit2[4]),
                    get_color_diff(hit2[5:8],colors),
                    int(hit2[3])*int(hit2[4]),
                    get_ratio_diff(hit2[3:5],dims)]
                fs=hash2file(conn.cursor(), hit2[0], False)
                if fs==None:
                    continue
                for f in fs:
                    thisf=this2
                    thisf[0]=f
                    this1.append(thisf)
                    hit_list.append(f)
        this1.sort(key=lambda x: x[1])
        if len(this1)>1:
            hits.append(this1)
            hit_list.append(cmp)

    if i==None:
        print("No measurements found")
        sys.exit(1)

    for src in hits:
        file_len=str(max([len(x[0]) for x in src]))
        print( ('{: <'+file_len+'} {: ^4} {: ^4} {: ^4} {: ^4} {: ^5}x{: ^5}').format("File","SD","CD","RD","Shp","W","H"))
        for c in range(len(src)):
            print( ('{: <'+file_len+'} {: >4} {: >4} {: >4} {: >4} {: >5}x{: >5}').format(src[c][0],src[c][1],
                                                                         "%.2f"%src[c][5],"%.2f"%src[c][7],
                                                                         "%.1f" % src[c][2],src[c][3],src[c][4]))
        if opts.viewer:
            fnames=[x[0] for x in src]
            call_viewer(opts, fnames)

    return

def find_fingerprint_nearest(opts):
    ''' Find nearest match to given file '''

    cmp=os.path.realpath(opts.similarity.rsplit(",")[0])
    thr=sys.maxint
    if len(opts.similarity.rsplit(","))>1:
        thr=int(opts.similarity.rsplit(",",1)[1])
    conn=sqlite3.connect(opts.sqlfile)
    conn.text_factory=str
    db1=conn.cursor()
    if is_listed(db1, cmp):
        hash=file2hash(conn.cursor(),cmp)
        db1.execute("SELECT hash,fingerprint,sharpness,width,height,R,G,B FROM data WHERE hash=?",(hash,))
        for hit1 in db1:
            fp=int(hit1[1])
            sp=hit1[2]
            dims=hit1[3:5]
            colors=hit1[5:8]
    else:
        hash=get_md5(cmp)
        fp=int(get_fingerprint(cmp))
        sp=get_sharpness(cmp)
        dims=get_dims(cmp)
        colors=get_colors(cmp)[0]

    db1.execute("SELECT hash,fingerprint,sharpness,width,height,R,G,B FROM data WHERE sharpness > 0")
    this=['',thr,0,0,0,0,0]
    hit1=None
    for i,hit1 in enumerate(db1):
        if hit1[0] == hash:
            continue
        similarity=bin(fp^int(hit1[1])).count('1')
        if similarity<this[1]:
            this[0]=hit1[0]
            this[1]=similarity
            this[2]=hit1[2]
            this[3]=int(hit1[3])
            this[4]=int(hit1[4])
            this[5]=get_color_diff(colors, hit1[5:8]) #abs( hit1[5] - colors[0] )+abs( hit1[6] - colors[1] )+abs( hit1[7] - colors[2] )
            this[6]=get_ratio_diff(this[3:5], dims)# (this[3]/float(this[4])) / (float(dims[0])/float(dims[1]))
    if hit1==None:
        print("No measurements found")
        sys.exit(1)
    if this[1]==thr:
        print("No similarities < "+str(thr)+": "+cmp, file=sys.stderr)
        return
    this[0]=hash2file(conn.cursor(),this[0])
    file_len=str(max(len(cmp),  len(this[0])))
    print( ('{: <'+file_len+'} {: ^4} {: ^4} {: ^4} {: ^4} {: ^5}x{: ^5}').format("File","SD","CD","RD","Shp","W","H"))
    print( ('{: <'+file_len+'} {: >4} {: ^4} {: ^4} {: ^4} {: >5}x{: >5}').format(cmp,"","","","%.1f" % sp,dims[0],dims[1]))
    print( ('{: <'+file_len+'} {: >4} {: >4} {: >4} {: >4} {: >5}x{: >5}').format(this[0], this[1],"%.2f"%this[5],
                                                               "%.2f"%this[6], "%.1f" % this[2],this[3], this[4]))

    if opts.viewer:
        call_viewer(opts, (cmp,this[0]))


def append_sharpness(opt):
    conn=sqlite3.connect(opt.sqlfile)
    conn.text_factory=str
    db=conn.cursor()
    dbh=conn.cursor()
    count=dbh.execute("SELECT COUNT(hash) FROM data WHERE sharpness IS NULL").fetchall()[0][0]
    db.execute("SELECT hash FROM data WHERE sharpness IS NULL")
    dirname_old=""
    for i,row in enumerate(db):
        filename=hash2file(conn.cursor(),row[0])
        if filename==None:
            continue
        filename=check_path(filename,opt)
        sp=get_sharpness(filename)
        dbh.execute("UPDATE data SET sharpness=? \
               WHERE hash=?",(sp,row[0]))
        print("%(nr)i %(f)s %(s)f" % {'f':filename, 'nr':count-i,'s':sp})
        if (i%25==0):
            conn.commit();
    conn.commit()

def get_sharpness(filename):
    """  Difference in X, Difference in Y, get smaller diff = smaller sharpness.
        May change if bugs found..  """

    # Resize to 1024 smaller axis, crop with golden ratio
    # grayscale and equalize histogram. calculate difference between x neighbor
    # and y neigbor. smaller difference = less sharp.
    small_args="convert ( -define jpeg:size=2048x2048 -resize 1024^ -gravity center -crop 62%x62%+0+0 -colorspace gray XXX -equalize ) \
( -clone 0 ( -clone 0 -roll +1+0 ) -compose Difference -composite -shave 1x1 ) \
( -clone 0 ( -clone 0 -roll +0+1 ) -compose Difference -composite -shave 1x1 ) \
-delete 0 -compose Darken -composite -format %[fx:mean*1000] info:".split(" ")
    small_args[12]=filename+'[0]'
    p=subprocess.Popen(small_args,stdout=subprocess.PIPE)
    sharpness, err = p.communicate()
    return float(sharpness.strip())

def searchdb(sqlfile,needle):
    conn=sqlite3.connect(sqlfile)
    conn.text_factory=str
    db=conn.cursor()
    dbh=conn.cursor()
    db.execute("SELECT file,hash FROM list WHERE file LIKE ? ORDER BY file",('%'+needle+'%',))
    results=[]
    flist=[]
    for row in db:
        data=dbh.execute("SELECT hash,width,height FROM data WHERE hash = ?",(row[1],)).fetchall()
        results.append([row[0], data[0][1],data[0][2]])
    flist.append(('search',results))
    return flist

def clean_dirs(dirs):
    for s in dirs[:]:
        if (s in BADDIRS) or (s.startswith(".")):
            dirs.remove(s)
    return dirs

def clean_syms(files):
    for f in files[:]:
        if os.path.islink(f):
            files.remove(f)
    return files

def confirm(prompt=None, resp=False):
    """prompts for yes or no response from the user. Returns True for yes and
    False for no.
    'resp' should be set to the default value assumed by the caller when
    user simply types ENTER.
    """

    if prompt is None:
        prompt = 'Confirm'
    if resp:
        prompt = '%s [%s]|%s: ' % (prompt, 'y', 'n')
    else:
        prompt = '%s [%s]|%s: ' % (prompt, 'n', 'y')

    while True:
        ans = raw_input(prompt)
        if not ans:
            return resp
        if ans not in ['y', 'Y', 'n', 'N']:
            print('please enter y or n.')
            continue
        if ans == 'y' or ans == 'Y':
            return True
        if ans == 'n' or ans == 'N':
            return False

def find_duplicates(sqlfile,search):
    if (search=='.'):
        search='%'
    else:
        search='%'+search+'%'
    conn=sqlite3.connect(sqlfile)
    conn.text_factory=str
    db=conn.cursor()
    dbh=conn.cursor()
    dbf=conn.cursor()
    db.execute("SELECT hash,count(*) FROM list WHERE file LIKE ? group by hash HAVING count(*) > 1 ",(search,))
    duphash=[]
    for row in db:
        hash=row[0]
        dbh.execute("SELECT hash,width,height FROM data WHERE hash = ?",(hash,))
        flist=[]
        for h in dbh:
            dbf.execute("SELECT file,date FROM list WHERE hash = ?",(hash,))
            for f in dbf:
                flist.append([f[0],h[1],h[2],f[1]])
        flist.sort(key=lambda file: file[3])
        duphash.append((hash, flist))
    duphash.sort(key=lambda file: file[1][0])
    return duphash

def find_smalls(minsize,sqlfile):
    conn=sqlite3.connect(sqlfile)
    conn.text_factory=str
    db=conn.cursor()
    db.execute("SELECT hash,width,height FROM data WHERE width < ? OR height < ?",(minsize,minsize))
    smalls=[]
    flist=[]
    for row in db:
        fs=hash2file(conn.cursor(), row[0], False)
        if fs==None:
            continue
        for f in fs:
            smalls.append([f, row[1], row[2]])
    flist.append(('smalls',smalls))
    return flist

def disk_used(options):
    conn=sqlite3.connect(options.sqlfile)
    conn.text_factory=str
    db=conn.cursor()
    db.execute('SELECT size,replace(file,?,"") as path FROM list WHERE file LIKE ?',
                     (os.path.realpath(options.diskused)+"/",
                      os.path.realpath(options.diskused)+"%",
                     ))
    entries=[]
    sizes=[]
    for row in db:
        start_path=row[1].split('/')
        start_path="/".join(start_path[0:int(options.diskused_depth)])
        if start_path not in entries:
            entries.append(start_path)
            sizes.append(row[0])
        else:
            sizes[ entries.index(start_path) ]+=row[0]
    for entry in zip(sizes,entries):
        print("| ".join([ str(entry[0]).ljust(14),
                         humanize_size(entry[0]).rjust(8),
                         entry[1]]))

def print_structure(files):
    for hash in files:
        #print(hash[0])
        for i,f in enumerate(hash[1]):
            print("%(i)d: (%(x)dx%(y)d):%(f)s " % {'i':i+1, 'f':f[0], 'x':f[1], 'y':f[2]})
    return

def print_dup_structure(files,opts):
    i=1
    for hash in files:
        #print(hash[0])
        fnames=[]
        for f in hash[1]:
            fnames.append(' "'+f[0]+'"')
        print("%(i)d:%(n)d:%(f)s " % {'i':i, 'n':len(fnames), 'f':",".join(fnames)})
        if opts.viewer:
            fnames=[x[0] for x in hash[1]]
            subprocess.call(opts.viewer.replace('%f', " ".join(fnames)), shell=True)
        i+=1
    return

def humanize_size(size,precision=1):
    if size==None:
        return 'nan'
    suffixes=['B','KB','MB','GB','TB']
    suffixIndex = 0
    defPrecision=0
    while size > 1024:
        suffixIndex += 1
        size = size/1024.0
        defPrecision=precision
    return "%.*f%s"%(defPrecision,size,suffixes[suffixIndex])

def humanize_date(date):
    if date==None:
        return ''
    return datetime.datetime.fromtimestamp(int(date)).strftime('%Y-%m-%d %H:%M:%S')

def import_descriptions(options):
    """ Walk through the path from given [startpath] and read
    any DESCFILE, importing the contents in the DB """
    conn=sqlite3.connect(options.sqlfile)
    conn.text_factory=str
    db=conn.cursor()
    for path,dirs,files in os.walk(options.startpath,followlinks=options.symlinks):
        dirs=clean_dirs(dirs)
        if not options.symlinks:
            files=clean_syms(files)
        files.sort()
        dirs.sort()
        db_files=get_folder_contents(db,os.path.realpath(path)+'/')
        if len(db_files)==0:
            continue
        if not os.path.exists( os.path.join(path,DESCFILE) ):
            continue
        read_file=open(os.path.join(path,DESCFILE),'r')
        reader=csv.reader(read_file, dialect='excel-tab')
        for row in reader:
            if row[0] in db_files:
                hash=file2hash(db,os.path.realpath(os.path.join(path,row[0])))
                if hash==None:
                    continue
                db.execute("UPDATE data SET description=? \
                            WHERE hash = ?",(row[1],hash))
            conn.commit()
        read_file.close()

def export_descriptions(options):
    """ Walk through folders, and write DESCFILE csv descriptions.  """
     # width, height, basename, description
     #%R%G%B %S %F %D
     # Red Green Blue Sharpness Fingerprint Date(formatting?)
     # %s %H
     # filesize Hash
    conn=sqlite3.connect(options.sqlfile)
    conn.text_factory=str
    db=conn.cursor()
    for path,dirs,files in os.walk(options.startpath,followlinks=options.symlinks):
        dirs=clean_dirs(dirs)
        if not options.symlinks:
            files=clean_syms(files)
        files.sort()
        dirs.sort()
        db_files=get_folder_contents(db,os.path.realpath(path)+'/')
        if len(db_files)==0:
            continue
        print('Writing to '+os.path.join(path,DESCFILE))
        # if exist DESCFILE
        write_file=open(os.path.join(path,DESCFILE),'w')
        writer=csv.writer(write_file, dialect='excel-tab')
        writer.writerow(["File","Description"])
        for f in db_files:
            fullname=os.path.realpath(os.path.join(path,f))
            hash=file2hash(db,fullname)
            if hash==None:
                continue
            l=db.execute("SELECT * FROM list WHERE hash = ?",(hash,)).fetchall()[0]
            d=db.execute("SELECT * FROM data WHERE hash = ?",(hash,)).fetchall()[0]
            t=",".join([x[0] for x in db.execute("SELECT tag FROM tags WHERE hash = ?",(hash,)).fetchall()])
            writer.writerow([f,description_parse(options.export_descriptions, l,d,t)])
        write_file.close()

def description_parse(s,l,d,t):
    """{desc} {width}x{height} {red} {green} {blue} {Bred} {Bgreen} {Bblue} {size} {date} {name} {tags}"""
    d=["" if x==None else x for x in d]
    return s.format(
        desc=d[1],
        width=d[3],
        height=d[4],
        red=d[7],
        green=d[8],
        blue=d[9],
        Bred=d[10],
        Bgreen=d[11],
        Bblue=d[12],
        size=humanize_size(l[3]),
        date=humanize_date(l[2]),
        name=os.path.basename(l[0]),
        tags=t,
    )


def import_metadata(options):
    """ import data table from another sqlite file"""
    if not os.path.exists(options.importfile):
        print("SQLite file {:} missing".format(options.importfile))
        sys.exit(1)

    conn=sqlite3.connect(options.sqlfile)
    conn.text_factory=str
    db=conn.cursor()
    tagsbefore=db.execute("SELECT COUNT(hash) FROM tags").fetchall()[0][0]
    db.execute("ATTACH ? as fromDB", (options.importfile, ))
    db.execute("INSERT OR REPLACE INTO main.data SELECT * FROM fromDB.data")
    db.execute("INSERT OR IGNORE INTO main.tags SELECT * FROM fromDB.tags")
    conn.commit()
    db.execute("""DELETE FROM main.tags WHERE rowid NOT IN
                    ( SELECT MIN(rowid) FROM main.tags GROUP BY hash,tag )""")
    conn.commit()
    count=db.execute("SELECT COUNT(hash) FROM fromDB.data").fetchall()[0][0]
    tagsafter=db.execute("SELECT COUNT(hash) FROM tags").fetchall()[0][0]

    print("Imported %d metadata, %d tags." % (count,tagsafter-tagsbefore))

def export_database(options):
    """ export data to new sqlite file. Minimize file size of sqlite."""
    if not os.path.exists(options.exportfile):
        createdb(options.exportfile)

    conn=sqlite3.connect(options.sqlfile)
    conn.text_factory=str
    db=conn.cursor()
    db.execute("ATTACH ? as toDB", (options.exportfile, ))
    db.execute("INSERT OR REPLACE INTO toDB.list SELECT * FROM main.list")
    db.execute("INSERT OR REPLACE INTO toDB.data SELECT * FROM main.data")
    db.execute("INSERT OR IGNORE INTO toDB.tags SELECT * FROM main.tags")
    conn.commit()
    count=db.execute("SELECT COUNT(hash) FROM toDB.list").fetchall()[0][0]
    metacount=db.execute("SELECT COUNT(hash) FROM toDB.data").fetchall()[0][0]
    tagscount=db.execute("SELECT COUNT(hash) FROM toDB.tags").fetchall()[0][0]

    print("Exported %d files, %d metadata, %d tags." % (count,metacount,tagscount))

def check_path(path,opt):
    """ return relative path name to DB if real path doesnt exist """
    if os.path.isfile(path):
        return path
    if not opt.relative:
        if not os.path.isfile(path):
            raise IOError("File %s not found. try -R for relative search" % path)

    sqlpath=os.path.dirname(os.path.realpath(opt.sqlfile))
    rel=os.path.join(sqlpath, path)
    searchpath=path.split(os.sep)
    while not os.path.isfile(rel):
        searchpath=searchpath[1:]
        if len(searchpath)==0:
            raise IOError("File %s not found" % path)
        rel=os.path.join(sqlpath, os.sep.join(searchpath))
        #~ print(rel)
    return rel


def main():
    options=setup_options();
    if not os.path.exists(options.sqlfile):
        createdb(options.sqlfile);
    if options.delete:
        print('Deleting entries...')
        delete_nonexisting(options.sqlfile)
    if options.add or options.changed:
        print('Adding entries...')
        add_recurse(options)
    if options.delete_data:
        print('Deleting metadata...')
        delete_data(options.sqlfile)
    if options.search:
        print_structure(searchdb(options.sqlfile,options.search))
    if options.measure:
        append_data(options)
        print('Measure colors...')
        append_colors(options)
        print('Measure fingerprints...')
        append_fingerprints(options)
        print('Measure sharpness...')
        append_sharpness(options)
    if options.random:
        print('Random lists...')
        random_lists(options.sqlfile)
    if options.nearestcolor:
        if os.path.exists(options.nearestcolor.rsplit(",")[0]):
            find_color_nearest_file(options)
        else:
            find_color_nearest(options)

    if options.similarity!=None:
        if os.path.exists(options.similarity.rsplit(",")[0]):
            find_fingerprint_nearest(options)
        else:
            find_fingerprint_similar(options)
    if options.duplicate:
        files=find_duplicates(options.sqlfile,options.startpath)
        print_dup_structure(files,options)
    if options.searchsmall:
        files=find_smalls(options.minsize,options.sqlfile)
        if options.deleteFiles:
            if len(files[0][1])>0:
                delete_files(files)
                delete_nonexisting(options.sqlfile)
        else:
            print_structure(files)
    if options.diskused:
        disk_used(options)
        #print(files)
    if options.tag:
        if options.startpath==".":
            print_tag(options)
        else:
            add_tag(options)
    if options.importfile:
        print("Importing metadata")
        import_metadata(options)
    if options.exportfile:
        print("Exporting database")
        export_database(options)
    if options.import_descriptions:
        print("Import descriptions")
        import_descriptions(options)
    if options.export_descriptions:
        print("Export descriptions")
        export_descriptions(options)
    sys.exit(0)

if __name__ == "__main__":
    main()