reorganization, and output filename for SimpleWeb

This commit is contained in:
q
2015-12-20 12:31:29 +02:00
parent 810d485755
commit 9869257aaa
18 changed files with 16 additions and 15 deletions

164
files/FolderSplit.py Executable file
View File

@@ -0,0 +1,164 @@
#!/usr/bin/python
import os,sys
import math,shutil,re
from random import shuffle
VERSION="0.1"
def setup_options():
''' Setup the command line options '''
from argparse import ArgumentParser
parser=ArgumentParser(description="Splits files to subfolders equally.")
parser.add_argument("--order",'-o',type=str,action='store', dest='order',default="sequence",
help="Splitting method.",
choices=['sequence','sparse','regexp','random'])
parser.add_argument("-m",action='store_true', dest='move',default=False,
help="Move entries instead of hardlink.")
parser.add_argument("-f",action='store_true', dest='files',default=False,
help="Split files only, skipping folders")
parser.add_argument("-r",'--regexp',type=str,action='store', dest='regexp',default="",
help="Regular expression for splitting. When set, order regexp used, -n not used.")
parser.add_argument("-n",'-N',type=int,action='store', dest='n',
help="Number of subfolders to split into.")
parser.add_argument("path",type=str,action="store",default=".",nargs="?",
help="Folder to split.")
options=parser.parse_args()
if options.n==None and options.regexp=="":
parser.print_help()
parser.error("Either -n or -r must be passed")
if options.regexp!="":
options.order="regexp"
return options
def linktree(src, dst):
"""Recursively link a directory tree using os.link.
Modified from shutil.copytree
"""
names = os.listdir(src)
os.makedirs(dst)
errors = []
for name in names:
srcname = os.path.join(src, name)
dstname = os.path.join(dst, name)
try:
if os.path.isdir(srcname):
linktree(srcname, dstname)
else:
# Will raise a SpecialFileError for unsupported file types
os.link(srcname, dstname)
except Error, err:
errors.extend(err.args[0])
except EnvironmentError, why:
errors.append((srcname, dstname, str(why)))
if errors:
raise Error, errors
def copyfileorfolder(basename,source,target,move):
''' Copies a file or folder structure under target folder '''
if move:
shutil.move(os.path.join(source,basename),os.path.join(target,basename))
return
if os.path.isfile(os.path.join(source,basename)):
os.link(os.path.join(source,basename),os.path.join(target,basename))
return
if os.path.isdir(os.path.join(source,basename)):
linktree(os.path.join(source,basename),os.path.join(target,basename))
return
raise RuntimeError(source+' was neither file nor folder.')
def portorder(inFiles,inFolder,outFolders,N,link):
''' Copy files in port order (sparse) '''
outidx=0
for row in inFiles:
copyfileorfolder(row,inFolder,outFolders[outidx],link)
outidx+=1
if outidx+1>N:
outidx=0
def fileorder(inFiles,inFolder,outFolders,N,link):
''' Copy files in input file order (sequnce) '''
bins=[int(math.floor(float(len(inFiles))/float(N)))]*int(N)
binidx=0
while sum(bins)<len(inFiles):
bins[binidx]+=1
binidx+=1
offsets=list(offset(bins))
offsets.insert(0,0)
for outidx in range(N):
for f in range(offsets[outidx], offsets[outidx]+bins[outidx]):
copyfileorfolder(inFiles[f],inFolder,outFolders[outidx],link)
def regexorder(inFiles,inFolder,outFolders,matcher,uniqlabel,link):
''' Copy files by regex match '''
for f in inFiles:
m=matcher.search(f)
if m:
outidx=uniqlabel.index(m.group(1))
copyfileorfolder(f,inFolder,outFolders[outidx],link)
def regexmatches(inFiles, opts):
matcher=re.compile(opts.regexp)
matches=[]
skipped=0
for f in inFiles:
m=matcher.search(f)
if m:
matches.append(m.group(1))
else:
skipped+=1
uniqlabel=sorted(set(matches))
print("Unique matches",uniqlabel)
print("Not matching %d files."% skipped)
for x in uniqlabel:
outFolders.append(os.path.join(opts.path,x))
return (outFolders, uniqlabel, matcher)
def offset(it):
total = 0
for x in it:
total += x
yield total
def report(outFolders):
for x in outFolders:
n=len(os.listdir(x))
print( os.path.basename( x )+":"+str(n) )
''' Splits a folder input in N outputs '''
options=setup_options()
outFolders=[]
method = options.order.lower().strip()
# list files, and remove hidden (.files)
inFiles=sorted(filter(lambda x: not x.startswith('.'), os.listdir(options.path)))
if options.files:
inFiles=[ f for f in inFiles if os.path.isfile( os.path.join( options.path, f ) ) ]
if method=='regexp':
(outFolders, uniqlabel, matcher)=regexmatches(inFiles, options)
raw_input("correct?")
else:
for x in range(options.n):
outFolders.append(os.path.join(options.path,'folder'+str(x+1)))
for x in outFolders:
if not os.path.isdir(x):
os.mkdir(x)
if method=='random':
shuffle(inFiles)
portorder(inFiles,options.path,outFolders,options.n,options.move)
if method=='regexp':
regexorder(inFiles,options.path,outFolders,matcher,uniqlabel,options.move)
if method=='sparse':
portorder(inFiles,options.path,outFolders,options.n,options.move)
if method=='sequence':
fileorder(inFiles,options.path,outFolders,options.n,options.move)
report(outFolders)