python3izing scripts
This commit is contained in:
@@ -1,41 +1,79 @@
|
||||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os,sys
|
||||
import math,shutil,re
|
||||
import os, sys
|
||||
import math, shutil, re
|
||||
from random import shuffle
|
||||
|
||||
VERSION="0.1"
|
||||
VERSION = "0.1"
|
||||
|
||||
|
||||
def setup_options():
|
||||
''' Setup the command line options '''
|
||||
""" Setup the command line options """
|
||||
from argparse import ArgumentParser
|
||||
|
||||
parser=ArgumentParser(description="Splits files to subfolders equally.")
|
||||
|
||||
parser.add_argument("--order",'-o',type=str,action='store', dest='order',default="sequence",
|
||||
help="Splitting method.",
|
||||
choices=['sequence','sparse','regexp','random'])
|
||||
parser.add_argument("-m",action='store_true', dest='move',default=False,
|
||||
help="Move entries instead of hardlink.")
|
||||
parser.add_argument("-f",action='store_true', dest='files',default=False,
|
||||
help="Split files only, skipping folders")
|
||||
parser.add_argument("-r",'--regexp',type=str,action='store', dest='regexp',default="",
|
||||
help="Regular expression for splitting. When set, order regexp used, -n not used.")
|
||||
parser.add_argument("-n",'-N',type=int,action='store', dest='n',
|
||||
help="Number of subfolders to split into.")
|
||||
parser.add_argument("path",type=str,action="store",default=".",nargs="?",
|
||||
help="Folder to split.")
|
||||
options=parser.parse_args()
|
||||
if options.n==None and options.regexp=="":
|
||||
parser = ArgumentParser(description="Splits files to subfolders equally.")
|
||||
|
||||
parser.add_argument(
|
||||
"--order",
|
||||
"-o",
|
||||
type=str,
|
||||
action="store",
|
||||
dest="order",
|
||||
default="sequence",
|
||||
help="Splitting method.",
|
||||
choices=["sequence", "sparse", "regexp", "random"],
|
||||
)
|
||||
parser.add_argument(
|
||||
"-m",
|
||||
action="store_true",
|
||||
dest="move",
|
||||
default=False,
|
||||
help="Move entries instead of hardlink.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-f",
|
||||
action="store_true",
|
||||
dest="files",
|
||||
default=False,
|
||||
help="Split files only, skipping folders",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-r",
|
||||
"--regexp",
|
||||
type=str,
|
||||
action="store",
|
||||
dest="regexp",
|
||||
default="",
|
||||
help="Regular expression for splitting. When set, order regexp used, -n not used.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-n",
|
||||
"-N",
|
||||
type=int,
|
||||
action="store",
|
||||
dest="n",
|
||||
help="Number of subfolders to split into.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"path",
|
||||
type=str,
|
||||
action="store",
|
||||
default=".",
|
||||
nargs="?",
|
||||
help="Folder to split.",
|
||||
)
|
||||
options = parser.parse_args()
|
||||
if options.n == None and options.regexp == "":
|
||||
parser.print_help()
|
||||
parser.error("Either -n or -r must be passed")
|
||||
if options.regexp!="":
|
||||
options.order="regexp"
|
||||
if options.regexp != "":
|
||||
options.order = "regexp"
|
||||
return options
|
||||
|
||||
|
||||
def linktree(src, dst):
|
||||
"""Recursively link a directory tree using os.link.
|
||||
Modified from shutil.copytree
|
||||
Modified from shutil.copytree
|
||||
"""
|
||||
names = os.listdir(src)
|
||||
os.makedirs(dst)
|
||||
@@ -49,116 +87,124 @@ def linktree(src, dst):
|
||||
else:
|
||||
# Will raise a SpecialFileError for unsupported file types
|
||||
os.link(srcname, dstname)
|
||||
except Error, err:
|
||||
except Error as err:
|
||||
errors.extend(err.args[0])
|
||||
except EnvironmentError, why:
|
||||
except EnvironmentError as why:
|
||||
errors.append((srcname, dstname, str(why)))
|
||||
|
||||
if errors:
|
||||
raise Error, errors
|
||||
raise Error(errors)
|
||||
|
||||
def copyfileorfolder(basename,source,target,move):
|
||||
''' Copies a file or folder structure under target folder '''
|
||||
|
||||
def copyfileorfolder(basename, source, target, move):
|
||||
""" Copies a file or folder structure under target folder """
|
||||
if move:
|
||||
shutil.move(os.path.join(source,basename),os.path.join(target,basename))
|
||||
shutil.move(os.path.join(source, basename), os.path.join(target, basename))
|
||||
return
|
||||
if os.path.isfile(os.path.join(source,basename)):
|
||||
os.link(os.path.join(source,basename),os.path.join(target,basename))
|
||||
if os.path.isfile(os.path.join(source, basename)):
|
||||
os.link(os.path.join(source, basename), os.path.join(target, basename))
|
||||
return
|
||||
if os.path.isdir(os.path.join(source,basename)):
|
||||
linktree(os.path.join(source,basename),os.path.join(target,basename))
|
||||
if os.path.isdir(os.path.join(source, basename)):
|
||||
linktree(os.path.join(source, basename), os.path.join(target, basename))
|
||||
return
|
||||
raise RuntimeError(source+' was neither file nor folder.')
|
||||
raise RuntimeError(source + " was neither file nor folder.")
|
||||
|
||||
def portorder(inFiles,inFolder,outFolders,N,link):
|
||||
''' Copy files in port order (sparse) '''
|
||||
outidx=0
|
||||
|
||||
def portorder(inFiles, inFolder, outFolders, N, link):
|
||||
""" Copy files in port order (sparse) """
|
||||
outidx = 0
|
||||
for row in inFiles:
|
||||
copyfileorfolder(row,inFolder,outFolders[outidx],link)
|
||||
outidx+=1
|
||||
if outidx+1>N:
|
||||
outidx=0
|
||||
copyfileorfolder(row, inFolder, outFolders[outidx], link)
|
||||
outidx += 1
|
||||
if outidx + 1 > N:
|
||||
outidx = 0
|
||||
|
||||
def fileorder(inFiles,inFolder,outFolders,N,link):
|
||||
''' Copy files in input file order (sequnce) '''
|
||||
|
||||
bins=[int(math.floor(float(len(inFiles))/float(N)))]*int(N)
|
||||
binidx=0
|
||||
while sum(bins)<len(inFiles):
|
||||
bins[binidx]+=1
|
||||
binidx+=1
|
||||
offsets=list(offset(bins))
|
||||
offsets.insert(0,0)
|
||||
def fileorder(inFiles, inFolder, outFolders, N, link):
|
||||
""" Copy files in input file order (sequnce) """
|
||||
|
||||
bins = [int(math.floor(float(len(inFiles)) / float(N)))] * int(N)
|
||||
binidx = 0
|
||||
while sum(bins) < len(inFiles):
|
||||
bins[binidx] += 1
|
||||
binidx += 1
|
||||
offsets = list(offset(bins))
|
||||
offsets.insert(0, 0)
|
||||
|
||||
for outidx in range(N):
|
||||
for f in range(offsets[outidx], offsets[outidx]+bins[outidx]):
|
||||
copyfileorfolder(inFiles[f],inFolder,outFolders[outidx],link)
|
||||
for f in range(offsets[outidx], offsets[outidx] + bins[outidx]):
|
||||
copyfileorfolder(inFiles[f], inFolder, outFolders[outidx], link)
|
||||
|
||||
|
||||
def regexorder(inFiles, inFolder, outFolders, matcher, uniqlabel, link):
|
||||
""" Copy files by regex match """
|
||||
|
||||
def regexorder(inFiles,inFolder,outFolders,matcher,uniqlabel,link):
|
||||
''' Copy files by regex match '''
|
||||
|
||||
for f in inFiles:
|
||||
m=matcher.search(f)
|
||||
m = matcher.search(f)
|
||||
if m:
|
||||
outidx=uniqlabel.index(m.group(1))
|
||||
copyfileorfolder(f,inFolder,outFolders[outidx],link)
|
||||
outidx = uniqlabel.index(m.group(1))
|
||||
copyfileorfolder(f, inFolder, outFolders[outidx], link)
|
||||
|
||||
|
||||
def regexmatches(inFiles, opts):
|
||||
matcher=re.compile(opts.regexp)
|
||||
matches=[]
|
||||
skipped=0
|
||||
matcher = re.compile(opts.regexp)
|
||||
matches = []
|
||||
skipped = 0
|
||||
for f in inFiles:
|
||||
m=matcher.search(f)
|
||||
m = matcher.search(f)
|
||||
if m:
|
||||
matches.append(m.group(1))
|
||||
else:
|
||||
skipped+=1
|
||||
uniqlabel=sorted(set(matches))
|
||||
print("Unique matches",uniqlabel)
|
||||
print("Not matching %d files."% skipped)
|
||||
skipped += 1
|
||||
uniqlabel = sorted(set(matches))
|
||||
print("Unique matches", uniqlabel)
|
||||
print("Not matching %d files." % skipped)
|
||||
for x in uniqlabel:
|
||||
outFolders.append(os.path.join(opts.path,x))
|
||||
outFolders.append(os.path.join(opts.path, x))
|
||||
return (outFolders, uniqlabel, matcher)
|
||||
|
||||
|
||||
def offset(it):
|
||||
total = 0
|
||||
for x in it:
|
||||
total += x
|
||||
yield total
|
||||
|
||||
|
||||
def report(outFolders):
|
||||
for x in outFolders:
|
||||
n=len(os.listdir(x))
|
||||
print( os.path.basename( x )+":"+str(n) )
|
||||
n = len(os.listdir(x))
|
||||
print(os.path.basename(x) + ":" + str(n))
|
||||
|
||||
''' Splits a folder input in N outputs '''
|
||||
options=setup_options()
|
||||
outFolders=[]
|
||||
|
||||
""" Splits a folder input in N outputs """
|
||||
options = setup_options()
|
||||
outFolders = []
|
||||
method = options.order.lower().strip()
|
||||
# list files, and remove hidden (.files)
|
||||
inFiles=sorted(filter(lambda x: not x.startswith('.'), os.listdir(options.path)))
|
||||
inFiles = sorted(filter(lambda x: not x.startswith("."), os.listdir(options.path)))
|
||||
if options.files:
|
||||
inFiles=[ f for f in inFiles if os.path.isfile( os.path.join( options.path, f ) ) ]
|
||||
inFiles = [f for f in inFiles if os.path.isfile(os.path.join(options.path, f))]
|
||||
|
||||
if method=='regexp':
|
||||
(outFolders, uniqlabel, matcher)=regexmatches(inFiles, options)
|
||||
raw_input("correct?")
|
||||
if method == "regexp":
|
||||
(outFolders, uniqlabel, matcher) = regexmatches(inFiles, options)
|
||||
input("correct?")
|
||||
else:
|
||||
for x in range(options.n):
|
||||
outFolders.append(os.path.join(options.path,'folder'+str(x+1)))
|
||||
outFolders.append(os.path.join(options.path, "folder" + str(x + 1)))
|
||||
|
||||
for x in outFolders:
|
||||
if not os.path.isdir(x):
|
||||
os.mkdir(x)
|
||||
if method=='random':
|
||||
if method == "random":
|
||||
shuffle(inFiles)
|
||||
portorder(inFiles,options.path,outFolders,options.n,options.move)
|
||||
if method=='regexp':
|
||||
regexorder(inFiles,options.path,outFolders,matcher,uniqlabel,options.move)
|
||||
if method=='sparse':
|
||||
portorder(inFiles,options.path,outFolders,options.n,options.move)
|
||||
if method=='sequence':
|
||||
fileorder(inFiles,options.path,outFolders,options.n,options.move)
|
||||
portorder(inFiles, options.path, outFolders, options.n, options.move)
|
||||
if method == "regexp":
|
||||
regexorder(inFiles, options.path, outFolders, matcher, uniqlabel, options.move)
|
||||
if method == "sparse":
|
||||
portorder(inFiles, options.path, outFolders, options.n, options.move)
|
||||
if method == "sequence":
|
||||
fileorder(inFiles, options.path, outFolders, options.n, options.move)
|
||||
|
||||
|
||||
report(outFolders)
|
||||
|
||||
Reference in New Issue
Block a user