simplify splitter

This commit is contained in:
Q
2024-08-10 19:35:23 +03:00
parent 17cb614f24
commit 00b0f07f51

View File

@@ -34,6 +34,14 @@ def setup_options():
default=False, default=False,
help="Move entries instead of hardlink.", help="Move entries instead of hardlink.",
) )
parser.add_argument(
"--exclude",
type=str,
action="append",
default=[],
nargs="*",
help="Exclude files/folders. Accepts regex",
)
parser.add_argument( parser.add_argument(
"-f", "-f",
action="store_true", action="store_true",
@@ -84,7 +92,13 @@ def setup_options():
dest="i", dest="i",
help="Max number of files in one folder. Can not be used together with -n or -r", help="Max number of files in one folder. Can not be used together with -n or -r",
) )
parser.add_argument(
"--verbose",
action="store_true",
dest="verbose",
default=False,
help="Verbose",
)
parser.add_argument( parser.add_argument(
"path", "path",
type=str, type=str,
@@ -159,66 +173,46 @@ def copyfileorfolder(basename, source, target, move):
raise RuntimeError(source + " was neither file nor folder.") raise RuntimeError(source + " was neither file nor folder.")
def portorder(inFiles, inFolder, outFolders, N, link): def portmatches(inFiles, inFolder, n, i):
"""Copy files in port order (sparse)""" """files in port order (sparse)"""
outidx = 0 outFolders = []
for row in inFiles: padding = "{:0" + str(len(str(n))) + "d}"
copyfileorfolder(row, inFolder, outFolders[outidx], link) for idx in range(n):
outidx += 1 outFolders.append(os.path.join(inFolder, ("folder-" + padding).format(idx + 1)))
if outidx + 1 > N: multiplier = 1 + int(len(inFiles) / n)
outidx = 0 outFolders = outFolders * int(multiplier)
outFolders = outFolders[0 : len(inFiles)]
return outFolders
def fileorder(inFiles, inFolder, outFolders, N, link): def filematches(inFiles, inFolder, n, i):
"""Copy files in input file order (sequnce)""" """files in input file order (sequence)"""
bins = [int(math.floor(float(len(inFiles)) / float(N)))] * int(N) padding = "{:0" + str(len(str(n))) + "d}"
binidx = 0 outFolders = []
while sum(bins) < len(inFiles): for idx, nidx in enumerate([i for x in range(n)]):
bins[binidx] += 1 outFolders.extend(nidx * [os.path.join(inFolder, ("folder-" + padding).format(idx + 1))])
binidx += 1
offsets = list(offset(bins))
offsets.insert(0, 0)
for outidx in range(N): outFolders = outFolders[0 : len(inFiles)]
for f in range(offsets[outidx], offsets[outidx] + bins[outidx]): return outFolders
copyfileorfolder(inFiles[f], inFolder, outFolders[outidx], link)
def regexorder(inFiles, inFolder, outFolders, matcher, uniqlabel, link):
"""Copy files by regex match"""
for f in inFiles:
m = matcher.search(f)
if m:
outidx = uniqlabel.index(m.group(1))
copyfileorfolder(f, inFolder, outFolders[outidx], link)
def dateorder(inFiles, inFolder, outFolders, move):
"""Copy files by regex match"""
for f, d in zip(inFiles, outFolders):
copyfileorfolder(f, inFolder, d, move)
def regexmatches(inFiles, opts): def regexmatches(inFiles, opts):
matcher = re.compile(opts.regexp) matcher = re.compile(opts.regexp)
matches = [] matches = []
included = []
skipped = 0 skipped = 0
for f in inFiles: for f in inFiles:
m = matcher.search(f) m = matcher.search(f)
if m: if m:
matches.append(m.group(1)) matches.append(os.path.join(opts.path, m.group(1)))
included.append(f)
else: else:
skipped += 1 skipped += 1
uniqlabel = sorted(set(matches)) uniqlabel = sorted(set(matches))
print("Unique matches", uniqlabel) print("Unique matches", ", ".join(uniqlabel))
print("Not matching %d files." % skipped) print("Did not match %d files." % skipped)
outFolders = [] return included, matches
for x in uniqlabel:
outFolders.append(os.path.join(opts.path, x))
return (outFolders, uniqlabel, matcher)
def datematches(inFiles, opts): def datematches(inFiles, opts):
@@ -240,7 +234,7 @@ def offset(it):
def report(outFolders): def report(outFolders):
for x in outFolders: for x in sorted(set(outFolders)):
n = len(os.listdir(x)) n = len(os.listdir(x))
print(os.path.basename(x) + ":" + str(n)) print(os.path.basename(x) + ":" + str(n))
@@ -251,11 +245,17 @@ def main():
method = options.order.lower().strip() method = options.order.lower().strip()
# list files, and remove hidden (.files) # list files, and remove hidden (.files)
inFiles = sorted(filter(lambda x: not x.startswith("."), os.listdir(options.path))) inFiles = sorted(filter(lambda x: not x.startswith("."), os.listdir(options.path)))
for exclude_list in options.exclude:
for exclude in exclude_list:
inFiles = [x for x in inFiles if not re.fullmatch(exclude, x)]
if options.files: if options.files:
inFiles = [f for f in inFiles if os.path.isfile(os.path.join(options.path, f))] inFiles = [f for f in inFiles if os.path.isfile(os.path.join(options.path, f))]
if method == "random":
shuffle(inFiles)
if method == "regexp": if method == "regexp":
(outFolders, uniqlabel, matcher) = regexmatches(inFiles, options) inFiles, outFolders = regexmatches(inFiles, options)
elif method == "date": elif method == "date":
outFolders = datematches(inFiles, options) outFolders = datematches(inFiles, options)
else: else:
@@ -266,29 +266,27 @@ def main():
n = math.ceil(len(inFiles) / options.i) n = math.ceil(len(inFiles) / options.i)
i = options.i i = options.i
print("Splitting to {} folders, <= {} files / folder".format(n, i)) print("Splitting to {} folders, <= {} files / folder".format(n, i))
outFolders = [] if method in ("random", "sparse"):
padding = "{:0" + str(len(str(n))) + "d}" outFolders = portmatches(inFiles, options.path, n, i)
for x in range(n): if method == "sequence":
outFolders.append(os.path.join(options.path, ("folder-" + padding).format(x + 1))) outFolders = filematches(inFiles, options.path, n, i)
if options.verbose:
print("Input paths:")
[print("{}\t-> {}".format(i, o)) for i, o in zip(inFiles, outFolders)]
print("----")
if options.dry: if options.dry:
print("Not doing anything, --dry") print("Not doing anything, --dry")
return return
for x in outFolders: for x in sorted(set(outFolders)):
if not os.path.isdir(x): if not os.path.isdir(x):
os.mkdir(x) os.mkdir(x)
if method == "regexp":
regexorder(inFiles, options.path, outFolders, matcher, uniqlabel, options.move)
if method == "random":
shuffle(inFiles)
portorder(inFiles, options.path, outFolders, n, options.move)
if method == "sparse":
portorder(inFiles, options.path, outFolders, n, options.move)
if method == "sequence":
fileorder(inFiles, options.path, outFolders, n, options.move)
if method == "date":
dateorder(inFiles, options.path, outFolders, options.move)
for f, d in zip(inFiles, outFolders):
copyfileorfolder(f, options.path, d, options.move)
if options.verbose:
report(outFolders) report(outFolders)