From 00b0f07f5128c780fee5aecd70016d8256731cb5 Mon Sep 17 00:00:00 2001 From: Q Date: Sat, 10 Aug 2024 19:35:23 +0300 Subject: [PATCH] simplify splitter --- files/FolderSplit.py | 128 +++++++++++++++++++++---------------------- 1 file changed, 63 insertions(+), 65 deletions(-) diff --git a/files/FolderSplit.py b/files/FolderSplit.py index 897414a..c077b0a 100755 --- a/files/FolderSplit.py +++ b/files/FolderSplit.py @@ -34,6 +34,14 @@ def setup_options(): default=False, help="Move entries instead of hardlink.", ) + parser.add_argument( + "--exclude", + type=str, + action="append", + default=[], + nargs="*", + help="Exclude files/folders. Accepts regex", + ) parser.add_argument( "-f", action="store_true", @@ -84,7 +92,13 @@ def setup_options(): dest="i", help="Max number of files in one folder. Can not be used together with -n or -r", ) - + parser.add_argument( + "--verbose", + action="store_true", + dest="verbose", + default=False, + help="Verbose", + ) parser.add_argument( "path", type=str, @@ -159,66 +173,46 @@ def copyfileorfolder(basename, source, target, move): raise RuntimeError(source + " was neither file nor folder.") -def portorder(inFiles, inFolder, outFolders, N, link): - """Copy files in port order (sparse)""" - outidx = 0 - for row in inFiles: - copyfileorfolder(row, inFolder, outFolders[outidx], link) - outidx += 1 - if outidx + 1 > N: - outidx = 0 +def portmatches(inFiles, inFolder, n, i): + """files in port order (sparse)""" + outFolders = [] + padding = "{:0" + str(len(str(n))) + "d}" + for idx in range(n): + outFolders.append(os.path.join(inFolder, ("folder-" + padding).format(idx + 1))) + multiplier = 1 + int(len(inFiles) / n) + outFolders = outFolders * int(multiplier) + outFolders = outFolders[0 : len(inFiles)] + return outFolders -def fileorder(inFiles, inFolder, outFolders, N, link): - """Copy files in input file order (sequnce)""" +def filematches(inFiles, inFolder, n, i): + """files in input file order (sequence)""" - bins = [int(math.floor(float(len(inFiles)) / float(N)))] * int(N) - binidx = 0 - while sum(bins) < len(inFiles): - bins[binidx] += 1 - binidx += 1 - offsets = list(offset(bins)) - offsets.insert(0, 0) + padding = "{:0" + str(len(str(n))) + "d}" + outFolders = [] + for idx, nidx in enumerate([i for x in range(n)]): + outFolders.extend(nidx * [os.path.join(inFolder, ("folder-" + padding).format(idx + 1))]) - for outidx in range(N): - for f in range(offsets[outidx], offsets[outidx] + bins[outidx]): - copyfileorfolder(inFiles[f], inFolder, outFolders[outidx], link) - - -def regexorder(inFiles, inFolder, outFolders, matcher, uniqlabel, link): - """Copy files by regex match""" - - for f in inFiles: - m = matcher.search(f) - if m: - outidx = uniqlabel.index(m.group(1)) - copyfileorfolder(f, inFolder, outFolders[outidx], link) - - -def dateorder(inFiles, inFolder, outFolders, move): - """Copy files by regex match""" - - for f, d in zip(inFiles, outFolders): - copyfileorfolder(f, inFolder, d, move) + outFolders = outFolders[0 : len(inFiles)] + return outFolders def regexmatches(inFiles, opts): matcher = re.compile(opts.regexp) matches = [] + included = [] skipped = 0 for f in inFiles: m = matcher.search(f) if m: - matches.append(m.group(1)) + matches.append(os.path.join(opts.path, m.group(1))) + included.append(f) else: skipped += 1 uniqlabel = sorted(set(matches)) - print("Unique matches", uniqlabel) - print("Not matching %d files." % skipped) - outFolders = [] - for x in uniqlabel: - outFolders.append(os.path.join(opts.path, x)) - return (outFolders, uniqlabel, matcher) + print("Unique matches", ", ".join(uniqlabel)) + print("Did not match %d files." % skipped) + return included, matches def datematches(inFiles, opts): @@ -240,7 +234,7 @@ def offset(it): def report(outFolders): - for x in outFolders: + for x in sorted(set(outFolders)): n = len(os.listdir(x)) print(os.path.basename(x) + ":" + str(n)) @@ -251,11 +245,17 @@ def main(): method = options.order.lower().strip() # list files, and remove hidden (.files) inFiles = sorted(filter(lambda x: not x.startswith("."), os.listdir(options.path))) + for exclude_list in options.exclude: + for exclude in exclude_list: + inFiles = [x for x in inFiles if not re.fullmatch(exclude, x)] + if options.files: inFiles = [f for f in inFiles if os.path.isfile(os.path.join(options.path, f))] + if method == "random": + shuffle(inFiles) if method == "regexp": - (outFolders, uniqlabel, matcher) = regexmatches(inFiles, options) + inFiles, outFolders = regexmatches(inFiles, options) elif method == "date": outFolders = datematches(inFiles, options) else: @@ -265,31 +265,29 @@ def main(): else: n = math.ceil(len(inFiles) / options.i) i = options.i - print("Splitting to {} folders, <={} files / folder".format(n, i)) - outFolders = [] - padding = "{:0" + str(len(str(n))) + "d}" - for x in range(n): - outFolders.append(os.path.join(options.path, ("folder-" + padding).format(x + 1))) + print("Splitting to {} folders, <= {} files / folder".format(n, i)) + if method in ("random", "sparse"): + outFolders = portmatches(inFiles, options.path, n, i) + if method == "sequence": + outFolders = filematches(inFiles, options.path, n, i) + + if options.verbose: + print("Input paths:") + [print("{}\t-> {}".format(i, o)) for i, o in zip(inFiles, outFolders)] + print("----") if options.dry: print("Not doing anything, --dry") return - for x in outFolders: + for x in sorted(set(outFolders)): if not os.path.isdir(x): os.mkdir(x) - if method == "regexp": - regexorder(inFiles, options.path, outFolders, matcher, uniqlabel, options.move) - if method == "random": - shuffle(inFiles) - portorder(inFiles, options.path, outFolders, n, options.move) - if method == "sparse": - portorder(inFiles, options.path, outFolders, n, options.move) - if method == "sequence": - fileorder(inFiles, options.path, outFolders, n, options.move) - if method == "date": - dateorder(inFiles, options.path, outFolders, options.move) - report(outFolders) + for f, d in zip(inFiles, outFolders): + copyfileorfolder(f, options.path, d, options.move) + + if options.verbose: + report(outFolders) if __name__ == "__main__":