From 890216e348bdbd880bc1a292eec913ee6a40f15d Mon Sep 17 00:00:00 2001 From: Ville Rantanen Date: Wed, 15 Jun 2022 15:19:09 +0300 Subject: [PATCH] split by max number of files --- files/FolderSplit.py | 114 ++++++++++++++++++++++++++++--------------- 1 file changed, 76 insertions(+), 38 deletions(-) diff --git a/files/FolderSplit.py b/files/FolderSplit.py index 9865ce3..481a4e4 100755 --- a/files/FolderSplit.py +++ b/files/FolderSplit.py @@ -4,11 +4,11 @@ import os, sys import math, shutil, re from random import shuffle -VERSION = "0.1" +VERSION = "0.2" def setup_options(): - """ Setup the command line options """ + """Setup the command line options""" from argparse import ArgumentParser parser = ArgumentParser(description="Splits files to subfolders equally.") @@ -37,6 +37,13 @@ def setup_options(): default=False, help="Split files only, skipping folders", ) + parser.add_argument( + "--dry", + action="store_true", + dest="dry", + default=False, + help="Dry run", + ) parser.add_argument( "-r", "--regexp", @@ -44,16 +51,27 @@ def setup_options(): action="store", dest="regexp", default="", - help="Regular expression for splitting. When set, order regexp used, -n not used.", + help="Regular expression for splitting. When set, order regexp used, -n or -i not used.", ) parser.add_argument( "-n", "-N", type=int, + default=None, action="store", dest="n", - help="Number of subfolders to split into.", + help="Number of subfolders to split into. Default 10.", ) + parser.add_argument( + "-i", + "-I", + type=int, + default=None, + action="store", + dest="i", + help="Max number of files in one folder. Can not be used together with -n or -r", + ) + parser.add_argument( "path", type=str, @@ -63,11 +81,15 @@ def setup_options(): help="Folder to split.", ) options = parser.parse_args() - if options.n == None and options.regexp == "": + if options.n is None and options.i is None and options.regexp == "": parser.print_help() - parser.error("Either -n or -r must be passed") + parser.error("Either -n, -i or -r must be passed") if options.regexp != "": options.order = "regexp" + if options.regexp == "": + if not options.i is None and not options.n is None: + parser.print_help() + parser.error("Both -n and -i cannot be used at the same time.") return options @@ -97,7 +119,7 @@ def linktree(src, dst): def copyfileorfolder(basename, source, target, move): - """ Copies a file or folder structure under target folder """ + """Copies a file or folder structure under target folder""" if move: shutil.move(os.path.join(source, basename), os.path.join(target, basename)) return @@ -111,7 +133,7 @@ def copyfileorfolder(basename, source, target, move): def portorder(inFiles, inFolder, outFolders, N, link): - """ Copy files in port order (sparse) """ + """Copy files in port order (sparse)""" outidx = 0 for row in inFiles: copyfileorfolder(row, inFolder, outFolders[outidx], link) @@ -121,7 +143,7 @@ def portorder(inFiles, inFolder, outFolders, N, link): def fileorder(inFiles, inFolder, outFolders, N, link): - """ Copy files in input file order (sequnce) """ + """Copy files in input file order (sequnce)""" bins = [int(math.floor(float(len(inFiles)) / float(N)))] * int(N) binidx = 0 @@ -137,7 +159,7 @@ def fileorder(inFiles, inFolder, outFolders, N, link): def regexorder(inFiles, inFolder, outFolders, matcher, uniqlabel, link): - """ Copy files by regex match """ + """Copy files by regex match""" for f in inFiles: m = matcher.search(f) @@ -159,6 +181,7 @@ def regexmatches(inFiles, opts): uniqlabel = sorted(set(matches)) print("Unique matches", uniqlabel) print("Not matching %d files." % skipped) + outFolders = [] for x in uniqlabel: outFolders.append(os.path.join(opts.path, x)) return (outFolders, uniqlabel, matcher) @@ -177,35 +200,50 @@ def report(outFolders): print(os.path.basename(x) + ":" + str(n)) -""" Splits a folder input in N outputs """ -options = setup_options() -outFolders = [] -method = options.order.lower().strip() -# list files, and remove hidden (.files) -inFiles = sorted(filter(lambda x: not x.startswith("."), os.listdir(options.path))) -if options.files: - inFiles = [f for f in inFiles if os.path.isfile(os.path.join(options.path, f))] +def main(): + """Splits a folder input in N outputs""" + options = setup_options() + method = options.order.lower().strip() + # list files, and remove hidden (.files) + inFiles = sorted(filter(lambda x: not x.startswith("."), os.listdir(options.path))) + if options.files: + inFiles = [f for f in inFiles if os.path.isfile(os.path.join(options.path, f))] -if method == "regexp": - (outFolders, uniqlabel, matcher) = regexmatches(inFiles, options) - input("correct?") -else: - padding = "{:0" + str(len(str(options.n))) + "d}" - for x in range(options.n): - outFolders.append(os.path.join(options.path, ("folder-" + padding).format(x + 1))) + if options.n: + n = options.n + i = math.ceil(len(inFiles) / n) + else: + n = math.ceil(len(inFiles) / options.i) + i = options.i -for x in outFolders: - if not os.path.isdir(x): - os.mkdir(x) -if method == "random": - shuffle(inFiles) - portorder(inFiles, options.path, outFolders, options.n, options.move) -if method == "regexp": - regexorder(inFiles, options.path, outFolders, matcher, uniqlabel, options.move) -if method == "sparse": - portorder(inFiles, options.path, outFolders, options.n, options.move) -if method == "sequence": - fileorder(inFiles, options.path, outFolders, options.n, options.move) + if method == "regexp": + (outFolders, uniqlabel, matcher) = regexmatches(inFiles, options) + else: + print("Splitting to {} folders, <={} files / folder".format(n, i)) + outFolders = [] + padding = "{:0" + str(len(str(n))) + "d}" + for x in range(n): + outFolders.append( + os.path.join(options.path, ("folder-" + padding).format(x + 1)) + ) + + if options.dry: + print("Not doing anything, --dry") + return + for x in outFolders: + if not os.path.isdir(x): + os.mkdir(x) + if method == "regexp": + regexorder(inFiles, options.path, outFolders, matcher, uniqlabel, options.move) + if method == "random": + shuffle(inFiles) + portorder(inFiles, options.path, outFolders, n, options.move) + if method == "sparse": + portorder(inFiles, options.path, outFolders, n, options.move) + if method == "sequence": + fileorder(inFiles, options.path, outFolders, n, options.move) + report(outFolders) -report(outFolders) +if __name__ == "__main__": + main()