split by max number of files

This commit is contained in:
Ville Rantanen
2022-06-15 15:19:09 +03:00
parent 78daebf232
commit 890216e348

View File

@@ -4,11 +4,11 @@ import os, sys
import math, shutil, re import math, shutil, re
from random import shuffle from random import shuffle
VERSION = "0.1" VERSION = "0.2"
def setup_options(): def setup_options():
""" Setup the command line options """ """Setup the command line options"""
from argparse import ArgumentParser from argparse import ArgumentParser
parser = ArgumentParser(description="Splits files to subfolders equally.") parser = ArgumentParser(description="Splits files to subfolders equally.")
@@ -37,6 +37,13 @@ def setup_options():
default=False, default=False,
help="Split files only, skipping folders", help="Split files only, skipping folders",
) )
parser.add_argument(
"--dry",
action="store_true",
dest="dry",
default=False,
help="Dry run",
)
parser.add_argument( parser.add_argument(
"-r", "-r",
"--regexp", "--regexp",
@@ -44,16 +51,27 @@ def setup_options():
action="store", action="store",
dest="regexp", dest="regexp",
default="", default="",
help="Regular expression for splitting. When set, order regexp used, -n not used.", help="Regular expression for splitting. When set, order regexp used, -n or -i not used.",
) )
parser.add_argument( parser.add_argument(
"-n", "-n",
"-N", "-N",
type=int, type=int,
default=None,
action="store", action="store",
dest="n", dest="n",
help="Number of subfolders to split into.", help="Number of subfolders to split into. Default 10.",
) )
parser.add_argument(
"-i",
"-I",
type=int,
default=None,
action="store",
dest="i",
help="Max number of files in one folder. Can not be used together with -n or -r",
)
parser.add_argument( parser.add_argument(
"path", "path",
type=str, type=str,
@@ -63,11 +81,15 @@ def setup_options():
help="Folder to split.", help="Folder to split.",
) )
options = parser.parse_args() options = parser.parse_args()
if options.n == None and options.regexp == "": if options.n is None and options.i is None and options.regexp == "":
parser.print_help() parser.print_help()
parser.error("Either -n or -r must be passed") parser.error("Either -n, -i or -r must be passed")
if options.regexp != "": if options.regexp != "":
options.order = "regexp" options.order = "regexp"
if options.regexp == "":
if not options.i is None and not options.n is None:
parser.print_help()
parser.error("Both -n and -i cannot be used at the same time.")
return options return options
@@ -97,7 +119,7 @@ def linktree(src, dst):
def copyfileorfolder(basename, source, target, move): def copyfileorfolder(basename, source, target, move):
""" Copies a file or folder structure under target folder """ """Copies a file or folder structure under target folder"""
if move: if move:
shutil.move(os.path.join(source, basename), os.path.join(target, basename)) shutil.move(os.path.join(source, basename), os.path.join(target, basename))
return return
@@ -111,7 +133,7 @@ def copyfileorfolder(basename, source, target, move):
def portorder(inFiles, inFolder, outFolders, N, link): def portorder(inFiles, inFolder, outFolders, N, link):
""" Copy files in port order (sparse) """ """Copy files in port order (sparse)"""
outidx = 0 outidx = 0
for row in inFiles: for row in inFiles:
copyfileorfolder(row, inFolder, outFolders[outidx], link) copyfileorfolder(row, inFolder, outFolders[outidx], link)
@@ -121,7 +143,7 @@ def portorder(inFiles, inFolder, outFolders, N, link):
def fileorder(inFiles, inFolder, outFolders, N, link): def fileorder(inFiles, inFolder, outFolders, N, link):
""" Copy files in input file order (sequnce) """ """Copy files in input file order (sequnce)"""
bins = [int(math.floor(float(len(inFiles)) / float(N)))] * int(N) bins = [int(math.floor(float(len(inFiles)) / float(N)))] * int(N)
binidx = 0 binidx = 0
@@ -137,7 +159,7 @@ def fileorder(inFiles, inFolder, outFolders, N, link):
def regexorder(inFiles, inFolder, outFolders, matcher, uniqlabel, link): def regexorder(inFiles, inFolder, outFolders, matcher, uniqlabel, link):
""" Copy files by regex match """ """Copy files by regex match"""
for f in inFiles: for f in inFiles:
m = matcher.search(f) m = matcher.search(f)
@@ -159,6 +181,7 @@ def regexmatches(inFiles, opts):
uniqlabel = sorted(set(matches)) uniqlabel = sorted(set(matches))
print("Unique matches", uniqlabel) print("Unique matches", uniqlabel)
print("Not matching %d files." % skipped) print("Not matching %d files." % skipped)
outFolders = []
for x in uniqlabel: for x in uniqlabel:
outFolders.append(os.path.join(opts.path, x)) outFolders.append(os.path.join(opts.path, x))
return (outFolders, uniqlabel, matcher) return (outFolders, uniqlabel, matcher)
@@ -177,35 +200,50 @@ def report(outFolders):
print(os.path.basename(x) + ":" + str(n)) print(os.path.basename(x) + ":" + str(n))
""" Splits a folder input in N outputs """ def main():
options = setup_options() """Splits a folder input in N outputs"""
outFolders = [] options = setup_options()
method = options.order.lower().strip() method = options.order.lower().strip()
# list files, and remove hidden (.files) # list files, and remove hidden (.files)
inFiles = sorted(filter(lambda x: not x.startswith("."), os.listdir(options.path))) inFiles = sorted(filter(lambda x: not x.startswith("."), os.listdir(options.path)))
if options.files: if options.files:
inFiles = [f for f in inFiles if os.path.isfile(os.path.join(options.path, f))] inFiles = [f for f in inFiles if os.path.isfile(os.path.join(options.path, f))]
if method == "regexp": if options.n:
(outFolders, uniqlabel, matcher) = regexmatches(inFiles, options) n = options.n
input("correct?") i = math.ceil(len(inFiles) / n)
else: else:
padding = "{:0" + str(len(str(options.n))) + "d}" n = math.ceil(len(inFiles) / options.i)
for x in range(options.n): i = options.i
outFolders.append(os.path.join(options.path, ("folder-" + padding).format(x + 1)))
for x in outFolders: if method == "regexp":
(outFolders, uniqlabel, matcher) = regexmatches(inFiles, options)
else:
print("Splitting to {} folders, <={} files / folder".format(n, i))
outFolders = []
padding = "{:0" + str(len(str(n))) + "d}"
for x in range(n):
outFolders.append(
os.path.join(options.path, ("folder-" + padding).format(x + 1))
)
if options.dry:
print("Not doing anything, --dry")
return
for x in outFolders:
if not os.path.isdir(x): if not os.path.isdir(x):
os.mkdir(x) os.mkdir(x)
if method == "random": if method == "regexp":
shuffle(inFiles)
portorder(inFiles, options.path, outFolders, options.n, options.move)
if method == "regexp":
regexorder(inFiles, options.path, outFolders, matcher, uniqlabel, options.move) regexorder(inFiles, options.path, outFolders, matcher, uniqlabel, options.move)
if method == "sparse": if method == "random":
portorder(inFiles, options.path, outFolders, options.n, options.move) shuffle(inFiles)
if method == "sequence": portorder(inFiles, options.path, outFolders, n, options.move)
fileorder(inFiles, options.path, outFolders, options.n, options.move) if method == "sparse":
portorder(inFiles, options.path, outFolders, n, options.move)
if method == "sequence":
fileorder(inFiles, options.path, outFolders, n, options.move)
report(outFolders)
report(outFolders) if __name__ == "__main__":
main()