#!/usr/bin/env python3 import os, sys import math, shutil, re from random import shuffle VERSION = "0.1" def setup_options(): """ Setup the command line options """ from argparse import ArgumentParser parser = ArgumentParser(description="Splits files to subfolders equally.") parser.add_argument( "--order", "-o", type=str, action="store", dest="order", default="sequence", help="Splitting method.", choices=["sequence", "sparse", "regexp", "random"], ) parser.add_argument( "-m", action="store_true", dest="move", default=False, help="Move entries instead of hardlink.", ) parser.add_argument( "-f", action="store_true", dest="files", default=False, help="Split files only, skipping folders", ) parser.add_argument( "-r", "--regexp", type=str, action="store", dest="regexp", default="", help="Regular expression for splitting. When set, order regexp used, -n not used.", ) parser.add_argument( "-n", "-N", type=int, action="store", dest="n", help="Number of subfolders to split into.", ) parser.add_argument( "path", type=str, action="store", default=".", nargs="?", help="Folder to split.", ) options = parser.parse_args() if options.n == None and options.regexp == "": parser.print_help() parser.error("Either -n or -r must be passed") if options.regexp != "": options.order = "regexp" return options def linktree(src, dst): """Recursively link a directory tree using os.link. Modified from shutil.copytree """ names = os.listdir(src) os.makedirs(dst) errors = [] for name in names: srcname = os.path.join(src, name) dstname = os.path.join(dst, name) try: if os.path.isdir(srcname): linktree(srcname, dstname) else: # Will raise a SpecialFileError for unsupported file types os.link(srcname, dstname) except Error as err: errors.extend(err.args[0]) except EnvironmentError as why: errors.append((srcname, dstname, str(why))) if errors: raise Error(errors) def copyfileorfolder(basename, source, target, move): """ Copies a file or folder structure under target folder """ if move: shutil.move(os.path.join(source, basename), os.path.join(target, basename)) return if os.path.isfile(os.path.join(source, basename)): os.link(os.path.join(source, basename), os.path.join(target, basename)) return if os.path.isdir(os.path.join(source, basename)): linktree(os.path.join(source, basename), os.path.join(target, basename)) return raise RuntimeError(source + " was neither file nor folder.") def portorder(inFiles, inFolder, outFolders, N, link): """ Copy files in port order (sparse) """ outidx = 0 for row in inFiles: copyfileorfolder(row, inFolder, outFolders[outidx], link) outidx += 1 if outidx + 1 > N: outidx = 0 def fileorder(inFiles, inFolder, outFolders, N, link): """ Copy files in input file order (sequnce) """ bins = [int(math.floor(float(len(inFiles)) / float(N)))] * int(N) binidx = 0 while sum(bins) < len(inFiles): bins[binidx] += 1 binidx += 1 offsets = list(offset(bins)) offsets.insert(0, 0) for outidx in range(N): for f in range(offsets[outidx], offsets[outidx] + bins[outidx]): copyfileorfolder(inFiles[f], inFolder, outFolders[outidx], link) def regexorder(inFiles, inFolder, outFolders, matcher, uniqlabel, link): """ Copy files by regex match """ for f in inFiles: m = matcher.search(f) if m: outidx = uniqlabel.index(m.group(1)) copyfileorfolder(f, inFolder, outFolders[outidx], link) def regexmatches(inFiles, opts): matcher = re.compile(opts.regexp) matches = [] skipped = 0 for f in inFiles: m = matcher.search(f) if m: matches.append(m.group(1)) else: skipped += 1 uniqlabel = sorted(set(matches)) print("Unique matches", uniqlabel) print("Not matching %d files." % skipped) for x in uniqlabel: outFolders.append(os.path.join(opts.path, x)) return (outFolders, uniqlabel, matcher) def offset(it): total = 0 for x in it: total += x yield total def report(outFolders): for x in outFolders: n = len(os.listdir(x)) print(os.path.basename(x) + ":" + str(n)) """ Splits a folder input in N outputs """ options = setup_options() outFolders = [] method = options.order.lower().strip() # list files, and remove hidden (.files) inFiles = sorted(filter(lambda x: not x.startswith("."), os.listdir(options.path))) if options.files: inFiles = [f for f in inFiles if os.path.isfile(os.path.join(options.path, f))] if method == "regexp": (outFolders, uniqlabel, matcher) = regexmatches(inFiles, options) input("correct?") else: padding = "{:0" + str(len(str(options.n))) + "d}" for x in range(options.n): outFolders.append(os.path.join(options.path, ("folder-" + padding).format(x + 1))) for x in outFolders: if not os.path.isdir(x): os.mkdir(x) if method == "random": shuffle(inFiles) portorder(inFiles, options.path, outFolders, options.n, options.move) if method == "regexp": regexorder(inFiles, options.path, outFolders, matcher, uniqlabel, options.move) if method == "sparse": portorder(inFiles, options.path, outFolders, options.n, options.move) if method == "sequence": fileorder(inFiles, options.path, outFolders, options.n, options.move) report(outFolders)