295 lines
8.2 KiB
Python
Executable File
295 lines
8.2 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
import math
|
|
import os
|
|
import re
|
|
import shutil
|
|
import sys
|
|
from datetime import datetime
|
|
from random import shuffle
|
|
|
|
VERSION = "0.3"
|
|
|
|
|
|
def setup_options():
|
|
"""Setup the command line options"""
|
|
from argparse import ArgumentParser
|
|
|
|
parser = ArgumentParser(description="Splits files to subfolders equally.")
|
|
|
|
parser.add_argument(
|
|
"--order",
|
|
"-o",
|
|
type=str,
|
|
action="store",
|
|
dest="order",
|
|
default="sequence",
|
|
help="Splitting method.",
|
|
choices=["sequence", "sparse", "regexp", "random", "date"],
|
|
)
|
|
parser.add_argument(
|
|
"-m",
|
|
action="store_true",
|
|
dest="move",
|
|
default=False,
|
|
help="Move entries instead of hardlink.",
|
|
)
|
|
parser.add_argument(
|
|
"--exclude",
|
|
type=str,
|
|
action="append",
|
|
default=[],
|
|
nargs="*",
|
|
help="Exclude files/folders. Accepts regex",
|
|
)
|
|
parser.add_argument(
|
|
"-f",
|
|
action="store_true",
|
|
dest="files",
|
|
default=False,
|
|
help="Split files only, skipping folders",
|
|
)
|
|
parser.add_argument(
|
|
"--dry",
|
|
action="store_true",
|
|
dest="dry",
|
|
default=False,
|
|
help="Dry run",
|
|
)
|
|
parser.add_argument(
|
|
"-r",
|
|
"--regexp",
|
|
type=str,
|
|
action="store",
|
|
dest="regexp",
|
|
default="",
|
|
help="Regular expression for splitting. When set, order regexp used, -n or -i not used.",
|
|
)
|
|
parser.add_argument(
|
|
"-d",
|
|
"--datefmt",
|
|
type=str,
|
|
action="store",
|
|
dest="datefmt",
|
|
default=None,
|
|
help="Date format for 'date' split. Defaults to %%Y-%%m-%%d",
|
|
)
|
|
parser.add_argument(
|
|
"-n",
|
|
"-N",
|
|
type=int,
|
|
default=None,
|
|
action="store",
|
|
dest="n",
|
|
help="Number of subfolders to split into. Default 10.",
|
|
)
|
|
parser.add_argument(
|
|
"-i",
|
|
"-I",
|
|
type=int,
|
|
default=None,
|
|
action="store",
|
|
dest="i",
|
|
help="Max number of files in one folder. Can not be used together with -n or -r",
|
|
)
|
|
parser.add_argument(
|
|
"--verbose",
|
|
action="store_true",
|
|
dest="verbose",
|
|
default=False,
|
|
help="Verbose",
|
|
)
|
|
parser.add_argument(
|
|
"path",
|
|
type=str,
|
|
action="store",
|
|
default=".",
|
|
nargs="?",
|
|
help="Folder to split. Defaults to current folder.",
|
|
)
|
|
options = parser.parse_args()
|
|
if options.regexp != "":
|
|
options.order = "regexp"
|
|
if options.datefmt is not None:
|
|
options.order = "date"
|
|
|
|
if options.order in ("sequence", "sparse", "random"):
|
|
if options.n is None and options.i is None:
|
|
parser.print_help()
|
|
parser.error("Either -n or -i must be used")
|
|
if not options.i is None and not options.n is None:
|
|
parser.print_help()
|
|
parser.error("Both -n and -i cannot be used at the same time.")
|
|
if options.order == "regexp":
|
|
if options.regexp == "":
|
|
parser.print_help()
|
|
parser.error("-r must be used")
|
|
if options.order == "date":
|
|
if options.datefmt is None:
|
|
options.datefmt = "%Y-%m-%d"
|
|
|
|
return options
|
|
|
|
|
|
def linktree(src, dst):
|
|
"""Recursively link a directory tree using os.link.
|
|
Modified from shutil.copytree
|
|
"""
|
|
names = os.listdir(src)
|
|
os.makedirs(dst)
|
|
errors = []
|
|
for name in names:
|
|
srcname = os.path.join(src, name)
|
|
dstname = os.path.join(dst, name)
|
|
try:
|
|
if os.path.isdir(srcname):
|
|
linktree(srcname, dstname)
|
|
else:
|
|
# Will raise a SpecialFileError for unsupported file types
|
|
os.link(srcname, dstname)
|
|
except Error as err:
|
|
errors.extend(err.args[0])
|
|
except EnvironmentError as why:
|
|
errors.append((srcname, dstname, str(why)))
|
|
|
|
if errors:
|
|
raise Error(errors)
|
|
|
|
|
|
def copyfileorfolder(basename, source, target, move):
|
|
"""Copies a file or folder structure under target folder"""
|
|
if os.path.exists(os.path.join(target, basename)):
|
|
print("FileExists!: " + os.path.join(target, basename))
|
|
return
|
|
if move:
|
|
shutil.move(os.path.join(source, basename), os.path.join(target, basename))
|
|
return
|
|
if os.path.isfile(os.path.join(source, basename)):
|
|
os.link(os.path.join(source, basename), os.path.join(target, basename))
|
|
return
|
|
if os.path.isdir(os.path.join(source, basename)):
|
|
linktree(os.path.join(source, basename), os.path.join(target, basename))
|
|
return
|
|
raise RuntimeError(source + " was neither file nor folder.")
|
|
|
|
|
|
def portmatches(inFiles, inFolder, n, i):
|
|
"""files in port order (sparse)"""
|
|
outFolders = []
|
|
padding = "{:0" + str(len(str(n))) + "d}"
|
|
for idx in range(n):
|
|
outFolders.append(os.path.join(inFolder, ("folder-" + padding).format(idx + 1)))
|
|
multiplier = 1 + int(len(inFiles) / n)
|
|
outFolders = outFolders * int(multiplier)
|
|
outFolders = outFolders[0 : len(inFiles)]
|
|
return outFolders
|
|
|
|
|
|
def filematches(inFiles, inFolder, n, i):
|
|
"""files in input file order (sequence)"""
|
|
|
|
padding = "{:0" + str(len(str(n))) + "d}"
|
|
outFolders = []
|
|
for idx, nidx in enumerate([i for x in range(n)]):
|
|
outFolders.extend(nidx * [os.path.join(inFolder, ("folder-" + padding).format(idx + 1))])
|
|
|
|
outFolders = outFolders[0 : len(inFiles)]
|
|
return outFolders
|
|
|
|
|
|
def regexmatches(inFiles, opts):
|
|
matcher = re.compile(opts.regexp)
|
|
matches = []
|
|
included = []
|
|
skipped = 0
|
|
for f in inFiles:
|
|
m = matcher.search(f)
|
|
if m:
|
|
matches.append(os.path.join(opts.path, m.group(1)))
|
|
included.append(f)
|
|
else:
|
|
skipped += 1
|
|
uniqlabel = sorted(set(matches))
|
|
print("Unique matches", ", ".join(uniqlabel))
|
|
print("Did not match %d files." % skipped)
|
|
return included, matches
|
|
|
|
|
|
def datematches(inFiles, opts):
|
|
matches = []
|
|
for f in inFiles:
|
|
matches.append(datetime.fromtimestamp(os.path.getmtime(os.path.join(opts.path, f))).strftime(opts.datefmt))
|
|
outFolders = []
|
|
for x in matches:
|
|
outFolders.append(os.path.join(opts.path, x))
|
|
print("Unique dates", len(set(matches)))
|
|
return outFolders
|
|
|
|
|
|
def offset(it):
|
|
total = 0
|
|
for x in it:
|
|
total += x
|
|
yield total
|
|
|
|
|
|
def report(outFolders):
|
|
for x in sorted(set(outFolders)):
|
|
n = len(os.listdir(x))
|
|
print(os.path.basename(x) + ":" + str(n))
|
|
|
|
|
|
def main():
|
|
"""Splits a folder input in N outputs"""
|
|
options = setup_options()
|
|
method = options.order.lower().strip()
|
|
# list files, and remove hidden (.files)
|
|
inFiles = sorted(filter(lambda x: not x.startswith("."), os.listdir(options.path)))
|
|
for exclude_list in options.exclude:
|
|
for exclude in exclude_list:
|
|
inFiles = [x for x in inFiles if not re.fullmatch(exclude, x)]
|
|
|
|
if options.files:
|
|
inFiles = [f for f in inFiles if os.path.isfile(os.path.join(options.path, f))]
|
|
|
|
if method == "random":
|
|
shuffle(inFiles)
|
|
if method == "regexp":
|
|
inFiles, outFolders = regexmatches(inFiles, options)
|
|
elif method == "date":
|
|
outFolders = datematches(inFiles, options)
|
|
else:
|
|
if options.n:
|
|
n = options.n
|
|
i = math.ceil(len(inFiles) / n)
|
|
else:
|
|
n = math.ceil(len(inFiles) / options.i)
|
|
i = options.i
|
|
print("Splitting to {} folders, <= {} files / folder".format(n, i))
|
|
if method in ("random", "sparse"):
|
|
outFolders = portmatches(inFiles, options.path, n, i)
|
|
if method == "sequence":
|
|
outFolders = filematches(inFiles, options.path, n, i)
|
|
|
|
if options.verbose:
|
|
print("Input paths:")
|
|
[print("{}\t-> {}".format(i, o)) for i, o in zip(inFiles, outFolders)]
|
|
print("----")
|
|
|
|
if options.dry:
|
|
print("Not doing anything, --dry")
|
|
return
|
|
for x in sorted(set(outFolders)):
|
|
if not os.path.isdir(x):
|
|
os.mkdir(x)
|
|
|
|
for f, d in zip(inFiles, outFolders):
|
|
copyfileorfolder(f, options.path, d, options.move)
|
|
|
|
if options.verbose:
|
|
report(outFolders)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|