Files
q-tools/files/mvregex
Ville Rantanen 1d89290e49 expand alnum
2019-12-21 17:32:34 +02:00

182 lines
6.1 KiB
Bash
Executable File

#!/bin/bash
function helpexit() {
echo 'Rename files in current folder, replacing with regex: arg1 -> arg2.'
echo 'Example: "_\([0-9]\)" "_X\1"'
echo 'Options:'
echo ' --ng Do _not_ replace globally (applies to regex replace only)'
echo ' --nc Do _not_ color output'
echo 'Modes:'
echo ' -n to match non-ascii and non-printable characters, and replace to [arg1]'
echo ' -f to replace match [arg1] with format [arg2], ex: -f "[0-9]\+" "%04d"'
echo ' -p to replace problematic characters [^\w()[]-.] with [arg1]'
echo ' -a to replace non-alphanumeric(+dot) characters [^\w_-.] with [arg1]'
echo ' -u Unidecode to nearest ascii representation'
exit
}
[[ -z "$1" ]] && helpexit
HIGHLIGHTSRC='\x1b[1;33;40m'
HIGHLIGHTTGT='\x1b[1;32;40m'
RESET='\x1b[0m'
GLOBAL="g"
unset TGT
MODE=regex
for (( i=1; i<=$#; i++ ))
do [[ "${!i}" = "-h" ]] && helpexit
[[ "${!i}" = "--nc" ]] && {
RESET=""
HIGHLIGHTSRC=""
HIGHLIGHTTGT=""
continue
}
[[ "${!i}" = "--ng" ]] && {
GLOBAL=""
continue
}
[[ "${!i}" = "-n" ]] && {
MODE=nonascii
continue
}
[[ "${!i}" = "-f" ]] && {
MODE=printf
continue
}
[[ "${!i}" = "-u" ]] && {
MODE=unidecode
continue
}
[[ "${!i}" = "-p" ]] && {
SRC='[^]\[0-9a-zA-Z_.()-]'
continue
}
[[ "${!i}" = "-a" ]] && {
SRC='[^0-9a-zA-Z._-]'
continue
}
[[ -z "$SRC" ]] && {
SRC="${!i}"
continue
}
TGT="${!i}"
done
IFS=$'\n'
LS="ls"
$LS --version | grep -q GNU && LS="ls -v"
LONGEST=8
c=0
if [ "$MODE" = "nonascii" ]; then
## Remove non-ascii characters
for file in $( eval $LS | grep -P "[\x00-\x1F]" ); do [[ ${#file} -gt ${LONGEST} ]] && LONGEST=${#file}; done
for file in $( eval $LS | grep -P "[\x80-\xFF]" ); do [[ ${#file} -gt ${LONGEST} ]] && LONGEST=${#file}; done
printf "%-${LONGEST}s\t|%s\n" Matching Replaced
for file in $( eval $LS| grep -P "[\x00-\x1F]" ); do
printf "%-${LONGEST}s\t|%s\n" "$file" \
"$( echo $file | tr -d \\n\\r | tr -c [:print:] '\000' | sed s/'\x0'/"$TGT"/g )"
c=$(( $c + 1 ))
done
for file in $( eval $LS | grep -P "[\x80-\xFF]" ); do
printf "%-${LONGEST}s\t|%s\n" "$file" \
"$( echo $file | tr -d \\n\\r | tr -c [:print:] '\000' | sed s/'\x0'/"$TGT"/g )"
c=$(( $c + 1 ))
done
echo $c' matches. Sure?'
read i
for file in $( eval $LS | grep -P "[\x00-\x1F]" ); do
mv -iv -- "$file" "$( echo $file | tr -d \\n\\r | tr -c [:print:] '\000' | sed s/'\x0'/"$TGT"/g )"
done
for file in $( eval $LS | grep -P "[\x80-\xFF]" ); do
mv -iv -- "$file" "$( echo $file | tr -d \\n\\r | tr -c [:print:] '\000' | sed s/'\x0'/"$TGT"/g )"
done
exit
fi
if [ "$MODE" = "printf" ]; then
[[ -z "$TGT" ]] && helpexit
## string/number formatting
for file in $( eval $LS | grep -- "$SRC"); do [[ ${#file} -gt ${LONGEST} ]] && LONGEST=${#file}; done
printf "%-${LONGEST}s\t|%s\n" Matching Formatted
for file in $( eval $LS | grep -- "$SRC" )
do source_match=$( echo $file | grep -h -o -m 1 -- "$SRC" | head -n1 )
# remove zeroes from beginning, if matching integers
let source_match="10#$source_match" &> /dev/null
target_replace=$( printf "$TGT" "$source_match" ) || FORMATTING_ERROR=1
source_colored=$( echo $file | sed s/"\($SRC\)"/${HIGHLIGHTSRC}"\1"${RESET}/ )
target_colored=$( echo $file | sed s/"$SRC"/${HIGHLIGHTTGT}"${target_replace}"${RESET}/ )
length_diff=$(( ${#source_colored} - ${#file} ))
pad_length=$(( ${LONGEST} + ${length_diff} ))
printf "%-${pad_length}s\t|%s\n" "$source_colored" "$target_colored"
[[ "$FORMATTING_ERROR" -eq 1 ]] && exit 1
c=$(( $c + 1 ))
done
echo $c' matches. Sure?'
read i
for file in $( eval $LS | grep -- "$2" )
do source_match=$( echo $file | grep -h -o -m 1 -- "$SRC" | head -n1 )
# remove zeroes from beginning, if matching integers
let source_match="10#$source_match" &> /dev/null
target_replace=$( printf "$TGT" "$source_match" )
target_file="$( echo $file | sed s/"$SRC"/"${target_replace}"/ )"
if [[ ! "$file" = "$target_file" ]]; then
mv -iv -- "$file" "$( echo $file | sed s/"$SRC"/"${target_replace}"/ )"
fi
done
exit
fi
if [ "$MODE" = "unidecode" ]; then
set -e
function decode() {
python3 -c "
import sys, unidecode
sys.stdout.write(unidecode.unidecode(sys.argv[1]))
" "$1"
}
for file in $( eval $LS ); do [[ ${#file} -gt ${LONGEST} ]] && LONGEST=${#file}; done
printf "%-${LONGEST}s\t|%s\n" Matching Replaced
for file in $( eval $LS ); do
target_file=$( decode "$file" )
if [[ ! "$file" = "$target_file" ]]; then
printf "%-${LONGEST}s\t|%s\n" "$file" "$target_file"
c=$(( $c + 1 ))
fi
done
echo $c' matches. Sure?'
read i
for file in $( eval $LS ); do
target_file=$( decode "$file" )
if [[ ! "$file" = "$target_file" ]]; then
mv -iv -- "$file" "$target_file"
fi
done
exit
fi
## Normal regex replace
[[ -z "$SRC" ]] && helpexit
for file in $( eval $LS | grep -- "$SRC"); do [[ ${#file} -gt ${LONGEST} ]] && LONGEST=${#file}; done
printf "%-${LONGEST}s\t|%s\n" Matching Replaced
for file in $( eval $LS | grep -- "$SRC" ); do
source_colored=$( echo $file | sed s/"\($SRC\)"/${HIGHLIGHTSRC}"\1"${RESET}/$GLOBAL ) || ERROR=1
target_colored=$( echo $file | sed s/"$SRC"/${HIGHLIGHTTGT}"${TGT}"${RESET}/$GLOBAL ) || ERROR=1
length_diff=$(( ${#source_colored} - ${#file} ))
pad_length=$(( ${LONGEST} + ${length_diff} ))
printf "%-${pad_length}s\t|%s\n" "$source_colored" "$target_colored"
[[ "$ERROR" -eq 1 ]] && {
echo "Match='$SRC'"
echo "Replace='$TGT'"
exit 1
}
c=$(( $c + 1 ))
done
echo $c' matches. Sure?'
read i
for file in $( eval $LS | grep -- "$SRC" )
do mv -iv -- "$file" "$( echo $file | sed s/"$SRC"/"$TGT"/$GLOBAL )"
done