unidecode for mvregex

This commit is contained in:
ville rantanen
2019-09-30 19:12:34 +03:00
parent bd887f1db5
commit 2af0c618f4

View File

@@ -11,6 +11,7 @@ function helpexit() {
echo ' -f to replace match [arg1] with format [arg2], ex: -f "[0-9]\+" "%04d"'
echo ' -p to replace problematic characters [^\w()[]-.] with [arg1]'
echo ' -a to replace non-alphanumeric(+dot) characters [^\w.] with [arg1]'
echo ' -u Unidecode to nearest ascii representation'
exit
}
@@ -41,6 +42,10 @@ do [[ "${!i}" = "-h" ]] && helpexit
MODE=printf
continue
}
[[ "${!i}" = "-u" ]] && {
MODE=unidecode
continue
}
[[ "${!i}" = "-p" ]] && {
SRC='[^]\[0-9a-zA-Z_.()-]'
continue
@@ -122,6 +127,35 @@ if [ "$MODE" = "printf" ]; then
exit
fi
if [ "$MODE" = "unidecode" ]; then
set -e
function decode() {
python3 -c "
import sys, unidecode
sys.stdout.write(unidecode.unidecode(sys.argv[1]))
" "$1"
}
for file in $( eval $LS ); do [[ ${#file} -gt ${LONGEST} ]] && LONGEST=${#file}; done
printf "%-${LONGEST}s\t|%s\n" Matching Replaced
for file in $( eval $LS ); do
target_file=$( decode "$file" )
if [[ ! "$file" = "$target_file" ]]; then
printf "%-${LONGEST}s\t|%s\n" "$file" "$target_file"
c=$(( $c + 1 ))
fi
done
echo $c' matches. Sure?'
read i
for file in $( eval $LS ); do
target_file=$( decode "$file" )
if [[ ! "$file" = "$target_file" ]]; then
mv -iv -- "$file" "$target_file"
fi
done
exit
fi
## Normal regex replace
[[ -z "$SRC" ]] && helpexit
for file in $( eval $LS | grep -- "$SRC"); do [[ ${#file} -gt ${LONGEST} ]] && LONGEST=${#file}; done