Compare commits

...

1 Commits

Author SHA1 Message Date
Safihre
9cbae891d0 WIP on release313 2024-10-16 10:37:53 +02:00
7 changed files with 37 additions and 23 deletions

View File

@@ -23,6 +23,7 @@ pytz==2024.2
sgmllib3k==1.0.0
portend==3.2.0
chardet==5.2.0
pyunormalize==16.0.0
PySocks==1.7.1
puremagic==1.28
guessit==3.8.0

View File

@@ -33,7 +33,7 @@ import os
import re
import sabnzbd
from sabnzbd.filesystem import get_unique_filename, renamer, get_ext, get_basename
from sabnzbd.filesystem import get_unique_filename, renamer, get_ext, get_basename, listdir_normalized
from sabnzbd.par2file import is_parfile, parse_par2_file
import sabnzbd.utils.file_extension as file_extension
from sabnzbd.misc import match_str
@@ -59,7 +59,7 @@ def decode_par2(parfile: str) -> List[str]:
# Parse all files in the folder
dirname = os.path.dirname(parfile)
new_files = [] # list of new files generated
for fn in os.listdir(dirname):
for fn in listdir_normalized(dirname):
filepath = os.path.join(dirname, fn)
# Only check files
if os.path.isfile(filepath):

View File

@@ -20,6 +20,7 @@ sabnzbd.encoding - Unicode/byte translation functions
"""
import locale
import pyunormalize
import chardet
from xml.sax.saxutils import escape
from typing import AnyStr
@@ -27,6 +28,11 @@ from typing import AnyStr
CODEPAGE = locale.getpreferredencoding()
def normalize_utf8(inputstring: str) -> str:
"""Make sure we return an utf8 normalized version"""
return pyunormalize.NFC(inputstring)
def utob(str_in: AnyStr) -> bytes:
"""Shorthand for converting UTF-8 string to bytes"""
if isinstance(str_in, bytes):
@@ -37,22 +43,19 @@ def utob(str_in: AnyStr) -> bytes:
def ubtou(str_in: AnyStr) -> str:
"""Shorthand for converting unicode bytes to UTF-8 string"""
if isinstance(str_in, str):
return str_in
return str_in.decode("utf-8")
return normalize_utf8(str_in)
return normalize_utf8(str_in.decode("utf-8"))
def platform_btou(str_in: AnyStr) -> str:
"""Return Unicode string, if not already Unicode, decode with locale encoding.
NOTE: Used for POpen because universal_newlines/text parameter doesn't
always work! We cannot use encoding-parameter because it's Python 3.7+
"""
"""Return Unicode string, if not already Unicode, decode with locale encoding"""
if isinstance(str_in, bytes):
try:
return ubtou(str_in)
except UnicodeDecodeError:
return str_in.decode(CODEPAGE, errors="replace").replace("?", "!")
return normalize_utf8(str_in.decode(CODEPAGE, errors="replace").replace("?", "!"))
else:
return str_in
return normalize_utf8(str_in)
def correct_unknown_encoding(str_or_bytes_in: AnyStr) -> str:
@@ -71,10 +74,10 @@ def correct_unknown_encoding(str_or_bytes_in: AnyStr) -> str:
except UnicodeDecodeError:
try:
# Try using 8-bit ASCII, if came from Windows
return str_or_bytes_in.decode("ISO-8859-1")
return normalize_utf8(str_or_bytes_in.decode("ISO-8859-1"))
except ValueError:
# Last resort we use the slow chardet package
return str_or_bytes_in.decode(chardet.detect(str_or_bytes_in)["encoding"])
return normalize_utf8(str_or_bytes_in.decode(chardet.detect(str_or_bytes_in)["encoding"]))
def correct_cherrypy_encoding(inputstring: str) -> str:

View File

@@ -46,7 +46,7 @@ except ImportError:
import sabnzbd
from sabnzbd.decorators import synchronized, cache_maintainer
from sabnzbd.constants import FUTURE_Q_FOLDER, JOB_ADMIN, GIGI, DEF_FILE_MAX, IGNORED_FILES_AND_FOLDERS, DEF_LOG_FILE
from sabnzbd.encoding import correct_unknown_encoding, utob, ubtou
from sabnzbd.encoding import correct_unknown_encoding, utob, ubtou, normalize_utf8
from sabnzbd.utils import rarfile
@@ -561,7 +561,7 @@ def globber(path: str, pattern: str = "*") -> List[str]:
"""Return matching base file/folder names in folder `path`"""
# Cannot use glob.glob() because it doesn't support Windows long name notation
if os.path.exists(path):
return [f for f in os.listdir(path) if safe_fnmatch(f, pattern)]
return [f for f in listdir_normalized(path) if safe_fnmatch(f, pattern)]
return []
@@ -569,7 +569,8 @@ def globber_full(path: str, pattern: str = "*") -> List[str]:
"""Return matching full file/folder names in folder `path`"""
# Cannot use glob.glob() because it doesn't support Windows long name notation
if os.path.exists(path):
return [os.path.join(path, f) for f in os.listdir(path) if safe_fnmatch(f, pattern)]
path = normalize_utf8(path)
return [os.path.join(path, f) for f in listdir_normalized(path) if safe_fnmatch(f, pattern)]
return []
@@ -581,7 +582,7 @@ def fix_unix_encoding(folder: str):
if not sabnzbd.WIN32 and not sabnzbd.MACOS:
for root, dirs, files in os.walk(folder):
for name in files:
new_name = correct_unknown_encoding(name)
new_name = normalize_utf8(correct_unknown_encoding(name))
if name != new_name:
try:
renamer(os.path.join(root, name), os.path.join(root, new_name))
@@ -804,6 +805,12 @@ def get_unique_filename(path: str) -> str:
return path
def listdir_normalized(input_dir: str) -> List[str]:
"""On macOS, the OS returns un-normalized results.
Always use the same normalization on all platforms"""
return [normalize_utf8(path) for path in os.listdir(input_dir)]
@synchronized(DIR_LOCK)
def listdir_full(input_dir: str, recursive: bool = True) -> List[str]:
"""List all files in dirs and sub-dirs"""
@@ -812,7 +819,7 @@ def listdir_full(input_dir: str, recursive: bool = True) -> List[str]:
for file in files:
# Ignore special folders and resources files created by macOS
if not sabnzbd.misc.match_str(root, IGNORED_FILES_AND_FOLDERS) and not file.startswith("._"):
filelist.append(os.path.join(root, file))
filelist.append(normalize_utf8(os.path.join(root, file)))
if not recursive:
break
return filelist
@@ -1386,7 +1393,7 @@ def pathbrowser(path: str, show_hidden: bool = False, show_files: bool = False)
# List all files and folders
file_list = []
for filename in os.listdir(path):
for filename in listdir_normalized(path):
fpath = os.path.join(path, filename)
isdir = os.path.isdir(fpath)

View File

@@ -63,6 +63,7 @@ from sabnzbd.filesystem import (
SEVENMULTI_RE,
is_size,
get_basename,
listdir_normalized,
)
from sabnzbd.nzbstuff import NzbObject
import sabnzbd.cfg as cfg
@@ -1020,7 +1021,7 @@ def par2_repair(nzo: NzbObject, setname: str) -> Tuple[bool, bool]:
return False, True
parfile = os.path.join(nzo.download_path, parfile_nzf.filename)
old_dir_content = os.listdir(nzo.download_path)
old_dir_content = listdir_normalized(nzo.download_path)
used_joinables = ()
joinables = ()
used_for_repair = ()
@@ -1080,7 +1081,7 @@ def par2_repair(nzo: NzbObject, setname: str) -> Tuple[bool, bool]:
try:
if cfg.enable_par_cleanup():
deletables = []
new_dir_content = os.listdir(nzo.download_path)
new_dir_content = listdir_normalized(nzo.download_path)
# If Multipar or par2cmdline repairs a broken part of a joinable, it doesn't list it as such.
# So we need to manually add all joinables of the set to the list of used joinables.

View File

@@ -73,6 +73,7 @@ from sabnzbd.filesystem import (
get_filename,
directory_is_writable,
check_filesystem_capabilities,
listdir_normalized,
)
from sabnzbd.nzbstuff import NzbObject
from sabnzbd.sorting import Sorter
@@ -961,7 +962,7 @@ def rar_renamer(nzo: NzbObject) -> int:
volnrext = {}
# Scan rar files in workdir, but not subdirs
workdir_files = os.listdir(nzo.download_path)
workdir_files = listdir_normalized(nzo.download_path)
for file_to_check in workdir_files:
file_to_check = os.path.join(nzo.download_path, file_to_check)
@@ -1185,7 +1186,7 @@ def one_file_or_folder(folder: str) -> str:
"""If the dir only contains one file or folder, join that file/folder onto the path"""
if os.path.exists(folder) and os.path.isdir(folder):
try:
cont = os.listdir(folder)
cont = listdir_normalized(folder)
if len(cont) == 1:
folder = os.path.join(folder, cont[0])
folder = one_file_or_folder(folder)

View File

@@ -37,6 +37,7 @@ from sabnzbd.filesystem import (
renamer,
sanitize_foldername,
clip_path,
listdir_normalized,
)
import sabnzbd.config as config
import sabnzbd.cfg as cfg
@@ -616,7 +617,7 @@ def move_to_parent_directory(workdir: str) -> Tuple[str, bool]:
logging.debug("Moving all files from %s to %s", workdir, dest)
# Check for DVD folders and bail out if found
for item in os.listdir(workdir):
for item in listdir_normalized(workdir):
if item.lower() in IGNORED_MOVIE_FOLDERS:
return workdir, True