Compare commits

...

1 Commits

Author SHA1 Message Date
Safihre
9cbae891d0 WIP on release313 2024-10-16 10:37:53 +02:00
7 changed files with 37 additions and 23 deletions

View File

@@ -23,6 +23,7 @@ pytz==2024.2
sgmllib3k==1.0.0 sgmllib3k==1.0.0
portend==3.2.0 portend==3.2.0
chardet==5.2.0 chardet==5.2.0
pyunormalize==16.0.0
PySocks==1.7.1 PySocks==1.7.1
puremagic==1.28 puremagic==1.28
guessit==3.8.0 guessit==3.8.0

View File

@@ -33,7 +33,7 @@ import os
import re import re
import sabnzbd import sabnzbd
from sabnzbd.filesystem import get_unique_filename, renamer, get_ext, get_basename from sabnzbd.filesystem import get_unique_filename, renamer, get_ext, get_basename, listdir_normalized
from sabnzbd.par2file import is_parfile, parse_par2_file from sabnzbd.par2file import is_parfile, parse_par2_file
import sabnzbd.utils.file_extension as file_extension import sabnzbd.utils.file_extension as file_extension
from sabnzbd.misc import match_str from sabnzbd.misc import match_str
@@ -59,7 +59,7 @@ def decode_par2(parfile: str) -> List[str]:
# Parse all files in the folder # Parse all files in the folder
dirname = os.path.dirname(parfile) dirname = os.path.dirname(parfile)
new_files = [] # list of new files generated new_files = [] # list of new files generated
for fn in os.listdir(dirname): for fn in listdir_normalized(dirname):
filepath = os.path.join(dirname, fn) filepath = os.path.join(dirname, fn)
# Only check files # Only check files
if os.path.isfile(filepath): if os.path.isfile(filepath):

View File

@@ -20,6 +20,7 @@ sabnzbd.encoding - Unicode/byte translation functions
""" """
import locale import locale
import pyunormalize
import chardet import chardet
from xml.sax.saxutils import escape from xml.sax.saxutils import escape
from typing import AnyStr from typing import AnyStr
@@ -27,6 +28,11 @@ from typing import AnyStr
CODEPAGE = locale.getpreferredencoding() CODEPAGE = locale.getpreferredencoding()
def normalize_utf8(inputstring: str) -> str:
"""Make sure we return an utf8 normalized version"""
return pyunormalize.NFC(inputstring)
def utob(str_in: AnyStr) -> bytes: def utob(str_in: AnyStr) -> bytes:
"""Shorthand for converting UTF-8 string to bytes""" """Shorthand for converting UTF-8 string to bytes"""
if isinstance(str_in, bytes): if isinstance(str_in, bytes):
@@ -37,22 +43,19 @@ def utob(str_in: AnyStr) -> bytes:
def ubtou(str_in: AnyStr) -> str: def ubtou(str_in: AnyStr) -> str:
"""Shorthand for converting unicode bytes to UTF-8 string""" """Shorthand for converting unicode bytes to UTF-8 string"""
if isinstance(str_in, str): if isinstance(str_in, str):
return str_in return normalize_utf8(str_in)
return str_in.decode("utf-8") return normalize_utf8(str_in.decode("utf-8"))
def platform_btou(str_in: AnyStr) -> str: def platform_btou(str_in: AnyStr) -> str:
"""Return Unicode string, if not already Unicode, decode with locale encoding. """Return Unicode string, if not already Unicode, decode with locale encoding"""
NOTE: Used for POpen because universal_newlines/text parameter doesn't
always work! We cannot use encoding-parameter because it's Python 3.7+
"""
if isinstance(str_in, bytes): if isinstance(str_in, bytes):
try: try:
return ubtou(str_in) return ubtou(str_in)
except UnicodeDecodeError: except UnicodeDecodeError:
return str_in.decode(CODEPAGE, errors="replace").replace("?", "!") return normalize_utf8(str_in.decode(CODEPAGE, errors="replace").replace("?", "!"))
else: else:
return str_in return normalize_utf8(str_in)
def correct_unknown_encoding(str_or_bytes_in: AnyStr) -> str: def correct_unknown_encoding(str_or_bytes_in: AnyStr) -> str:
@@ -71,10 +74,10 @@ def correct_unknown_encoding(str_or_bytes_in: AnyStr) -> str:
except UnicodeDecodeError: except UnicodeDecodeError:
try: try:
# Try using 8-bit ASCII, if came from Windows # Try using 8-bit ASCII, if came from Windows
return str_or_bytes_in.decode("ISO-8859-1") return normalize_utf8(str_or_bytes_in.decode("ISO-8859-1"))
except ValueError: except ValueError:
# Last resort we use the slow chardet package # Last resort we use the slow chardet package
return str_or_bytes_in.decode(chardet.detect(str_or_bytes_in)["encoding"]) return normalize_utf8(str_or_bytes_in.decode(chardet.detect(str_or_bytes_in)["encoding"]))
def correct_cherrypy_encoding(inputstring: str) -> str: def correct_cherrypy_encoding(inputstring: str) -> str:

View File

@@ -46,7 +46,7 @@ except ImportError:
import sabnzbd import sabnzbd
from sabnzbd.decorators import synchronized, cache_maintainer from sabnzbd.decorators import synchronized, cache_maintainer
from sabnzbd.constants import FUTURE_Q_FOLDER, JOB_ADMIN, GIGI, DEF_FILE_MAX, IGNORED_FILES_AND_FOLDERS, DEF_LOG_FILE from sabnzbd.constants import FUTURE_Q_FOLDER, JOB_ADMIN, GIGI, DEF_FILE_MAX, IGNORED_FILES_AND_FOLDERS, DEF_LOG_FILE
from sabnzbd.encoding import correct_unknown_encoding, utob, ubtou from sabnzbd.encoding import correct_unknown_encoding, utob, ubtou, normalize_utf8
from sabnzbd.utils import rarfile from sabnzbd.utils import rarfile
@@ -561,7 +561,7 @@ def globber(path: str, pattern: str = "*") -> List[str]:
"""Return matching base file/folder names in folder `path`""" """Return matching base file/folder names in folder `path`"""
# Cannot use glob.glob() because it doesn't support Windows long name notation # Cannot use glob.glob() because it doesn't support Windows long name notation
if os.path.exists(path): if os.path.exists(path):
return [f for f in os.listdir(path) if safe_fnmatch(f, pattern)] return [f for f in listdir_normalized(path) if safe_fnmatch(f, pattern)]
return [] return []
@@ -569,7 +569,8 @@ def globber_full(path: str, pattern: str = "*") -> List[str]:
"""Return matching full file/folder names in folder `path`""" """Return matching full file/folder names in folder `path`"""
# Cannot use glob.glob() because it doesn't support Windows long name notation # Cannot use glob.glob() because it doesn't support Windows long name notation
if os.path.exists(path): if os.path.exists(path):
return [os.path.join(path, f) for f in os.listdir(path) if safe_fnmatch(f, pattern)] path = normalize_utf8(path)
return [os.path.join(path, f) for f in listdir_normalized(path) if safe_fnmatch(f, pattern)]
return [] return []
@@ -581,7 +582,7 @@ def fix_unix_encoding(folder: str):
if not sabnzbd.WIN32 and not sabnzbd.MACOS: if not sabnzbd.WIN32 and not sabnzbd.MACOS:
for root, dirs, files in os.walk(folder): for root, dirs, files in os.walk(folder):
for name in files: for name in files:
new_name = correct_unknown_encoding(name) new_name = normalize_utf8(correct_unknown_encoding(name))
if name != new_name: if name != new_name:
try: try:
renamer(os.path.join(root, name), os.path.join(root, new_name)) renamer(os.path.join(root, name), os.path.join(root, new_name))
@@ -804,6 +805,12 @@ def get_unique_filename(path: str) -> str:
return path return path
def listdir_normalized(input_dir: str) -> List[str]:
"""On macOS, the OS returns un-normalized results.
Always use the same normalization on all platforms"""
return [normalize_utf8(path) for path in os.listdir(input_dir)]
@synchronized(DIR_LOCK) @synchronized(DIR_LOCK)
def listdir_full(input_dir: str, recursive: bool = True) -> List[str]: def listdir_full(input_dir: str, recursive: bool = True) -> List[str]:
"""List all files in dirs and sub-dirs""" """List all files in dirs and sub-dirs"""
@@ -812,7 +819,7 @@ def listdir_full(input_dir: str, recursive: bool = True) -> List[str]:
for file in files: for file in files:
# Ignore special folders and resources files created by macOS # Ignore special folders and resources files created by macOS
if not sabnzbd.misc.match_str(root, IGNORED_FILES_AND_FOLDERS) and not file.startswith("._"): if not sabnzbd.misc.match_str(root, IGNORED_FILES_AND_FOLDERS) and not file.startswith("._"):
filelist.append(os.path.join(root, file)) filelist.append(normalize_utf8(os.path.join(root, file)))
if not recursive: if not recursive:
break break
return filelist return filelist
@@ -1386,7 +1393,7 @@ def pathbrowser(path: str, show_hidden: bool = False, show_files: bool = False)
# List all files and folders # List all files and folders
file_list = [] file_list = []
for filename in os.listdir(path): for filename in listdir_normalized(path):
fpath = os.path.join(path, filename) fpath = os.path.join(path, filename)
isdir = os.path.isdir(fpath) isdir = os.path.isdir(fpath)

View File

@@ -63,6 +63,7 @@ from sabnzbd.filesystem import (
SEVENMULTI_RE, SEVENMULTI_RE,
is_size, is_size,
get_basename, get_basename,
listdir_normalized,
) )
from sabnzbd.nzbstuff import NzbObject from sabnzbd.nzbstuff import NzbObject
import sabnzbd.cfg as cfg import sabnzbd.cfg as cfg
@@ -1020,7 +1021,7 @@ def par2_repair(nzo: NzbObject, setname: str) -> Tuple[bool, bool]:
return False, True return False, True
parfile = os.path.join(nzo.download_path, parfile_nzf.filename) parfile = os.path.join(nzo.download_path, parfile_nzf.filename)
old_dir_content = os.listdir(nzo.download_path) old_dir_content = listdir_normalized(nzo.download_path)
used_joinables = () used_joinables = ()
joinables = () joinables = ()
used_for_repair = () used_for_repair = ()
@@ -1080,7 +1081,7 @@ def par2_repair(nzo: NzbObject, setname: str) -> Tuple[bool, bool]:
try: try:
if cfg.enable_par_cleanup(): if cfg.enable_par_cleanup():
deletables = [] deletables = []
new_dir_content = os.listdir(nzo.download_path) new_dir_content = listdir_normalized(nzo.download_path)
# If Multipar or par2cmdline repairs a broken part of a joinable, it doesn't list it as such. # If Multipar or par2cmdline repairs a broken part of a joinable, it doesn't list it as such.
# So we need to manually add all joinables of the set to the list of used joinables. # So we need to manually add all joinables of the set to the list of used joinables.

View File

@@ -73,6 +73,7 @@ from sabnzbd.filesystem import (
get_filename, get_filename,
directory_is_writable, directory_is_writable,
check_filesystem_capabilities, check_filesystem_capabilities,
listdir_normalized,
) )
from sabnzbd.nzbstuff import NzbObject from sabnzbd.nzbstuff import NzbObject
from sabnzbd.sorting import Sorter from sabnzbd.sorting import Sorter
@@ -961,7 +962,7 @@ def rar_renamer(nzo: NzbObject) -> int:
volnrext = {} volnrext = {}
# Scan rar files in workdir, but not subdirs # Scan rar files in workdir, but not subdirs
workdir_files = os.listdir(nzo.download_path) workdir_files = listdir_normalized(nzo.download_path)
for file_to_check in workdir_files: for file_to_check in workdir_files:
file_to_check = os.path.join(nzo.download_path, file_to_check) file_to_check = os.path.join(nzo.download_path, file_to_check)
@@ -1185,7 +1186,7 @@ def one_file_or_folder(folder: str) -> str:
"""If the dir only contains one file or folder, join that file/folder onto the path""" """If the dir only contains one file or folder, join that file/folder onto the path"""
if os.path.exists(folder) and os.path.isdir(folder): if os.path.exists(folder) and os.path.isdir(folder):
try: try:
cont = os.listdir(folder) cont = listdir_normalized(folder)
if len(cont) == 1: if len(cont) == 1:
folder = os.path.join(folder, cont[0]) folder = os.path.join(folder, cont[0])
folder = one_file_or_folder(folder) folder = one_file_or_folder(folder)

View File

@@ -37,6 +37,7 @@ from sabnzbd.filesystem import (
renamer, renamer,
sanitize_foldername, sanitize_foldername,
clip_path, clip_path,
listdir_normalized,
) )
import sabnzbd.config as config import sabnzbd.config as config
import sabnzbd.cfg as cfg import sabnzbd.cfg as cfg
@@ -616,7 +617,7 @@ def move_to_parent_directory(workdir: str) -> Tuple[str, bool]:
logging.debug("Moving all files from %s to %s", workdir, dest) logging.debug("Moving all files from %s to %s", workdir, dest)
# Check for DVD folders and bail out if found # Check for DVD folders and bail out if found
for item in os.listdir(workdir): for item in listdir_normalized(workdir):
if item.lower() in IGNORED_MOVIE_FOLDERS: if item.lower() in IGNORED_MOVIE_FOLDERS:
return workdir, True return workdir, True