refactor title processing to remove special characters and brackets

This commit is contained in:
maxid
2025-12-20 20:00:16 +01:00
parent 1f50b18b9f
commit cfe34358a0
3 changed files with 23 additions and 7 deletions

View File

@@ -37,9 +37,9 @@ from media_manager.torrent.utils import (
import_file,
get_files_for_import,
remove_special_characters,
strip_trailing_year,
get_importable_media_directories,
extract_external_id_from_string,
remove_special_chars_and_parentheses,
)
from media_manager.indexer.service import IndexerService
from media_manager.metadataProvider.abstractMetaDataProvider import (
@@ -643,7 +643,7 @@ class MovieService:
self, movie: Path, metadata_provider: AbstractMetadataProvider
) -> MediaImportSuggestion:
search_result = self.search_for_movie(
strip_trailing_year(movie.name), metadata_provider
remove_special_chars_and_parentheses(movie.name), metadata_provider
)
import_candidates = MediaImportSuggestion(
directory=movie, candidates=search_result

View File

@@ -190,11 +190,27 @@ def remove_special_characters(filename: str) -> str:
return sanitized
def strip_trailing_year(title: str) -> str:
def remove_special_chars_and_parentheses(title: str) -> str:
"""
Removes a trailing space + (4-digit year) at end of string
Removes special characters and bracketed information from the title.
:param title: The original title.
:return: A sanitized version of the title.
"""
return re.sub(r"\s*\(\d{4}\)\s*$", "", title).strip()
# Remove content within brackets
sanitized = re.sub(r"\[.*?\]", "", title)
# Remove content within curly brackets
sanitized = re.sub(r"\{.*?\}", "", sanitized)
# Remove year within parentheses
sanitized = re.sub(r"\(\d{4}\)", "", sanitized)
# Remove special characters
sanitized = remove_special_characters(sanitized)
return sanitized
def get_importable_media_directories(path: Path) -> list[Path]:

View File

@@ -42,9 +42,9 @@ from media_manager.torrent.utils import (
import_file,
get_files_for_import,
remove_special_characters,
strip_trailing_year,
get_importable_media_directories,
extract_external_id_from_string,
remove_special_chars_and_parentheses,
)
from media_manager.indexer.service import IndexerService
from media_manager.metadataProvider.abstractMetaDataProvider import (
@@ -877,7 +877,7 @@ class TvService:
self, tv_show: Path, metadata_provider: AbstractMetadataProvider
) -> MediaImportSuggestion:
search_result = self.search_for_show(
strip_trailing_year(tv_show.name), metadata_provider
remove_special_chars_and_parentheses(tv_show.name), metadata_provider
)
import_candidates = MediaImportSuggestion(
directory=tv_show, candidates=search_result