mirror of
https://github.com/morpheus65535/bazarr.git
synced 2026-04-18 13:19:12 -04:00
219 lines
6.8 KiB
Python
219 lines
6.8 KiB
Python
# BSD 2-Clause License
|
|
#
|
|
# Apprise - Push Notification Library.
|
|
# Copyright (c) 2026, Chris Caron <lead2gold@gmail.com>
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions are met:
|
|
#
|
|
# 1. Redistributions of source code must retain the above copyright notice,
|
|
# this list of conditions and the following disclaimer.
|
|
#
|
|
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
# this list of conditions and the following disclaimer in the documentation
|
|
# and/or other materials provided with the distribution.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
# POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
|
|
from apprise.common import NotifyFormat
|
|
|
|
# Characters we can apply a new line to if found
|
|
PUNCTUATION_CHARS = ".!?:;"
|
|
PUNCT_SPLIT_PATTERN = re.compile(
|
|
f"[{re.escape(PUNCTUATION_CHARS)}][ \t\r\n\x0b\x0c]+"
|
|
)
|
|
|
|
# Support HTML entities (&...;)
|
|
HTML_ENTITY_LOOKBACK = 16
|
|
HTML_ENTITY_LOOKAHEAD = 16
|
|
|
|
# Support Markdown constructs (e.g., links, formatting)
|
|
# Longer lookback for links [text](url)
|
|
MARKDOWN_CONSTRUCT_LOOKBACK = 32
|
|
|
|
|
|
def html_adjust(
|
|
text: str,
|
|
window_start: int,
|
|
split_at: int,
|
|
) -> int:
|
|
"""
|
|
Adjust the split point to avoid splitting inside short HTML entities
|
|
such as ' '.
|
|
|
|
If the split falls inside '&...;' within a small window around the
|
|
boundary, move the split back to '&' so the entire entity is kept
|
|
in the next chunk.
|
|
"""
|
|
if split_at <= window_start or split_at > len(text):
|
|
return split_at
|
|
|
|
search_start = max(window_start, split_at - HTML_ENTITY_LOOKBACK)
|
|
search_end = split_at
|
|
|
|
amp_index = text.rfind("&", search_start, search_end)
|
|
if amp_index == -1:
|
|
return split_at
|
|
|
|
forward_end = min(len(text), split_at + HTML_ENTITY_LOOKAHEAD)
|
|
semi_index = text.find(";", amp_index, forward_end)
|
|
|
|
if (
|
|
semi_index != -1
|
|
and amp_index > window_start
|
|
and amp_index < split_at <= semi_index
|
|
):
|
|
return amp_index
|
|
|
|
return split_at
|
|
|
|
|
|
def markdown_adjust(
|
|
text: str,
|
|
window_start: int,
|
|
split_at: int,
|
|
) -> int:
|
|
"""
|
|
Adjust the split point to avoid splitting inside simple Markdown
|
|
link / image constructs like [Text](URL) or .
|
|
|
|
This is a best-effort heuristic and does not attempt full Markdown
|
|
parsing. If the boundary falls between '['/'!' and the closing ')'
|
|
of a nearby link/image, move the split back to that start.
|
|
"""
|
|
if split_at <= window_start or split_at > len(text):
|
|
return split_at
|
|
|
|
search_start = max(window_start, split_at - MARKDOWN_CONSTRUCT_LOOKBACK)
|
|
|
|
# Prefer '[' as the starting marker for links/images.
|
|
link_start_idx = text.rfind("[", search_start, split_at)
|
|
if link_start_idx == -1:
|
|
# As a fallback, consider '!' as a possible start, e.g. ''.
|
|
link_start_idx = text.rfind("!", search_start, split_at)
|
|
|
|
if link_start_idx == -1:
|
|
return split_at
|
|
|
|
# Look ahead for a closing ')' to bound the construct.
|
|
forward_end = min(len(text), split_at + MARKDOWN_CONSTRUCT_LOOKBACK)
|
|
link_end_idx = text.find(")", link_start_idx, forward_end)
|
|
|
|
if link_end_idx != -1 and link_start_idx < split_at < link_end_idx:
|
|
return link_start_idx
|
|
|
|
return split_at
|
|
|
|
|
|
def smart_split(
|
|
text: str,
|
|
limit: int,
|
|
body_format: NotifyFormat,
|
|
) -> list[str]:
|
|
"""
|
|
Split `text` into chunks of at most `limit` characters.
|
|
|
|
Soft split priority:
|
|
1. Last newline before `limit` (\\n or \\r)
|
|
2. Last space or tab before `limit`
|
|
3. Last punctuation+whitespace (.,!?:; followed by space/tab/newline)
|
|
4. Hard split at `limit`
|
|
|
|
`body_format` controls additional safety rules:
|
|
- NotifyFormat.TEXT: generic splitting only
|
|
- NotifyFormat.HTML: avoid splitting inside '&...;' entities
|
|
- NotifyFormat.MARKDOWN: same as HTML, plus a best-effort check to
|
|
avoid splitting inside [Text](URL) /  patterns.
|
|
"""
|
|
|
|
if not text or limit <= 0:
|
|
return [""]
|
|
|
|
result: list[str] = []
|
|
start = 0
|
|
length = len(text)
|
|
|
|
while start < length: # pragma: no branch
|
|
remaining = length - start
|
|
if remaining <= limit:
|
|
result.append(text[start:])
|
|
break
|
|
|
|
window_end = min(start + limit, length)
|
|
#
|
|
# Priority 1: Search for newline
|
|
#
|
|
last_nl_idx = max(
|
|
text.rfind("\n", start, window_end),
|
|
text.rfind("\r", start, window_end),
|
|
)
|
|
split_nl = last_nl_idx + 1 if last_nl_idx != -1 else -1
|
|
|
|
#
|
|
# Priority 2: Search for ending Space and/or Tab
|
|
#
|
|
last_space_tab_idx = max(
|
|
text.rfind(" ", start, window_end),
|
|
text.rfind("\t", start, window_end),
|
|
)
|
|
split_space_tab = (
|
|
last_space_tab_idx + 1 if last_space_tab_idx != -1 else -1
|
|
)
|
|
|
|
#
|
|
# Priority 3: Last punctuation + whitespace
|
|
#
|
|
split_punct = -1
|
|
for match in PUNCT_SPLIT_PATTERN.finditer(text, start, window_end):
|
|
split_punct = match.end()
|
|
|
|
# Determine the best soft split point
|
|
if split_nl != -1:
|
|
split_at = split_nl
|
|
|
|
elif split_space_tab != -1:
|
|
split_at = split_space_tab
|
|
|
|
elif split_punct != -1:
|
|
split_at = split_punct
|
|
|
|
else:
|
|
#
|
|
# Priority 4: Hard split (old way of doing things)
|
|
#
|
|
split_at = window_end
|
|
|
|
#
|
|
# Conditional Content-specific adjustments
|
|
#
|
|
orig_split = split_at
|
|
if body_format is NotifyFormat.HTML:
|
|
split_at = html_adjust(text, start, split_at)
|
|
|
|
elif body_format is NotifyFormat.MARKDOWN:
|
|
# Markdown may also contain HTML entities.
|
|
split_at = html_adjust(text, start, split_at)
|
|
split_at = markdown_adjust(text, start, split_at)
|
|
|
|
if split_at <= start:
|
|
split_at = orig_split
|
|
|
|
result.append(text[start:split_at])
|
|
start = split_at
|
|
|
|
return result
|