From 95b8aadb239bdce8d7f7a03e4ab995e56bf4e820 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Louis=20V=C3=A9zina?= <5130500+morpheus65535@users.noreply.github.com> Date: Wed, 29 Jan 2020 06:53:29 -0500 Subject: [PATCH] WIP --- bazarr/config.py | 6 +- bazarr/get_subtitle.py | 19 +- bazarr/libs.py | 5 - bazarr/list_subtitles.py | 10 +- bazarr/logger.py | 3 +- bazarr/main.py | 7 +- libs2/bs4/AUTHORS.txt | 43 - libs2/bs4/COPYING.txt | 27 - libs2/bs4/NEWS.txt | 1190 ---------- libs2/bs4/README.txt | 63 - libs2/bs4/TODO.txt | 31 - libs2/bs4/__init__.py | 529 ----- libs2/bs4/builder/__init__.py | 333 --- libs2/bs4/builder/_html5lib.py | 426 ---- libs2/bs4/builder/_htmlparser.py | 314 --- libs2/bs4/builder/_lxml.py | 258 -- libs2/bs4/dammit.py | 842 ------- libs2/bs4/diagnose.py | 219 -- libs2/bs4/element.py | 1808 -------------- libs2/bs4/formatter.py | 99 - libs2/bs4/testing.py | 770 ------ libs2/bs4/tests/__init__.py | 1 - libs2/bs4/tests/test_builder_registry.py | 147 -- libs2/bs4/tests/test_docs.py | 36 - libs2/bs4/tests/test_html5lib.py | 130 - libs2/bs4/tests/test_htmlparser.py | 34 - libs2/bs4/tests/test_lxml.py | 76 - libs2/bs4/tests/test_soup.py | 501 ---- libs2/bs4/tests/test_tree.py | 2050 ---------------- libs2/concurrent/__init__.py | 3 - libs2/concurrent/futures/__init__.py | 23 - libs2/concurrent/futures/_base.py | 607 ----- libs2/concurrent/futures/process.py | 359 --- libs2/concurrent/futures/thread.py | 134 -- libs2/enum/LICENSE | 32 - libs2/enum/README | 3 - libs2/enum/__init__.py | 837 ------- libs2/enum/doc/enum.pdf | 2237 ------------------ libs2/enum/doc/enum.rst | 735 ------ libs2/enum/test.py | 1820 -------------- libs2/yaml/__init__.py | 406 ---- libs2/yaml/composer.py | 139 -- libs2/yaml/constructor.py | 709 ------ libs2/yaml/cyaml.py | 101 - libs2/yaml/dumper.py | 62 - libs2/yaml/emitter.py | 1144 --------- libs2/yaml/error.py | 75 - libs2/yaml/events.py | 86 - libs2/yaml/loader.py | 63 - libs2/yaml/nodes.py | 49 - libs2/yaml/parser.py | 589 ----- libs2/yaml/reader.py | 188 -- libs2/yaml/representer.py | 488 ---- libs2/yaml/resolver.py | 227 -- libs2/yaml/scanner.py | 1444 ----------- libs2/yaml/serializer.py | 111 - libs2/yaml/tokens.py | 104 - libs3/bs4/__init__.py | 616 ----- libs3/bs4/builder/__init__.py | 367 --- libs3/bs4/builder/_html5lib.py | 426 ---- libs3/bs4/builder/_htmlparser.py | 350 --- libs3/bs4/builder/_lxml.py | 296 --- libs3/bs4/dammit.py | 850 ------- libs3/bs4/diagnose.py | 224 -- libs3/bs4/element.py | 1579 ------------ libs3/bs4/formatter.py | 99 - libs3/bs4/testing.py | 992 -------- libs3/bs4/tests/__init__.py | 1 - libs3/bs4/tests/test_builder_registry.py | 147 -- libs3/bs4/tests/test_docs.py | 36 - libs3/bs4/tests/test_html5lib.py | 170 -- libs3/bs4/tests/test_htmlparser.py | 47 - libs3/bs4/tests/test_lxml.py | 100 - libs3/bs4/tests/test_soup.py | 567 ----- libs3/bs4/tests/test_tree.py | 2205 ----------------- libs3/engineio/__init__.py | 25 - libs3/engineio/async_drivers/__init__.py | 0 libs3/engineio/async_drivers/aiohttp.py | 128 - libs3/engineio/async_drivers/asgi.py | 214 -- libs3/engineio/async_drivers/eventlet.py | 30 - libs3/engineio/async_drivers/gevent.py | 63 - libs3/engineio/async_drivers/gevent_uwsgi.py | 156 -- libs3/engineio/async_drivers/sanic.py | 144 -- libs3/engineio/async_drivers/threading.py | 17 - libs3/engineio/async_drivers/tornado.py | 184 -- libs3/engineio/asyncio_client.py | 585 ----- libs3/engineio/asyncio_server.py | 472 ---- libs3/engineio/asyncio_socket.py | 236 -- libs3/engineio/client.py | 680 ------ libs3/engineio/exceptions.py | 22 - libs3/engineio/middleware.py | 87 - libs3/engineio/packet.py | 92 - libs3/engineio/payload.py | 81 - libs3/engineio/server.py | 675 ------ libs3/engineio/socket.py | 248 -- libs3/engineio/static_files.py | 55 - libs3/flask_socketio/__init__.py | 922 -------- libs3/flask_socketio/namespace.py | 47 - libs3/flask_socketio/test_client.py | 205 -- libs3/socketio/__init__.py | 38 - libs3/socketio/asgi.py | 36 - libs3/socketio/asyncio_aiopika_manager.py | 105 - libs3/socketio/asyncio_client.py | 475 ---- libs3/socketio/asyncio_manager.py | 58 - libs3/socketio/asyncio_namespace.py | 204 -- libs3/socketio/asyncio_pubsub_manager.py | 163 -- libs3/socketio/asyncio_redis_manager.py | 107 - libs3/socketio/asyncio_server.py | 526 ---- libs3/socketio/base_manager.py | 178 -- libs3/socketio/client.py | 620 ----- libs3/socketio/exceptions.py | 30 - libs3/socketio/kafka_manager.py | 63 - libs3/socketio/kombu_manager.py | 122 - libs3/socketio/middleware.py | 42 - libs3/socketio/namespace.py | 191 -- libs3/socketio/packet.py | 179 -- libs3/socketio/pubsub_manager.py | 154 -- libs3/socketio/redis_manager.py | 115 - libs3/socketio/server.py | 730 ------ libs3/socketio/tornado.py | 11 - libs3/socketio/zmq_manager.py | 111 - libs3/yaml/__init__.py | 402 ---- libs3/yaml/composer.py | 139 -- libs3/yaml/constructor.py | 720 ------ libs3/yaml/cyaml.py | 101 - libs3/yaml/dumper.py | 62 - libs3/yaml/emitter.py | 1137 --------- libs3/yaml/error.py | 75 - libs3/yaml/events.py | 86 - libs3/yaml/loader.py | 63 - libs3/yaml/nodes.py | 49 - libs3/yaml/parser.py | 589 ----- libs3/yaml/reader.py | 185 -- libs3/yaml/representer.py | 389 --- libs3/yaml/resolver.py | 227 -- libs3/yaml/scanner.py | 1435 ----------- libs3/yaml/serializer.py | 111 - libs3/yaml/tokens.py | 104 - views/episodes.html | 2 +- 139 files changed, 12 insertions(+), 47314 deletions(-) delete mode 100644 libs2/bs4/AUTHORS.txt delete mode 100644 libs2/bs4/COPYING.txt delete mode 100644 libs2/bs4/NEWS.txt delete mode 100644 libs2/bs4/README.txt delete mode 100644 libs2/bs4/TODO.txt delete mode 100644 libs2/bs4/__init__.py delete mode 100644 libs2/bs4/builder/__init__.py delete mode 100644 libs2/bs4/builder/_html5lib.py delete mode 100644 libs2/bs4/builder/_htmlparser.py delete mode 100644 libs2/bs4/builder/_lxml.py delete mode 100644 libs2/bs4/dammit.py delete mode 100644 libs2/bs4/diagnose.py delete mode 100644 libs2/bs4/element.py delete mode 100644 libs2/bs4/formatter.py delete mode 100644 libs2/bs4/testing.py delete mode 100644 libs2/bs4/tests/__init__.py delete mode 100644 libs2/bs4/tests/test_builder_registry.py delete mode 100644 libs2/bs4/tests/test_docs.py delete mode 100644 libs2/bs4/tests/test_html5lib.py delete mode 100644 libs2/bs4/tests/test_htmlparser.py delete mode 100644 libs2/bs4/tests/test_lxml.py delete mode 100644 libs2/bs4/tests/test_soup.py delete mode 100644 libs2/bs4/tests/test_tree.py delete mode 100644 libs2/concurrent/__init__.py delete mode 100644 libs2/concurrent/futures/__init__.py delete mode 100644 libs2/concurrent/futures/_base.py delete mode 100644 libs2/concurrent/futures/process.py delete mode 100644 libs2/concurrent/futures/thread.py delete mode 100644 libs2/enum/LICENSE delete mode 100644 libs2/enum/README delete mode 100644 libs2/enum/__init__.py delete mode 100644 libs2/enum/doc/enum.pdf delete mode 100644 libs2/enum/doc/enum.rst delete mode 100644 libs2/enum/test.py delete mode 100644 libs2/yaml/__init__.py delete mode 100644 libs2/yaml/composer.py delete mode 100644 libs2/yaml/constructor.py delete mode 100644 libs2/yaml/cyaml.py delete mode 100644 libs2/yaml/dumper.py delete mode 100644 libs2/yaml/emitter.py delete mode 100644 libs2/yaml/error.py delete mode 100644 libs2/yaml/events.py delete mode 100644 libs2/yaml/loader.py delete mode 100644 libs2/yaml/nodes.py delete mode 100644 libs2/yaml/parser.py delete mode 100644 libs2/yaml/reader.py delete mode 100644 libs2/yaml/representer.py delete mode 100644 libs2/yaml/resolver.py delete mode 100644 libs2/yaml/scanner.py delete mode 100644 libs2/yaml/serializer.py delete mode 100644 libs2/yaml/tokens.py delete mode 100644 libs3/bs4/__init__.py delete mode 100644 libs3/bs4/builder/__init__.py delete mode 100644 libs3/bs4/builder/_html5lib.py delete mode 100644 libs3/bs4/builder/_htmlparser.py delete mode 100644 libs3/bs4/builder/_lxml.py delete mode 100644 libs3/bs4/dammit.py delete mode 100644 libs3/bs4/diagnose.py delete mode 100644 libs3/bs4/element.py delete mode 100644 libs3/bs4/formatter.py delete mode 100644 libs3/bs4/testing.py delete mode 100644 libs3/bs4/tests/__init__.py delete mode 100644 libs3/bs4/tests/test_builder_registry.py delete mode 100644 libs3/bs4/tests/test_docs.py delete mode 100644 libs3/bs4/tests/test_html5lib.py delete mode 100644 libs3/bs4/tests/test_htmlparser.py delete mode 100644 libs3/bs4/tests/test_lxml.py delete mode 100644 libs3/bs4/tests/test_soup.py delete mode 100644 libs3/bs4/tests/test_tree.py delete mode 100644 libs3/engineio/__init__.py delete mode 100644 libs3/engineio/async_drivers/__init__.py delete mode 100644 libs3/engineio/async_drivers/aiohttp.py delete mode 100644 libs3/engineio/async_drivers/asgi.py delete mode 100644 libs3/engineio/async_drivers/eventlet.py delete mode 100644 libs3/engineio/async_drivers/gevent.py delete mode 100644 libs3/engineio/async_drivers/gevent_uwsgi.py delete mode 100644 libs3/engineio/async_drivers/sanic.py delete mode 100644 libs3/engineio/async_drivers/threading.py delete mode 100644 libs3/engineio/async_drivers/tornado.py delete mode 100644 libs3/engineio/asyncio_client.py delete mode 100644 libs3/engineio/asyncio_server.py delete mode 100644 libs3/engineio/asyncio_socket.py delete mode 100644 libs3/engineio/client.py delete mode 100644 libs3/engineio/exceptions.py delete mode 100644 libs3/engineio/middleware.py delete mode 100644 libs3/engineio/packet.py delete mode 100644 libs3/engineio/payload.py delete mode 100644 libs3/engineio/server.py delete mode 100644 libs3/engineio/socket.py delete mode 100644 libs3/engineio/static_files.py delete mode 100644 libs3/flask_socketio/__init__.py delete mode 100644 libs3/flask_socketio/namespace.py delete mode 100644 libs3/flask_socketio/test_client.py delete mode 100644 libs3/socketio/__init__.py delete mode 100644 libs3/socketio/asgi.py delete mode 100644 libs3/socketio/asyncio_aiopika_manager.py delete mode 100644 libs3/socketio/asyncio_client.py delete mode 100644 libs3/socketio/asyncio_manager.py delete mode 100644 libs3/socketio/asyncio_namespace.py delete mode 100644 libs3/socketio/asyncio_pubsub_manager.py delete mode 100644 libs3/socketio/asyncio_redis_manager.py delete mode 100644 libs3/socketio/asyncio_server.py delete mode 100644 libs3/socketio/base_manager.py delete mode 100644 libs3/socketio/client.py delete mode 100644 libs3/socketio/exceptions.py delete mode 100644 libs3/socketio/kafka_manager.py delete mode 100644 libs3/socketio/kombu_manager.py delete mode 100644 libs3/socketio/middleware.py delete mode 100644 libs3/socketio/namespace.py delete mode 100644 libs3/socketio/packet.py delete mode 100644 libs3/socketio/pubsub_manager.py delete mode 100644 libs3/socketio/redis_manager.py delete mode 100644 libs3/socketio/server.py delete mode 100644 libs3/socketio/tornado.py delete mode 100644 libs3/socketio/zmq_manager.py delete mode 100644 libs3/yaml/__init__.py delete mode 100644 libs3/yaml/composer.py delete mode 100644 libs3/yaml/constructor.py delete mode 100644 libs3/yaml/cyaml.py delete mode 100644 libs3/yaml/dumper.py delete mode 100644 libs3/yaml/emitter.py delete mode 100644 libs3/yaml/error.py delete mode 100644 libs3/yaml/events.py delete mode 100644 libs3/yaml/loader.py delete mode 100644 libs3/yaml/nodes.py delete mode 100644 libs3/yaml/parser.py delete mode 100644 libs3/yaml/reader.py delete mode 100644 libs3/yaml/representer.py delete mode 100644 libs3/yaml/resolver.py delete mode 100644 libs3/yaml/scanner.py delete mode 100644 libs3/yaml/serializer.py delete mode 100644 libs3/yaml/tokens.py diff --git a/bazarr/config.py b/bazarr/config.py index 6028bc56c..5eaacc92d 100644 --- a/bazarr/config.py +++ b/bazarr/config.py @@ -5,7 +5,6 @@ import os from simpleconfigparser import simpleconfigparser from get_args import args -from six import PY3 defaults = { 'general': { @@ -140,10 +139,7 @@ defaults = { } } -if PY3: - settings = simpleconfigparser(defaults=defaults, interpolation=None) -else: - settings = simpleconfigparser(defaults=defaults) +settings = simpleconfigparser(defaults=defaults, interpolation=None) settings.read(os.path.join(args.config_dir, 'config', 'config.ini')) base_url = settings.general.base_url diff --git a/bazarr/get_subtitle.py b/bazarr/get_subtitle.py index 698afd853..ea5e31495 100644 --- a/bazarr/get_subtitle.py +++ b/bazarr/get_subtitle.py @@ -1126,28 +1126,17 @@ def postprocessing(command, path): try: encoding = getpreferredencoding() if os.name == 'nt': - if six.PY3: - codepage = subprocess.Popen("chcp", shell=True, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, encoding=getpreferredencoding()) - else: - codepage = subprocess.Popen("chcp", shell=True, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + codepage = subprocess.Popen("chcp", shell=True, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, encoding=getpreferredencoding()) # wait for the process to terminate out_codepage, err_codepage = codepage.communicate() encoding = out_codepage.split(':')[-1].strip() - if six.PY3: - process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, encoding=encoding) - else: - process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, encoding=encoding) # wait for the process to terminate out, err = process.communicate() - if six.PY2: - out = out.decode(encoding) - out = out.replace('\n', ' ').replace('\r', ' ') except Exception as e: diff --git a/bazarr/libs.py b/bazarr/libs.py index 078fbec3e..a3450e315 100644 --- a/bazarr/libs.py +++ b/bazarr/libs.py @@ -18,11 +18,6 @@ def clean_libs(): def set_libs(): sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../libs/')) - from six import PY3 - if PY3: - sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../libs3/')) - else: - sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../libs2/')) clean_libs() diff --git a/bazarr/list_subtitles.py b/bazarr/list_subtitles.py index 34641d1e0..f7cf3b0e6 100644 --- a/bazarr/list_subtitles.py +++ b/bazarr/list_subtitles.py @@ -379,17 +379,11 @@ def guess_external_subtitles(dest_folder, subtitles): continue detected_language = None - if six.PY3: - with open(subtitle_path, 'r', errors='ignore') as f: - text = f.read() - else: - with open(subtitle_path, 'r') as f: - text = f.read() + with open(subtitle_path, 'r', errors='ignore') as f: + text = f.read() try: encoding = UnicodeDammit(text) - if six.PY2: - text = text.decode(encoding.original_encoding) detected_language = langdetect.detect(text) except Exception as e: logging.exception('BAZARR Error trying to detect language for this subtitles file: ' + diff --git a/bazarr/logger.py b/bazarr/logger.py index 1bf8a6476..49af82261 100644 --- a/bazarr/logger.py +++ b/bazarr/logger.py @@ -42,8 +42,7 @@ class NoExceptionFormatter(logging.Formatter): def configure_logging(debug=False): - if six.PY3: - warnings.simplefilter('ignore', category=ResourceWarning) + warnings.simplefilter('ignore', category=ResourceWarning) if not debug: log_level = "INFO" diff --git a/bazarr/main.py b/bazarr/main.py index f4ab85977..e850eaff4 100644 --- a/bazarr/main.py +++ b/bazarr/main.py @@ -1,6 +1,6 @@ # coding=utf-8 -bazarr_version = '0.8.4.1' +bazarr_version = '0.9' import os os.environ["SZ_USER_AGENT"] = "Bazarr/1" @@ -41,7 +41,7 @@ from notifier import update_notifier from cherrypy.wsgiserver import CherryPyWSGIServer from io import BytesIO -from six import text_type, PY2 +from six import text_type from datetime import timedelta from get_languages import load_language_in_db, language_from_alpha3, language_from_alpha2, alpha2_from_alpha3 from flask import make_response, request, redirect, abort, render_template, Response, session, flash, url_for, \ @@ -1647,8 +1647,7 @@ def movie_history(no): # Mute DeprecationWarning warnings.simplefilter("ignore", DeprecationWarning) -if six.PY3: - warnings.simplefilter("ignore", BrokenPipeError) +warnings.simplefilter("ignore", BrokenPipeError) if args.dev: server = app.run( host=str(settings.general.ip), port=(int(args.port) if args.port else int(settings.general.port))) diff --git a/libs2/bs4/AUTHORS.txt b/libs2/bs4/AUTHORS.txt deleted file mode 100644 index 2ac8fcc8c..000000000 --- a/libs2/bs4/AUTHORS.txt +++ /dev/null @@ -1,43 +0,0 @@ -Behold, mortal, the origins of Beautiful Soup... -================================================ - -Leonard Richardson is the primary programmer. - -Aaron DeVore is awesome. - -Mark Pilgrim provided the encoding detection code that forms the base -of UnicodeDammit. - -Thomas Kluyver and Ezio Melotti finished the work of getting Beautiful -Soup 4 working under Python 3. - -Simon Willison wrote soupselect, which was used to make Beautiful Soup -support CSS selectors. - -Sam Ruby helped with a lot of edge cases. - -Jonathan Ellis was awarded the prestigous Beau Potage D'Or for his -work in solving the nestable tags conundrum. - -An incomplete list of people have contributed patches to Beautiful -Soup: - - Istvan Albert, Andrew Lin, Anthony Baxter, Andrew Boyko, Tony Chang, - Zephyr Fang, Fuzzy, Roman Gaufman, Yoni Gilad, Richie Hindle, Peteris - Krumins, Kent Johnson, Ben Last, Robert Leftwich, Staffan Malmgren, - Ksenia Marasanova, JP Moins, Adam Monsen, John Nagle, "Jon", Ed - Oskiewicz, Greg Phillips, Giles Radford, Arthur Rudolph, Marko - Samastur, Jouni Seppänen, Alexander Schmolck, Andy Theyers, Glyn - Webster, Paul Wright, Danny Yoo - -An incomplete list of people who made suggestions or found bugs or -found ways to break Beautiful Soup: - - Hanno Böck, Matteo Bertini, Chris Curvey, Simon Cusack, Bruce Eckel, - Matt Ernst, Michael Foord, Tom Harris, Bill de hOra, Donald Howes, - Matt Patterson, Scott Roberts, Steve Strassmann, Mike Williams, - warchild at redho dot com, Sami Kuisma, Carlos Rocha, Bob Hutchison, - Joren Mc, Michal Migurski, John Kleven, Tim Heaney, Tripp Lilley, Ed - Summers, Dennis Sutch, Chris Smith, Aaron Sweep^W Swartz, Stuart - Turner, Greg Edwards, Kevin J Kalupson, Nikos Kouremenos, Artur de - Sousa Rocha, Yichun Wei, Per Vognsen diff --git a/libs2/bs4/COPYING.txt b/libs2/bs4/COPYING.txt deleted file mode 100644 index b91188869..000000000 --- a/libs2/bs4/COPYING.txt +++ /dev/null @@ -1,27 +0,0 @@ -Beautiful Soup is made available under the MIT license: - - Copyright (c) 2004-2015 Leonard Richardson - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - -Beautiful Soup incorporates code from the html5lib library, which is -also made available under the MIT license. Copyright (c) 2006-2013 -James Graham and other contributors diff --git a/libs2/bs4/NEWS.txt b/libs2/bs4/NEWS.txt deleted file mode 100644 index 3726c570a..000000000 --- a/libs2/bs4/NEWS.txt +++ /dev/null @@ -1,1190 +0,0 @@ -= 4.4.1 (20150928) = - -* Fixed a bug that deranged the tree when part of it was - removed. Thanks to Eric Weiser for the patch and John Wiseman for a - test. [bug=1481520] - -* Fixed a parse bug with the html5lib tree-builder. Thanks to Roel - Kramer for the patch. [bug=1483781] - -* Improved the implementation of CSS selector grouping. Thanks to - Orangain for the patch. [bug=1484543] - -* Fixed the test_detect_utf8 test so that it works when chardet is - installed. [bug=1471359] - -* Corrected the output of Declaration objects. [bug=1477847] - - -= 4.4.0 (20150703) = - -Especially important changes: - -* Added a warning when you instantiate a BeautifulSoup object without - explicitly naming a parser. [bug=1398866] - -* __repr__ now returns an ASCII bytestring in Python 2, and a Unicode - string in Python 3, instead of a UTF8-encoded bytestring in both - versions. In Python 3, __str__ now returns a Unicode string instead - of a bytestring. [bug=1420131] - -* The `text` argument to the find_* methods is now called `string`, - which is more accurate. `text` still works, but `string` is the - argument described in the documentation. `text` may eventually - change its meaning, but not for a very long time. [bug=1366856] - -* Changed the way soup objects work under copy.copy(). Copying a - NavigableString or a Tag will give you a new NavigableString that's - equal to the old one but not connected to the parse tree. Patch by - Martijn Peters. [bug=1307490] - -* Started using a standard MIT license. [bug=1294662] - -* Added a Chinese translation of the documentation by Delong .w. - -New features: - -* Introduced the select_one() method, which uses a CSS selector but - only returns the first match, instead of a list of - matches. [bug=1349367] - -* You can now create a Tag object without specifying a - TreeBuilder. Patch by Martijn Pieters. [bug=1307471] - -* You can now create a NavigableString or a subclass just by invoking - the constructor. [bug=1294315] - -* Added an `exclude_encodings` argument to UnicodeDammit and to the - Beautiful Soup constructor, which lets you prohibit the detection of - an encoding that you know is wrong. [bug=1469408] - -* The select() method now supports selector grouping. Patch by - Francisco Canas [bug=1191917] - -Bug fixes: - -* Fixed yet another problem that caused the html5lib tree builder to - create a disconnected parse tree. [bug=1237763] - -* Force object_was_parsed() to keep the tree intact even when an element - from later in the document is moved into place. [bug=1430633] - -* Fixed yet another bug that caused a disconnected tree when html5lib - copied an element from one part of the tree to another. [bug=1270611] - -* Fixed a bug where Element.extract() could create an infinite loop in - the remaining tree. - -* The select() method can now find tags whose names contain - dashes. Patch by Francisco Canas. [bug=1276211] - -* The select() method can now find tags with attributes whose names - contain dashes. Patch by Marek Kapolka. [bug=1304007] - -* Improved the lxml tree builder's handling of processing - instructions. [bug=1294645] - -* Restored the helpful syntax error that happens when you try to - import the Python 2 edition of Beautiful Soup under Python - 3. [bug=1213387] - -* In Python 3.4 and above, set the new convert_charrefs argument to - the html.parser constructor to avoid a warning and future - failures. Patch by Stefano Revera. [bug=1375721] - -* The warning when you pass in a filename or URL as markup will now be - displayed correctly even if the filename or URL is a Unicode - string. [bug=1268888] - -* If the initial tag contains a CDATA list attribute such as - 'class', the html5lib tree builder will now turn its value into a - list, as it would with any other tag. [bug=1296481] - -* Fixed an import error in Python 3.5 caused by the removal of the - HTMLParseError class. [bug=1420063] - -* Improved docstring for encode_contents() and - decode_contents(). [bug=1441543] - -* Fixed a crash in Unicode, Dammit's encoding detector when the name - of the encoding itself contained invalid bytes. [bug=1360913] - -* Improved the exception raised when you call .unwrap() or - .replace_with() on an element that's not attached to a tree. - -* Raise a NotImplementedError whenever an unsupported CSS pseudoclass - is used in select(). Previously some cases did not result in a - NotImplementedError. - -* It's now possible to pickle a BeautifulSoup object no matter which - tree builder was used to create it. However, the only tree builder - that survives the pickling process is the HTMLParserTreeBuilder - ('html.parser'). If you unpickle a BeautifulSoup object created with - some other tree builder, soup.builder will be None. [bug=1231545] - -= 4.3.2 (20131002) = - -* Fixed a bug in which short Unicode input was improperly encoded to - ASCII when checking whether or not it was the name of a file on - disk. [bug=1227016] - -* Fixed a crash when a short input contains data not valid in - filenames. [bug=1232604] - -* Fixed a bug that caused Unicode data put into UnicodeDammit to - return None instead of the original data. [bug=1214983] - -* Combined two tests to stop a spurious test failure when tests are - run by nosetests. [bug=1212445] - -= 4.3.1 (20130815) = - -* Fixed yet another problem with the html5lib tree builder, caused by - html5lib's tendency to rearrange the tree during - parsing. [bug=1189267] - -* Fixed a bug that caused the optimized version of find_all() to - return nothing. [bug=1212655] - -= 4.3.0 (20130812) = - -* Instead of converting incoming data to Unicode and feeding it to the - lxml tree builder in chunks, Beautiful Soup now makes successive - guesses at the encoding of the incoming data, and tells lxml to - parse the data as that encoding. Giving lxml more control over the - parsing process improves performance and avoids a number of bugs and - issues with the lxml parser which had previously required elaborate - workarounds: - - - An issue in which lxml refuses to parse Unicode strings on some - systems. [bug=1180527] - - - A returning bug that truncated documents longer than a (very - small) size. [bug=963880] - - - A returning bug in which extra spaces were added to a document if - the document defined a charset other than UTF-8. [bug=972466] - - This required a major overhaul of the tree builder architecture. If - you wrote your own tree builder and didn't tell me, you'll need to - modify your prepare_markup() method. - -* The UnicodeDammit code that makes guesses at encodings has been - split into its own class, EncodingDetector. A lot of apparently - redundant code has been removed from Unicode, Dammit, and some - undocumented features have also been removed. - -* Beautiful Soup will issue a warning if instead of markup you pass it - a URL or the name of a file on disk (a common beginner's mistake). - -* A number of optimizations improve the performance of the lxml tree - builder by about 33%, the html.parser tree builder by about 20%, and - the html5lib tree builder by about 15%. - -* All find_all calls should now return a ResultSet object. Patch by - Aaron DeVore. [bug=1194034] - -= 4.2.1 (20130531) = - -* The default XML formatter will now replace ampersands even if they - appear to be part of entities. That is, "<" will become - "<". The old code was left over from Beautiful Soup 3, which - didn't always turn entities into Unicode characters. - - If you really want the old behavior (maybe because you add new - strings to the tree, those strings include entities, and you want - the formatter to leave them alone on output), it can be found in - EntitySubstitution.substitute_xml_containing_entities(). [bug=1182183] - -* Gave new_string() the ability to create subclasses of - NavigableString. [bug=1181986] - -* Fixed another bug by which the html5lib tree builder could create a - disconnected tree. [bug=1182089] - -* The .previous_element of a BeautifulSoup object is now always None, - not the last element to be parsed. [bug=1182089] - -* Fixed test failures when lxml is not installed. [bug=1181589] - -* html5lib now supports Python 3. Fixed some Python 2-specific - code in the html5lib test suite. [bug=1181624] - -* The html.parser treebuilder can now handle numeric attributes in - text when the hexidecimal name of the attribute starts with a - capital X. Patch by Tim Shirley. [bug=1186242] - -= 4.2.0 (20130514) = - -* The Tag.select() method now supports a much wider variety of CSS - selectors. - - - Added support for the adjacent sibling combinator (+) and the - general sibling combinator (~). Tests by "liquider". [bug=1082144] - - - The combinators (>, +, and ~) can now combine with any supported - selector, not just one that selects based on tag name. - - - Added limited support for the "nth-of-type" pseudo-class. Code - by Sven Slootweg. [bug=1109952] - -* The BeautifulSoup class is now aliased to "_s" and "_soup", making - it quicker to type the import statement in an interactive session: - - from bs4 import _s - or - from bs4 import _soup - - The alias may change in the future, so don't use this in code you're - going to run more than once. - -* Added the 'diagnose' submodule, which includes several useful - functions for reporting problems and doing tech support. - - - diagnose(data) tries the given markup on every installed parser, - reporting exceptions and displaying successes. If a parser is not - installed, diagnose() mentions this fact. - - - lxml_trace(data, html=True) runs the given markup through lxml's - XML parser or HTML parser, and prints out the parser events as - they happen. This helps you quickly determine whether a given - problem occurs in lxml code or Beautiful Soup code. - - - htmlparser_trace(data) is the same thing, but for Python's - built-in HTMLParser class. - -* In an HTML document, the contents of a -
-Hello, world! - - -''' - soup = self.soup(html) - self.assertEqual("text/javascript", soup.find('script')['type']) - - def test_comment(self): - # Comments are represented as Comment objects. - markup = "foobaz
" - self.assertSoupEquals(markup) - - soup = self.soup(markup) - comment = soup.find(text="foobar") - self.assertEqual(comment.__class__, Comment) - - # The comment is properly integrated into the tree. - foo = soup.find(text="foo") - self.assertEqual(comment, foo.next_element) - baz = soup.find(text="baz") - self.assertEqual(comment, baz.previous_element) - - def test_preserved_whitespace_in_pre_and_textarea(self): - """Whitespace must be preserved inand