You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Enso-Bot/venv/Lib/site-packages/dsdev_utils/compat.py

67 lines
2.3 KiB
Python

5 years ago
# ------------------------------------------------------------------------------
# The MIT License (MIT)
#
# Copyright (c) 2014-2019 Digital Sapphire
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
# ------------------------------------------------------------------------------
import chardet
import logging
import six
if not six.PY2:
# Helper for Python 2 and 3 compatibility
unicode = str
log = logging.getLogger(__name__)
def make_compat_str(in_str):
"""
Tries to guess encoding of [str/bytes] and decode it into
an unicode object.
"""
assert isinstance(in_str, (bytes, str, unicode))
if not in_str:
return unicode()
# Chardet in Py2 works on str + bytes objects
if six.PY2 and isinstance(in_str, unicode):
return in_str
# Chardet in Py3 works on bytes objects
if not six.PY2 and not isinstance(in_str, bytes):
return in_str
# Detect the encoding now
enc = chardet.detect(in_str)
# Decode the object into a unicode object
out_str = in_str.decode(enc['encoding'])
# Cleanup: Sometimes UTF-16 strings include the BOM
if enc['encoding'] == "UTF-16BE":
# Remove byte order marks (BOM)
if out_str.startswith('\ufeff'):
out_str = out_str[1:]
# Return the decoded string
return out_str