mirror of https://github.com/sgoudham/Enso-Bot.git
67 lines
2.3 KiB
Python
67 lines
2.3 KiB
Python
# ------------------------------------------------------------------------------
|
|
# The MIT License (MIT)
|
|
#
|
|
# Copyright (c) 2014-2019 Digital Sapphire
|
|
#
|
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
# of this software and associated documentation files (the "Software"), to deal
|
|
# in the Software without restriction, including without limitation the rights
|
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
# copies of the Software, and to permit persons to whom the Software is
|
|
# furnished to do so, subject to the following conditions:
|
|
#
|
|
# The above copyright notice and this permission notice shall be included in
|
|
# all copies or substantial portions of the Software.
|
|
#
|
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
# THE SOFTWARE.
|
|
# ------------------------------------------------------------------------------
|
|
import chardet
|
|
import logging
|
|
|
|
import six
|
|
|
|
if not six.PY2:
|
|
# Helper for Python 2 and 3 compatibility
|
|
unicode = str
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
def make_compat_str(in_str):
|
|
"""
|
|
Tries to guess encoding of [str/bytes] and decode it into
|
|
an unicode object.
|
|
"""
|
|
assert isinstance(in_str, (bytes, str, unicode))
|
|
if not in_str:
|
|
return unicode()
|
|
|
|
# Chardet in Py2 works on str + bytes objects
|
|
if six.PY2 and isinstance(in_str, unicode):
|
|
return in_str
|
|
|
|
# Chardet in Py3 works on bytes objects
|
|
if not six.PY2 and not isinstance(in_str, bytes):
|
|
return in_str
|
|
|
|
# Detect the encoding now
|
|
enc = chardet.detect(in_str)
|
|
|
|
# Decode the object into a unicode object
|
|
out_str = in_str.decode(enc['encoding'])
|
|
|
|
# Cleanup: Sometimes UTF-16 strings include the BOM
|
|
if enc['encoding'] == "UTF-16BE":
|
|
# Remove byte order marks (BOM)
|
|
if out_str.startswith('\ufeff'):
|
|
out_str = out_str[1:]
|
|
|
|
# Return the decoded string
|
|
return out_str
|