Coverage for sources/detextive/decoders.py: 100%
30 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-20 18:02 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-20 18:02 +0000
1# vim: set filetype=python fileencoding=utf-8:
2# -*- coding: utf-8 -*-
4#============================================================================#
5# #
6# Licensed under the Apache License, Version 2.0 (the "License"); #
7# you may not use this file except in compliance with the License. #
8# You may obtain a copy of the License at #
9# #
10# http://www.apache.org/licenses/LICENSE-2.0 #
11# #
12# Unless required by applicable law or agreed to in writing, software #
13# distributed under the License is distributed on an "AS IS" BASIS, #
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
15# See the License for the specific language governing permissions and #
16# limitations under the License. #
17# #
18#============================================================================#
21''' Conversion of bytes arrays to Unicode text. '''
24from . import __
25from . import charsets as _charsets
26from . import core as _core
27from . import exceptions as _exceptions
28from . import inference as _inference
29from . import mimetypes as _mimetypes
30from . import nomina as _nomina
31from . import validation as _validation
33from .core import ( # isort: skip
34 BEHAVIORS_DEFAULT as _BEHAVIORS_DEFAULT,
35 CHARSET_DEFAULT as _CHARSET_DEFAULT,
36 MIMETYPE_DEFAULT as _MIMETYPE_DEFAULT,
37 BehaviorTristate as _BehaviorTristate,
38 BehaviorsArgument as _BehaviorsArgument,
39 CharsetResult as _CharsetResult,
40)
43def decode( # noqa: PLR0913
44 content: _nomina.Content, /, *,
45 behaviors: _BehaviorsArgument = _BEHAVIORS_DEFAULT,
46 profile: _validation.ProfileArgument = _validation.PROFILE_TEXTUAL,
47 charset_default: _nomina.CharsetDefaultArgument = _CHARSET_DEFAULT,
48 mimetype_default: _nomina.MimetypeDefaultArgument = _MIMETYPE_DEFAULT,
49 http_content_type: _nomina.HttpContentTypeArgument = __.absent,
50 location: _nomina.LocationArgument = __.absent,
51 charset_supplement: _nomina.CharsetSupplementArgument = __.absent,
52 mimetype_supplement: _nomina.MimetypeSupplementArgument = __.absent,
53) -> str:
54 ''' Decodes bytes array to Unicode text. '''
55 if content == b'': return ''
56 behaviors_ = __.dcls.replace(
57 behaviors, trial_decode = _BehaviorTristate.Never )
58 try:
59 mimetype_result, charset_result = (
60 _inference.infer_mimetype_charset_confidence(
61 content,
62 behaviors = behaviors_,
63 charset_default = charset_default,
64 mimetype_default = mimetype_default,
65 http_content_type = http_content_type,
66 charset_supplement = charset_supplement,
67 mimetype_supplement = mimetype_supplement,
68 location = location ) )
69 except _exceptions.Omnierror:
70 charset = (
71 'utf-8-sig' if __.is_absent( charset_supplement )
72 else charset_supplement )
73 confidence = _core.confidence_from_bytes_quantity( content, behaviors )
74 charset_result = _CharsetResult(
75 charset = charset, confidence = confidence )
76 else:
77 if ( charset_result.charset is None
78 and not _mimetypes.is_textual_mimetype( mimetype_result.mimetype )
79 ): raise _exceptions.ContentDecodeImpossibility( location = location )
80 text, result = _charsets.attempt_decodes(
81 content,
82 behaviors = behaviors,
83 inference = (
84 'utf-8-sig' if charset_result.charset is None
85 else charset_result.charset ),
86 supplement = charset_supplement,
87 location = location )
88 should_validate = False
89 match behaviors.text_validate:
90 case _BehaviorTristate.Always:
91 should_validate = True
92 case _BehaviorTristate.AsNeeded:
93 should_validate = (
94 result.confidence < behaviors.text_validate_confidence )
95 case _BehaviorTristate.Never: pass
96 if should_validate and not profile( text ):
97 raise _exceptions.TextInvalidity( location = location )
98 return text