Coverage for sources/detextive/decoders.py: 100%

30 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-20 18:02 +0000

1# vim: set filetype=python fileencoding=utf-8: 

2# -*- coding: utf-8 -*- 

3 

4#============================================================================# 

5# # 

6# Licensed under the Apache License, Version 2.0 (the "License"); # 

7# you may not use this file except in compliance with the License. # 

8# You may obtain a copy of the License at # 

9# # 

10# http://www.apache.org/licenses/LICENSE-2.0 # 

11# # 

12# Unless required by applicable law or agreed to in writing, software # 

13# distributed under the License is distributed on an "AS IS" BASIS, # 

14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # 

15# See the License for the specific language governing permissions and # 

16# limitations under the License. # 

17# # 

18#============================================================================# 

19 

20 

21''' Conversion of bytes arrays to Unicode text. ''' 

22 

23 

24from . import __ 

25from . import charsets as _charsets 

26from . import core as _core 

27from . import exceptions as _exceptions 

28from . import inference as _inference 

29from . import mimetypes as _mimetypes 

30from . import nomina as _nomina 

31from . import validation as _validation 

32 

33from .core import ( # isort: skip 

34 BEHAVIORS_DEFAULT as _BEHAVIORS_DEFAULT, 

35 CHARSET_DEFAULT as _CHARSET_DEFAULT, 

36 MIMETYPE_DEFAULT as _MIMETYPE_DEFAULT, 

37 BehaviorTristate as _BehaviorTristate, 

38 BehaviorsArgument as _BehaviorsArgument, 

39 CharsetResult as _CharsetResult, 

40) 

41 

42 

43def decode( # noqa: PLR0913 

44 content: _nomina.Content, /, *, 

45 behaviors: _BehaviorsArgument = _BEHAVIORS_DEFAULT, 

46 profile: _validation.ProfileArgument = _validation.PROFILE_TEXTUAL, 

47 charset_default: _nomina.CharsetDefaultArgument = _CHARSET_DEFAULT, 

48 mimetype_default: _nomina.MimetypeDefaultArgument = _MIMETYPE_DEFAULT, 

49 http_content_type: _nomina.HttpContentTypeArgument = __.absent, 

50 location: _nomina.LocationArgument = __.absent, 

51 charset_supplement: _nomina.CharsetSupplementArgument = __.absent, 

52 mimetype_supplement: _nomina.MimetypeSupplementArgument = __.absent, 

53) -> str: 

54 ''' Decodes bytes array to Unicode text. ''' 

55 if content == b'': return '' 

56 behaviors_ = __.dcls.replace( 

57 behaviors, trial_decode = _BehaviorTristate.Never ) 

58 try: 

59 mimetype_result, charset_result = ( 

60 _inference.infer_mimetype_charset_confidence( 

61 content, 

62 behaviors = behaviors_, 

63 charset_default = charset_default, 

64 mimetype_default = mimetype_default, 

65 http_content_type = http_content_type, 

66 charset_supplement = charset_supplement, 

67 mimetype_supplement = mimetype_supplement, 

68 location = location ) ) 

69 except _exceptions.Omnierror: 

70 charset = ( 

71 'utf-8-sig' if __.is_absent( charset_supplement ) 

72 else charset_supplement ) 

73 confidence = _core.confidence_from_bytes_quantity( content, behaviors ) 

74 charset_result = _CharsetResult( 

75 charset = charset, confidence = confidence ) 

76 else: 

77 if ( charset_result.charset is None 

78 and not _mimetypes.is_textual_mimetype( mimetype_result.mimetype ) 

79 ): raise _exceptions.ContentDecodeImpossibility( location = location ) 

80 text, result = _charsets.attempt_decodes( 

81 content, 

82 behaviors = behaviors, 

83 inference = ( 

84 'utf-8-sig' if charset_result.charset is None 

85 else charset_result.charset ), 

86 supplement = charset_supplement, 

87 location = location ) 

88 should_validate = False 

89 match behaviors.text_validate: 

90 case _BehaviorTristate.Always: 

91 should_validate = True 

92 case _BehaviorTristate.AsNeeded: 

93 should_validate = ( 

94 result.confidence < behaviors.text_validate_confidence ) 

95 case _BehaviorTristate.Never: pass 

96 if should_validate and not profile( text ): 

97 raise _exceptions.TextInvalidity( location = location ) 

98 return text