Coverage for sources/detextive/lineseparators.py: 100%

1# vim: set filetype=python fileencoding=utf-8:

2# -*- coding: utf-8 -*-

4#============================================================================#

5# #

6# Licensed under the Apache License, Version 2.0 (the "License"); #

7# you may not use this file except in compliance with the License. #

8# You may obtain a copy of the License at #

9# #

10# http://www.apache.org/licenses/LICENSE-2.0 #

11# #

12# Unless required by applicable law or agreed to in writing, software #

13# distributed under the License is distributed on an "AS IS" BASIS, #

14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #

15# See the License for the specific language governing permissions and #

16# limitations under the License. #

17# #

18#============================================================================#

21''' Line separator enumeration and utilities. '''

24from . import __

27class LineSeparators( __.enum.Enum ):

28 ''' Line separators for cross-platform text processing. '''

30 CR = '\r' # Classic MacOS (0xD)

31 CRLF = '\r\n' # DOS/Windows (0xD 0xA)

32 LF = '\n' # Unix/Linux (0xA)

34 @classmethod

35 def detect_bytes(

36 selfclass,

37 content: __.cabc.Sequence[ int ] | bytes,

38 limit: int = 1024,

39 ) -> __.typx.Optional[ 'LineSeparators' ]:

40 ''' Detects line separator from byte content sample.

42 Returns detected LineSeparators enum member or None.

43 '''

44 sample = content[ : limit ]

45 found_cr = False

46 for byte in sample:

47 match byte:

48 case 0xd: # carriage return

49 if found_cr: return selfclass.CR

50 found_cr = True

51 case 0xa: # linefeed

52 if found_cr: return selfclass.CRLF

53 return selfclass.LF

54 case _:

55 if found_cr: return selfclass.CR

56 return None

58 @classmethod

59 def detect_text(

60 selfclass, text: str, limit: int = 1024

61 ) -> __.typx.Optional[ 'LineSeparators' ]:

62 ''' Detects line separator from text (Unicode string).

64 Returns detected LineSeparators enum member or None.

65 '''

66 sample = text[ : limit ]

67 found_cr = False

68 for c in sample:

69 match c:

70 case '\r': # carriage return

71 if found_cr: return selfclass.CR

72 found_cr = True

73 case '\n': # linefeed

74 if found_cr: return selfclass.CRLF

75 return selfclass.LF

76 case _:

77 if found_cr: return selfclass.CR

78 return None

80 @classmethod

81 def normalize_universal( selfclass, content: str ) -> str:

82 ''' Normalizes all line separators to Unix LF format. '''

83 return content.replace( '\r\n', '\r' ).replace( '\r', '\n' )

85 def normalize( self, content: str ) -> str:

86 ''' Normalizes specific line separator to Unix LF format. '''

87 if LineSeparators.LF is self: return content

88 return content.replace( self.value, '\n' )

90 def nativize( self, content: str ) -> str:

91 ''' Converts Unix LF to this platform's line separator. '''

92 if LineSeparators.LF is self: return content

93 return content.replace( '\n', self.value )

Coverage for sources / detextive / lineseparators.py: 100%

44 statements