Coverage for sources / detextive / lineseparators.py: 100%

44 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-02-14 04:38 +0000

1# vim: set filetype=python fileencoding=utf-8: 

2# -*- coding: utf-8 -*- 

3 

4#============================================================================# 

5# # 

6# Licensed under the Apache License, Version 2.0 (the "License"); # 

7# you may not use this file except in compliance with the License. # 

8# You may obtain a copy of the License at # 

9# # 

10# http://www.apache.org/licenses/LICENSE-2.0 # 

11# # 

12# Unless required by applicable law or agreed to in writing, software # 

13# distributed under the License is distributed on an "AS IS" BASIS, # 

14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # 

15# See the License for the specific language governing permissions and # 

16# limitations under the License. # 

17# # 

18#============================================================================# 

19 

20 

21''' Line separator enumeration and utilities. ''' 

22 

23 

24from . import __ 

25 

26 

27class LineSeparators( __.enum.Enum ): 

28 ''' Line separators for cross-platform text processing. ''' 

29 

30 CR = '\r' # Classic MacOS (0xD) 

31 CRLF = '\r\n' # DOS/Windows (0xD 0xA) 

32 LF = '\n' # Unix/Linux (0xA) 

33 

34 @classmethod 

35 def detect_bytes( 

36 selfclass, 

37 content: __.cabc.Sequence[ int ] | bytes, 

38 limit: int = 1024, 

39 ) -> __.typx.Optional[ 'LineSeparators' ]: 

40 ''' Detects line separator from byte content sample. 

41 

42 Returns detected LineSeparators enum member or None. 

43 ''' 

44 sample = content[ : limit ] 

45 found_cr = False 

46 for byte in sample: 

47 match byte: 

48 case 0xd: # carriage return 

49 if found_cr: return selfclass.CR 

50 found_cr = True 

51 case 0xa: # linefeed 

52 if found_cr: return selfclass.CRLF 

53 return selfclass.LF 

54 case _: 

55 if found_cr: return selfclass.CR 

56 return None 

57 

58 @classmethod 

59 def detect_text( 

60 selfclass, text: str, limit: int = 1024 

61 ) -> __.typx.Optional[ 'LineSeparators' ]: 

62 ''' Detects line separator from text (Unicode string). 

63 

64 Returns detected LineSeparators enum member or None. 

65 ''' 

66 sample = text[ : limit ] 

67 found_cr = False 

68 for c in sample: 

69 match c: 

70 case '\r': # carriage return 

71 if found_cr: return selfclass.CR 

72 found_cr = True 

73 case '\n': # linefeed 

74 if found_cr: return selfclass.CRLF 

75 return selfclass.LF 

76 case _: 

77 if found_cr: return selfclass.CR 

78 return None 

79 

80 @classmethod 

81 def normalize_universal( selfclass, content: str ) -> str: 

82 ''' Normalizes all line separators to Unix LF format. ''' 

83 return content.replace( '\r\n', '\r' ).replace( '\r', '\n' ) 

84 

85 def normalize( self, content: str ) -> str: 

86 ''' Normalizes specific line separator to Unix LF format. ''' 

87 if LineSeparators.LF is self: return content 

88 return content.replace( self.value, '\n' ) 

89 

90 def nativize( self, content: str ) -> str: 

91 ''' Converts Unix LF to this platform's line separator. ''' 

92 if LineSeparators.LF is self: return content 

93 return content.replace( '\n', self.value )