"FileLineEndingDetector - Character-based implementation"
Object subclass: #FileLineEndingDetector
instanceVariableNames: ''
classVariableNames: ''
package: 'FileUtilities'
"Class-side methods"
FileLineEndingDetector class >> detectLineEndingIn: aFilename
"Detect line ending convention using character-based approach"
| fileReference contents |
fileReference := aFilename asFileReference.
fileReference exists ifFalse: [
^ self error: 'File does not exist: ', aFilename
].
"Read as string but preserve original line endings"
contents := fileReference binaryReadStream contents asString.
^ self detectLineEndingInString: contents
FileLineEndingDetector class >> detectLineEndingInString: aString
"Detect line ending convention in a string using characters"
| crCount lfCount crlfCount index |
aString isEmpty ifTrue: [ ^ #unknown ].
crCount := 0.
lfCount := 0.
crlfCount := 0.
index := 1.
[ index <= aString size ] whileTrue: [
| currentChar |
currentChar := aString at: index.
"Check for CR+LF sequence"
(currentChar = Character cr and: [
index < aString size and: [
(aString at: index + 1) = Character lf ] ])
ifTrue: [
crlfCount := crlfCount + 1.
index := index + 2 ]
ifFalse: [
"Check for standalone CR or LF"
currentChar = Character cr ifTrue: [ crCount := crCount + 1 ].
currentChar = Character lf ifTrue: [ lfCount := lfCount + 1 ].
index := index + 1 ]
].
^ self determineConvention: crCount lf: lfCount crlf: crlfCount
FileLineEndingDetector class >> detectLineEndingSimple: aFilename
"Simpler detection using Pharo's string scanning capabilities"
| fileReference contents hasCRLF hasCR hasLF |
fileReference := aFilename asFileReference.
fileReference exists ifFalse: [
^ self error: 'File does not exist: ', aFilename
].
contents := fileReference binaryReadStream contents asString.
contents isEmpty ifTrue: [ ^ #unknown ].
"Check for different line ending patterns"
hasCRLF := contents includesSubstring: String crlf.
hasCR := contents includes: Character cr.
hasLF := contents includes: Character lf.
"Determine convention based on presence"
hasCRLF ifTrue: [ ^ #crlf ].
hasLF ifTrue: [ ^ #lf ].
hasCR ifTrue: [ ^ #cr ].
^ #unknown
FileLineEndingDetector class >> detectLineEndingPrecise: aFilename
"Most precise detection using regular expressions"
| fileReference contents crlfCount lfCount crCount |
fileReference := aFilename asFileReference.
fileReference exists ifFalse: [
^ self error: 'File does not exist: ', aFilename
].
contents := fileReference binaryReadStream contents asString.
contents isEmpty ifTrue: [ ^ #unknown ].
"Count different line ending types using pattern matching"
crlfCount := (contents regex: String crlf matchesCollect: [ :each | each ]) size.
"Remove CRLF before counting standalone CR and LF"
| withoutCRLF |
withoutCRLF := contents copyReplaceAll: String crlf with: ''.
lfCount := (withoutCRLF occurrencesOf: Character lf).
crCount := (withoutCRLF occurrencesOf: Character cr).
^ self determineConvention: crCount lf: lfCount crlf: crlfCount
FileLineEndingDetector class >> determineConvention: crCount lf: lfCount crlf: crlfCount
"Determine the predominant line ending convention"
| total |
total := crCount + lfCount + crlfCount.
total = 0 ifTrue: [ ^ #unknown ].
"Return based on predominant type (>80% threshold)"
(crlfCount > 0 and: [ crlfCount >= (total * 0.8) ])
ifTrue: [ ^ #crlf ].
(lfCount > 0 and: [ lfCount >= (total * 0.8) ])
ifTrue: [ ^ #lf ].
(crCount > 0 and: [ crCount >= (total * 0.8) ])
ifTrue: [ ^ #cr ].
"If no clear predominant type, determine by priority"
crlfCount > 0 ifTrue: [ ^ #crlf ].
lfCount > 0 ifTrue: [ ^ #lf ].
crCount > 0 ifTrue: [ ^ #cr ].
^ #mixed
FileLineEndingDetector class >> analyzeLineEndings: aFilename
"Analyze line endings and return detailed information"
| fileReference contents info lines |
fileReference := aFilename asFileReference.
fileReference exists ifFalse: [
^ Dictionary new
at: #error put: 'File does not exist';
yourself
].
contents := fileReference binaryReadStream contents asString.
info := Dictionary new.
info at: #filename put: aFilename.
info at: #size put: fileReference size.
"Count occurrences of each type"
info at: #crlf put: (self countOccurrences: String crlf in: contents).
"For standalone counts, remove CRLF first"
| withoutCRLF |
withoutCRLF := contents copyReplaceAll: String crlf with: ''.
info at: #lf put: (withoutCRLF occurrencesOf: Character lf).
info at: #cr put: (withoutCRLF occurrencesOf: Character cr).
"Determine convention"
info at: #convention put: (self determineConvention:
(info at: #cr)
lf: (info at: #lf)
crlf: (info at: #crlf)).
"Count total lines (useful metric)"
lines := contents lines.
info at: #lineCount put: lines size.
^ info
FileLineEndingDetector class >> countOccurrences: aSubstring in: aString
"Count non-overlapping occurrences of substring in string"
| count index |
count := 0.
index := 1.
[ index <= aString size ] whileTrue: [
| foundIndex |
foundIndex := aString findString: aSubstring startingAt: index.
foundIndex > 0
ifTrue: [
count := count + 1.
index := foundIndex + aSubstring size ]
ifFalse: [
index := aString size + 1 ]
].
^ count
FileLineEndingDetector class >> convertFile: aFilename to: aConvention
"Convert a file to use specific line endings"
| fileReference contents lines newLineString result |
fileReference := aFilename asFileReference.
fileReference exists ifFalse: [
^ self error: 'File does not exist: ', aFilename
].
"Read and split into logical lines"
contents := fileReference contents.
lines := contents lines.
"Determine new line string"
newLineString := self lineStringFor: aConvention.
"Join lines with new line ending"
result := lines joinUsing: newLineString.
"Write back to file"
fileReference writeStreamDo: [ :stream |
stream nextPutAll: result
].
^ true
FileLineEndingDetector class >> lineStringFor: aConvention
"Return the line ending string for a convention"
aConvention = #lf ifTrue: [ ^ String lf ].
aConvention = #crlf ifTrue: [ ^ String crlf ].
aConvention = #cr ifTrue: [ ^ String cr ].
"Default to system line ending"
^ String new: 1 withAll: Character linefeed
"Simplified single method version"
FileLineEndingDetector class >> detectLineEndingQuick: aFilename
"Quick detection method using character-based approach"
| contents crlfPresent lfPresent crPresent |
contents := aFilename asFileReference contents.
contents isEmpty ifTrue: [ ^ #unknown ].
"Check what's present"
crlfPresent := contents includesSubstring: (String with: Character cr with: Character lf).
crlfPresent ifTrue: [ ^ #crlf ].
lfPresent := contents includes: Character lf.
crPresent := contents includes: Character cr.
lfPresent ifTrue: [ ^ #lf ].
crPresent ifTrue: [ ^ #cr ].
^ #unknown
"Extension methods for FileReference"
FileReference >> detectLineEnding
"Detect the line ending convention of this file"
^ FileLineEndingDetector detectLineEndingIn: self fullName
FileReference >> lineEndingInfo
"Get detailed line ending information"
^ FileLineEndingDetector analyzeLineEndings: self fullName
FileReference >> hasUnixLineEndings
"Check if file uses Unix line endings (LF)"
^ self detectLineEnding = #lf
FileReference >> hasWindowsLineEndings
"Check if file uses Windows line endings (CRLF)"
^ self detectLineEnding = #crlf
FileReference >> hasMacClassicLineEndings
"Check if file uses classic Mac line endings (CR)"
^ self detectLineEnding = #cr
FileReference >> normalizeLineEndings
"Convert to system default line endings"
| systemConvention |
systemConvention := Smalltalk os isWindows
ifTrue: [ #crlf ]
ifFalse: [ #lf ].
^ FileLineEndingDetector convertFile: self fullName to: systemConvention
"Usage examples:"
"
""Quick detection""
FileLineEndingDetector detectLineEndingQuick: '/path/to/file.txt'.
""Detailed detection""
FileLineEndingDetector detectLineEndingIn: '/path/to/file.txt'.
""Simple check""
FileLineEndingDetector detectLineEndingSimple: '/path/to/file.txt'.
""Using FileReference extensions""
'/path/to/file.txt' asFileReference hasUnixLineEndings.
'/path/to/file.txt' asFileReference hasWindowsLineEndings.
""Normalize to system convention""
'/path/to/file.txt' asFileReference normalizeLineEndings.
""Get detailed analysis""
info := FileLineEndingDetector analyzeLineEndings: '/path/to/file.txt'.
info at: #lineCount. ""Number of lines""
info at: #convention. ""Detected convention""
"