PROGRAM
MAP
MODULE('API')
MatchDatePattern(STRING pText), BOOL, PASCAL
ExtractDateFromText(STRING pText), STRING, PASCAL
END
END
MatchDatePattern PROCEDURE(STRING pText)
Months STRING('January|February|March|April|May|June|July|August|September|October|November|December|' & |
'Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec')
MonthAbbr STRING('Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec')
Result BOOL(FALSE)
Pos LONG
TextLen LONG
i LONG
j LONG
Word STRING(20)
Day STRING(10)
Year STRING(10)
InWord BOOL
FoundMonth BOOL
FoundDay BOOL
FoundYear BOOL
DayNum LONG
CODE
TextLen = LEN(CLIP(pText))
Pos = 1
! Loop through the text looking for date patterns
LOOP WHILE Pos <= TextLen
FoundMonth = FALSE
FoundDay = FALSE
FoundYear = FALSE
Word = ''
! Skip leading whitespace
LOOP WHILE Pos <= TextLen AND pText[Pos] = ' '
Pos += 1
END
IF Pos > TextLen THEN BREAK.
! Try to match a month name
i = Pos
LOOP WHILE i <= TextLen AND (pText[i] >= 'A' AND pText[i] <= 'Z' OR pText[i] >= 'a' AND pText[i] <= 'z')
Word = CLIP(Word) & pText[i]
i += 1
END
! Check if Word is a valid month
IF INSTRING(UPPER(CLIP(Word)), 'JANUARY|FEBRUARY|MARCH|APRIL|MAY|JUNE|JULY|AUGUST|SEPTEMBER|OCTOBER|NOVEMBER|DECEMBER|' & |
'JAN|FEB|MAR|APR|JUN|JUL|AUG|SEP|OCT|NOV|DEC', 1, 1)
FoundMonth = TRUE
Pos = i
! Skip whitespace after month
LOOP WHILE Pos <= TextLen AND pText[Pos] = ' '
Pos += 1
END
! Try to match day (1-2 digits)
Day = ''
LOOP WHILE Pos <= TextLen AND (pText[Pos] >= '0' AND pText[Pos] <= '9') AND LEN(CLIP(Day)) < 2
Day = CLIP(Day) & pText[Pos]
Pos += 1
END
IF LEN(CLIP(Day)) > 0
DayNum = Day
IF DayNum >= 1 AND DayNum <= 31
FoundDay = TRUE
! Skip optional ordinal suffix (st, nd, rd, th)
IF Pos + 1 <= TextLen
IF (pText[Pos] = 's' AND pText[Pos+1] = 't') OR |
(pText[Pos] = 'n' AND pText[Pos+1] = 'd') OR |
(pText[Pos] = 'r' AND pText[Pos+1] = 'd') OR |
(pText[Pos] = 't' AND pText[Pos+1] = 'h')
Pos += 2
END
END
! Skip optional comma
IF Pos <= TextLen AND pText[Pos] = ','
Pos += 1
END
! Skip whitespace
LOOP WHILE Pos <= TextLen AND pText[Pos] = ' '
Pos += 1
END
! Try to match 4-digit year
Year = ''
LOOP WHILE Pos <= TextLen AND (pText[Pos] >= '0' AND pText[Pos] <= '9') AND LEN(CLIP(Year)) < 4
Year = CLIP(Year) & pText[Pos]
Pos += 1
END
IF LEN(CLIP(Year)) = 4
FoundYear = TRUE
END
END
END
ELSE
Pos += 1
END
IF FoundMonth AND FoundDay AND FoundYear
Result = TRUE
BREAK
END
END
RETURN Result
ExtractDateFromText PROCEDURE(STRING pText)
Result STRING(50)
Pos LONG
TextLen LONG
i LONG
Word STRING(20)
Day STRING(10)
Year STRING(10)
FoundMonth BOOL
FoundDay BOOL
FoundYear BOOL
DayNum LONG
DateStart LONG
CODE
Result = ''
TextLen = LEN(CLIP(pText))
Pos = 1
LOOP WHILE Pos <= TextLen
FoundMonth = FALSE
FoundDay = FALSE
FoundYear = FALSE
Word = ''
DateStart = Pos
! Skip leading whitespace
LOOP WHILE Pos <= TextLen AND pText[Pos] = ' '
Pos += 1
DateStart = Pos
END
IF Pos > TextLen THEN BREAK.
! Try to match a month name
i = Pos
LOOP WHILE i <= TextLen AND (pText[i] >= 'A' AND pText[i] <= 'Z' OR pText[i] >= 'a' AND pText[i] <= 'z')
Word = CLIP(Word) & pText[i]
i += 1
END
! Check if Word is a valid month
IF INSTRING(UPPER(CLIP(Word)), 'JANUARY|FEBRUARY|MARCH|APRIL|MAY|JUNE|JULY|AUGUST|SEPTEMBER|OCTOBER|NOVEMBER|DECEMBER|' & |
'JAN|FEB|MAR|APR|JUN|JUL|AUG|SEP|OCT|NOV|DEC', 1, 1)
FoundMonth = TRUE
Pos = i
! Skip whitespace after month
LOOP WHILE Pos <= TextLen AND pText[Pos] = ' '
Pos += 1
END
! Try to match day
Day = ''
LOOP WHILE Pos <= TextLen AND (pText[Pos] >= '0' AND pText[Pos] <= '9') AND LEN(CLIP(Day)) < 2
Day = CLIP(Day) & pText[Pos]
Pos += 1
END
IF LEN(CLIP(Day)) > 0
DayNum = Day
IF DayNum >= 1 AND DayNum <= 31
FoundDay = TRUE
! Skip optional ordinal suffix
IF Pos + 1 <= TextLen
IF (pText[Pos] = 's' AND pText[Pos+1] = 't') OR |
(pText[Pos] = 'n' AND pText[Pos+1] = 'd') OR |
(pText[Pos] = 'r' AND pText[Pos+1] = 'd') OR |
(pText[Pos] = 't' AND pText[Pos+1] = 'h')
Pos += 2
END
END
! Skip optional comma
IF Pos <= TextLen AND pText[Pos] = ','
Pos += 1
END
! Skip whitespace
LOOP WHILE Pos <= TextLen AND pText[Pos] = ' '
Pos += 1
END
! Try to match 4-digit year
Year = ''
LOOP WHILE Pos <= TextLen AND (pText[Pos] >= '0' AND pText[Pos] <= '9') AND LEN(CLIP(Year)) < 4
Year = CLIP(Year) & pText[Pos]
Pos += 1
END
IF LEN(CLIP(Year)) = 4
FoundYear = TRUE
END
END
END
ELSE
Pos += 1
END
IF FoundMonth AND FoundDay AND FoundYear
Result = SUB(pText, DateStart, Pos - DateStart)
BREAK
END
END
RETURN Result