aboutsummaryrefslogtreecommitdiff
path: root/ch08/8_a_2.hs
blob: 92f130aa7b8019e095313b070970f9e9fe173eb0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
-- While filesystems on Unix are usually case-sensitive (e.g. “G” vs. “g”) in
-- file names, Windows filesystems are not. Add a parameter to the globToRegex
-- and matchesGlob functions that allows control over case sensitive matching.

-- For simplicity, let's assume that only a-z and A-Z letters are affected by
-- the case sensitivity option. Also, probably not all error cases are detected
-- when converting a glob to regex.

{-- From examples/examples/ch08/GlobRegex.hs modified according to the assignment --}
import Data.Char
import Text.Regex.Posix ((=~))

data MatchOptions = DontIgnoreCase | IgnoreCase

globToRegex :: String -> MatchOptions -> String
globToRegex cs opt = '^' : globToRegex' cs opt ++ "$"

globToRegex' :: String -> MatchOptions -> String
globToRegex' "" _ = ""
globToRegex' ('*':cs) opt = ".*" ++ globToRegex' cs opt
globToRegex' ('?':cs) opt = '.' : globToRegex' cs opt
globToRegex' ('[':'!':cs) opt = "[^" ++ charClass cs opt
globToRegex' ('[':cs) opt     = '[' : charClass cs opt
globToRegex' (c:cs) opt       = escape c opt ++ globToRegex' cs opt

escape :: Char -> MatchOptions -> String
escape c opt | c `elem` regexChars = '\\' : [c]
             | otherwise = caseOutsideCharClass c opt
    where regexChars = "\\+()^$.{}]|"

charClass :: String -> MatchOptions -> String
charClass (']':cs) opt     = ']' : globToRegex' cs opt
charClass (a:'-':c:cs) opt = caseRange a c opt ++ charClass cs opt
charClass (c:cs) opt       = (caseInsideCharClass c opt) ++ charClass cs opt
charClass [] _             = error "unterminated character class"


matchesGlob :: FilePath -> String -> MatchOptions -> Bool
matchesGlob name pat opt = name =~ (globToRegex pat opt)
{-- End of code from examples --}

oppositeCase :: Char -> Char
oppositeCase c = if isAlpha c
                 then if isLower c
                      then toUpper c
                      else toLower c
                 else error (c:" is not a caseable character")

caseRange :: Char -> Char -> MatchOptions -> String
caseRange a c DontIgnoreCase = a : '-' : [c]
caseRange a c IgnoreCase = if sameCase
                                   then a:'-':c:opA:'-':[opC]
                                   -- Patterns like W-d which means W-Za-d are
                                   -- converted like w-za-dW-ZA-D
                                   else opA:"-za-" ++ c:a:"-ZA-" ++ [opC]
  where sameCase = ((isLower a) && (isLower c)) || ((isUpper a) && (isUpper c))
        opA = oppositeCase a
        opC = oppositeCase c

caseInsideCharClass :: Char -> MatchOptions -> String
caseInsideCharClass c DontIgnoreCase = [c]
caseInsideCharClass c IgnoreCase = if isAlpha c
                                   then (oppositeCase c) : [c]
                                   else [c]

caseOutsideCharClass :: Char -> MatchOptions -> String
caseOutsideCharClass c DontIgnoreCase = [c]
caseOutsideCharClass c opt = if shouldBeInBrackets
                             then '[' : cased ++ "]"
                             else cased
  where cased = caseInsideCharClass c opt
        shouldBeInBrackets = (length cased) > 1

-- ghci> :l 8_a_2.hs
-- [1 of 1] Compiling Main             ( 8_a_2.hs, interpreted )
-- Ok, one module loaded.

-- ghci> globToRegex "f[o-1].c" IgnoreCase
-- "^[Ff][O-za-1o-ZA-*** Exception: 1 is not a caseable character
-- CallStack (from HasCallStack):
--   error, called at 8_a_2.hs:48:23 in main:Main

-- ghci> globToRegex "f[gH,i-kL-N]*.c" DontIgnoreCase
-- "^f[gH,i-kL-N].*\\.c$"
-- ghci> matchesGlob "fhjM.c" "f[gH,i-kL-N]*.c" DontIgnoreCase
-- False
-- ghci> matchesGlob "fHjM.c" "f[gH,i-kL-N]*.c" DontIgnoreCase
-- True

-- ghci> globToRegex "f[gH,i-kL-N]*.c" IgnoreCase
-- "^[Ff][GghH,i-kI-KL-Nl-n].*\\.[Cc]$"
-- ghci> matchesGlob "fhjM.c" "f[gH,i-kL-N]*.c" IgnoreCase
-- True
-- ghci> matchesGlob "fGhJm.c" "f[gH,i-kL-N]*.c" IgnoreCase
-- True

-- ghci> globToRegex "f[W-d]*.c" DontIgnoreCase
-- "^f[W-d].*\\.c$"
-- ghci> matchesGlob "fxa.c" "f[W-d]*.c" DontIgnoreCase
-- False
-- ghci> matchesGlob "fXa.c" "f[W-d]*.c" DontIgnoreCase
-- True

-- ghci> globToRegex "f[W-d]*.c" IgnoreCase
-- "^[Ff][w-za-dW-ZA-D].*\\.[Cc]$"
-- ghci> matchesGlob "fwa.c" "f[W-d]*.c" IgnoreCase
-- True
-- ghci> matchesGlob "fXawD.c" "f[W-d]*.c" IgnoreCase
-- True