aboutsummaryrefslogtreecommitdiff
path: root/ch08/8_a_2.hs
blob: d2047f9a2dd01373f1f4d3aa5a72b4058bac6b61 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
-- 1. While filesystems on Unix are usually case-sensitive (e.g. “G” vs. “g”) in
--    file names, Windows filesystems are not. Add a parameter to the
--    globToRegex and matchesGlob functions that allows control over case
--    sensitive matching.
--
-- 2. Although we've gone to some lengths to write a portable namesMatching
--    function, the function uses our case sensitive globToRegex function. Find
--    a way to modify namesMatching to be case sensitive on Unix, and case
--    insensitive on Windows, without modifying its type signature. (Hint:
--    consider reading the documentation for System.FilePath to look for a
--    variable that tells us whether we're running on a Unix-like system, or on
--    Windows.)

-- For simplicity, let's assume that only a-z and A-Z letters are affected by
-- the case sensitivity option. Also, probably not all error cases are detected
-- when converting a glob to regex.

{-- From examples/examples/ch08/Glob.hs modified according to the assignment --}
import Control.Exception
import Control.Monad (forM)

import System.Directory (doesDirectoryExist, doesFileExist,
                         getCurrentDirectory, getDirectoryContents)

import System.FilePath (dropTrailingPathSeparator, splitFileName, (</>), searchPathSeparator)
{-- End of code from examples --}

{-- From examples/examples/ch08/GlobRegex.hs modified according to the assignment --}
import Data.Char
import Text.Regex.Posix ((=~))

data MatchOptions = DontIgnoreCase | IgnoreCase

globToRegex :: String -> MatchOptions -> String
globToRegex cs opt = '^' : globToRegex' cs opt ++ "$"

globToRegex' :: String -> MatchOptions -> String
globToRegex' "" _ = ""
globToRegex' ('*':cs) opt = ".*" ++ globToRegex' cs opt
globToRegex' ('?':cs) opt = '.' : globToRegex' cs opt
globToRegex' ('[':'!':cs) opt = "[^" ++ charClass cs opt
globToRegex' ('[':cs) opt     = '[' : charClass cs opt
globToRegex' (c:cs) opt       = escape c opt ++ globToRegex' cs opt

escape :: Char -> MatchOptions -> String
escape c opt | c `elem` regexChars = '\\' : [c]
             | otherwise = caseOutsideCharClass c opt
    where regexChars = "\\+()^$.{}]|"

charClass :: String -> MatchOptions -> String
charClass (']':cs) opt     = ']' : globToRegex' cs opt
charClass (a:'-':c:cs) opt = caseRange a c opt ++ charClass cs opt
charClass (c:cs) opt       = (caseInsideCharClass c opt) ++ charClass cs opt
charClass [] _             = error "unterminated character class"


matchesGlob :: FilePath -> String -> MatchOptions -> Bool
matchesGlob name pat opt = name =~ (globToRegex pat opt)
{-- End of code from examples --}

oppositeCase :: Char -> Char
oppositeCase c = if isAlpha c
                 then if isLower c
                      then toUpper c
                      else toLower c
                 else error (c:" is not a caseable character")

caseRange :: Char -> Char -> MatchOptions -> String
caseRange a c DontIgnoreCase = a : '-' : [c]
caseRange a c IgnoreCase = if sameCase
                                   then a:'-':c:opA:'-':[opC]
                                   -- Patterns like W-d which means W-Za-d are
                                   -- converted like w-za-dW-ZA-D
                                   else opA:"-za-" ++ c:a:"-ZA-" ++ [opC]
  where sameCase = ((isLower a) && (isLower c)) || ((isUpper a) && (isUpper c))
        opA = oppositeCase a
        opC = oppositeCase c

caseInsideCharClass :: Char -> MatchOptions -> String
caseInsideCharClass c DontIgnoreCase = [c]
caseInsideCharClass c IgnoreCase = if isAlpha c
                                   then (oppositeCase c) : [c]
                                   else [c]

caseOutsideCharClass :: Char -> MatchOptions -> String
caseOutsideCharClass c DontIgnoreCase = [c]
caseOutsideCharClass c opt = if shouldBeInBrackets
                             then '[' : cased ++ "]"
                             else cased
  where cased = caseInsideCharClass c opt
        shouldBeInBrackets = (length cased) > 1

-- ghci> :l 8_a_2.hs
-- [1 of 1] Compiling Main             ( 8_a_2.hs, interpreted )
-- Ok, one module loaded.

-- ghci> globToRegex "f[o-1].c" IgnoreCase
-- "^[Ff][O-za-1o-ZA-*** Exception: 1 is not a caseable character
-- CallStack (from HasCallStack):
--   error, called at 8_a_2.hs:48:23 in main:Main

-- ghci> globToRegex "f[gH,i-kL-N]*.c" DontIgnoreCase
-- "^f[gH,i-kL-N].*\\.c$"
-- ghci> matchesGlob "fhjM.c" "f[gH,i-kL-N]*.c" DontIgnoreCase
-- False
-- ghci> matchesGlob "fHjM.c" "f[gH,i-kL-N]*.c" DontIgnoreCase
-- True

-- ghci> globToRegex "f[gH,i-kL-N]*.c" IgnoreCase
-- "^[Ff][GghH,i-kI-KL-Nl-n].*\\.[Cc]$"
-- ghci> matchesGlob "fhjM.c" "f[gH,i-kL-N]*.c" IgnoreCase
-- True
-- ghci> matchesGlob "fGhJm.c" "f[gH,i-kL-N]*.c" IgnoreCase
-- True

-- ghci> globToRegex "f[W-d]*.c" DontIgnoreCase
-- "^f[W-d].*\\.c$"
-- ghci> matchesGlob "fxa.c" "f[W-d]*.c" DontIgnoreCase
-- False
-- ghci> matchesGlob "fXa.c" "f[W-d]*.c" DontIgnoreCase
-- True

-- ghci> globToRegex "f[W-d]*.c" IgnoreCase
-- "^[Ff][w-za-dW-ZA-D].*\\.[Cc]$"
-- ghci> matchesGlob "fwa.c" "f[W-d]*.c" IgnoreCase
-- True
-- ghci> matchesGlob "fXawD.c" "f[W-d]*.c" IgnoreCase
-- True


-- There are three possible variables in System.FilePath that could be used for
-- differentiating between the platforms: pathSeparator, pathSeparators, and
-- searchPathSeparator. Even though they don't seem as a direct way for
-- detecting the platform, it is possible to use them.

{-- From examples/examples/ch08/Glob.hs modified according to the assignment
 --
 -- The required imports have been moved from here to the top of this source
 -- file.
 --
 -- The following error in the listMatches function
 --
 --   Ambiguous type variable ‘e0’ arising from a use of ‘handle’
 --   prevents the constraint ‘(Exception e0)’ from being solved.
 --   Probable fix: use a type annotation to specify what ‘e0’ should be.
 --
 -- has been fixed according to
 --
 --   https://stackoverflow.com/questions/12030977/ambiguous-type-variable-arising-from-the-use-of-handle
 --}
namesMatching :: String -> IO [FilePath]

isPattern :: String -> Bool
isPattern = any (`elem` "[*?")

namesMatching pat
  | not (isPattern pat) = do
    exists <- doesNameExist pat
    return (if exists then [pat] else [])

  | otherwise = do
    case splitFileName pat of
      ("", baseName) -> do
          curDir <- getCurrentDirectory
          listMatches curDir baseName
      (dirName, baseName) -> do
          dirs <- if isPattern dirName
                  then namesMatching (dropTrailingPathSeparator dirName)
                  else return [dirName]
          let listDir = if isPattern baseName
                        then listMatches
                        else listPlain
          pathNames <- forM dirs $ \dir -> do
                           baseNames <- listDir dir baseName
                           return (map (dir </>) baseNames)
          return (concat pathNames)

listMatches :: FilePath -> String -> IO [String]
listMatches dirName pat = do
    dirName' <- if null dirName
                then getCurrentDirectory
                else return dirName
    handle (\(SomeException _)  -> return []) $ do
        names <- getDirectoryContents dirName'
        let names' = if isHidden pat
                     then filter isHidden names
                     else filter (not . isHidden) names
        return (filter (`filterMatchesGlob` pat) names')

isHidden ('.':_) = True
isHidden _       = False

listPlain :: FilePath -> String -> IO [String]
listPlain dirName baseName = do
    exists <- if null baseName
              then doesDirectoryExist dirName
              else doesNameExist (dirName </> baseName)
    return (if exists then [baseName] else [])

doesNameExist :: FilePath -> IO Bool

doesNameExist name = do
    fileExists <- doesFileExist name
    if fileExists
      then return True
      else doesDirectoryExist name
{-- End of code from examples --}

filterMatchesGlob :: FilePath -> String -> Bool
filterMatchesGlob path glob = matchesGlob path glob caseSensitiveness

caseSensitiveness :: MatchOptions
caseSensitiveness = if isPlatformWindows
                    then IgnoreCase
                    else DontIgnoreCase

isPlatformWindows :: Bool
isPlatformWindows = (searchPathSeparator == windowsSearchPathSeparator)
  where windowsSearchPathSeparator = ';'

-- I haven't tested this solution on Windows, just Linux. On Linux, the case
-- should not be ignored.

-- ghci> namesMatching "8_A_1.hs"
-- []
-- ghci> namesMatching "8_a_1.hs"
-- ["8_a_1.hs"]
-- ghci> namesMatching "*regex*"
-- []
-- ghci> namesMatching "*Regex*"
-- ["./GlobRegex.hs"]