1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
|
-- 1. While filesystems on Unix are usually case-sensitive (e.g. “G” vs. “g”) in
-- file names, Windows filesystems are not. Add a parameter to the
-- globToRegex and matchesGlob functions that allows control over case
-- sensitive matching.
--
-- 2. Although we've gone to some lengths to write a portable namesMatching
-- function, the function uses our case sensitive globToRegex function. Find
-- a way to modify namesMatching to be case sensitive on Unix, and case
-- insensitive on Windows, without modifying its type signature. (Hint:
-- consider reading the documentation for System.FilePath to look for a
-- variable that tells us whether we're running on a Unix-like system, or on
-- Windows.)
-- For simplicity, let's assume that only a-z and A-Z letters are affected by
-- the case sensitivity option. Also, probably not all error cases are detected
-- when converting a glob to regex.
{-- From examples/examples/ch08/Glob.hs modified according to the assignment --}
import Control.Exception
import Control.Monad (forM)
import System.Directory (doesDirectoryExist, doesFileExist,
getCurrentDirectory, getDirectoryContents)
import System.FilePath (dropTrailingPathSeparator, splitFileName, (</>), searchPathSeparator)
{-- End of code from examples --}
{-- From examples/examples/ch08/GlobRegex.hs modified according to the assignment --}
import Data.Char
import Text.Regex.Posix ((=~))
data MatchOptions = DontIgnoreCase | IgnoreCase
globToRegex :: String -> MatchOptions -> String
globToRegex cs opt = '^' : globToRegex' cs opt ++ "$"
globToRegex' :: String -> MatchOptions -> String
globToRegex' "" _ = ""
globToRegex' ('*':cs) opt = ".*" ++ globToRegex' cs opt
globToRegex' ('?':cs) opt = '.' : globToRegex' cs opt
globToRegex' ('[':'!':cs) opt = "[^" ++ charClass cs opt
globToRegex' ('[':cs) opt = '[' : charClass cs opt
globToRegex' (c:cs) opt = escape c opt ++ globToRegex' cs opt
escape :: Char -> MatchOptions -> String
escape c opt | c `elem` regexChars = '\\' : [c]
| otherwise = caseOutsideCharClass c opt
where regexChars = "\\+()^$.{}]|"
charClass :: String -> MatchOptions -> String
charClass (']':cs) opt = ']' : globToRegex' cs opt
charClass (a:'-':c:cs) opt = caseRange a c opt ++ charClass cs opt
charClass (c:cs) opt = (caseInsideCharClass c opt) ++ charClass cs opt
charClass [] _ = error "unterminated character class"
matchesGlob :: FilePath -> String -> MatchOptions -> Bool
matchesGlob name pat opt = name =~ (globToRegex pat opt)
{-- End of code from examples --}
oppositeCase :: Char -> Char
oppositeCase c = if isAlpha c
then if isLower c
then toUpper c
else toLower c
else error (c:" is not a caseable character")
caseRange :: Char -> Char -> MatchOptions -> String
caseRange a c DontIgnoreCase = a : '-' : [c]
caseRange a c IgnoreCase = if sameCase
then a:'-':c:opA:'-':[opC]
-- Patterns like W-d which means W-Za-d are
-- converted like w-za-dW-ZA-D
else opA:"-za-" ++ c:a:"-ZA-" ++ [opC]
where sameCase = ((isLower a) && (isLower c)) || ((isUpper a) && (isUpper c))
opA = oppositeCase a
opC = oppositeCase c
caseInsideCharClass :: Char -> MatchOptions -> String
caseInsideCharClass c DontIgnoreCase = [c]
caseInsideCharClass c IgnoreCase = if isAlpha c
then (oppositeCase c) : [c]
else [c]
caseOutsideCharClass :: Char -> MatchOptions -> String
caseOutsideCharClass c DontIgnoreCase = [c]
caseOutsideCharClass c opt = if shouldBeInBrackets
then '[' : cased ++ "]"
else cased
where cased = caseInsideCharClass c opt
shouldBeInBrackets = (length cased) > 1
-- ghci> :l 8_a_2.hs
-- [1 of 1] Compiling Main ( 8_a_2.hs, interpreted )
-- Ok, one module loaded.
-- ghci> globToRegex "f[o-1].c" IgnoreCase
-- "^[Ff][O-za-1o-ZA-*** Exception: 1 is not a caseable character
-- CallStack (from HasCallStack):
-- error, called at 8_a_2.hs:48:23 in main:Main
-- ghci> globToRegex "f[gH,i-kL-N]*.c" DontIgnoreCase
-- "^f[gH,i-kL-N].*\\.c$"
-- ghci> matchesGlob "fhjM.c" "f[gH,i-kL-N]*.c" DontIgnoreCase
-- False
-- ghci> matchesGlob "fHjM.c" "f[gH,i-kL-N]*.c" DontIgnoreCase
-- True
-- ghci> globToRegex "f[gH,i-kL-N]*.c" IgnoreCase
-- "^[Ff][GghH,i-kI-KL-Nl-n].*\\.[Cc]$"
-- ghci> matchesGlob "fhjM.c" "f[gH,i-kL-N]*.c" IgnoreCase
-- True
-- ghci> matchesGlob "fGhJm.c" "f[gH,i-kL-N]*.c" IgnoreCase
-- True
-- ghci> globToRegex "f[W-d]*.c" DontIgnoreCase
-- "^f[W-d].*\\.c$"
-- ghci> matchesGlob "fxa.c" "f[W-d]*.c" DontIgnoreCase
-- False
-- ghci> matchesGlob "fXa.c" "f[W-d]*.c" DontIgnoreCase
-- True
-- ghci> globToRegex "f[W-d]*.c" IgnoreCase
-- "^[Ff][w-za-dW-ZA-D].*\\.[Cc]$"
-- ghci> matchesGlob "fwa.c" "f[W-d]*.c" IgnoreCase
-- True
-- ghci> matchesGlob "fXawD.c" "f[W-d]*.c" IgnoreCase
-- True
-- There are three possible variables in System.FilePath that could be used for
-- differentiating between the platforms: pathSeparator, pathSeparators, and
-- searchPathSeparator. Even though they don't seem as a direct way for
-- detecting the platform, it is possible to use them.
{-- From examples/examples/ch08/Glob.hs modified according to the assignment
--
-- The required imports have been moved from here to the top of this source
-- file.
--
-- The following error in the listMatches function
--
-- Ambiguous type variable ‘e0’ arising from a use of ‘handle’
-- prevents the constraint ‘(Exception e0)’ from being solved.
-- Probable fix: use a type annotation to specify what ‘e0’ should be.
--
-- has been fixed according to
--
-- https://stackoverflow.com/questions/12030977/ambiguous-type-variable-arising-from-the-use-of-handle
--}
namesMatching :: String -> IO [FilePath]
isPattern :: String -> Bool
isPattern = any (`elem` "[*?")
namesMatching pat
| not (isPattern pat) = do
exists <- doesNameExist pat
return (if exists then [pat] else [])
| otherwise = do
case splitFileName pat of
("", baseName) -> do
curDir <- getCurrentDirectory
listMatches curDir baseName
(dirName, baseName) -> do
dirs <- if isPattern dirName
then namesMatching (dropTrailingPathSeparator dirName)
else return [dirName]
let listDir = if isPattern baseName
then listMatches
else listPlain
pathNames <- forM dirs $ \dir -> do
baseNames <- listDir dir baseName
return (map (dir </>) baseNames)
return (concat pathNames)
listMatches :: FilePath -> String -> IO [String]
listMatches dirName pat = do
dirName' <- if null dirName
then getCurrentDirectory
else return dirName
handle (\(SomeException _) -> return []) $ do
names <- getDirectoryContents dirName'
let names' = if isHidden pat
then filter isHidden names
else filter (not . isHidden) names
return (filter (`filterMatchesGlob` pat) names')
isHidden ('.':_) = True
isHidden _ = False
listPlain :: FilePath -> String -> IO [String]
listPlain dirName baseName = do
exists <- if null baseName
then doesDirectoryExist dirName
else doesNameExist (dirName </> baseName)
return (if exists then [baseName] else [])
doesNameExist :: FilePath -> IO Bool
doesNameExist name = do
fileExists <- doesFileExist name
if fileExists
then return True
else doesDirectoryExist name
{-- End of code from examples --}
filterMatchesGlob :: FilePath -> String -> Bool
filterMatchesGlob path glob = matchesGlob path glob caseSensitiveness
caseSensitiveness :: MatchOptions
caseSensitiveness = if isPlatformWindows
then IgnoreCase
else DontIgnoreCase
isPlatformWindows :: Bool
isPlatformWindows = (searchPathSeparator == windowsSearchPathSeparator)
where windowsSearchPathSeparator = ';'
-- I haven't tested this solution on Windows, just Linux. On Linux, the case
-- should not be ignored.
-- ghci> namesMatching "8_A_1.hs"
-- []
-- ghci> namesMatching "8_a_1.hs"
-- ["8_a_1.hs"]
-- ghci> namesMatching "*regex*"
-- []
-- ghci> namesMatching "*Regex*"
-- ["./GlobRegex.hs"]
|