1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
|
-- The * wild card only matches names within a single directory. Many shells
-- have an extended wild card syntax, **, that matches names recursively in all
-- directories. For example, **.c would mean “match a name ending in .c in this
-- directory or any subdirectory at any depth”. Implement matching on **
-- wildcards.
-- The assignment seems incorrect. Or maybe it was correct at the time of
-- writing the book. I used bash and Python's glob for checking this
-- functionality (globstar). From bash manual page:
--
-- "..., two adjacent *s used as a single pattern will match all files and
-- zero or more directories and subdirectories. If followed by a /, two
-- adjacent *s will match only directories and subdirectories."
--
-- According to those, patterns like '**.c' don't match files recursively. That
-- behavior can be achieved by patterns like '**/*.c'.
--
-- To follow the assignment regardless, I implemented what I call an extended
-- globstar: If a pattern in any component of a file path contains '**' among
-- other characters, each '**' sequence is converted to a single '*'. This new
-- pattern is used for matching files and directories in a current directory. In
-- matching directories, the searching continues with the tail of the file path,
-- and in all directories (including the matching ones) the searching continues
-- with the whole file path.
{-- From examples/examples/ch08/Glob.hs modified according to the assignment --}
import Data.List (sort, group)
import Control.Monad (forM)
import GlobRegex (matchesGlob)
import System.Directory (doesDirectoryExist, doesFileExist,
getCurrentDirectory, getDirectoryContents)
import System.FilePath ((</>), splitPath)
find :: String -> IO [FilePath]
find path
| not (isPattern path) = do
exists <- doesNameExist path
return (if exists then [path] else [])
| otherwise = do
(rootDir, rest) <- splitRootDir path
found' <- findPattern rootDir "" rest
-- Keep only one match of any single file.
--
-- In this implementation of the globstar matching, some matches can be
-- present multiple times in the result. For example, for file C/B/A and
-- pattern '**/A', the file can be found both as 'A' in directory C/B and
-- '*/A' in directory C.
let found = map head ((group.sort) found')
return found
splitRootDir :: FilePath -> IO (FilePath, [FilePath])
splitRootDir path = do
currentDir <- getCurrentDirectory
let split = splitPath path
let first = head split
return (if first == "/"
then (first, (tail split))
else (currentDir, split))
-- Parameters:
-- rootDir - the root directory where the searching started
-- matchingRoot - part of the search path/pattern that matches. The path for
-- listing directory content is rootDir/matchingRoot.
-- path - the remaining part of the search path/pattern to be matched.
findPattern :: FilePath -> FilePath -> [String] -> IO [FilePath]
findPattern rootDir matchingRoot [] = do
return []
findPattern rootDir matchingRoot path = do
let pat = head path
let rest = tail path
let isGlobstar = isPatternGlobstar pat
let dirOnly = ((last pat) == '/')
let hidden = ((head pat) == '.')
let patToMatch = getPatternForMatching pat
-- List regular files and directories separately for matching the current
-- search path/pattern component
files <- listDir (rootDir </> matchingRoot) doesFileExist hidden
dirs <- listDir (rootDir </> matchingRoot) doesDirectoryExist hidden
-- For recursing in the case of globstar pattern, get all directories
-- regardless of whether they are hidden or not
globstarDirs <- listDir (rootDir </> matchingRoot) doesDirectoryExist False
let filesMatch = if not dirOnly
then filter (`matchesGlob` patToMatch) files
else []
let dirsMatch = filter (`matchesGlob` patToMatch) dirs
let allMatch = filesMatch ++ dirsMatch
-- For globstar, search all directories again with the same 'path' regardless
-- of whether they match or not
globstarFound <- if isGlobstar
then do
namesRecursive <- forM globstarDirs $ \dir -> do
findPattern rootDir (matchingRoot </> dir) path
-- Globstar also means no directory. Search directly in the
-- matching root.
namesCurrent <- findPattern rootDir matchingRoot rest
return ((concat namesRecursive) ++ namesCurrent)
else return []
found <- if null rest
-- This is the last component of the search path. Return matches.
then do
return (map ((</>) matchingRoot) allMatch)
-- Not the last component. Recurse into matching directories.
else do
names <- forM dirsMatch $ \dir -> do
findPattern rootDir (matchingRoot </> dir) rest
return (concat names)
-- The overall result consists of the normal matches and the globstar
-- recursive matches
return (found ++ globstarFound)
listDir :: FilePath -> (FilePath -> IO Bool) -> Bool -> IO [FilePath]
listDir rootDir existsFunc hidden = do
dirContents <- getDirectoryContents rootDir
files <- forM dirContents $ \file -> do
let path = (</>) rootDir file
exists <- existsFunc path
if exists
then return file
else return ""
-- Keep only files of wanted type
let filesWanted = filter (\f -> f /= "") files
let filesHidden = if hidden
then filter (\f -> isHidden f && f /= "." && f /= "..") filesWanted
else filter (not . isHidden) filesWanted
return filesHidden
getPatternForMatching :: String -> String
getPatternForMatching ('*':'*':rest) = '*':(getPatternForMatching rest)
getPatternForMatching ('/':rest) = getPatternForMatching rest
getPatternForMatching (p:rest) = p:(getPatternForMatching rest)
getPatternForMatching p = p
isPattern :: String -> Bool
isPattern = any (`elem` "[*?")
isPatternGlobstar :: String -> Bool
isPatternGlobstar ('*':'*':_) = True
isPatternGlobstar (_:rest) = isPatternGlobstar rest
isPatternGlobstar _ = False
isHidden :: FilePath -> Bool
isHidden ('.':_) = True
isHidden _ = False
doesNameExist :: FilePath -> IO Bool
doesNameExist name = do
fileExists <- doesFileExist name
if fileExists
then return True
else doesDirectoryExist name
{-- End of code from examples --}
-- The testing directory structure is
--
-- test-8_b_3/
-- |-- A.c
-- |-- dir1
-- | |-- .hiddenC.c
-- | |-- .hiddenD.c
-- | |-- .hidden_dirG_c
-- | | `-- A.c
-- | |-- A.c
-- | |-- B.c
-- | |-- dirE_c
-- | | `-- file
-- | `-- dirF_c
-- | `-- file
-- `-- dir2
-- `-- dir1
-- `-- dir3
-- |-- .hiddenC.c
-- |-- .hiddenD.c
-- |-- .hidden_dirG_c
-- | `-- A.c
-- |-- A.c
-- |-- B.c
-- |-- dirE_c
-- | `-- file
-- `-- dirF_c
-- `-- file
--
-- 10 directories, 15 files
-- ghci> :l 8_b_3.hs
-- [1 of 2] Compiling GlobRegex ( GlobRegex.hs, interpreted )
-- [2 of 2] Compiling Main ( 8_b_3.hs, interpreted )
-- Ok, two modules loaded.
-- ghci> find "test-8_b_3/dir1/nonexistent.c"
-- []
-- Not a pattern
-- ghci> find "test-8_b_3/dir1/A.c"
-- ["test-8_b_3/dir1/A.c"]
-- Normal pattern for files and directories
-- ghci> find "test-8_b_3/dir1/*c"
-- ["test-8_b_3/dir1/A.c","test-8_b_3/dir1/B.c","test-8_b_3/dir1/dirE_c","test-8_b_3/dir1/dirF_c"]
-- Normal pattern for hidden files and directories
-- ghci> find "test-8_b_3/dir1/.*c"
-- ["test-8_b_3/dir1/.hiddenD.c","test-8_b_3/dir1/.hiddenC.c","test-8_b_3/dir1/.hidden_dirG_c"]
-- Normal pattern for directories only
-- ghci> find "test-8_b_3/dir1/*c/"
-- ["test-8_b_3/dir1/dirE_c","test-8_b_3/dir1/dirF_c"]
-- Globstar also means 'zero directories' (searching in current directory)
-- ghci> find "test-8_b_3/dir1/**/*.c"
-- ["test-8_b_3/dir1/A.c","test-8_b_3/dir1/B.c"]
-- Globstar for files and directories
-- ghci> find "test-8_b_3/**c"
-- ["test-8_b_3/A.c","test-8_b_3/dir2/dir1/dir3/A.c","test-8_b_3/dir2/dir1/dir3/B.c", \
-- "test-8_b_3/dir2/dir1/dir3/dirE_c","test-8_b_3/dir2/dir1/dir3/dirF_c","test-8_b_3/dir1/A.c", \
-- "test-8_b_3/dir1/B.c","test-8_b_3/dir1/dirE_c","test-8_b_3/dir1/dirF_c"]
-- Globstar for hidden files and directories
-- ghci> find "test-8_b_3/.**c"
-- ["test-8_b_3/dir2/dir1/dir3/.hiddenD.c","test-8_b_3/dir2/dir1/dir3/.hiddenC.c", \
-- "test-8_b_3/dir2/dir1/dir3/.hidden_dirG_c","test-8_b_3/dir1/.hiddenD.c", \
-- "test-8_b_3/dir1/.hiddenC.c","test-8_b_3/dir1/.hidden_dirG_c"]
-- Globstar for directories only
-- ghci> find "test-8_b_3/**c/"
-- ["test-8_b_3/dir2/dir1/dir3/dirE_c","test-8_b_3/dir2/dir1/dir3/dirF_c", \
-- "test-8_b_3/dir1/dirE_c","test-8_b_3/dir1/dirF_c"]
-- Globstar matching hidden directories in the path
-- Note that globstar also means 'zero directories'
-- ghci> find "test-8_b_3/.**/A.c"
-- ["test-8_b_3/dir2/dir1/dir3/.hidden_dirG_c/A.c","test-8_b_3/dir2/dir1/dir3/A.c", \
-- "test-8_b_3/dir1/.hidden_dirG_c/A.c","test-8_b_3/dir1/A.c","test-8_b_3/A.c"]
-- Globstar in multiple occurrences
-- ghci> find "test-8_b_3/**/dir1/**/A.c"
-- ["test-8_b_3/dir1/A.c","test-8_b_3/dir2/dir1/dir3/A.c"]
-- Globstar matching paths that are prefixes of other paths
-- ghci> find "test-8_b_3/**/dir1"
-- ["test-8_b_3/dir1","test-8_b_3/dir2/dir1"]
|