aboutsummaryrefslogtreecommitdiff
path: root/ch08/8_b_3.hs
blob: 88006380ee3c4d2b8bb283636ac8fc6ae79acd22 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
-- The * wild card only matches names within a single directory. Many shells
-- have an extended wild card syntax, **, that matches names recursively in all
-- directories. For example, **.c would mean “match a name ending in .c in this
-- directory or any subdirectory at any depth”. Implement matching on **
-- wildcards.

-- The assignment seems incorrect. Or maybe it was correct at the time of
-- writing the book. I used bash and Python's glob for checking this
-- functionality (globstar). From bash manual page:
--
--   "..., two adjacent *s used as a single pattern will match all files and
--   zero or more directories and subdirectories. If followed by a /, two
--   adjacent *s will match only directories and subdirectories."
--
-- According to those, patterns like '**.c' don't match files recursively. That
-- behavior can be achieved by patterns like '**/*.c'.
--
-- To follow the assignment regardless, I implemented what I call an extended
-- globstar: If a pattern in any component of a file path contains '**' among
-- other characters, each '**' sequence is converted to a single '*'. This new
-- pattern is used for matching files and directories in a current directory. In
-- matching directories, the searching continues with the tail of the file path,
-- and in all directories (including the matching ones) the searching continues
-- with the whole file path.


{-- From examples/examples/ch08/Glob.hs modified according to the assignment --}
import Data.List (sort, group)

import Control.Monad (forM)
import GlobRegex (matchesGlob)

import System.Directory (doesDirectoryExist, doesFileExist,
                         getCurrentDirectory, getDirectoryContents)

import System.FilePath ((</>), splitPath)


find :: String -> IO [FilePath]
find path
  | not (isPattern path) = do
      exists <- doesNameExist path
      return (if exists then [path] else [])
  | otherwise = do
      (rootDir, rest) <- splitRootDir path
      found' <- findPattern rootDir "" rest
      -- Keep only one match of any single file.
      --
      -- In this implementation of the globstar matching, some matches can be
      -- present multiple times in the result. For example, for file C/B/A and
      -- pattern '**/A', the file can be found both as 'A' in directory C/B and
      -- '*/A' in directory C.
      let found = map head ((group.sort) found')
      return found

splitRootDir :: FilePath -> IO (FilePath, [FilePath])
splitRootDir path = do
  currentDir <- getCurrentDirectory
  let split = splitPath path
  let first = head split
  return (if first == "/"
          then (first, (tail split))
          else (currentDir, split))

-- Parameters:
--   rootDir      - the root directory where the searching started
--   matchingRoot - part of the search path/pattern that matches. The path for
--                  listing directory content is rootDir/matchingRoot.
--   path         - the remaining part of the search path/pattern to be matched.
findPattern :: FilePath -> FilePath -> [String] -> IO [FilePath]
findPattern rootDir matchingRoot [] = do
  return []
findPattern rootDir matchingRoot path = do
  let pat = head path
  let rest = tail path
  let isGlobstar = isPatternGlobstar pat
  let dirOnly = ((last pat) == '/')
  let hidden = ((head pat) == '.')
  let patToMatch = getPatternForMatching pat

  -- List regular files and directories separately for matching the current
  -- search path/pattern component
  files <- listDir (rootDir </> matchingRoot) doesFileExist hidden
  dirs <- listDir (rootDir </> matchingRoot) doesDirectoryExist hidden

  -- For recursing in the case of globstar pattern, get all directories
  -- regardless of whether they are hidden or not
  globstarDirs <- listDir (rootDir </> matchingRoot) doesDirectoryExist False
  let filesMatch = if not dirOnly
                   then filter (`matchesGlob` patToMatch) files
                   else []
  let dirsMatch = filter (`matchesGlob` patToMatch) dirs
  let allMatch = filesMatch ++ dirsMatch

  -- For globstar, search all directories again with the same 'path' regardless
  -- of whether they match or not
  globstarFound <- if isGlobstar
                   then do
                     namesRecursive <- forM globstarDirs $ \dir -> do
                       findPattern rootDir (matchingRoot </> dir) path
                     -- Globstar also means no directory. Search directly in the
                     -- matching root.
                     namesCurrent <- findPattern rootDir matchingRoot rest
                     return ((concat namesRecursive) ++ namesCurrent)
                   else return []
  found <- if null rest
         -- This is the last component of the search path. Return matches.
         then do
           return (map ((</>) matchingRoot) allMatch)
         -- Not the last component. Recurse into matching directories.
         else do
           names <- forM dirsMatch $ \dir -> do
             findPattern rootDir (matchingRoot </> dir) rest
           return (concat names)

  -- The overall result consists of the normal matches and the globstar
  -- recursive matches
  return (found ++ globstarFound)

listDir :: FilePath -> (FilePath -> IO Bool) -> Bool -> IO [FilePath]
listDir rootDir existsFunc hidden = do
  dirContents <- getDirectoryContents rootDir
  files <- forM dirContents $ \file -> do
    let path = (</>) rootDir file
    exists <- existsFunc path
    if exists
      then return file
      else return ""
  -- Keep only files of wanted type
  let filesWanted = filter (\f -> f /= "") files
  let filesHidden = if hidden
                    then filter (\f -> isHidden f && f /= "." && f /= "..") filesWanted
                    else filter (not . isHidden) filesWanted
  return filesHidden

getPatternForMatching :: String -> String
getPatternForMatching ('*':'*':rest)  = '*':(getPatternForMatching rest)
getPatternForMatching ('/':rest)      = getPatternForMatching rest
getPatternForMatching (p:rest)        = p:(getPatternForMatching rest)
getPatternForMatching  p              = p

isPattern :: String -> Bool
isPattern = any (`elem` "[*?")

isPatternGlobstar :: String -> Bool
isPatternGlobstar ('*':'*':_)  = True
isPatternGlobstar (_:rest)     = isPatternGlobstar rest
isPatternGlobstar _            = False

isHidden :: FilePath -> Bool
isHidden ('.':_) = True
isHidden _       = False

doesNameExist :: FilePath -> IO Bool
doesNameExist name = do
    fileExists <- doesFileExist name
    if fileExists
      then return True
      else doesDirectoryExist name
{-- End of code from examples --}


-- The testing directory structure is
--
-- test-8_b_3/
-- |-- A.c
-- |-- dir1
-- |   |-- .hiddenC.c
-- |   |-- .hiddenD.c
-- |   |-- .hidden_dirG_c
-- |   |   `-- A.c
-- |   |-- A.c
-- |   |-- B.c
-- |   |-- dirE_c
-- |   |   `-- file
-- |   `-- dirF_c
-- |       `-- file
-- `-- dir2
--     `-- dir1
--         `-- dir3
--             |-- .hiddenC.c
--             |-- .hiddenD.c
--             |-- .hidden_dirG_c
--             |   `-- A.c
--             |-- A.c
--             |-- B.c
--             |-- dirE_c
--             |   `-- file
--             `-- dirF_c
--                 `-- file
--
-- 10 directories, 15 files


-- ghci> :l 8_b_3.hs
-- [1 of 2] Compiling GlobRegex        ( GlobRegex.hs, interpreted )
-- [2 of 2] Compiling Main             ( 8_b_3.hs, interpreted )
-- Ok, two modules loaded.

-- ghci> find "test-8_b_3/dir1/nonexistent.c"
-- []

-- Not a pattern
-- ghci> find "test-8_b_3/dir1/A.c"
-- ["test-8_b_3/dir1/A.c"]


-- Normal pattern for files and directories
-- ghci> find "test-8_b_3/dir1/*c"
-- ["test-8_b_3/dir1/A.c","test-8_b_3/dir1/B.c","test-8_b_3/dir1/dirE_c","test-8_b_3/dir1/dirF_c"]

-- Normal pattern for hidden files and directories
-- ghci> find "test-8_b_3/dir1/.*c"
-- ["test-8_b_3/dir1/.hiddenD.c","test-8_b_3/dir1/.hiddenC.c","test-8_b_3/dir1/.hidden_dirG_c"]

-- Normal pattern for directories only
-- ghci> find "test-8_b_3/dir1/*c/"
-- ["test-8_b_3/dir1/dirE_c","test-8_b_3/dir1/dirF_c"]


-- Globstar also means 'zero directories' (searching in current directory)
-- ghci> find "test-8_b_3/dir1/**/*.c"
-- ["test-8_b_3/dir1/A.c","test-8_b_3/dir1/B.c"]

-- Globstar for files and directories
-- ghci> find "test-8_b_3/**c"
-- ["test-8_b_3/A.c","test-8_b_3/dir2/dir1/dir3/A.c","test-8_b_3/dir2/dir1/dir3/B.c", \
--  "test-8_b_3/dir2/dir1/dir3/dirE_c","test-8_b_3/dir2/dir1/dir3/dirF_c","test-8_b_3/dir1/A.c", \
--  "test-8_b_3/dir1/B.c","test-8_b_3/dir1/dirE_c","test-8_b_3/dir1/dirF_c"]

-- Globstar for hidden files and directories
-- ghci> find "test-8_b_3/.**c"
-- ["test-8_b_3/dir2/dir1/dir3/.hiddenD.c","test-8_b_3/dir2/dir1/dir3/.hiddenC.c", \
--  "test-8_b_3/dir2/dir1/dir3/.hidden_dirG_c","test-8_b_3/dir1/.hiddenD.c", \
--  "test-8_b_3/dir1/.hiddenC.c","test-8_b_3/dir1/.hidden_dirG_c"]

-- Globstar for directories only
-- ghci> find "test-8_b_3/**c/"
-- ["test-8_b_3/dir2/dir1/dir3/dirE_c","test-8_b_3/dir2/dir1/dir3/dirF_c", \
--  "test-8_b_3/dir1/dirE_c","test-8_b_3/dir1/dirF_c"]

-- Globstar matching hidden directories in the path
-- Note that globstar also means 'zero directories'
-- ghci> find "test-8_b_3/.**/A.c"
-- ["test-8_b_3/dir2/dir1/dir3/.hidden_dirG_c/A.c","test-8_b_3/dir2/dir1/dir3/A.c", \
--  "test-8_b_3/dir1/.hidden_dirG_c/A.c","test-8_b_3/dir1/A.c","test-8_b_3/A.c"]

-- Globstar in multiple occurrences
-- ghci> find "test-8_b_3/**/dir1/**/A.c"
-- ["test-8_b_3/dir1/A.c","test-8_b_3/dir2/dir1/dir3/A.c"]

-- Globstar matching paths that are prefixes of other paths
-- ghci> find "test-8_b_3/**/dir1"
-- ["test-8_b_3/dir1","test-8_b_3/dir2/dir1"]