aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md2
-rw-r--r--ch08/8_b_3.hs254
-rw-r--r--ch08/test-8_b_3/A.c0
-rw-r--r--ch08/test-8_b_3/dir1/.hiddenC.c0
-rw-r--r--ch08/test-8_b_3/dir1/.hiddenD.c0
-rw-r--r--ch08/test-8_b_3/dir1/.hidden_dirG_c/A.c0
-rw-r--r--ch08/test-8_b_3/dir1/A.c0
-rw-r--r--ch08/test-8_b_3/dir1/B.c0
-rw-r--r--ch08/test-8_b_3/dir1/dirE_c/file0
-rw-r--r--ch08/test-8_b_3/dir1/dirF_c/file0
-rw-r--r--ch08/test-8_b_3/dir2/dir1/dir3/.hiddenC.c0
-rw-r--r--ch08/test-8_b_3/dir2/dir1/dir3/.hiddenD.c0
-rw-r--r--ch08/test-8_b_3/dir2/dir1/dir3/.hidden_dirG_c/A.c0
-rw-r--r--ch08/test-8_b_3/dir2/dir1/dir3/A.c0
-rw-r--r--ch08/test-8_b_3/dir2/dir1/dir3/B.c0
-rw-r--r--ch08/test-8_b_3/dir2/dir1/dir3/dirE_c/file0
-rw-r--r--ch08/test-8_b_3/dir2/dir1/dir3/dirF_c/file0
17 files changed, 255 insertions, 1 deletions
diff --git a/README.md b/README.md
index dccd064..ba989f5 100644
--- a/README.md
+++ b/README.md
@@ -115,7 +115,7 @@ of a group is in bold italics.
| 8_a_2 | yes | | |
| **_8_b_1_** | yes, in 8_a_2 | 210 | |
| 8_b_2 | yes | | |
-| 8_b_3 | | | |
+| 8_b_3 | yes | | |
| **_8_b_1_** | | 211 | |
| 8_b_2 | | | |
| **_8_d_1_** | | 212 | |
diff --git a/ch08/8_b_3.hs b/ch08/8_b_3.hs
new file mode 100644
index 0000000..8800638
--- /dev/null
+++ b/ch08/8_b_3.hs
@@ -0,0 +1,254 @@
+-- The * wild card only matches names within a single directory. Many shells
+-- have an extended wild card syntax, **, that matches names recursively in all
+-- directories. For example, **.c would mean “match a name ending in .c in this
+-- directory or any subdirectory at any depth”. Implement matching on **
+-- wildcards.
+
+-- The assignment seems incorrect. Or maybe it was correct at the time of
+-- writing the book. I used bash and Python's glob for checking this
+-- functionality (globstar). From bash manual page:
+--
+-- "..., two adjacent *s used as a single pattern will match all files and
+-- zero or more directories and subdirectories. If followed by a /, two
+-- adjacent *s will match only directories and subdirectories."
+--
+-- According to those, patterns like '**.c' don't match files recursively. That
+-- behavior can be achieved by patterns like '**/*.c'.
+--
+-- To follow the assignment regardless, I implemented what I call an extended
+-- globstar: If a pattern in any component of a file path contains '**' among
+-- other characters, each '**' sequence is converted to a single '*'. This new
+-- pattern is used for matching files and directories in a current directory. In
+-- matching directories, the searching continues with the tail of the file path,
+-- and in all directories (including the matching ones) the searching continues
+-- with the whole file path.
+
+
+{-- From examples/examples/ch08/Glob.hs modified according to the assignment --}
+import Data.List (sort, group)
+
+import Control.Monad (forM)
+import GlobRegex (matchesGlob)
+
+import System.Directory (doesDirectoryExist, doesFileExist,
+ getCurrentDirectory, getDirectoryContents)
+
+import System.FilePath ((</>), splitPath)
+
+
+find :: String -> IO [FilePath]
+find path
+ | not (isPattern path) = do
+ exists <- doesNameExist path
+ return (if exists then [path] else [])
+ | otherwise = do
+ (rootDir, rest) <- splitRootDir path
+ found' <- findPattern rootDir "" rest
+ -- Keep only one match of any single file.
+ --
+ -- In this implementation of the globstar matching, some matches can be
+ -- present multiple times in the result. For example, for file C/B/A and
+ -- pattern '**/A', the file can be found both as 'A' in directory C/B and
+ -- '*/A' in directory C.
+ let found = map head ((group.sort) found')
+ return found
+
+splitRootDir :: FilePath -> IO (FilePath, [FilePath])
+splitRootDir path = do
+ currentDir <- getCurrentDirectory
+ let split = splitPath path
+ let first = head split
+ return (if first == "/"
+ then (first, (tail split))
+ else (currentDir, split))
+
+-- Parameters:
+-- rootDir - the root directory where the searching started
+-- matchingRoot - part of the search path/pattern that matches. The path for
+-- listing directory content is rootDir/matchingRoot.
+-- path - the remaining part of the search path/pattern to be matched.
+findPattern :: FilePath -> FilePath -> [String] -> IO [FilePath]
+findPattern rootDir matchingRoot [] = do
+ return []
+findPattern rootDir matchingRoot path = do
+ let pat = head path
+ let rest = tail path
+ let isGlobstar = isPatternGlobstar pat
+ let dirOnly = ((last pat) == '/')
+ let hidden = ((head pat) == '.')
+ let patToMatch = getPatternForMatching pat
+
+ -- List regular files and directories separately for matching the current
+ -- search path/pattern component
+ files <- listDir (rootDir </> matchingRoot) doesFileExist hidden
+ dirs <- listDir (rootDir </> matchingRoot) doesDirectoryExist hidden
+
+ -- For recursing in the case of globstar pattern, get all directories
+ -- regardless of whether they are hidden or not
+ globstarDirs <- listDir (rootDir </> matchingRoot) doesDirectoryExist False
+ let filesMatch = if not dirOnly
+ then filter (`matchesGlob` patToMatch) files
+ else []
+ let dirsMatch = filter (`matchesGlob` patToMatch) dirs
+ let allMatch = filesMatch ++ dirsMatch
+
+ -- For globstar, search all directories again with the same 'path' regardless
+ -- of whether they match or not
+ globstarFound <- if isGlobstar
+ then do
+ namesRecursive <- forM globstarDirs $ \dir -> do
+ findPattern rootDir (matchingRoot </> dir) path
+ -- Globstar also means no directory. Search directly in the
+ -- matching root.
+ namesCurrent <- findPattern rootDir matchingRoot rest
+ return ((concat namesRecursive) ++ namesCurrent)
+ else return []
+ found <- if null rest
+ -- This is the last component of the search path. Return matches.
+ then do
+ return (map ((</>) matchingRoot) allMatch)
+ -- Not the last component. Recurse into matching directories.
+ else do
+ names <- forM dirsMatch $ \dir -> do
+ findPattern rootDir (matchingRoot </> dir) rest
+ return (concat names)
+
+ -- The overall result consists of the normal matches and the globstar
+ -- recursive matches
+ return (found ++ globstarFound)
+
+listDir :: FilePath -> (FilePath -> IO Bool) -> Bool -> IO [FilePath]
+listDir rootDir existsFunc hidden = do
+ dirContents <- getDirectoryContents rootDir
+ files <- forM dirContents $ \file -> do
+ let path = (</>) rootDir file
+ exists <- existsFunc path
+ if exists
+ then return file
+ else return ""
+ -- Keep only files of wanted type
+ let filesWanted = filter (\f -> f /= "") files
+ let filesHidden = if hidden
+ then filter (\f -> isHidden f && f /= "." && f /= "..") filesWanted
+ else filter (not . isHidden) filesWanted
+ return filesHidden
+
+getPatternForMatching :: String -> String
+getPatternForMatching ('*':'*':rest) = '*':(getPatternForMatching rest)
+getPatternForMatching ('/':rest) = getPatternForMatching rest
+getPatternForMatching (p:rest) = p:(getPatternForMatching rest)
+getPatternForMatching p = p
+
+isPattern :: String -> Bool
+isPattern = any (`elem` "[*?")
+
+isPatternGlobstar :: String -> Bool
+isPatternGlobstar ('*':'*':_) = True
+isPatternGlobstar (_:rest) = isPatternGlobstar rest
+isPatternGlobstar _ = False
+
+isHidden :: FilePath -> Bool
+isHidden ('.':_) = True
+isHidden _ = False
+
+doesNameExist :: FilePath -> IO Bool
+doesNameExist name = do
+ fileExists <- doesFileExist name
+ if fileExists
+ then return True
+ else doesDirectoryExist name
+{-- End of code from examples --}
+
+
+-- The testing directory structure is
+--
+-- test-8_b_3/
+-- |-- A.c
+-- |-- dir1
+-- | |-- .hiddenC.c
+-- | |-- .hiddenD.c
+-- | |-- .hidden_dirG_c
+-- | | `-- A.c
+-- | |-- A.c
+-- | |-- B.c
+-- | |-- dirE_c
+-- | | `-- file
+-- | `-- dirF_c
+-- | `-- file
+-- `-- dir2
+-- `-- dir1
+-- `-- dir3
+-- |-- .hiddenC.c
+-- |-- .hiddenD.c
+-- |-- .hidden_dirG_c
+-- | `-- A.c
+-- |-- A.c
+-- |-- B.c
+-- |-- dirE_c
+-- | `-- file
+-- `-- dirF_c
+-- `-- file
+--
+-- 10 directories, 15 files
+
+
+-- ghci> :l 8_b_3.hs
+-- [1 of 2] Compiling GlobRegex ( GlobRegex.hs, interpreted )
+-- [2 of 2] Compiling Main ( 8_b_3.hs, interpreted )
+-- Ok, two modules loaded.
+
+-- ghci> find "test-8_b_3/dir1/nonexistent.c"
+-- []
+
+-- Not a pattern
+-- ghci> find "test-8_b_3/dir1/A.c"
+-- ["test-8_b_3/dir1/A.c"]
+
+
+-- Normal pattern for files and directories
+-- ghci> find "test-8_b_3/dir1/*c"
+-- ["test-8_b_3/dir1/A.c","test-8_b_3/dir1/B.c","test-8_b_3/dir1/dirE_c","test-8_b_3/dir1/dirF_c"]
+
+-- Normal pattern for hidden files and directories
+-- ghci> find "test-8_b_3/dir1/.*c"
+-- ["test-8_b_3/dir1/.hiddenD.c","test-8_b_3/dir1/.hiddenC.c","test-8_b_3/dir1/.hidden_dirG_c"]
+
+-- Normal pattern for directories only
+-- ghci> find "test-8_b_3/dir1/*c/"
+-- ["test-8_b_3/dir1/dirE_c","test-8_b_3/dir1/dirF_c"]
+
+
+-- Globstar also means 'zero directories' (searching in current directory)
+-- ghci> find "test-8_b_3/dir1/**/*.c"
+-- ["test-8_b_3/dir1/A.c","test-8_b_3/dir1/B.c"]
+
+-- Globstar for files and directories
+-- ghci> find "test-8_b_3/**c"
+-- ["test-8_b_3/A.c","test-8_b_3/dir2/dir1/dir3/A.c","test-8_b_3/dir2/dir1/dir3/B.c", \
+-- "test-8_b_3/dir2/dir1/dir3/dirE_c","test-8_b_3/dir2/dir1/dir3/dirF_c","test-8_b_3/dir1/A.c", \
+-- "test-8_b_3/dir1/B.c","test-8_b_3/dir1/dirE_c","test-8_b_3/dir1/dirF_c"]
+
+-- Globstar for hidden files and directories
+-- ghci> find "test-8_b_3/.**c"
+-- ["test-8_b_3/dir2/dir1/dir3/.hiddenD.c","test-8_b_3/dir2/dir1/dir3/.hiddenC.c", \
+-- "test-8_b_3/dir2/dir1/dir3/.hidden_dirG_c","test-8_b_3/dir1/.hiddenD.c", \
+-- "test-8_b_3/dir1/.hiddenC.c","test-8_b_3/dir1/.hidden_dirG_c"]
+
+-- Globstar for directories only
+-- ghci> find "test-8_b_3/**c/"
+-- ["test-8_b_3/dir2/dir1/dir3/dirE_c","test-8_b_3/dir2/dir1/dir3/dirF_c", \
+-- "test-8_b_3/dir1/dirE_c","test-8_b_3/dir1/dirF_c"]
+
+-- Globstar matching hidden directories in the path
+-- Note that globstar also means 'zero directories'
+-- ghci> find "test-8_b_3/.**/A.c"
+-- ["test-8_b_3/dir2/dir1/dir3/.hidden_dirG_c/A.c","test-8_b_3/dir2/dir1/dir3/A.c", \
+-- "test-8_b_3/dir1/.hidden_dirG_c/A.c","test-8_b_3/dir1/A.c","test-8_b_3/A.c"]
+
+-- Globstar in multiple occurrences
+-- ghci> find "test-8_b_3/**/dir1/**/A.c"
+-- ["test-8_b_3/dir1/A.c","test-8_b_3/dir2/dir1/dir3/A.c"]
+
+-- Globstar matching paths that are prefixes of other paths
+-- ghci> find "test-8_b_3/**/dir1"
+-- ["test-8_b_3/dir1","test-8_b_3/dir2/dir1"]
diff --git a/ch08/test-8_b_3/A.c b/ch08/test-8_b_3/A.c
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/ch08/test-8_b_3/A.c
diff --git a/ch08/test-8_b_3/dir1/.hiddenC.c b/ch08/test-8_b_3/dir1/.hiddenC.c
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/ch08/test-8_b_3/dir1/.hiddenC.c
diff --git a/ch08/test-8_b_3/dir1/.hiddenD.c b/ch08/test-8_b_3/dir1/.hiddenD.c
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/ch08/test-8_b_3/dir1/.hiddenD.c
diff --git a/ch08/test-8_b_3/dir1/.hidden_dirG_c/A.c b/ch08/test-8_b_3/dir1/.hidden_dirG_c/A.c
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/ch08/test-8_b_3/dir1/.hidden_dirG_c/A.c
diff --git a/ch08/test-8_b_3/dir1/A.c b/ch08/test-8_b_3/dir1/A.c
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/ch08/test-8_b_3/dir1/A.c
diff --git a/ch08/test-8_b_3/dir1/B.c b/ch08/test-8_b_3/dir1/B.c
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/ch08/test-8_b_3/dir1/B.c
diff --git a/ch08/test-8_b_3/dir1/dirE_c/file b/ch08/test-8_b_3/dir1/dirE_c/file
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/ch08/test-8_b_3/dir1/dirE_c/file
diff --git a/ch08/test-8_b_3/dir1/dirF_c/file b/ch08/test-8_b_3/dir1/dirF_c/file
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/ch08/test-8_b_3/dir1/dirF_c/file
diff --git a/ch08/test-8_b_3/dir2/dir1/dir3/.hiddenC.c b/ch08/test-8_b_3/dir2/dir1/dir3/.hiddenC.c
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/ch08/test-8_b_3/dir2/dir1/dir3/.hiddenC.c
diff --git a/ch08/test-8_b_3/dir2/dir1/dir3/.hiddenD.c b/ch08/test-8_b_3/dir2/dir1/dir3/.hiddenD.c
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/ch08/test-8_b_3/dir2/dir1/dir3/.hiddenD.c
diff --git a/ch08/test-8_b_3/dir2/dir1/dir3/.hidden_dirG_c/A.c b/ch08/test-8_b_3/dir2/dir1/dir3/.hidden_dirG_c/A.c
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/ch08/test-8_b_3/dir2/dir1/dir3/.hidden_dirG_c/A.c
diff --git a/ch08/test-8_b_3/dir2/dir1/dir3/A.c b/ch08/test-8_b_3/dir2/dir1/dir3/A.c
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/ch08/test-8_b_3/dir2/dir1/dir3/A.c
diff --git a/ch08/test-8_b_3/dir2/dir1/dir3/B.c b/ch08/test-8_b_3/dir2/dir1/dir3/B.c
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/ch08/test-8_b_3/dir2/dir1/dir3/B.c
diff --git a/ch08/test-8_b_3/dir2/dir1/dir3/dirE_c/file b/ch08/test-8_b_3/dir2/dir1/dir3/dirE_c/file
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/ch08/test-8_b_3/dir2/dir1/dir3/dirE_c/file
diff --git a/ch08/test-8_b_3/dir2/dir1/dir3/dirF_c/file b/ch08/test-8_b_3/dir2/dir1/dir3/dirF_c/file
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/ch08/test-8_b_3/dir2/dir1/dir3/dirF_c/file