aboutsummaryrefslogtreecommitdiff
path: root/ch05
diff options
context:
space:
mode:
Diffstat (limited to 'ch05')
-rw-r--r--ch05/5_a_1.hs317
-rw-r--r--ch05/Prettify.hs2
2 files changed, 318 insertions, 1 deletions
diff --git a/ch05/5_a_1.hs b/ch05/5_a_1.hs
new file mode 100644
index 0000000..b1793c3
--- /dev/null
+++ b/ch05/5_a_1.hs
@@ -0,0 +1,317 @@
+-- 1. Our current pretty printer is spartan so that it will fit within our space
+-- constraints, but there are a number of useful improvements we can make.
+--
+-- Write a function, fill, with the following type signature:
+--
+-- -- file: ch05/Prettify.hs
+-- fill :: Int -> Doc -> Doc
+--
+-- It should add spaces to a document until it is the given number of columns
+-- wide. If it is already wider than this value, it should add no spaces.
+--
+-- 2. Our pretty printer does not take nesting into account. Whenever we open
+-- parentheses, braces, or brackets, any lines that follow should be indented
+-- so that they are aligned with the opening character until a matching
+-- closing character is encountered.
+--
+-- Add support for nesting, with a controllable amount of indentation.
+--
+-- -- file: ch05/Prettify.hs
+-- fill :: Int -> Doc -> Doc
+
+-- The name of the function 'fill' in the exercise 2. is wrong. Example file
+-- Prettify.hs contains a placeholder function for solution of that exercise
+-- with the correct name, which is 'nest'.
+
+-- These exercises are very unclear. To me, this is caused by the intermediate
+-- Doc type. It introduces a non-trivial concept into the topic of
+-- pretty-printing.
+--
+-- Reading the first pages of
+-- http://belle.sourceforge.net/doc/hughes95design.pdf I found in a comment for
+-- the second exercise in the on-line version of the book helped me to get a
+-- little bit better idea about motivation for the Doc type.
+--
+-- Page 126 of the book says: "How can we do this if our Doc type doesn't
+-- contain any information about rendering?" The interface of the two functions
+-- in this exercise uses only the Doc type, but the assignment talks about
+-- width. Width is a property of a stage of printing after the Doc type, the one
+-- that converts Doc into String. That stage has a lot of freedom in how to
+-- format the output and this formatting information is not available to the Doc
+-- stage, so it doesn't make sense to talk about the overall width in the
+-- context of Doc type. This means that it is not possible to follow the
+-- assignments exactly as they are.
+
+-- To make the implementation more simple, I put some restrictions on the input
+-- and output. It is already more complicated than what could be expected from a
+-- beginner reader.
+
+-- For being able to implement the solution in a separate file, value
+-- constructors of the Doc type have been exported too so they can be use in
+-- pattern matching.
+
+
+-- For the first exercise, I decided to affect the formatting on a more general
+-- level. Instead of trying to achieve a specific width of the whole document,
+-- which can be changed in an unforeseen way when converting to a string, I
+-- decided to fill the document in a way so that the first character of every
+-- line starts at a specified column in every representation converted into
+-- String. It means, to pad the document from the left by spaces. If the padding
+-- is already wider, it's not shortened.
+--
+-- For simplicity, the implementation works according to the
+-- modified/complemented assignment only when all Lines are used in the
+-- pretty-printed output (i.e., for small requested width). To fix this, the
+-- padding would have to be added the Line elements, converting them to '(Concat
+-- Line (Text "<padding>"))', so it's used only when a line break is used.
+
+import Prettify
+import PrettyJSON
+import SimpleJSON
+
+-- A Doc document is a tree structure. For talking about a visual representation
+-- of the document it represents, we need to think about it in a serialized
+-- way. This serialization is what the pretty printing functions do; they
+-- extract data from the tree in a specific traversal of it.
+--
+-- To split this serialization from the other modifications of the document, it
+-- is serialized into a list of document marks. These marks are then used for
+-- reconstructing a Doc document.
+
+data DocMark = MEmpty
+ | MChar Char
+ | MText String
+ | MLine
+ | MConcat
+ | MUnion
+ deriving (Show,Eq)
+
+serialize :: Doc -> [DocMark]
+serialize Empty = [MEmpty]
+serialize (Char c) = [MChar c]
+serialize (Text s) = [MText s]
+serialize Line = [MLine]
+serialize (Concat a b) = [MConcat] ++ (serialize a) ++ (serialize b)
+serialize (Union a b) = [MUnion] ++ (serialize a) ++ (serialize b)
+
+deserialize :: [DocMark] -> Doc
+deserialize ms = snd (deserialize' ms)
+ where deserialize' :: [DocMark] -> ([DocMark], Doc)
+ deserialize' (MEmpty:ms) = (ms, Empty)
+ deserialize' ((MChar c):ms) = (ms, (Char c))
+ deserialize' ((MText s):ms) = (ms, (Text s))
+ deserialize' (MLine:ms) = (ms, Line)
+ -- The Concat and Union nodes have two subtrees. When reconstructing the
+ -- subtrees from the serialized form, it needs to be done sequentially
+ -- because the start of the right subtree marks depends on the left
+ -- subtree. When the left subtree is reconstructed first and its marks
+ -- removed from the list, the beginning of the list is where the right
+ -- subtree starts.
+ deserialize' (m:ms) = ms2 `seq` (ms3, doc)
+ where (ms2, a) = deserialize' ms
+ (ms3, b) = deserialize' ms2
+ doc = if m == MConcat
+ then (Concat a b)
+ else (Union a b)
+
+paddingLength :: String -> Int
+paddingLength s = length (takeWhile isOnlySpace s)
+ where isOnlySpace c = (c == ' ')
+
+continueMeasuring :: String -> Bool
+continueMeasuring s = (paddingLength s) == (length s)
+
+makePadding :: Int -> String
+makePadding width = take width (repeat '.') -- To make the added padding more
+ -- visible in the example output,
+ -- dots are used instead of spaces.
+
+getStringFromMark :: DocMark -> String
+getStringFromMark (MChar c) = [c]
+getStringFromMark (MText s) = s
+getStringFromMark _ = error "This mark does not contain any characters"
+
+fill :: Int -> Doc -> Doc
+fill width doc = deserialize (fill' width (serialize doc) True 0)
+
+fill' :: Int -> [DocMark] -> Bool -> Int -> [DocMark]
+fill' width ms@((MChar c):_) measure existingPadding = fill'pad width ms measure existingPadding
+fill' width ms@((MText s):_) measure existingPadding = fill'pad width ms measure existingPadding
+-- The Line resets measuring of an existing padding
+fill' width (MLine:ms) measure existingPadding = MLine:(fill' width ms True 0)
+--No action needed for MEmpty, MConcat, and MUnion
+fill' width (m:ms) measure existingPadding = m:(fill' width ms measure existingPadding)
+fill' _ [] _ _ = []
+
+fill'pad :: Int -> [DocMark] -> Bool -> Int -> [DocMark]
+fill'pad width (m:ms) measure existingPadding = paddingMarks ++ (m:(fill' width ms continueNext padding))
+ where str = getStringFromMark m
+ continue = continueMeasuring str -- If the string is not empty, no need to continue
+ -- measuring the leading spaces on the line
+ padding = existingPadding + paddingLength str -- Existing padding on the line
+ paddingMarks = if measure && (not continue) && (padding < width)
+ then MConcat:[MText (makePadding (width - padding))]
+ else []
+ continueNext = measure && continue -- Measure only the leftmost padding. No need to
+ -- measure the other sequences of space on the line
+
+-- Test input for the first exercise
+--
+-- The output of 'renderJValue (JArray [(JNumber 1), (JBool True), (JString "hello")])' was manually
+-- modified to contain paddings of different lengths. The pretty-printed document look like
+--
+-- [1.0,
+-- true,
+--"hello"
+-- ]
+testFill = Concat (Concat (Concat (Char ' ') (Char '['))
+ (Concat (Concat (Concat (Text "1.0") (Char ','))
+ (Union (Char ' ') Line))
+ (Concat (Concat (Concat (Concat (Text " ") (Text "true")) (Char ','))
+ (Union (Char ' ') Line))
+ (Concat (Concat (Concat (Char '"') (Concat (Char 'h') (Concat (Char 'e')
+ (Concat (Char 'l') (Concat (Char 'l') (Char 'o')))))) (Char '"'))
+ (Union (Char ' ') Line)))))
+ (Concat (Text " ") (Char ']'))
+
+-- ghci> :l 5_a_1.hs
+-- [1 of 4] Compiling Prettify ( Prettify.hs, interpreted )
+-- [2 of 4] Compiling SimpleJSON ( SimpleJSON.hs, interpreted )
+-- [3 of 4] Compiling PrettyJSON ( PrettyJSON.hs, interpreted )
+-- [4 of 4] Compiling Main ( 5_a_1.hs, interpreted )
+-- Ok, four modules loaded.
+--
+-- ghci> putStrLn ( pretty 1 ( fill 0 testFill ))
+-- [1.0,
+-- true,
+-- "hello"
+-- ]
+-- ghci> putStrLn ( pretty 1 ( fill 1 testFill ))
+-- [1.0,
+-- true,
+-- ."hello"
+-- ]
+-- ghci> putStrLn ( pretty 1 ( fill 2 testFill ))
+-- .[1.0,
+-- true,
+-- .."hello"
+-- ]
+-- ghci> putStrLn ( pretty 1 ( fill 5 testFill ))
+-- ....[1.0,
+-- true,
+-- ....."hello"
+-- .]
+-- ghci> putStrLn ( pretty 1 ( fill 9 testFill ))
+-- ........[1.0,
+-- .true,
+-- ........."hello"
+-- .....]
+
+
+-- For the second exercise, it is based on the solution of the first one. The
+-- definition of "align" from the Oxford Learner's Dictionaries is:
+--
+-- "align (something) (with something) to arrange something in the correct
+-- position, or to be in the correct position, in relation to something else,
+-- especially in a straight line"
+--
+-- This implementation aligns content of JSON objects and arrays in relation to
+-- opening braces and brackets, not in a straight line. Aligning in a straight
+-- line would require information from the later stage and this is too much out
+-- of the Doc context.
+--
+-- For simplicity, let's assume that '[', ']', '{', and '}' are used only to
+-- mark an array or an object and they are not included in other strings.
+
+-- The implementation is modified copy-pasted first exercise. To keep things
+-- simpler for the purpose of explaining, I didn't try to deduplicate code of
+-- the two implementations.
+
+nest :: Int -> Doc -> Doc
+nest width doc = deserialize (nest' 0 width (serialize doc) True 0)
+
+nest' :: Int -> Int -> [DocMark] -> Bool -> Int -> [DocMark]
+nest' width unit ms@((MChar c):_) measure existingPadding = nest'pad width unit ms measure existingPadding
+nest' width unit ms@((MText s):_) measure existingPadding = nest'pad width unit ms measure existingPadding
+nest' width unit (MLine:ms) measure existingPadding = MLine:(nest' width unit ms True 0)
+nest' width unit (m:ms) measure existingPadding = m:(nest' width unit ms measure existingPadding)
+nest' _ _ [] _ _ = []
+
+nest'width :: Int -> Int -> String -> (Int, Int)
+nest'width width unit str = if (str == "[") || (str == "{")
+ then (width, (width + unit)) -- Keep the expected padding width for the current
+ -- line, increase for the subsequent lines
+ else if (str == "]") || (str == "}")
+ then (width - unit, width - unit) -- Indent the ending bracket or brace width
+ -- the same width as the opening one
+ else (width, width) -- No change in indentation
+
+nest'pad :: Int -> Int -> [DocMark] -> Bool -> Int -> [DocMark]
+nest'pad width unit (m:ms) measure existingPadding =
+ paddingMarks ++ (m:(nest' nextWidth unit ms continueNext padding))
+ where
+ str = getStringFromMark m
+ (currentWidth, nextWidth) = nest'width width unit str
+ continue = continueMeasuring str
+ padding = existingPadding + paddingLength str
+ paddingMarks = if measure && (not continue) && (padding < currentWidth)
+ then MConcat:[MText (makePadding (currentWidth - padding))]
+ else []
+ continueNext = measure && continue
+
+
+testNext = renderJValue (JObject [("a", JNumber 1),
+ ("bc", JBool True),
+ ("defg", JArray [JNumber 1,
+ (JObject [("h", JBool False),
+ ("i", JNumber 3)
+ ]),
+ JString "hello"
+ ]),
+ ("z", JNumber 6)
+ ])
+
+-- ghci> putStrLn ( pretty 1 ( nest 0 testNext ))
+-- {"a": 1.0,
+-- "bc": true,
+-- "defg": [1.0,
+-- {"h": false,
+-- "i": 3.0
+-- },
+-- "hello"
+-- ],
+-- "z": 6.0
+-- }
+-- ghci> putStrLn ( pretty 1 ( nest 1 testNext ))
+-- {"a": 1.0,
+-- ."bc": true,
+-- ."defg": [1.0,
+-- ..{"h": false,
+-- ..."i": 3.0
+-- ..},
+-- .."hello"
+-- .],
+-- ."z": 6.0
+-- }
+-- ghci> putStrLn ( pretty 1 ( nest 4 testNext ))
+-- {"a": 1.0,
+-- ...."bc": true,
+-- ...."defg": [1.0,
+-- ........{"h": false,
+-- ............"i": 3.0
+-- ........},
+-- ........"hello"
+-- ....],
+-- ...."z": 6.0
+-- }
+-- ghci> putStrLn ( pretty 1 ( nest 7 testNext ))
+-- {"a": 1.0,
+-- ......."bc": true,
+-- ......."defg": [1.0,
+-- ..............{"h": false,
+-- ....................."i": 3.0
+-- ..............},
+-- .............."hello"
+-- .......],
+-- ......."z": 6.0
+-- }
diff --git a/ch05/Prettify.hs b/ch05/Prettify.hs
index c33a4ae..415d939 100644
--- a/ch05/Prettify.hs
+++ b/ch05/Prettify.hs
@@ -1,7 +1,7 @@
module Prettify
(
-- * Constructors
- Doc
+ Doc(..)
-- * Basic combinators
, (Prettify.<>)
, empty