aboutsummaryrefslogtreecommitdiff
path: root/ch16/16_a_2.hs
blob: ede341e16e317f5fa1f205a655857f9347191133 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
-- A popular denial-of-service attack against naive web servers is simply to
-- send unreasonably long headers. A single header might contain 10s or 100s of
-- megabytes of garbage text, causing a server to run out of memory.
-- Restructure the header parser so that it will fail if any line is longer than
-- 4096 characters. It must fail immediately when this occurs; it cannot wait
-- until the end of a line eventually shows up.


{-- From examples/examples/ch16/HttpRequestParser.hs and modified --}
import Control.Applicative
import Text.ParserCombinators.Parsec hiding (many, optional, (<|>))
import Control.Monad (liftM4)
import Data.List

maxHeaderLineLength = 4096

checkLineLength :: CharParser st ()
checkLineLength = do
  i <- getInput
  if isTooLong i
    then do
      -- It is needed to consume something from the input for the p_headers
      -- parser to fail immediately. If nothing is consumed, 'manyTill' still
      -- tries its end parser (the 'crlf').
      _ <- anyChar
      -- Set source column to make the error message more helpful, for example
      -- 'Left (line 2, column 4097)')
      pos <- getPosition
      setPosition (setSourceColumn pos (maxHeaderLineLength + 1))
      fail "Header line too long"
    else return ()
  where
    isTooLong l =  lineLen l > maxHeaderLineLength
      where
        lineLen l = length $ takeWhileMax isNotLineEnd l (maxHeaderLineLength + 1)
        takeWhileMax p xs max = take max $ takeWhile p xs
        isNotLineEnd c = (c /= '\n' && c /= '\r')

p_headers :: CharParser st [(String, String)]
p_headers = header `manyTill` crlf
  where header = checkLineLength *> (liftA2 (,) fieldName (char ':' *> spaces *> contents))
        contents = liftA2 (++) (many1 notEOL <* crlf)
                               (checkLineLength *> (continuation <|> pure []))
        continuation = liftA2 (:) (' ' <$ many1 (oneOf " \t")) contents
        fieldName = (:) <$> letter <*> many fieldChar
        fieldChar = letter <|> digit <|> oneOf "-_"

crlf :: CharParser st ()
crlf = (() <$ string "\r\n") <|> (() <$ newline)

notEOL :: CharParser st Char
notEOL = noneOf "\r\n"

data Method = Get | Post
          deriving (Eq, Ord, Show)

data HttpRequest = HttpRequest {
      reqMethod :: Method
    , reqURL :: String
    , reqHeaders :: [(String, String)]
    , reqBody :: Maybe String
    } deriving (Eq, Show)

p_request :: CharParser () HttpRequest
p_request = q "GET" Get (pure Nothing)
        <|> q "POST" Post (Just <$> many anyChar)
  where q name ctor body = liftM4 HttpRequest req url p_headers body
            where req = ctor <$ string name <* char ' '
                  url = optional (char '/') *>
                        manyTill notEOL (try $ string " HTTP/1." <* oneOf "01")
                        <* crlf
{-- End of code from examples --}


testReqInfinitelyLongLine :: String
testReqInfinitelyLongLine = intercalate "\r\n" [
  "POST /index.html HTTP/1.1",
  "Host: book.realworldhaskell.org",
  "User-Agent: " ++ repeat 'u',
  "Accept: text/html",
  "",
  "Hello World!"
  ] ++ "\r\n"

testReq :: Int -> String
testReq len = intercalate "\r\n" [
  "POST /index.html HTTP/1.1",
  "Host: book.realworldhaskell.org",
  "User-Agent: " ++ take (len - 12) (repeat 'u'),
  "Accept: text/html",
  "",
  "Hello World!"
  ] ++ "\r\n"

testReqContinuation :: Int -> String
testReqContinuation len = intercalate "\r\n" [
  "POST /index.html HTTP/1.1",
  "Host: book.realworldhaskell.org",
  "User-Agent: Mozilla/5.0",
  "  continuation of user agent",
  " " ++ take (len - 1) (repeat 'u'),
  "Accept: text/html",
  "",
  "Hello World!"
  ] ++ "\r\n"


-- For brevity, longs strings are shortened by '...'

-- ghci> :l 16_a_2.hs
-- [1 of 2] Compiling Main             ( 16_a_2.hs, interpreted )
-- Ok, one module loaded.

-- ghci> runParser p_request () "" testReqInfinitelyLongLine
-- Left (line 3, column 4097):
-- Header line too long

-- ghci> runParser p_request () "" (testReq 20)
-- Right (HttpRequest {reqMethod = Post, reqURL = "index.html", reqHeaders = [("Host","book.realworldhaskell.org"),("User-Agent","uuuuuuuu"),("Accept","text/html")], reqBody = Just "Hello World!\r\n"})

-- ghci> runParser p_request () "" (testReq 4096)
-- Right (HttpRequest {reqMethod = Post, reqURL = "index.html", reqHeaders = [("Host","book.realworldhaskell.org"),("User-Agent","uuu...uuuuuuu"),("Accept","text/html")], reqBody = Just "Hello World!\r\n"})

-- ghci> runParser p_request () "" (testReq 4097)
-- Left (line 3, column 4097):
-- Header line too long

-- ghci> runParser p_request () "" (testReqContinuation 20)
-- Right (HttpRequest {reqMethod = Post, reqURL = "index.html", reqHeaders = [("Host","book.realworldhaskell.org"),("User-Agent","Mozilla/5.0 continuation of user agent uuuuuuuuuuuuuuuuuuu"),("Accept","text/html")], reqBody = Just "Hello World!\r\n"})

-- ghci> runParser p_request () "" (testReqContinuation 4096)
-- Right (HttpRequest {reqMethod = Post, reqURL = "index.html", reqHeaders = [("Host","book.realworldhaskell.org"),("User-Agent","Mozilla/5.0 continuation of user agent uuuuuu...uuuuuu"),("Accept","text/html")], reqBody = Just "Hello World!\r\n"})

-- ghci> runParser p_request () "" (testReqContinuation 4097)
-- Left (line 5, column 4097):
-- Header line too long