1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
|
{-- snippet all --}
module PodParser where
import PodTypes
import Text.XML.HaXml
import Text.XML.HaXml.Parse
import Text.XML.HaXml.Html.Generate(showattr)
import Data.Char
import Data.List
data Item = Item {itemtitle :: String,
enclosureurl :: String
}
deriving (Eq, Show, Read)
data Feed = Feed {channeltitle :: String,
items :: [Item]}
deriving (Eq, Show, Read)
{- | Given a podcast and an Item, produce an Episode -}
item2ep :: Podcast -> Item -> Episode
item2ep pc item =
Episode {epId = 0,
epCast = pc,
epURL = enclosureurl item,
epDone = False}
{- | Parse the data from a given string, with the given name to use
in error messages. -}
parse :: String -> String -> Feed
parse content name =
Feed {channeltitle = getTitle doc,
items = getEnclosures doc}
where parseResult = xmlParse name (stripUnicodeBOM content)
doc = getContent parseResult
getContent :: Document -> Content
getContent (Document _ _ e _) = CElem e
{- | Some Unicode documents begin with a binary sequence;
strip it off before processing. -}
stripUnicodeBOM :: String -> String
stripUnicodeBOM ('\xef':'\xbb':'\xbf':x) = x
stripUnicodeBOM x = x
{- | Pull out the channel part of the document.
Note that HaXml defines CFilter as:
> type CFilter = Content -> [Content]
-}
channel :: CFilter
channel = tag "rss" /> tag "channel"
getTitle :: Content -> String
getTitle doc =
contentToStringDefault "Untitled Podcast"
(channel /> tag "title" /> txt $ doc)
getEnclosures :: Content -> [Item]
getEnclosures doc =
concatMap procItem $ getItems doc
where procItem :: Content -> [Item]
procItem item = concatMap (procEnclosure title) enclosure
where title = contentToStringDefault "Untitled Episode"
(keep /> tag "title" /> txt $ item)
enclosure = (keep /> tag "enclosure") item
getItems :: CFilter
getItems = channel /> tag "item"
procEnclosure :: String -> Content -> [Item]
procEnclosure title enclosure =
map makeItem (showattr "url" enclosure)
where makeItem :: Content -> Item
makeItem x = Item {itemtitle = title,
enclosureurl = contentToString [x]}
{- | Convert [Content] to a printable String, with a default if the
passed-in [Content] is [], signifying a lack of a match. -}
contentToStringDefault :: String -> [Content] -> String
contentToStringDefault msg [] = msg
contentToStringDefault _ x = contentToString x
{- | Convert [Content] to a printable string, taking care to unescape it.
An implementation without unescaping would simply be:
> contentToString = concatMap (show . content)
Because HaXml's unescaping only works on Elements, we must make sure that
whatever Content we have is wrapped in an Element, then use txt to
pull the insides back out. -}
contentToString :: [Content] -> String
contentToString =
concatMap procContent
where procContent x =
verbatim $ keep /> txt $ CElem (unesc (fakeElem x))
fakeElem :: Content -> Element
fakeElem x = Elem "fake" [] [x]
unesc :: Element -> Element
unesc = xmlUnEscape stdXmlEscaper
{-- /snippet all --}
|