aboutsummaryrefslogtreecommitdiff
path: root/ch23/23_a_1/PodParser.hs
diff options
context:
space:
mode:
Diffstat (limited to 'ch23/23_a_1/PodParser.hs')
-rw-r--r--ch23/23_a_1/PodParser.hs106
1 files changed, 106 insertions, 0 deletions
diff --git a/ch23/23_a_1/PodParser.hs b/ch23/23_a_1/PodParser.hs
new file mode 100644
index 0000000..2ec1a17
--- /dev/null
+++ b/ch23/23_a_1/PodParser.hs
@@ -0,0 +1,106 @@
+{-- snippet all --}
+module PodParser where
+
+import PodTypes
+import Text.XML.HaXml
+import Text.XML.HaXml.Parse
+import Text.XML.HaXml.Html.Generate(showattr)
+import Data.Char
+import Data.List
+
+data Item = Item {itemtitle :: String,
+ enclosureurl :: String
+ }
+ deriving (Eq, Show, Read)
+
+data Feed = Feed {channeltitle :: String,
+ items :: [Item]}
+ deriving (Eq, Show, Read)
+
+{- | Given a podcast and an Item, produce an Episode -}
+item2ep :: Podcast -> Item -> Episode
+item2ep pc item =
+ Episode {epId = 0,
+ epCast = pc,
+ epURL = enclosureurl item,
+ epDone = False}
+
+{- | Parse the data from a given string, with the given name to use
+in error messages. -}
+parse :: String -> String -> Feed
+parse content name =
+ Feed {channeltitle = getTitle doc,
+ items = getEnclosures doc}
+
+ where parseResult = xmlParse name (stripUnicodeBOM content)
+ doc = getContent parseResult
+
+ getContent :: Document -> Content
+ getContent (Document _ _ e _) = CElem e
+
+ {- | Some Unicode documents begin with a binary sequence;
+ strip it off before processing. -}
+ stripUnicodeBOM :: String -> String
+ stripUnicodeBOM ('\xef':'\xbb':'\xbf':x) = x
+ stripUnicodeBOM x = x
+
+{- | Pull out the channel part of the document.
+
+Note that HaXml defines CFilter as:
+
+> type CFilter = Content -> [Content]
+-}
+channel :: CFilter
+channel = tag "rss" /> tag "channel"
+
+getTitle :: Content -> String
+getTitle doc =
+ contentToStringDefault "Untitled Podcast"
+ (channel /> tag "title" /> txt $ doc)
+
+getEnclosures :: Content -> [Item]
+getEnclosures doc =
+ concatMap procItem $ getItems doc
+ where procItem :: Content -> [Item]
+ procItem item = concatMap (procEnclosure title) enclosure
+ where title = contentToStringDefault "Untitled Episode"
+ (keep /> tag "title" /> txt $ item)
+ enclosure = (keep /> tag "enclosure") item
+
+ getItems :: CFilter
+ getItems = channel /> tag "item"
+
+ procEnclosure :: String -> Content -> [Item]
+ procEnclosure title enclosure =
+ map makeItem (showattr "url" enclosure)
+ where makeItem :: Content -> Item
+ makeItem x = Item {itemtitle = title,
+ enclosureurl = contentToString [x]}
+
+{- | Convert [Content] to a printable String, with a default if the
+passed-in [Content] is [], signifying a lack of a match. -}
+contentToStringDefault :: String -> [Content] -> String
+contentToStringDefault msg [] = msg
+contentToStringDefault _ x = contentToString x
+
+{- | Convert [Content] to a printable string, taking care to unescape it.
+
+An implementation without unescaping would simply be:
+
+> contentToString = concatMap (show . content)
+
+Because HaXml's unescaping only works on Elements, we must make sure that
+whatever Content we have is wrapped in an Element, then use txt to
+pull the insides back out. -}
+contentToString :: [Content] -> String
+contentToString =
+ concatMap procContent
+ where procContent x =
+ verbatim $ keep /> txt $ CElem (unesc (fakeElem x))
+
+ fakeElem :: Content -> Element
+ fakeElem x = Elem "fake" [] [x]
+
+ unesc :: Element -> Element
+ unesc = xmlUnEscape stdXmlEscaper
+{-- /snippet all --}