From 305837683782841b0d93dabc61e7e6e19df220b4 Mon Sep 17 00:00:00 2001 From: Jan Sucan Date: Fri, 12 Sep 2025 09:10:37 +0200 Subject: 23_a_1: Copy code of ch23 from the examples --- ch23/23_a_1/PodParser.hs | 106 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 ch23/23_a_1/PodParser.hs (limited to 'ch23/23_a_1/PodParser.hs') diff --git a/ch23/23_a_1/PodParser.hs b/ch23/23_a_1/PodParser.hs new file mode 100644 index 0000000..2ec1a17 --- /dev/null +++ b/ch23/23_a_1/PodParser.hs @@ -0,0 +1,106 @@ +{-- snippet all --} +module PodParser where + +import PodTypes +import Text.XML.HaXml +import Text.XML.HaXml.Parse +import Text.XML.HaXml.Html.Generate(showattr) +import Data.Char +import Data.List + +data Item = Item {itemtitle :: String, + enclosureurl :: String + } + deriving (Eq, Show, Read) + +data Feed = Feed {channeltitle :: String, + items :: [Item]} + deriving (Eq, Show, Read) + +{- | Given a podcast and an Item, produce an Episode -} +item2ep :: Podcast -> Item -> Episode +item2ep pc item = + Episode {epId = 0, + epCast = pc, + epURL = enclosureurl item, + epDone = False} + +{- | Parse the data from a given string, with the given name to use +in error messages. -} +parse :: String -> String -> Feed +parse content name = + Feed {channeltitle = getTitle doc, + items = getEnclosures doc} + + where parseResult = xmlParse name (stripUnicodeBOM content) + doc = getContent parseResult + + getContent :: Document -> Content + getContent (Document _ _ e _) = CElem e + + {- | Some Unicode documents begin with a binary sequence; + strip it off before processing. -} + stripUnicodeBOM :: String -> String + stripUnicodeBOM ('\xef':'\xbb':'\xbf':x) = x + stripUnicodeBOM x = x + +{- | Pull out the channel part of the document. + +Note that HaXml defines CFilter as: + +> type CFilter = Content -> [Content] +-} +channel :: CFilter +channel = tag "rss" /> tag "channel" + +getTitle :: Content -> String +getTitle doc = + contentToStringDefault "Untitled Podcast" + (channel /> tag "title" /> txt $ doc) + +getEnclosures :: Content -> [Item] +getEnclosures doc = + concatMap procItem $ getItems doc + where procItem :: Content -> [Item] + procItem item = concatMap (procEnclosure title) enclosure + where title = contentToStringDefault "Untitled Episode" + (keep /> tag "title" /> txt $ item) + enclosure = (keep /> tag "enclosure") item + + getItems :: CFilter + getItems = channel /> tag "item" + + procEnclosure :: String -> Content -> [Item] + procEnclosure title enclosure = + map makeItem (showattr "url" enclosure) + where makeItem :: Content -> Item + makeItem x = Item {itemtitle = title, + enclosureurl = contentToString [x]} + +{- | Convert [Content] to a printable String, with a default if the +passed-in [Content] is [], signifying a lack of a match. -} +contentToStringDefault :: String -> [Content] -> String +contentToStringDefault msg [] = msg +contentToStringDefault _ x = contentToString x + +{- | Convert [Content] to a printable string, taking care to unescape it. + +An implementation without unescaping would simply be: + +> contentToString = concatMap (show . content) + +Because HaXml's unescaping only works on Elements, we must make sure that +whatever Content we have is wrapped in an Element, then use txt to +pull the insides back out. -} +contentToString :: [Content] -> String +contentToString = + concatMap procContent + where procContent x = + verbatim $ keep /> txt $ CElem (unesc (fakeElem x)) + + fakeElem :: Content -> Element + fakeElem x = Elem "fake" [] [x] + + unesc :: Element -> Element + unesc = xmlUnEscape stdXmlEscaper +{-- /snippet all --} -- cgit v1.2.3