From 305837683782841b0d93dabc61e7e6e19df220b4 Mon Sep 17 00:00:00 2001
From: Jan Sucan <jan@jansucan.com>
Date: Fri, 12 Sep 2025 09:10:37 +0200
Subject: 23_a_1: Copy code of ch23 from the examples

---
 ch23/23_a_1/PodParser.hs | 106 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 106 insertions(+)
 create mode 100644 ch23/23_a_1/PodParser.hs

(limited to 'ch23/23_a_1/PodParser.hs')

diff --git a/ch23/23_a_1/PodParser.hs b/ch23/23_a_1/PodParser.hs
new file mode 100644
index 0000000..2ec1a17
--- /dev/null
+++ b/ch23/23_a_1/PodParser.hs
@@ -0,0 +1,106 @@
+{-- snippet all --}
+module PodParser where
+
+import PodTypes
+import Text.XML.HaXml
+import Text.XML.HaXml.Parse
+import Text.XML.HaXml.Html.Generate(showattr)
+import Data.Char
+import Data.List
+
+data Item = Item {itemtitle :: String,
+                  enclosureurl :: String
+                  }
+          deriving (Eq, Show, Read)
+
+data Feed = Feed {channeltitle :: String,
+                  items :: [Item]}
+            deriving (Eq, Show, Read)
+
+{- | Given a podcast and an Item, produce an Episode -}
+item2ep :: Podcast -> Item -> Episode
+item2ep pc item =
+    Episode {epId = 0,
+             epCast = pc,
+             epURL = enclosureurl item,
+             epDone = False}
+
+{- | Parse the data from a given string, with the given name to use
+in error messages. -}
+parse :: String -> String -> Feed
+parse content name = 
+    Feed {channeltitle = getTitle doc,
+          items = getEnclosures doc}
+
+    where parseResult = xmlParse name (stripUnicodeBOM content)
+          doc = getContent parseResult
+
+          getContent :: Document -> Content
+          getContent (Document _ _ e _) = CElem e
+          
+          {- | Some Unicode documents begin with a binary sequence;
+             strip it off before processing. -}
+          stripUnicodeBOM :: String -> String
+          stripUnicodeBOM ('\xef':'\xbb':'\xbf':x) = x
+          stripUnicodeBOM x = x
+
+{- | Pull out the channel part of the document.
+
+Note that HaXml defines CFilter as:
+
+> type CFilter = Content -> [Content]
+-}
+channel :: CFilter
+channel = tag "rss" /> tag "channel"
+
+getTitle :: Content -> String
+getTitle doc =
+    contentToStringDefault "Untitled Podcast" 
+        (channel /> tag "title" /> txt $ doc)
+
+getEnclosures :: Content -> [Item]
+getEnclosures doc =
+    concatMap procItem $ getItems doc
+    where procItem :: Content -> [Item]
+          procItem item = concatMap (procEnclosure title) enclosure
+              where title = contentToStringDefault "Untitled Episode"
+                               (keep /> tag "title" /> txt $ item)
+                    enclosure = (keep /> tag "enclosure") item
+
+          getItems :: CFilter
+          getItems = channel /> tag "item"
+
+          procEnclosure :: String -> Content -> [Item]
+          procEnclosure title enclosure =
+              map makeItem (showattr "url" enclosure)
+              where makeItem :: Content -> Item
+                    makeItem x = Item {itemtitle = title,
+                                       enclosureurl = contentToString [x]}
+
+{- | Convert [Content] to a printable String, with a default if the 
+passed-in [Content] is [], signifying a lack of a match. -}
+contentToStringDefault :: String -> [Content] -> String
+contentToStringDefault msg [] = msg
+contentToStringDefault _ x = contentToString x
+
+{- | Convert [Content] to a printable string, taking care to unescape it.
+
+An implementation without unescaping would simply be:
+
+> contentToString = concatMap (show . content)
+
+Because HaXml's unescaping only works on Elements, we must make sure that
+whatever Content we have is wrapped in an Element, then use txt to
+pull the insides back out. -}
+contentToString :: [Content] -> String
+contentToString = 
+    concatMap procContent
+    where procContent x = 
+              verbatim $ keep /> txt $ CElem (unesc (fakeElem x))
+
+          fakeElem :: Content -> Element
+          fakeElem x = Elem "fake" [] [x]
+
+          unesc :: Element -> Element
+          unesc = xmlUnEscape stdXmlEscaper
+{-- /snippet all --}
-- 
cgit v1.2.3