{-- snippet all --} module PodParser where import PodTypes import Text.XML.HaXml import Text.XML.HaXml.Parse import Text.XML.HaXml.Html.Generate(showattr) import Data.Char import Data.List data Item = Item {itemtitle :: String, enclosureurl :: String } deriving (Eq, Show, Read) data Feed = Feed {channeltitle :: String, items :: [Item]} deriving (Eq, Show, Read) {- | Given a podcast and an Item, produce an Episode -} item2ep :: Podcast -> Item -> Episode item2ep pc item = Episode {epId = 0, epCast = pc, epURL = enclosureurl item, epDone = False} {- | Parse the data from a given string, with the given name to use in error messages. -} parse :: String -> String -> Feed parse content name = Feed {channeltitle = getTitle doc, items = getEnclosures doc} where parseResult = xmlParse name (stripUnicodeBOM content) doc = getContent parseResult getContent :: Document -> Content getContent (Document _ _ e _) = CElem e {- | Some Unicode documents begin with a binary sequence; strip it off before processing. -} stripUnicodeBOM :: String -> String stripUnicodeBOM ('\xef':'\xbb':'\xbf':x) = x stripUnicodeBOM x = x {- | Pull out the channel part of the document. Note that HaXml defines CFilter as: > type CFilter = Content -> [Content] -} channel :: CFilter channel = tag "rss" /> tag "channel" getTitle :: Content -> String getTitle doc = contentToStringDefault "Untitled Podcast" (channel /> tag "title" /> txt $ doc) getEnclosures :: Content -> [Item] getEnclosures doc = concatMap procItem $ getItems doc where procItem :: Content -> [Item] procItem item = concatMap (procEnclosure title) enclosure where title = contentToStringDefault "Untitled Episode" (keep /> tag "title" /> txt $ item) enclosure = (keep /> tag "enclosure") item getItems :: CFilter getItems = channel /> tag "item" procEnclosure :: String -> Content -> [Item] procEnclosure title enclosure = map makeItem (showattr "url" enclosure) where makeItem :: Content -> Item makeItem x = Item {itemtitle = title, enclosureurl = contentToString [x]} {- | Convert [Content] to a printable String, with a default if the passed-in [Content] is [], signifying a lack of a match. -} contentToStringDefault :: String -> [Content] -> String contentToStringDefault msg [] = msg contentToStringDefault _ x = contentToString x {- | Convert [Content] to a printable string, taking care to unescape it. An implementation without unescaping would simply be: > contentToString = concatMap (show . content) Because HaXml's unescaping only works on Elements, we must make sure that whatever Content we have is wrapped in an Element, then use txt to pull the insides back out. -} contentToString :: [Content] -> String contentToString = concatMap procContent where procContent x = verbatim $ keep /> txt $ CElem (unesc (fakeElem x)) fakeElem :: Content -> Element fakeElem x = Elem "fake" [] [x] unesc :: Element -> Element unesc = xmlUnEscape stdXmlEscaper {-- /snippet all --}