{-# LANGUAGE CPP #-}

-- ------------------------------------------------------------

{- |
   Module     : Text.XML.HXT.Parser.TagSoup
   Copyright  : Copyright (C) 2005-2015 Uwe Schmidt
   License    : MIT

   Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
   Stability  : experimental
   Portability: portable

   lazy HTML and simpe XML parser implemented with tagsoup
   parsing is done with a very simple monadic top down parser

-}

-- ------------------------------------------------------------

module Text.XML.HXT.Parser.TagSoup
    ( parseHtmlTagSoup
    )
where

-- ------------------------------------------------------------

#if MIN_VERSION_base(4,8,0)
#else
import           Control.Applicative               (Applicative (..))
#endif

import           Control.Monad                     (ap, liftM)

import           Data.Char                         (toLower)
import           Data.Char.Properties.XMLCharProps (isXmlSpaceChar)
import           Data.Maybe

import           Text.HTML.TagSoup
import           Text.HTML.TagSoup.Entity          (lookupNumericEntity)
import           Text.XML.HXT.DOM.Interface        (NsEnv, QName, XmlTrees,
                                                    a_xml, a_xmlns, c_warn,
                                                    isWellformedQualifiedName,
                                                    mkName, newQName, newXName,
                                                    nullXName, toNsEnv,
                                                    xmlNamespace,
                                                    xmlnsNamespace)
import           Text.XML.HXT.DOM.XmlNode          (isElem, mkAttr', mkCmt',
                                                    mkElement, mkError',
                                                    mkText')
import           Text.XML.HXT.Parser.HtmlParsec    (closesHtmlTag,
                                                    isEmptyHtmlTag,
                                                    isInnerHtmlTagOf)
import           Text.XML.HXT.Parser.XhtmlEntities

-- ----------------------------------------

-- The name table contains the id map. All element and attribute names are stored
-- the first time they are ecountered, and later always this name is used,
-- not a string built by the parser.

type STag               = Tag String

type Tags               = [STag]

type Context            = ([String], NsEnv)

type State              = Tags

newtype Parser a        = P { forall a. Parser a -> State -> (a, State)
parse :: State -> (a, State)}

instance Functor Parser where
    fmap :: forall a b. (a -> b) -> Parser a -> Parser b
fmap = (a -> b) -> Parser a -> Parser b
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM

instance Applicative Parser where
    pure :: forall a. a -> Parser a
pure  = a -> Parser a
forall a. a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return
    <*> :: forall a b. Parser (a -> b) -> Parser a -> Parser b
(<*>) = Parser (a -> b) -> Parser a -> Parser b
forall (m :: * -> *) a b. Monad m => m (a -> b) -> m a -> m b
ap

instance Monad Parser where
    return :: forall a. a -> Parser a
return a
x    = (State -> (a, State)) -> Parser a
forall a. (State -> (a, State)) -> Parser a
P ((State -> (a, State)) -> Parser a)
-> (State -> (a, State)) -> Parser a
forall a b. (a -> b) -> a -> b
$ \ State
ts -> (a
x, State
ts)
    Parser a
p >>= :: forall a b. Parser a -> (a -> Parser b) -> Parser b
>>= a -> Parser b
f     = (State -> (b, State)) -> Parser b
forall a. (State -> (a, State)) -> Parser a
P ((State -> (b, State)) -> Parser b)
-> (State -> (b, State)) -> Parser b
forall a b. (a -> b) -> a -> b
$ \ State
ts -> let
                              (a
res, State
ts') = Parser a -> State -> (a, State)
forall a. Parser a -> State -> (a, State)
parse Parser a
p State
ts
                              in
                              Parser b -> State -> (b, State)
forall a. Parser a -> State -> (a, State)
parse (a -> Parser b
f a
res) State
ts'

runParser       :: Parser a -> Tags -> a
runParser :: forall a. Parser a -> State -> a
runParser Parser a
p State
ts  = (a, State) -> a
forall a b. (a, b) -> a
fst ((a, State) -> a) -> (State -> (a, State)) -> State -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Parser a -> State -> (a, State)
forall a. Parser a -> State -> (a, State)
parse Parser a
p (State -> a) -> State -> a
forall a b. (a -> b) -> a -> b
$ State
ts

-- ----------------------------------------

cond            :: Parser Bool -> Parser a -> Parser a -> Parser a
cond :: forall a. Parser Bool -> Parser a -> Parser a -> Parser a
cond Parser Bool
c Parser a
t Parser a
e      = do
                  Bool
p <- Parser Bool
c
                  if Bool
p then Parser a
t else Parser a
e

lookAhead       :: (STag -> Bool) -> Parser Bool
lookAhead :: (STag -> Bool) -> Parser Bool
lookAhead STag -> Bool
p     = (State -> (Bool, State)) -> Parser Bool
forall a. (State -> (a, State)) -> Parser a
P ((State -> (Bool, State)) -> Parser Bool)
-> (State -> (Bool, State)) -> Parser Bool
forall a b. (a -> b) -> a -> b
$ \ State
s -> (Bool -> Bool
not (State -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null State
s) Bool -> Bool -> Bool
&& STag -> Bool
p (State -> STag
forall a. HasCallStack => [a] -> a
head State
s), State
s)

-- ----------------------------------------
-- primitive look ahead tests

isEof           :: Parser Bool
isEof :: Parser Bool
isEof           = (State -> (Bool, State)) -> Parser Bool
forall a. (State -> (a, State)) -> Parser a
P ((State -> (Bool, State)) -> Parser Bool)
-> (State -> (Bool, State)) -> Parser Bool
forall a b. (a -> b) -> a -> b
$ \ State
s -> (State -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null State
s, State
s)

isText          :: Parser Bool
isText :: Parser Bool
isText          = (STag -> Bool) -> Parser Bool
lookAhead STag -> Bool
forall {str}. Tag str -> Bool
is
                  where
                  is :: Tag str -> Bool
is (TagText str
_) = Bool
True
                  is Tag str
_           = Bool
False

isCmt           :: Parser Bool
isCmt :: Parser Bool
isCmt           = (STag -> Bool) -> Parser Bool
lookAhead STag -> Bool
forall {str}. Tag str -> Bool
is
                  where
                  is :: Tag str -> Bool
is (TagComment str
_) = Bool
True
                  is Tag str
_              = Bool
False

isWarn          :: Parser Bool
isWarn :: Parser Bool
isWarn          = (STag -> Bool) -> Parser Bool
lookAhead STag -> Bool
forall {str}. Tag str -> Bool
is
                  where
                  is :: Tag str -> Bool
is (TagWarning str
_) = Bool
True
                  is Tag str
_              = Bool
False

isPos           :: Parser Bool
isPos :: Parser Bool
isPos           = (STag -> Bool) -> Parser Bool
lookAhead STag -> Bool
forall {str}. Tag str -> Bool
is
                  where
                  is :: Tag str -> Bool
is (TagPosition Row
_ Row
_) = Bool
True
                  is Tag str
_           = Bool
False

isCls           :: Parser Bool
isCls :: Parser Bool
isCls           = (STag -> Bool) -> Parser Bool
lookAhead STag -> Bool
forall {str}. Tag str -> Bool
is
                  where
                  is :: Tag str -> Bool
is (TagClose str
_) = Bool
True
                  is Tag str
_            = Bool
False

isOpn           :: Parser Bool
isOpn :: Parser Bool
isOpn           = (STag -> Bool) -> Parser Bool
lookAhead STag -> Bool
forall {str}. Tag str -> Bool
is
                  where
                  is :: Tag str -> Bool
is (TagOpen str
_ [Attribute str]
_) = Bool
True
                  is Tag str
_             = Bool
False

-- ----------------------------------------
-- primitive symbol parsers

getTag          :: Parser STag
getTag :: Parser STag
getTag          = (State -> (STag, State)) -> Parser STag
forall a. (State -> (a, State)) -> Parser a
P ((State -> (STag, State)) -> Parser STag)
-> (State -> (STag, State)) -> Parser STag
forall a b. (a -> b) -> a -> b
$ \ State
t -> (State -> STag
forall a. HasCallStack => [a] -> a
head State
t, State -> State
forall a. HasCallStack => [a] -> [a]
tail State
t)

getSym          :: (STag -> a) -> Parser a
getSym :: forall a. (STag -> a) -> Parser a
getSym STag -> a
f        = do
                  STag
t <- Parser STag
getTag
                  a -> Parser a
forall a. a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return (STag -> a
f STag
t)

getText         :: Parser String
getText :: Parser String
getText         = (STag -> String) -> Parser String
forall a. (STag -> a) -> Parser a
getSym STag -> String
forall {str}. Tag str -> str
sym
                  where
                  sym :: Tag str -> str
sym (TagText str
t)       = str
t
                  sym Tag str
_                 = str
forall a. HasCallStack => a
undefined

getCmt          :: Parser String
getCmt :: Parser String
getCmt          = (STag -> String) -> Parser String
forall a. (STag -> a) -> Parser a
getSym STag -> String
forall {str}. Tag str -> str
sym
                  where
                  sym :: Tag str -> str
sym (TagComment str
c)    = str
c
                  sym Tag str
_                 = str
forall a. HasCallStack => a
undefined

getWarn         :: Parser String
getWarn :: Parser String
getWarn         = (STag -> String) -> Parser String
forall a. (STag -> a) -> Parser a
getSym STag -> String
forall {str}. Tag str -> str
sym
                  where
                  sym :: Tag str -> str
sym (TagWarning str
w)    = str
w
                  sym Tag str
_                 = str
forall a. HasCallStack => a
undefined

getPos          :: Parser (Int, Int)
getPos :: Parser (Row, Row)
getPos          = (STag -> (Row, Row)) -> Parser (Row, Row)
forall a. (STag -> a) -> Parser a
getSym STag -> (Row, Row)
forall {str}. Tag str -> (Row, Row)
sym
                  where
                  sym :: Tag str -> (Row, Row)
sym (TagPosition Row
l Row
c) = (Row
l, Row
c)
                  sym Tag str
_                 = (Row, Row)
forall a. HasCallStack => a
undefined

getCls          :: Parser String
getCls :: Parser String
getCls          = (STag -> String) -> Parser String
forall a. (STag -> a) -> Parser a
getSym STag -> String
forall {str}. Tag str -> str
sym
                  where
                  sym :: Tag str -> str
sym (TagClose str
n)      = str
n
                  sym Tag str
_                 = str
forall a. HasCallStack => a
undefined

getOpn          :: Parser (String, [(String,String)])
getOpn :: Parser (String, [(String, String)])
getOpn          = (STag -> (String, [(String, String)]))
-> Parser (String, [(String, String)])
forall a. (STag -> a) -> Parser a
getSym STag -> (String, [(String, String)])
forall {a}. Tag a -> (a, [Attribute a])
sym
                  where
                  sym :: Tag a -> (a, [Attribute a])
sym (TagOpen a
n [Attribute a]
al)    = (a
n, [Attribute a]
al)
                  sym Tag a
_                 = (a, [Attribute a])
forall a. HasCallStack => a
undefined

-- ----------------------------------------
-- pushback parsers for inserting missing tags

pushBack        :: STag -> Parser ()
pushBack :: STag -> Parser ()
pushBack STag
t      = (State -> ((), State)) -> Parser ()
forall a. (State -> (a, State)) -> Parser a
P ((State -> ((), State)) -> Parser ())
-> (State -> ((), State)) -> Parser ()
forall a b. (a -> b) -> a -> b
$ \ State
ts -> ((), STag
tSTag -> State -> State
forall a. a -> [a] -> [a]
:State
ts)

insCls          :: String -> Parser ()
insCls :: String -> Parser ()
insCls String
n        = STag -> Parser ()
pushBack (String -> STag
forall str. str -> Tag str
TagClose String
n)

insOpn          :: String -> [(String, String)] -> Parser ()
insOpn :: String -> [(String, String)] -> Parser ()
insOpn String
n [(String, String)]
al     = STag -> Parser ()
pushBack (String -> [(String, String)] -> STag
forall str. str -> [Attribute str] -> Tag str
TagOpen String
n [(String, String)]
al)

-- ----------------------------------------

mkQN            :: Bool -> Bool -> NsEnv -> String -> Parser QName
mkQN :: Bool -> Bool -> NsEnv -> String -> Parser QName
mkQN Bool
withNamespaces Bool
isAttr NsEnv
env String
s
    | Bool
withNamespaces
        = QName -> Parser QName
forall a. a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return QName
qn1
    | Bool
otherwise
        = QName -> Parser QName
forall a. a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return QName
qn0
    where
    qn1 :: QName
qn1
        | Bool
isAttr Bool -> Bool -> Bool
&& Bool
isSimpleName        = QName
s'
        | Bool
isSimpleName                  = XName -> XName -> XName -> QName
newQName (String -> XName
newXName String
s) XName
nullXName (XName -> XName
nsUri XName
nullXName)
        | String -> Bool
isWellformedQualifiedName String
s   = XName -> XName -> XName -> QName
newQName XName
lp'          XName
px'       (XName -> XName
nsUri XName
px')
        | Bool
otherwise                     = QName
s'
    qn0 :: QName
qn0                                 = QName
s'

    nsUri :: XName -> XName
nsUri XName
x                             = XName -> Maybe XName -> XName
forall a. a -> Maybe a -> a
fromMaybe XName
nullXName (Maybe XName -> XName) -> (NsEnv -> Maybe XName) -> NsEnv -> XName
forall b c a. (b -> c) -> (a -> b) -> a -> c
. XName -> NsEnv -> Maybe XName
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup XName
x (NsEnv -> XName) -> NsEnv -> XName
forall a b. (a -> b) -> a -> b
$ NsEnv
env
    isSimpleName :: Bool
isSimpleName                        = (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
':') String
s
    (String
px, (Char
_ : String
lp))                      = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span(Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
':') String
s
    px' :: XName
px'                                 = String -> XName
newXName String
px
    lp' :: XName
lp'                                 = String -> XName
newXName String
lp
    s' :: QName
s'                                  = String -> QName
mkName   String
s

extendNsEnv     :: Bool -> [(String, String)] -> NsEnv -> NsEnv
extendNsEnv :: Bool -> [(String, String)] -> NsEnv -> NsEnv
extendNsEnv Bool
withNamespaces [(String, String)]
al1 NsEnv
env
    | Bool
withNamespaces
        = [(String, String)] -> NsEnv
toNsEnv (((String, String) -> [(String, String)])
-> [(String, String)] -> [(String, String)]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap ((String -> String -> [(String, String)])
-> (String, String) -> [(String, String)]
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry String -> String -> [(String, String)]
forall {b}. String -> b -> [(String, b)]
addNs) [(String, String)]
al1) NsEnv -> NsEnv -> NsEnv
forall a. [a] -> [a] -> [a]
++ NsEnv
env
    | Bool
otherwise
        = NsEnv
env
    where
    addNs :: String -> b -> [(String, b)]
addNs String
n b
v
        | String
px String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
a_xmlns
          Bool -> Bool -> Bool
&&
          (String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
lp Bool -> Bool -> Bool
|| (Bool -> Bool
not (Bool -> Bool) -> (String -> Bool) -> String -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null (String -> Bool) -> (String -> String) -> String -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
forall a. HasCallStack => [a] -> [a]
tail (String -> Bool) -> String -> Bool
forall a b. (a -> b) -> a -> b
$ String
lp))
            = [(Row -> String -> String
forall a. Row -> [a] -> [a]
drop Row
1 String
lp, b
v)]
        | Bool
otherwise
            = []
        where
        (String
px, String
lp) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
':') String
n

-- ----------------------------------------

-- own entity lookup to prevent problems with &amp; and tagsoup hack for IE

lookupEntity    :: Bool -> Bool -> (String, Bool) -> Tags
lookupEntity :: Bool -> Bool -> (String, Bool) -> State
lookupEntity Bool
withWarnings Bool
_asHtml (e0 :: String
e0@(Char
'#':String
e), Bool
withSemicolon)
    = case String -> Maybe String
lookupNumericEntity String
e of
      Just String
c  -> (String -> STag
forall str. str -> Tag str
TagText String
c)
                 STag -> State -> State
forall a. a -> [a] -> [a]
: State
missingSemi
      Maybe String
Nothing -> ( String -> STag
forall str. str -> Tag str
TagText (String -> STag) -> String -> STag
forall a b. (a -> b) -> a -> b
$ String
"&" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
e0 String -> String -> String
forall a. [a] -> [a] -> [a]
++ [Char
';' | Bool
withSemicolon])
                 STag -> State -> State
forall a. a -> [a] -> [a]
: if Bool
withWarnings
                   then (String -> STag
forall str. str -> Tag str
TagWarning (String -> STag) -> String -> STag
forall a b. (a -> b) -> a -> b
$ String
"illegal char reference: &" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
e String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
";")
                        STag -> State -> State
forall a. a -> [a] -> [a]
: State
missingSemi
                   else []
    where
    missingSemi :: State
missingSemi
        | Bool
withWarnings
          Bool -> Bool -> Bool
&&
          Bool -> Bool
not Bool
withSemicolon = [String -> STag
forall str. str -> Tag str
TagWarning (String -> STag) -> String -> STag
forall a b. (a -> b) -> a -> b
$ String
"missing \";\" at end of char reference: &" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
e]
        | Bool
otherwise         = []

lookupEntity Bool
withWarnings Bool
asHtml (String
e, Bool
withSemicolon)
    = case (String -> [(String, Row)] -> Maybe Row
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup String
e [(String, Row)]
entities) of
      Just Row
x  -> (String -> STag
forall str. str -> Tag str
TagText [Row -> Char
forall a. Enum a => Row -> a
toEnum Row
x])
                 STag -> State -> State
forall a. a -> [a] -> [a]
: State
missingSemi
      Maybe Row
Nothing -> (String -> STag
forall str. str -> Tag str
TagText (String -> STag) -> String -> STag
forall a b. (a -> b) -> a -> b
$ String
"&" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
e String -> String -> String
forall a. [a] -> [a] -> [a]
++ [Char
';' | Bool
withSemicolon])
                 STag -> State -> State
forall a. a -> [a] -> [a]
: if Bool
withWarnings
                   then (String -> STag
forall str. str -> Tag str
TagWarning (String -> STag) -> String -> STag
forall a b. (a -> b) -> a -> b
$ String
"Unknown entity reference: &" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
e String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
";")
                        STag -> State -> State
forall a. a -> [a] -> [a]
: State
missingSemi
                   else []
    where
    entities :: [(String, Row)]
entities
        | Bool
asHtml    = [(String, Row)]
xhtmlEntities
        | Bool
otherwise = [(String, Row)]
xhtmlEntities -- xmlEntities (TODO: xhtml is xml and html)
    missingSemi :: State
missingSemi
        | Bool
withWarnings
          Bool -> Bool -> Bool
&&
          Bool -> Bool
not Bool
withSemicolon = [String -> STag
forall str. str -> Tag str
TagWarning (String -> STag) -> String -> STag
forall a b. (a -> b) -> a -> b
$ String
"missing \";\" at end of entity reference: &" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
e]
        | Bool
otherwise         = []

lookupEntityAttr        :: Bool -> Bool -> (String, Bool) -> (String, Tags)
lookupEntityAttr :: Bool -> Bool -> (String, Bool) -> (String, State)
lookupEntityAttr Bool
withWarnings Bool
asHtml (String
e, Bool
b)
    | State -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null State
r    = (String
s,                   State
r)
    | Bool
otherwise = (String
"&" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
s String -> String -> String
forall a. [a] -> [a] -> [a]
++ [Char
';' | Bool
b], State
r)
    where
    (TagText String
s) : State
r = Bool -> Bool -> (String, Bool) -> State
lookupEntity Bool
withWarnings Bool
asHtml (String
e, Bool
b)

-- ----------------------------------------
{-
        entityData x = case lookupEntity y of
            Just y -> [TagText $ fromChar y]
            Nothing -> [TagText $ fromString $ "&" ++ y ++ ";"
                       ,TagWarning $ fromString $ "Unknown entity: " ++ y]
            where y = toString x

        entityAttrib (x,b) = case lookupEntity y of
            Just y -> (fromChar y, [])
            Nothing -> (fromString $ "&" ++ y ++ [';'|b], [TagWarning $ fromString $ "Unknown entity: " ++ y])
            where y = toString x
-}
-- ----------------------------------------

-- |
-- Turns all element and attribute names to lower case
-- even !DOCTYPE stuff. But this is discarded when parsing the tagsoup

lowerCaseNames :: Tags -> Tags
lowerCaseNames :: State -> State
lowerCaseNames
    = (STag -> STag) -> State -> State
forall a b. (a -> b) -> [a] -> [b]
map STag -> STag
f
    where
    f :: STag -> STag
f (TagOpen String
name [(String, String)]
attrs)
        = String -> [(String, String)] -> STag
forall str. str -> [Attribute str] -> Tag str
TagOpen (String -> String
nameToLower String
name) (((String, String) -> (String, String))
-> [(String, String)] -> [(String, String)]
forall a b. (a -> b) -> [a] -> [b]
map (String, String) -> (String, String)
forall {b}. (String, b) -> (String, b)
attrToLower [(String, String)]
attrs)
    f (TagClose String
name)
        = String -> STag
forall str. str -> Tag str
TagClose (String -> String
nameToLower String
name)
    f STag
a = STag
a
    nameToLower :: String -> String
nameToLower          = (Char -> Char) -> String -> String
forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toLower
    attrToLower :: (String, b) -> (String, b)
attrToLower (String
an, b
av) = (String -> String
nameToLower String
an, b
av)

-- ----------------------------------------
-- the main parser

parseHtmlTagSoup        :: Bool -> Bool -> Bool -> Bool -> Bool -> String -> String -> XmlTrees
parseHtmlTagSoup :: Bool
-> Bool -> Bool -> Bool -> Bool -> String -> String -> XmlTrees
parseHtmlTagSoup Bool
withNamespaces Bool
withWarnings Bool
withComment Bool
removeWhiteSpace Bool
asHtml String
doc
    = ( XmlTrees -> XmlTrees
docRootElem
        (XmlTrees -> XmlTrees)
-> (String -> XmlTrees) -> String -> XmlTrees
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Parser XmlTrees -> State -> XmlTrees
forall a. Parser a -> State -> a
runParser (Context -> Parser XmlTrees
buildCont Context
forall {a}. ([a], NsEnv)
initContext)
        (State -> XmlTrees) -> (String -> State) -> String -> XmlTrees
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ( if Bool
asHtml
            then State -> State
lowerCaseNames
            else State -> State
forall a. a -> a
id
          )
        (State -> State) -> (String -> State) -> String -> State
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> State
tagsoupParse
      )
    where
    tagsoupParse        :: String -> Tags
    tagsoupParse :: String -> State
tagsoupParse        = ParseOptions String -> String -> State
forall str. StringLike str => ParseOptions str -> str -> [Tag str]
parseTagsOptions ParseOptions String
tagsoupOptions

    tagsoupOptions      :: ParseOptions String
    tagsoupOptions :: ParseOptions String
tagsoupOptions      = ParseOptions String
parseOptions' { optTagWarning :: Bool
optTagWarning   = Bool
withWarnings
                                        , optEntityData :: (String, Bool) -> State
optEntityData   = Bool -> Bool -> (String, Bool) -> State
lookupEntity     Bool
withWarnings Bool
asHtml
                                        , optEntityAttrib :: (String, Bool) -> (String, State)
optEntityAttrib = Bool -> Bool -> (String, Bool) -> (String, State)
lookupEntityAttr Bool
withWarnings Bool
asHtml
                                        }
                          where
                          parseOptions' :: ParseOptions String
                          parseOptions' :: ParseOptions String
parseOptions' = ParseOptions String
forall str. StringLike str => ParseOptions str
parseOptions

    -- This is essential for lazy parsing:
    -- the call of "take 1" stops parsing, when the first element is detected
    -- no check on end of input sequence is required to build this (0- or 1-element list)
    -- so eof is never checked unneccessarily

    docRootElem :: XmlTrees -> XmlTrees
docRootElem
        = Row -> XmlTrees -> XmlTrees
forall a. Row -> [a] -> [a]
take Row
1 (XmlTrees -> XmlTrees)
-> (XmlTrees -> XmlTrees) -> XmlTrees -> XmlTrees
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (NTree XNode -> Bool) -> XmlTrees -> XmlTrees
forall a. (a -> Bool) -> [a] -> [a]
filter NTree XNode -> Bool
forall a. XmlNode a => a -> Bool
isElem

    initContext :: ([a], NsEnv)
initContext         = ( []
                          , [(String, String)] -> NsEnv
toNsEnv ([(String, String)] -> NsEnv) -> [(String, String)] -> NsEnv
forall a b. (a -> b) -> a -> b
$
                            [ (String
a_xml,   String
xmlNamespace)
                            , (String
a_xmlns, String
xmlnsNamespace)
                            ]
                          )

    wrap :: a -> [a]
wrap                = (a -> [a] -> [a]
forall a. a -> [a] -> [a]
:[])

    warn :: String -> XmlTrees
warn
        | Bool
withWarnings  = NTree XNode -> XmlTrees
forall {a}. a -> [a]
wrap (NTree XNode -> XmlTrees)
-> (String -> NTree XNode) -> String -> XmlTrees
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Row -> String -> NTree XNode
mkError' Row
c_warn (String -> NTree XNode)
-> (String -> String) -> String -> NTree XNode
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
forall a. Show a => a -> String
show (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String
doc String -> String -> String
forall a. [a] -> [a] -> [a]
++) (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String
" " String -> String -> String
forall a. [a] -> [a] -> [a]
++)
        | Bool
otherwise     = XmlTrees -> String -> XmlTrees
forall a b. a -> b -> a
const []
    cmt :: String -> XmlTrees
cmt
        | Bool
withComment   = NTree XNode -> XmlTrees
forall {a}. a -> [a]
wrap (NTree XNode -> XmlTrees)
-> (String -> NTree XNode) -> String -> XmlTrees
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> NTree XNode
mkCmt'
        | Bool
otherwise     = XmlTrees -> String -> XmlTrees
forall a b. a -> b -> a
const []
    txt :: String -> XmlTrees
txt
        | Bool
removeWhiteSpace
                        = \ String
t ->
                          if (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isXmlSpaceChar String
t
                          then []
                          else NTree XNode -> XmlTrees
forall {a}. a -> [a]
wrap (NTree XNode -> XmlTrees)
-> (String -> NTree XNode) -> String -> XmlTrees
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> NTree XNode
mkText' (String -> XmlTrees) -> String -> XmlTrees
forall a b. (a -> b) -> a -> b
$ String
t
        | Bool
otherwise     = NTree XNode -> XmlTrees
forall {a}. a -> [a]
wrap (NTree XNode -> XmlTrees)
-> (String -> NTree XNode) -> String -> XmlTrees
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> NTree XNode
mkText'

    isEmptyElem :: String -> Bool
isEmptyElem
        | Bool
asHtml        = String -> Bool
isEmptyHtmlTag
        | Bool
otherwise     = Bool -> String -> Bool
forall a b. a -> b -> a
const Bool
False

    isInnerElem :: String -> String -> Bool
isInnerElem
        | Bool
asHtml        = String -> String -> Bool
isInnerHtmlTagOf
        | Bool
otherwise     = (String -> Bool) -> String -> String -> Bool
forall a b. a -> b -> a
const (Bool -> String -> Bool
forall a b. a -> b -> a
const Bool
False)

    closesElem :: [String] -> String -> Bool
closesElem
        | Bool
asHtml        = \ [String]
ns String
n1 ->
                          Bool -> Bool
not ([String] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [String]
ns)
                          Bool -> Bool -> Bool
&&
                          String
n1 String -> String -> Bool
`closesHtmlTag` ([String] -> String
forall a. HasCallStack => [a] -> a
head [String]
ns)
        | Bool
otherwise     = (String -> Bool) -> [String] -> String -> Bool
forall a b. a -> b -> a
const (Bool -> String -> Bool
forall a b. a -> b -> a
const Bool
False)

    buildCont   :: Context -> Parser XmlTrees
    buildCont :: Context -> Parser XmlTrees
buildCont Context
ns
        = Parser Bool
-> Parser XmlTrees -> Parser XmlTrees -> Parser XmlTrees
forall a. Parser Bool -> Parser a -> Parser a -> Parser a
cond Parser Bool
isText ( do
                        String
t <- Parser String
getText
                        XmlTrees
rl <- Context -> Parser XmlTrees
buildCont Context
ns
                        XmlTrees -> Parser XmlTrees
forall a. a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> XmlTrees
txt String
t XmlTrees -> XmlTrees -> XmlTrees
forall a. [a] -> [a] -> [a]
++ XmlTrees
rl)
                      )
          ( Parser Bool
-> Parser XmlTrees -> Parser XmlTrees -> Parser XmlTrees
forall a. Parser Bool -> Parser a -> Parser a -> Parser a
cond Parser Bool
isOpn ( do
                         (String
n,[(String, String)]
al) <- Parser (String, [(String, String)])
getOpn
                         Context -> String -> [(String, String)] -> Parser XmlTrees
openTag Context
ns String
n [(String, String)]
al
                       )
            ( Parser Bool
-> Parser XmlTrees -> Parser XmlTrees -> Parser XmlTrees
forall a. Parser Bool -> Parser a -> Parser a -> Parser a
cond Parser Bool
isCls ( do
                           String
n <- Parser String
getCls
                           Context -> String -> Parser XmlTrees
closeTag Context
ns String
n
                         )
              ( Parser Bool
-> Parser XmlTrees -> Parser XmlTrees -> Parser XmlTrees
forall a. Parser Bool -> Parser a -> Parser a -> Parser a
cond Parser Bool
isCmt ( do
                             String
c <- Parser String
getCmt
                             XmlTrees
rl <- Context -> Parser XmlTrees
buildCont Context
ns
                             XmlTrees -> Parser XmlTrees
forall a. a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> XmlTrees
cmt String
c XmlTrees -> XmlTrees -> XmlTrees
forall a. [a] -> [a] -> [a]
++ XmlTrees
rl)
                           )
                ( Parser Bool
-> Parser XmlTrees -> Parser XmlTrees -> Parser XmlTrees
forall a. Parser Bool -> Parser a -> Parser a -> Parser a
cond Parser Bool
isWarn ( do
                                String
w <- Parser String
getWarn
                                XmlTrees
rl <- Context -> Parser XmlTrees
buildCont Context
ns
                                XmlTrees -> Parser XmlTrees
forall a. a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> XmlTrees
warn String
w XmlTrees -> XmlTrees -> XmlTrees
forall a. [a] -> [a] -> [a]
++ XmlTrees
rl)
                              )
                  ( Parser Bool
-> Parser XmlTrees -> Parser XmlTrees -> Parser XmlTrees
forall a. Parser Bool -> Parser a -> Parser a -> Parser a
cond Parser Bool
isPos ( do
                                 (Row, Row)
_ <- Parser (Row, Row)
getPos
                                 Context -> Parser XmlTrees
buildCont Context
ns
                               )
                    ( Parser Bool
-> Parser XmlTrees -> Parser XmlTrees -> Parser XmlTrees
forall a. Parser Bool -> Parser a -> Parser a -> Parser a
cond Parser Bool
isEof ( do
                                   Bool
_ <- Parser Bool
isEof
                                   Context -> Parser XmlTrees
closeAll Context
ns
                                 )
                      ( XmlTrees -> Parser XmlTrees
forall a. a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> XmlTrees
warn String
"parse error in tagsoup tree construction")
                      )
                    )
                  )
                )
              )
            )
          )
        where
        closeTag                :: Context -> String -> Parser XmlTrees
        closeTag :: Context -> String -> Parser XmlTrees
closeTag ((String
n':[String]
_), NsEnv
_) String
n1
            | String
n' String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
n1          = XmlTrees -> Parser XmlTrees
forall a. a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return []                     -- a normal closing tag
                                                                -- all other cases try to repair wrong html
        closeTag ns' :: Context
ns'@((String
n':[String]
_), NsEnv
_) String
n1                             -- n1 closes n implicitly
            | String
n' String -> String -> Bool
`isInnerElem` String
n1                               -- e.g. <td>...</tr>
                                = do
                                  String -> Parser ()
insCls String
n1                     -- pushback </n1>
                                  String -> Parser ()
insCls String
n'                     -- insert a </n'>
                                  Context -> Parser XmlTrees
buildCont Context
ns'                 -- try again
        closeTag Context
ns' String
n1
            | String -> Bool
isEmptyElem String
n1    = Context -> Parser XmlTrees
buildCont Context
ns'                 -- ignore a redundant closing tag for empty element
        closeTag ns' :: Context
ns'@((String
n':[String]
ns1'), NsEnv
_) String
n1                          -- insert a missing closing tag
            | String
n1 String -> [String] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String]
ns1'    = do
                                  String -> Parser ()
insCls String
n1
                                  String -> Parser ()
insCls String
n'
                                  XmlTrees
rl <- Context -> Parser XmlTrees
buildCont Context
ns'
                                  XmlTrees -> Parser XmlTrees
forall a. a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return ( String -> XmlTrees
warn (String
"closing tag " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String -> String
forall a. Show a => a -> String
show String
n' String -> String -> String
forall a. [a] -> [a] -> [a]
++
                                                 String
" expected, but " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String -> String
forall a. Show a => a -> String
show String
n1 String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" found")
                                           XmlTrees -> XmlTrees -> XmlTrees
forall a. [a] -> [a] -> [a]
++ XmlTrees
rl
                                         )
        closeTag Context
ns' String
n1                                         -- ignore a wrong closing tag
                                = do
                                  XmlTrees
rl <- Context -> Parser XmlTrees
buildCont Context
ns'
                                  XmlTrees -> Parser XmlTrees
forall a. a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return ( String -> XmlTrees
warn (String
"no opening tag for closing tag " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String -> String
forall a. Show a => a -> String
show String
n1)
                                           XmlTrees -> XmlTrees -> XmlTrees
forall a. [a] -> [a] -> [a]
++ XmlTrees
rl
                                         )

        openTag                 :: Context -> String -> [(String, String)] -> Parser XmlTrees
        openTag :: Context -> String -> [(String, String)] -> Parser XmlTrees
openTag cx' :: Context
cx'@([String]
ns',NsEnv
env') String
n1 [(String, String)]
al1
            | String -> Bool
isPiDT String
n1         = Context -> Parser XmlTrees
buildCont Context
cx'
            | String -> Bool
isEmptyElem String
n1
                                = do
                                  QName
qn <- NsEnv -> String -> Parser QName
mkElemQN NsEnv
nenv String
n1
                                  XmlTrees
al <- [(String, String)] -> Parser XmlTrees
mkAttrs [(String, String)]
al1
                                  XmlTrees
rl <- Context -> Parser XmlTrees
buildCont Context
cx'
                                  XmlTrees -> Parser XmlTrees
forall a. a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return (QName -> XmlTrees -> XmlTrees -> NTree XNode
mkElement QName
qn XmlTrees
al [] NTree XNode -> XmlTrees -> XmlTrees
forall a. a -> [a] -> [a]
: XmlTrees
rl)
            | [String] -> String -> Bool
closesElem [String]
ns' String
n1 = do
                                  String -> [(String, String)] -> Parser ()
insOpn String
n1 [(String, String)]
al1
                                  String -> Parser ()
insCls ([String] -> String
forall a. HasCallStack => [a] -> a
head [String]
ns')
                                  Context -> Parser XmlTrees
buildCont Context
cx'
            | Bool
otherwise         = do
                                  QName
qn <- NsEnv -> String -> Parser QName
mkElemQN NsEnv
nenv String
n1
                                  XmlTrees
al <- [(String, String)] -> Parser XmlTrees
mkAttrs [(String, String)]
al1
                                  XmlTrees
cs <- Context -> Parser XmlTrees
buildCont ((String
n1 String -> [String] -> [String]
forall a. a -> [a] -> [a]
: [String]
ns'), NsEnv
nenv)
                                  XmlTrees
rl <- Context -> Parser XmlTrees
buildCont Context
cx'
                                  XmlTrees -> Parser XmlTrees
forall a. a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return (QName -> XmlTrees -> XmlTrees -> NTree XNode
mkElement QName
qn XmlTrees
al XmlTrees
cs NTree XNode -> XmlTrees -> XmlTrees
forall a. a -> [a] -> [a]
: XmlTrees
rl)
            where
            nenv :: NsEnv
nenv                = Bool -> [(String, String)] -> NsEnv -> NsEnv
extendNsEnv Bool
withNamespaces [(String, String)]
al1 NsEnv
env'
            mkElemQN :: NsEnv -> String -> Parser QName
mkElemQN            = Bool -> Bool -> NsEnv -> String -> Parser QName
mkQN Bool
withNamespaces Bool
False
            mkAttrQN :: NsEnv -> String -> Parser QName
mkAttrQN            = Bool -> Bool -> NsEnv -> String -> Parser QName
mkQN Bool
withNamespaces Bool
True
            isPiDT :: String -> Bool
isPiDT (Char
'?':String
_)      = Bool
True
            isPiDT (Char
'!':String
_)      = Bool
True
            isPiDT String
_            = Bool
False
            mkAttrs :: [(String, String)] -> Parser XmlTrees
mkAttrs             = ((String, String) -> Parser (NTree XNode))
-> [(String, String)] -> Parser XmlTrees
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM ((String -> String -> Parser (NTree XNode))
-> (String, String) -> Parser (NTree XNode)
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry String -> String -> Parser (NTree XNode)
mkA)
            mkA :: String -> String -> Parser (NTree XNode)
mkA String
an String
av           = do
                                  QName
qan <- NsEnv -> String -> Parser QName
mkAttrQN NsEnv
nenv String
an
                                  NTree XNode -> Parser (NTree XNode)
forall a. a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return (QName -> XmlTrees -> NTree XNode
mkAttr' QName
qan (NTree XNode -> XmlTrees
forall {a}. a -> [a]
wrap (NTree XNode -> XmlTrees)
-> (String -> NTree XNode) -> String -> XmlTrees
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> NTree XNode
mkText' (String -> XmlTrees) -> String -> XmlTrees
forall a b. (a -> b) -> a -> b
$ String
av))

        closeAll                :: ([String], NsEnv) -> Parser XmlTrees
        closeAll :: Context -> Parser XmlTrees
closeAll ([String]
ns',NsEnv
_)        = XmlTrees -> Parser XmlTrees
forall a. a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return ((String -> XmlTrees) -> [String] -> XmlTrees
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap String -> XmlTrees
wrn [String]
ns')
                                  where
                                  wrn :: String -> XmlTrees
wrn = String -> XmlTrees
warn (String -> XmlTrees) -> (String -> String) -> String -> XmlTrees
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String
"insert missing closing tag " String -> String -> String
forall a. [a] -> [a] -> [a]
++) (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
forall a. Show a => a -> String
show

-- ------------------------------------------------------------