Overview

Packages

  • application
    • commands
    • components
      • actions
      • filters
      • leftWidget
      • permissions
      • sortableWidget
      • util
      • webupdater
      • x2flow
        • actions
        • triggers
      • X2GridView
      • X2Settings
    • controllers
    • models
      • embedded
    • modules
      • accounts
        • controllers
        • models
      • actions
        • controllers
        • models
      • calendar
        • controllers
        • models
      • charts
        • models
      • contacts
        • controllers
        • models
      • docs
        • components
        • controllers
        • models
      • groups
        • controllers
        • models
      • marketing
        • components
        • controllers
        • models
      • media
        • controllers
        • models
      • mobile
        • components
      • opportunities
        • controllers
        • models
      • products
        • controllers
        • models
      • quotes
        • controllers
        • models
      • services
        • controllers
        • models
      • template
        • models
      • users
        • controllers
        • models
      • workflow
        • controllers
        • models
      • x2Leads
        • controllers
        • models
  • Net
  • None
  • PHP
  • system
    • base
    • caching
      • dependencies
    • collections
    • console
    • db
      • ar
      • schema
        • cubrid
        • mssql
        • mysql
        • oci
        • pgsql
        • sqlite
    • i18n
      • gettext
    • logging
    • test
    • utils
    • validators
    • web
      • actions
      • auth
      • filters
      • form
      • helpers
      • renderers
      • services
      • widgets
        • captcha
        • pagers
  • Text
    • Highlighter
  • zii
    • behaviors
    • widgets
      • grid
      • jui

Classes

  • ActionFormModel
  • ArrayUtil
  • ArrayValidator
  • AssociatedMediaBehavior
  • AuxLib
  • Changelog
  • DetailView
  • EncryptUtilTmp
  • EventsWidgetFieldFormatter
  • FailedLogins
  • FieldFormatter
  • FieldFormatterBase
  • FieldInputRenderer
  • FileFieldBehavior
  • FiltersForm
  • FilterUtil
  • FineDiff
  • FineDiffCopyOp
  • FineDiffDeleteOp
  • FineDiffInsertOp
  • FineDiffOp
  • FineDiffOps
  • FineDiffReplaceOp
  • GlobalCSSFormModel
  • GlobalImportFormModel
  • GoogleAuthenticator
  • HTML5
  • HTML5TreeConstructer
  • HTMLPurifier
  • HTMLPurifier_Arborize
  • HTMLPurifier_AttrCollections
  • HTMLPurifier_AttrDef
  • HTMLPurifier_AttrDef_Clone
  • HTMLPurifier_AttrDef_CSS
  • HTMLPurifier_AttrDef_CSS_AlphaValue
  • HTMLPurifier_AttrDef_CSS_Background
  • HTMLPurifier_AttrDef_CSS_BackgroundPosition
  • HTMLPurifier_AttrDef_CSS_Border
  • HTMLPurifier_AttrDef_CSS_Color
  • HTMLPurifier_AttrDef_CSS_Composite
  • HTMLPurifier_AttrDef_CSS_DenyElementDecorator
  • HTMLPurifier_AttrDef_CSS_Filter
  • HTMLPurifier_AttrDef_CSS_Font
  • HTMLPurifier_AttrDef_CSS_FontFamily
  • HTMLPurifier_AttrDef_CSS_Ident
  • HTMLPurifier_AttrDef_CSS_ImportantDecorator
  • HTMLPurifier_AttrDef_CSS_Length
  • HTMLPurifier_AttrDef_CSS_ListStyle
  • HTMLPurifier_AttrDef_CSS_Multiple
  • HTMLPurifier_AttrDef_CSS_Number
  • HTMLPurifier_AttrDef_CSS_Percentage
  • HTMLPurifier_AttrDef_CSS_TextDecoration
  • HTMLPurifier_AttrDef_CSS_URI
  • HTMLPurifier_AttrDef_Enum
  • HTMLPurifier_AttrDef_HTML_Bool
  • HTMLPurifier_AttrDef_HTML_Class
  • HTMLPurifier_AttrDef_HTML_Color
  • HTMLPurifier_AttrDef_HTML_FrameTarget
  • HTMLPurifier_AttrDef_HTML_ID
  • HTMLPurifier_AttrDef_HTML_Length
  • HTMLPurifier_AttrDef_HTML_LinkTypes
  • HTMLPurifier_AttrDef_HTML_MultiLength
  • HTMLPurifier_AttrDef_HTML_Nmtokens
  • HTMLPurifier_AttrDef_HTML_Pixels
  • HTMLPurifier_AttrDef_Integer
  • HTMLPurifier_AttrDef_Lang
  • HTMLPurifier_AttrDef_Switch
  • HTMLPurifier_AttrDef_Text
  • HTMLPurifier_AttrDef_URI
  • HTMLPurifier_AttrDef_URI_Email
  • HTMLPurifier_AttrDef_URI_Email_SimpleCheck
  • HTMLPurifier_AttrDef_URI_Host
  • HTMLPurifier_AttrDef_URI_IPv4
  • HTMLPurifier_AttrDef_URI_IPv6
  • HTMLPurifier_AttrTransform
  • HTMLPurifier_AttrTransform_Background
  • HTMLPurifier_AttrTransform_BdoDir
  • HTMLPurifier_AttrTransform_BgColor
  • HTMLPurifier_AttrTransform_BoolToCSS
  • HTMLPurifier_AttrTransform_Border
  • HTMLPurifier_AttrTransform_EnumToCSS
  • HTMLPurifier_AttrTransform_ImgRequired
  • HTMLPurifier_AttrTransform_ImgSpace
  • HTMLPurifier_AttrTransform_Input
  • HTMLPurifier_AttrTransform_Lang
  • HTMLPurifier_AttrTransform_Length
  • HTMLPurifier_AttrTransform_Name
  • HTMLPurifier_AttrTransform_NameSync
  • HTMLPurifier_AttrTransform_Nofollow
  • HTMLPurifier_AttrTransform_SafeEmbed
  • HTMLPurifier_AttrTransform_SafeObject
  • HTMLPurifier_AttrTransform_SafeParam
  • HTMLPurifier_AttrTransform_ScriptRequired
  • HTMLPurifier_AttrTransform_TargetBlank
  • HTMLPurifier_AttrTransform_Textarea
  • HTMLPurifier_AttrTypes
  • HTMLPurifier_AttrValidator
  • HTMLPurifier_Bootstrap
  • HTMLPurifier_ChildDef
  • HTMLPurifier_ChildDef_Chameleon
  • HTMLPurifier_ChildDef_Custom
  • HTMLPurifier_ChildDef_Empty
  • HTMLPurifier_ChildDef_List
  • HTMLPurifier_ChildDef_Optional
  • HTMLPurifier_ChildDef_Required
  • HTMLPurifier_ChildDef_StrictBlockquote
  • HTMLPurifier_ChildDef_Table
  • HTMLPurifier_Config
  • HTMLPurifier_ConfigSchema
  • HTMLPurifier_ConfigSchema_Builder_ConfigSchema
  • HTMLPurifier_ConfigSchema_Builder_Xml
  • HTMLPurifier_ConfigSchema_Interchange
  • HTMLPurifier_ConfigSchema_Interchange_Directive
  • HTMLPurifier_ConfigSchema_Interchange_Id
  • HTMLPurifier_ConfigSchema_InterchangeBuilder
  • HTMLPurifier_ConfigSchema_Validator
  • HTMLPurifier_ConfigSchema_ValidatorAtom
  • HTMLPurifier_ContentSets
  • HTMLPurifier_Context
  • HTMLPurifier_CSSDefinition
  • HTMLPurifier_Definition
  • HTMLPurifier_DefinitionCache
  • HTMLPurifier_DefinitionCache_Decorator
  • HTMLPurifier_DefinitionCache_Decorator_Cleanup
  • HTMLPurifier_DefinitionCache_Decorator_Memory
  • HTMLPurifier_DefinitionCache_Null
  • HTMLPurifier_DefinitionCache_Serializer
  • HTMLPurifier_DefinitionCacheFactory
  • HTMLPurifier_Doctype
  • HTMLPurifier_DoctypeRegistry
  • HTMLPurifier_ElementDef
  • HTMLPurifier_Encoder
  • HTMLPurifier_EntityLookup
  • HTMLPurifier_EntityParser
  • HTMLPurifier_ErrorCollector
  • HTMLPurifier_ErrorStruct
  • HTMLPurifier_Filter
  • HTMLPurifier_Filter_ExtractStyleBlocks
  • HTMLPurifier_Filter_YouTube
  • HTMLPurifier_Generator
  • HTMLPurifier_HTMLDefinition
  • HTMLPurifier_HTMLModule
  • HTMLPurifier_HTMLModule_Bdo
  • HTMLPurifier_HTMLModule_CommonAttributes
  • HTMLPurifier_HTMLModule_Edit
  • HTMLPurifier_HTMLModule_Forms
  • HTMLPurifier_HTMLModule_Hypertext
  • HTMLPurifier_HTMLModule_Iframe
  • HTMLPurifier_HTMLModule_Image
  • HTMLPurifier_HTMLModule_Legacy
  • HTMLPurifier_HTMLModule_List
  • HTMLPurifier_HTMLModule_Name
  • HTMLPurifier_HTMLModule_Nofollow
  • HTMLPurifier_HTMLModule_NonXMLCommonAttributes
  • HTMLPurifier_HTMLModule_Object
  • HTMLPurifier_HTMLModule_Presentation
  • HTMLPurifier_HTMLModule_Proprietary
  • HTMLPurifier_HTMLModule_Ruby
  • HTMLPurifier_HTMLModule_SafeEmbed
  • HTMLPurifier_HTMLModule_SafeObject
  • HTMLPurifier_HTMLModule_SafeScripting
  • HTMLPurifier_HTMLModule_Scripting
  • HTMLPurifier_HTMLModule_StyleAttribute
  • HTMLPurifier_HTMLModule_Tables
  • HTMLPurifier_HTMLModule_Target
  • HTMLPurifier_HTMLModule_TargetBlank
  • HTMLPurifier_HTMLModule_Text
  • HTMLPurifier_HTMLModule_Tidy
  • HTMLPurifier_HTMLModule_Tidy_Name
  • HTMLPurifier_HTMLModule_Tidy_Proprietary
  • HTMLPurifier_HTMLModule_Tidy_Strict
  • HTMLPurifier_HTMLModule_Tidy_Transitional
  • HTMLPurifier_HTMLModule_Tidy_XHTML
  • HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4
  • HTMLPurifier_HTMLModule_XMLCommonAttributes
  • HTMLPurifier_HTMLModuleManager
  • HTMLPurifier_IDAccumulator
  • HTMLPurifier_Injector
  • HTMLPurifier_Injector_AutoParagraph
  • HTMLPurifier_Injector_DisplayLinkURI
  • HTMLPurifier_Injector_Linkify
  • HTMLPurifier_Injector_PurifierLinkify
  • HTMLPurifier_Injector_RemoveEmpty
  • HTMLPurifier_Injector_RemoveSpansWithoutAttributes
  • HTMLPurifier_Injector_SafeObject
  • HTMLPurifier_Language
  • HTMLPurifier_Language_en_x_test
  • HTMLPurifier_LanguageFactory
  • HTMLPurifier_Length
  • HTMLPurifier_Lexer
  • HTMLPurifier_Lexer_DirectLex
  • HTMLPurifier_Lexer_DOMLex
  • HTMLPurifier_Lexer_PH5P
  • HTMLPurifier_Node
  • HTMLPurifier_Node_Comment
  • HTMLPurifier_Node_Element
  • HTMLPurifier_Node_Text
  • HTMLPurifier_PercentEncoder
  • HTMLPurifier_Printer
  • HTMLPurifier_Printer_ConfigForm
  • HTMLPurifier_Printer_ConfigForm_bool
  • HTMLPurifier_Printer_ConfigForm_default
  • HTMLPurifier_Printer_ConfigForm_NullDecorator
  • HTMLPurifier_Printer_CSSDefinition
  • HTMLPurifier_Printer_HTMLDefinition
  • HTMLPurifier_PropertyList
  • HTMLPurifier_PropertyListIterator
  • HTMLPurifier_Queue
  • HTMLPurifier_Strategy
  • HTMLPurifier_Strategy_Composite
  • HTMLPurifier_Strategy_Core
  • HTMLPurifier_Strategy_FixNesting
  • HTMLPurifier_Strategy_MakeWellFormed
  • HTMLPurifier_Strategy_RemoveForeignElements
  • HTMLPurifier_Strategy_ValidateAttributes
  • HTMLPurifier_StringHash
  • HTMLPurifier_StringHashParser
  • HTMLPurifier_TagTransform
  • HTMLPurifier_TagTransform_Font
  • HTMLPurifier_TagTransform_Simple
  • HTMLPurifier_Token
  • HTMLPurifier_Token_Comment
  • HTMLPurifier_Token_Empty
  • HTMLPurifier_Token_End
  • HTMLPurifier_Token_Start
  • HTMLPurifier_Token_Tag
  • HTMLPurifier_Token_Text
  • HTMLPurifier_TokenFactory
  • HTMLPurifier_UnitConverter
  • HTMLPurifier_URI
  • HTMLPurifier_URIDefinition
  • HTMLPurifier_URIFilter
  • HTMLPurifier_URIFilter_DisableExternal
  • HTMLPurifier_URIFilter_DisableExternalResources
  • HTMLPurifier_URIFilter_DisableResources
  • HTMLPurifier_URIFilter_HostBlacklist
  • HTMLPurifier_URIFilter_MakeAbsolute
  • HTMLPurifier_URIFilter_Munge
  • HTMLPurifier_URIFilter_SafeIframe
  • HTMLPurifier_URIParser
  • HTMLPurifier_URIScheme
  • HTMLPurifier_URIScheme_data
  • HTMLPurifier_URIScheme_file
  • HTMLPurifier_URIScheme_ftp
  • HTMLPurifier_URIScheme_http
  • HTMLPurifier_URIScheme_https
  • HTMLPurifier_URIScheme_mailto
  • HTMLPurifier_URIScheme_news
  • HTMLPurifier_URIScheme_nntp
  • HTMLPurifier_URISchemeRegistry
  • HTMLPurifier_VarParser
  • HTMLPurifier_VarParser_Flexible
  • HTMLPurifier_VarParser_Native
  • HTMLPurifier_Zipper
  • JSONFieldsBehavior
  • JSONResponse
  • Markdown_Parser
  • MarkdownExtra_Parser
  • MediaFieldFormatter
  • MediaSelector
  • MobileActiveRecordFieldFormatter
  • MobileActivityFeed
  • MobileChartDashboard
  • MobileFieldFormatter
  • MobileFieldInputRenderer
  • ModuleModelNameValidator
  • MultiChildNode
  • MultiTypeAutocomplete
  • PasswordUtil
  • ProductFeature
  • ProfileWidgetLayout
  • QueryParamGenerator
  • RecordLimitBehavior
  • RecordView
  • RecordViewWidgetLayout
  • RelationshipsGridModel
  • RelationshipsJoin
  • RepairUserDataCommand
  • RequestUtil
  • RequiredIfNotSetValidator
  • ResponseUtil
  • RunMigrationScriptCommand
  • ServiceWebFormDesigner
  • Settings
  • StringUtil
  • TestEmailAction
  • TestEmailActionForm
  • ThemeGenerator
  • TimerUtil
  • TopicsFieldFormatter
  • TopicsWidgetLayout
  • TransactionalViewFieldFormatter
  • UrlUtil
  • ValidLinkValidator
  • WebFormDesigner
  • WebLeadFormDesigner
  • X2ActiveRecordBehavior
  • X2ActiveRecordFieldFormatter
  • X2ButtonColumn
  • X2ConditionList
  • X2ConsoleCommand
  • X2ControllerBehavior
  • X2DataColumn
  • X2DuplicateBehavior
  • X2Flashes
  • X2GridViewFieldFormatter
  • X2IPAddress
  • X2LeadsDataColumn
  • X2MergeableBehavior
  • X2MessageSource
  • X2MobileControllerBehavior
  • X2MobileProfileControllerBehavior
  • X2MobileQuotesControllerBehavior
  • X2MobileSiteControllerBehavior
  • X2MobileTopicsControllerBehavior
  • X2ModelConversionBehavior
  • X2ModelConversionWidget
  • X2ModelForeignKeyValidator
  • X2ModelUniqueIndexValidator
  • X2NonWebUser
  • X2StaticDropdown
  • X2StaticField
  • X2StaticFieldsBehavior
  • X2UrlManager
  • X2Validator
  • X2WidgetBehavior

Interfaces

  • AdminOwnedCredentials

Exceptions

  • CampaignMailingException
  • CodeExchangeException
  • GetCredentialsException
  • HTMLPurifier_ConfigSchema_Exception
  • HTMLPurifier_Exception
  • HTMLPurifier_VarParserException
  • Net_IDNA2_Exception
  • Net_IDNA2_Exception_Nameprep
  • NoRefreshTokenException
  • NoUserIdException
  • StringUtilException

Functions

  • checkCurrency
  • checkDNS
  • checkServerVar
  • checkTimezone
  • decodeQuotes
  • echoIcons
  • encodeQuotes
  • exceptionForError
  • getField
  • getLanguageName
  • getModuleTitle
  • handleReqError
  • handleReqException
  • htmlpurifier_filter_extractstyleblocks_muteerrorhandler
  • installer_t
  • installer_tr
  • isAllowedDir
  • mediaMigrationRrmdir
  • migrateMediaDir
  • printGraph
  • printR
  • renderFields
  • reqShutdown
  • RIP
  • translateOptions
  • tryGetRemote
  • Overview
  • Package
  • Class
  • Tree
   1: <?php
   2: 
   3: /**
   4:  * Experimental HTML5-based parser using Jeroen van der Meer's PH5P library.
   5:  * Occupies space in the HTML5 pseudo-namespace, which may cause conflicts.
   6:  * 
   7:  * @note
   8:  *    Recent changes to PHP's DOM extension have resulted in some fatal
   9:  *    error conditions with the original version of PH5P. Pending changes,
  10:  *    this lexer will punt to DirectLex if DOM throughs an exception.
  11:  */
  12: 
  13: class HTMLPurifier_Lexer_PH5P extends HTMLPurifier_Lexer_DOMLex {
  14:     
  15:     public function tokenizeHTML($html, $config, $context) {
  16:         $new_html = $this->normalize($html, $config, $context);
  17:         $new_html = $this->wrapHTML($new_html, $config, $context);
  18:         try {
  19:             $parser = new HTML5($new_html);
  20:             $doc = $parser->save();
  21:         } catch (DOMException $e) {
  22:             // Uh oh, it failed. Punt to DirectLex.
  23:             $lexer = new HTMLPurifier_Lexer_DirectLex();
  24:             $context->register('PH5PError', $e); // save the error, so we can detect it
  25:             return $lexer->tokenizeHTML($html, $config, $context); // use original HTML
  26:         }
  27:         $tokens = array();
  28:         $this->tokenizeDOM(
  29:             $doc->getElementsByTagName('html')->item(0)-> // <html>
  30:                   getElementsByTagName('body')->item(0)-> //   <body>
  31:                   getElementsByTagName('div')->item(0)    //     <div>
  32:             , $tokens);
  33:         return $tokens;
  34:     }
  35:     
  36: }
  37: 
  38: /*
  39: 
  40: Copyright 2007 Jeroen van der Meer <http://jero.net/> 
  41: 
  42: Permission is hereby granted, free of charge, to any person obtaining a 
  43: copy of this software and associated documentation files (the 
  44: "Software"), to deal in the Software without restriction, including 
  45: without limitation the rights to use, copy, modify, merge, publish, 
  46: distribute, sublicense, and/or sell copies of the Software, and to 
  47: permit persons to whom the Software is furnished to do so, subject to 
  48: the following conditions: 
  49: 
  50: The above copyright notice and this permission notice shall be included 
  51: in all copies or substantial portions of the Software. 
  52: 
  53: THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 
  54: OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
  55: MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
  56: IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
  57: CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
  58: TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
  59: SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
  60: 
  61: */
  62: 
  63: class HTML5 {
  64:     private $data;
  65:     private $char;
  66:     private $EOF;
  67:     private $state;
  68:     private $tree;
  69:     private $token;
  70:     private $content_model;
  71:     private $escape = false;
  72:     private $entities = array('AElig;','AElig','AMP;','AMP','Aacute;','Aacute',
  73:     'Acirc;','Acirc','Agrave;','Agrave','Alpha;','Aring;','Aring','Atilde;',
  74:     'Atilde','Auml;','Auml','Beta;','COPY;','COPY','Ccedil;','Ccedil','Chi;',
  75:     'Dagger;','Delta;','ETH;','ETH','Eacute;','Eacute','Ecirc;','Ecirc','Egrave;',
  76:     'Egrave','Epsilon;','Eta;','Euml;','Euml','GT;','GT','Gamma;','Iacute;',
  77:     'Iacute','Icirc;','Icirc','Igrave;','Igrave','Iota;','Iuml;','Iuml','Kappa;',
  78:     'LT;','LT','Lambda;','Mu;','Ntilde;','Ntilde','Nu;','OElig;','Oacute;',
  79:     'Oacute','Ocirc;','Ocirc','Ograve;','Ograve','Omega;','Omicron;','Oslash;',
  80:     'Oslash','Otilde;','Otilde','Ouml;','Ouml','Phi;','Pi;','Prime;','Psi;',
  81:     'QUOT;','QUOT','REG;','REG','Rho;','Scaron;','Sigma;','THORN;','THORN',
  82:     'TRADE;','Tau;','Theta;','Uacute;','Uacute','Ucirc;','Ucirc','Ugrave;',
  83:     'Ugrave','Upsilon;','Uuml;','Uuml','Xi;','Yacute;','Yacute','Yuml;','Zeta;',
  84:     'aacute;','aacute','acirc;','acirc','acute;','acute','aelig;','aelig',
  85:     'agrave;','agrave','alefsym;','alpha;','amp;','amp','and;','ang;','apos;',
  86:     'aring;','aring','asymp;','atilde;','atilde','auml;','auml','bdquo;','beta;',
  87:     'brvbar;','brvbar','bull;','cap;','ccedil;','ccedil','cedil;','cedil',
  88:     'cent;','cent','chi;','circ;','clubs;','cong;','copy;','copy','crarr;',
  89:     'cup;','curren;','curren','dArr;','dagger;','darr;','deg;','deg','delta;',
  90:     'diams;','divide;','divide','eacute;','eacute','ecirc;','ecirc','egrave;',
  91:     'egrave','empty;','emsp;','ensp;','epsilon;','equiv;','eta;','eth;','eth',
  92:     'euml;','euml','euro;','exist;','fnof;','forall;','frac12;','frac12',
  93:     'frac14;','frac14','frac34;','frac34','frasl;','gamma;','ge;','gt;','gt',
  94:     'hArr;','harr;','hearts;','hellip;','iacute;','iacute','icirc;','icirc',
  95:     'iexcl;','iexcl','igrave;','igrave','image;','infin;','int;','iota;',
  96:     'iquest;','iquest','isin;','iuml;','iuml','kappa;','lArr;','lambda;','lang;',
  97:     'laquo;','laquo','larr;','lceil;','ldquo;','le;','lfloor;','lowast;','loz;',
  98:     'lrm;','lsaquo;','lsquo;','lt;','lt','macr;','macr','mdash;','micro;','micro',
  99:     'middot;','middot','minus;','mu;','nabla;','nbsp;','nbsp','ndash;','ne;',
 100:     'ni;','not;','not','notin;','nsub;','ntilde;','ntilde','nu;','oacute;',
 101:     'oacute','ocirc;','ocirc','oelig;','ograve;','ograve','oline;','omega;',
 102:     'omicron;','oplus;','or;','ordf;','ordf','ordm;','ordm','oslash;','oslash',
 103:     'otilde;','otilde','otimes;','ouml;','ouml','para;','para','part;','permil;',
 104:     'perp;','phi;','pi;','piv;','plusmn;','plusmn','pound;','pound','prime;',
 105:     'prod;','prop;','psi;','quot;','quot','rArr;','radic;','rang;','raquo;',
 106:     'raquo','rarr;','rceil;','rdquo;','real;','reg;','reg','rfloor;','rho;',
 107:     'rlm;','rsaquo;','rsquo;','sbquo;','scaron;','sdot;','sect;','sect','shy;',
 108:     'shy','sigma;','sigmaf;','sim;','spades;','sub;','sube;','sum;','sup1;',
 109:     'sup1','sup2;','sup2','sup3;','sup3','sup;','supe;','szlig;','szlig','tau;',
 110:     'there4;','theta;','thetasym;','thinsp;','thorn;','thorn','tilde;','times;',
 111:     'times','trade;','uArr;','uacute;','uacute','uarr;','ucirc;','ucirc',
 112:     'ugrave;','ugrave','uml;','uml','upsih;','upsilon;','uuml;','uuml','weierp;',
 113:     'xi;','yacute;','yacute','yen;','yen','yuml;','yuml','zeta;','zwj;','zwnj;');
 114: 
 115:     const PCDATA    = 0;
 116:     const RCDATA    = 1;
 117:     const CDATA     = 2;
 118:     const PLAINTEXT = 3;
 119: 
 120:     const DOCTYPE  = 0;
 121:     const STARTTAG = 1;
 122:     const ENDTAG   = 2;
 123:     const COMMENT  = 3;
 124:     const CHARACTR = 4;
 125:     const EOF      = 5;
 126: 
 127:     public function __construct($data) {
 128: 
 129:         $this->data = $data;
 130:         $this->char = -1;
 131:         $this->EOF  = strlen($data);
 132:         $this->tree = new HTML5TreeConstructer;
 133:         $this->content_model = self::PCDATA;
 134: 
 135:         $this->state = 'data';
 136: 
 137:         while($this->state !== null) {
 138:             $this->{$this->state.'State'}();
 139:         }
 140:     }
 141: 
 142:     public function save() {
 143:         return $this->tree->save();
 144:     }
 145: 
 146:     private function char() {
 147:         return ($this->char < $this->EOF)
 148:             ? $this->data[$this->char]
 149:             : false;
 150:     }
 151: 
 152:     private function character($s, $l = 0) {
 153:         if($s + $l < $this->EOF) {
 154:             if($l === 0) {
 155:                 return $this->data[$s];
 156:             } else {
 157:                 return substr($this->data, $s, $l);
 158:             }
 159:         }
 160:     }
 161: 
 162:     private function characters($char_class, $start) {
 163:         return preg_replace('#^(['.$char_class.']+).*#s', '\\1', substr($this->data, $start));
 164:     }
 165: 
 166:     private function dataState() {
 167:         // Consume the next input character
 168:         $this->char++;
 169:         $char = $this->char();
 170: 
 171:         if($char === '&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) {
 172:             /* U+0026 AMPERSAND (&)
 173:             When the content model flag is set to one of the PCDATA or RCDATA
 174:             states: switch to the entity data state. Otherwise: treat it as per
 175:             the "anything else"    entry below. */
 176:             $this->state = 'entityData';
 177: 
 178:         } elseif($char === '-') {
 179:             /* If the content model flag is set to either the RCDATA state or
 180:             the CDATA state, and the escape flag is false, and there are at
 181:             least three characters before this one in the input stream, and the
 182:             last four characters in the input stream, including this one, are
 183:             U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS,
 184:             and U+002D HYPHEN-MINUS ("<!--"), then set the escape flag to true. */
 185:             if(($this->content_model === self::RCDATA || $this->content_model ===
 186:             self::CDATA) && $this->escape === false &&
 187:             $this->char >= 3 && $this->character($this->char - 4, 4) === '<!--') {
 188:                 $this->escape = true;
 189:             }
 190: 
 191:             /* In any case, emit the input character as a character token. Stay
 192:             in the data state. */
 193:             $this->emitToken(array(
 194:                 'type' => self::CHARACTR,
 195:                 'data' => $char
 196:             ));
 197: 
 198:         /* U+003C LESS-THAN SIGN (<) */
 199:         } elseif($char === '<' && ($this->content_model === self::PCDATA ||
 200:         (($this->content_model === self::RCDATA ||
 201:         $this->content_model === self::CDATA) && $this->escape === false))) {
 202:             /* When the content model flag is set to the PCDATA state: switch
 203:             to the tag open state.
 204: 
 205:             When the content model flag is set to either the RCDATA state or
 206:             the CDATA state and the escape flag is false: switch to the tag
 207:             open state.
 208: 
 209:             Otherwise: treat it as per the "anything else" entry below. */
 210:             $this->state = 'tagOpen';
 211: 
 212:         /* U+003E GREATER-THAN SIGN (>) */
 213:         } elseif($char === '>') {
 214:             /* If the content model flag is set to either the RCDATA state or
 215:             the CDATA state, and the escape flag is true, and the last three
 216:             characters in the input stream including this one are U+002D
 217:             HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN ("-->"),
 218:             set the escape flag to false. */
 219:             if(($this->content_model === self::RCDATA ||
 220:             $this->content_model === self::CDATA) && $this->escape === true &&
 221:             $this->character($this->char, 3) === '-->') {
 222:                 $this->escape = false;
 223:             }
 224: 
 225:             /* In any case, emit the input character as a character token.
 226:             Stay in the data state. */
 227:             $this->emitToken(array(
 228:                 'type' => self::CHARACTR,
 229:                 'data' => $char
 230:             ));
 231: 
 232:         } elseif($this->char === $this->EOF) {
 233:             /* EOF
 234:             Emit an end-of-file token. */
 235:             $this->EOF();
 236: 
 237:         } elseif($this->content_model === self::PLAINTEXT) {
 238:             /* When the content model flag is set to the PLAINTEXT state
 239:             THIS DIFFERS GREATLY FROM THE SPEC: Get the remaining characters of
 240:             the text and emit it as a character token. */
 241:             $this->emitToken(array(
 242:                 'type' => self::CHARACTR,
 243:                 'data' => substr($this->data, $this->char)
 244:             ));
 245: 
 246:             $this->EOF();
 247: 
 248:         } else {
 249:             /* Anything else
 250:             THIS DIFFERS GREATLY FROM THE SPEC: Get as many character that
 251:             otherwise would also be treated as a character token and emit it
 252:             as a single character token. Stay in the data state. */
 253:             $len  = strcspn($this->data, '<&', $this->char);
 254:             $char = substr($this->data, $this->char, $len);
 255:             $this->char += $len - 1;
 256: 
 257:             $this->emitToken(array(
 258:                 'type' => self::CHARACTR,
 259:                 'data' => $char
 260:             ));
 261: 
 262:             $this->state = 'data';
 263:         }
 264:     }
 265: 
 266:     private function entityDataState() {
 267:         // Attempt to consume an entity.
 268:         $entity = $this->entity();
 269: 
 270:         // If nothing is returned, emit a U+0026 AMPERSAND character token.
 271:         // Otherwise, emit the character token that was returned.
 272:         $char = (!$entity) ? '&' : $entity;
 273:         $this->emitToken(array(
 274:             'type' => self::CHARACTR,
 275:             'data' => $char
 276:         ));
 277: 
 278:         // Finally, switch to the data state.
 279:         $this->state = 'data';
 280:     }
 281: 
 282:     private function tagOpenState() {
 283:         switch($this->content_model) {
 284:             case self::RCDATA:
 285:             case self::CDATA:
 286:                 /* If the next input character is a U+002F SOLIDUS (/) character,
 287:                 consume it and switch to the close tag open state. If the next
 288:                 input character is not a U+002F SOLIDUS (/) character, emit a
 289:                 U+003C LESS-THAN SIGN character token and switch to the data
 290:                 state to process the next input character. */
 291:                 if($this->character($this->char + 1) === '/') {
 292:                     $this->char++;
 293:                     $this->state = 'closeTagOpen';
 294: 
 295:                 } else {
 296:                     $this->emitToken(array(
 297:                         'type' => self::CHARACTR,
 298:                         'data' => '<'
 299:                     ));
 300: 
 301:                     $this->state = 'data';
 302:                 }
 303:             break;
 304: 
 305:             case self::PCDATA:
 306:                 // If the content model flag is set to the PCDATA state
 307:                 // Consume the next input character:
 308:                 $this->char++;
 309:                 $char = $this->char();
 310: 
 311:                 if($char === '!') {
 312:                     /* U+0021 EXCLAMATION MARK (!)
 313:                     Switch to the markup declaration open state. */
 314:                     $this->state = 'markupDeclarationOpen';
 315: 
 316:                 } elseif($char === '/') {
 317:                     /* U+002F SOLIDUS (/)
 318:                     Switch to the close tag open state. */
 319:                     $this->state = 'closeTagOpen';
 320: 
 321:                 } elseif(preg_match('/^[A-Za-z]$/', $char)) {
 322:                     /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
 323:                     Create a new start tag token, set its tag name to the lowercase
 324:                     version of the input character (add 0x0020 to the character's code
 325:                     point), then switch to the tag name state. (Don't emit the token
 326:                     yet; further details will be filled in before it is emitted.) */
 327:                     $this->token = array(
 328:                         'name'  => strtolower($char),
 329:                         'type'  => self::STARTTAG,
 330:                         'attr'  => array()
 331:                     );
 332: 
 333:                     $this->state = 'tagName';
 334: 
 335:                 } elseif($char === '>') {
 336:                     /* U+003E GREATER-THAN SIGN (>)
 337:                     Parse error. Emit a U+003C LESS-THAN SIGN character token and a
 338:                     U+003E GREATER-THAN SIGN character token. Switch to the data state. */
 339:                     $this->emitToken(array(
 340:                         'type' => self::CHARACTR,
 341:                         'data' => '<>'
 342:                     ));
 343: 
 344:                     $this->state = 'data';
 345: 
 346:                 } elseif($char === '?') {
 347:                     /* U+003F QUESTION MARK (?)
 348:                     Parse error. Switch to the bogus comment state. */
 349:                     $this->state = 'bogusComment';
 350: 
 351:                 } else {
 352:                     /* Anything else
 353:                     Parse error. Emit a U+003C LESS-THAN SIGN character token and
 354:                     reconsume the current input character in the data state. */
 355:                     $this->emitToken(array(
 356:                         'type' => self::CHARACTR,
 357:                         'data' => '<'
 358:                     ));
 359: 
 360:                     $this->char--;
 361:                     $this->state = 'data';
 362:                 }
 363:             break;
 364:         }
 365:     }
 366: 
 367:     private function closeTagOpenState() {
 368:         $next_node = strtolower($this->characters('A-Za-z', $this->char + 1));
 369:         $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName;
 370: 
 371:         if(($this->content_model === self::RCDATA || $this->content_model === self::CDATA) &&
 372:         (!$the_same || ($the_same && (!preg_match('/[\t\n\x0b\x0c >\/]/',
 373:         $this->character($this->char + 1 + strlen($next_node))) || $this->EOF === $this->char)))) {
 374:             /* If the content model flag is set to the RCDATA or CDATA states then
 375:             examine the next few characters. If they do not match the tag name of
 376:             the last start tag token emitted (case insensitively), or if they do but
 377:             they are not immediately followed by one of the following characters:
 378:                 * U+0009 CHARACTER TABULATION
 379:                 * U+000A LINE FEED (LF)
 380:                 * U+000B LINE TABULATION
 381:                 * U+000C FORM FEED (FF)
 382:                 * U+0020 SPACE
 383:                 * U+003E GREATER-THAN SIGN (>)
 384:                 * U+002F SOLIDUS (/)
 385:                 * EOF
 386:             ...then there is a parse error. Emit a U+003C LESS-THAN SIGN character
 387:             token, a U+002F SOLIDUS character token, and switch to the data state
 388:             to process the next input character. */
 389:             $this->emitToken(array(
 390:                 'type' => self::CHARACTR,
 391:                 'data' => '</'
 392:             ));
 393: 
 394:             $this->state = 'data';
 395: 
 396:         } else {
 397:             /* Otherwise, if the content model flag is set to the PCDATA state,
 398:             or if the next few characters do match that tag name, consume the
 399:             next input character: */
 400:             $this->char++;
 401:             $char = $this->char();
 402: 
 403:             if(preg_match('/^[A-Za-z]$/', $char)) {
 404:                 /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
 405:                 Create a new end tag token, set its tag name to the lowercase version
 406:                 of the input character (add 0x0020 to the character's code point), then
 407:                 switch to the tag name state. (Don't emit the token yet; further details
 408:                 will be filled in before it is emitted.) */
 409:                 $this->token = array(
 410:                     'name'  => strtolower($char),
 411:                     'type'  => self::ENDTAG
 412:                 );
 413: 
 414:                 $this->state = 'tagName';
 415: 
 416:             } elseif($char === '>') {
 417:                 /* U+003E GREATER-THAN SIGN (>)
 418:                 Parse error. Switch to the data state. */
 419:                 $this->state = 'data';
 420: 
 421:             } elseif($this->char === $this->EOF) {
 422:                 /* EOF
 423:                 Parse error. Emit a U+003C LESS-THAN SIGN character token and a U+002F
 424:                 SOLIDUS character token. Reconsume the EOF character in the data state. */
 425:                 $this->emitToken(array(
 426:                     'type' => self::CHARACTR,
 427:                     'data' => '</'
 428:                 ));
 429: 
 430:                 $this->char--;
 431:                 $this->state = 'data';
 432: 
 433:             } else {
 434:                 /* Parse error. Switch to the bogus comment state. */
 435:                 $this->state = 'bogusComment';
 436:             }
 437:         }
 438:     }
 439: 
 440:     private function tagNameState() {
 441:         // Consume the next input character:
 442:         $this->char++;
 443:         $char = $this->character($this->char);
 444: 
 445:         if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
 446:             /* U+0009 CHARACTER TABULATION
 447:             U+000A LINE FEED (LF)
 448:             U+000B LINE TABULATION
 449:             U+000C FORM FEED (FF)
 450:             U+0020 SPACE
 451:             Switch to the before attribute name state. */
 452:             $this->state = 'beforeAttributeName';
 453: 
 454:         } elseif($char === '>') {
 455:             /* U+003E GREATER-THAN SIGN (>)
 456:             Emit the current tag token. Switch to the data state. */
 457:             $this->emitToken($this->token);
 458:             $this->state = 'data';
 459: 
 460:         } elseif($this->char === $this->EOF) {
 461:             /* EOF
 462:             Parse error. Emit the current tag token. Reconsume the EOF
 463:             character in the data state. */
 464:             $this->emitToken($this->token);
 465: 
 466:             $this->char--;
 467:             $this->state = 'data';
 468: 
 469:         } elseif($char === '/') {
 470:             /* U+002F SOLIDUS (/)
 471:             Parse error unless this is a permitted slash. Switch to the before
 472:             attribute name state. */
 473:             $this->state = 'beforeAttributeName';
 474: 
 475:         } else {
 476:             /* Anything else
 477:             Append the current input character to the current tag token's tag name.
 478:             Stay in the tag name state. */
 479:             $this->token['name'] .= strtolower($char);
 480:             $this->state = 'tagName';
 481:         }
 482:     }
 483: 
 484:     private function beforeAttributeNameState() {
 485:         // Consume the next input character:
 486:         $this->char++;
 487:         $char = $this->character($this->char);
 488: 
 489:         if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
 490:             /* U+0009 CHARACTER TABULATION
 491:             U+000A LINE FEED (LF)
 492:             U+000B LINE TABULATION
 493:             U+000C FORM FEED (FF)
 494:             U+0020 SPACE
 495:             Stay in the before attribute name state. */
 496:             $this->state = 'beforeAttributeName';
 497: 
 498:         } elseif($char === '>') {
 499:             /* U+003E GREATER-THAN SIGN (>)
 500:             Emit the current tag token. Switch to the data state. */
 501:             $this->emitToken($this->token);
 502:             $this->state = 'data';
 503: 
 504:         } elseif($char === '/') {
 505:             /* U+002F SOLIDUS (/)
 506:             Parse error unless this is a permitted slash. Stay in the before
 507:             attribute name state. */
 508:             $this->state = 'beforeAttributeName';
 509: 
 510:         } elseif($this->char === $this->EOF) {
 511:             /* EOF
 512:             Parse error. Emit the current tag token. Reconsume the EOF
 513:             character in the data state. */
 514:             $this->emitToken($this->token);
 515: 
 516:             $this->char--;
 517:             $this->state = 'data';
 518: 
 519:         } else {
 520:             /* Anything else
 521:             Start a new attribute in the current tag token. Set that attribute's
 522:             name to the current input character, and its value to the empty string.
 523:             Switch to the attribute name state. */
 524:             $this->token['attr'][] = array(
 525:                 'name'  => strtolower($char),
 526:                 'value' => null
 527:             );
 528: 
 529:             $this->state = 'attributeName';
 530:         }
 531:     }
 532: 
 533:     private function attributeNameState() {
 534:         // Consume the next input character:
 535:         $this->char++;
 536:         $char = $this->character($this->char);
 537: 
 538:         if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
 539:             /* U+0009 CHARACTER TABULATION
 540:             U+000A LINE FEED (LF)
 541:             U+000B LINE TABULATION
 542:             U+000C FORM FEED (FF)
 543:             U+0020 SPACE
 544:             Stay in the before attribute name state. */
 545:             $this->state = 'afterAttributeName';
 546: 
 547:         } elseif($char === '=') {
 548:             /* U+003D EQUALS SIGN (=)
 549:             Switch to the before attribute value state. */
 550:             $this->state = 'beforeAttributeValue';
 551: 
 552:         } elseif($char === '>') {
 553:             /* U+003E GREATER-THAN SIGN (>)
 554:             Emit the current tag token. Switch to the data state. */
 555:             $this->emitToken($this->token);
 556:             $this->state = 'data';
 557: 
 558:         } elseif($char === '/' && $this->character($this->char + 1) !== '>') {
 559:             /* U+002F SOLIDUS (/)
 560:             Parse error unless this is a permitted slash. Switch to the before
 561:             attribute name state. */
 562:             $this->state = 'beforeAttributeName';
 563: 
 564:         } elseif($this->char === $this->EOF) {
 565:             /* EOF
 566:             Parse error. Emit the current tag token. Reconsume the EOF
 567:             character in the data state. */
 568:             $this->emitToken($this->token);
 569: 
 570:             $this->char--;
 571:             $this->state = 'data';
 572: 
 573:         } else {
 574:             /* Anything else
 575:             Append the current input character to the current attribute's name.
 576:             Stay in the attribute name state. */
 577:             $last = count($this->token['attr']) - 1;
 578:             $this->token['attr'][$last]['name'] .= strtolower($char);
 579: 
 580:             $this->state = 'attributeName';
 581:         }
 582:     }
 583: 
 584:     private function afterAttributeNameState() {
 585:         // Consume the next input character:
 586:         $this->char++;
 587:         $char = $this->character($this->char);
 588: 
 589:         if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
 590:             /* U+0009 CHARACTER TABULATION
 591:             U+000A LINE FEED (LF)
 592:             U+000B LINE TABULATION
 593:             U+000C FORM FEED (FF)
 594:             U+0020 SPACE
 595:             Stay in the after attribute name state. */
 596:             $this->state = 'afterAttributeName';
 597: 
 598:         } elseif($char === '=') {
 599:             /* U+003D EQUALS SIGN (=)
 600:             Switch to the before attribute value state. */
 601:             $this->state = 'beforeAttributeValue';
 602: 
 603:         } elseif($char === '>') {
 604:             /* U+003E GREATER-THAN SIGN (>)
 605:             Emit the current tag token. Switch to the data state. */
 606:             $this->emitToken($this->token);
 607:             $this->state = 'data';
 608: 
 609:         } elseif($char === '/' && $this->character($this->char + 1) !== '>') {
 610:             /* U+002F SOLIDUS (/)
 611:             Parse error unless this is a permitted slash. Switch to the
 612:             before attribute name state. */
 613:             $this->state = 'beforeAttributeName';
 614: 
 615:         } elseif($this->char === $this->EOF) {
 616:             /* EOF
 617:             Parse error. Emit the current tag token. Reconsume the EOF
 618:             character in the data state. */
 619:             $this->emitToken($this->token);
 620: 
 621:             $this->char--;
 622:             $this->state = 'data';
 623: 
 624:         } else {
 625:             /* Anything else
 626:             Start a new attribute in the current tag token. Set that attribute's
 627:             name to the current input character, and its value to the empty string.
 628:             Switch to the attribute name state. */
 629:             $this->token['attr'][] = array(
 630:                 'name'  => strtolower($char),
 631:                 'value' => null
 632:             );
 633: 
 634:             $this->state = 'attributeName';
 635:         }
 636:     }
 637: 
 638:     private function beforeAttributeValueState() {
 639:         // Consume the next input character:
 640:         $this->char++;
 641:         $char = $this->character($this->char);
 642: 
 643:         if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
 644:             /* U+0009 CHARACTER TABULATION
 645:             U+000A LINE FEED (LF)
 646:             U+000B LINE TABULATION
 647:             U+000C FORM FEED (FF)
 648:             U+0020 SPACE
 649:             Stay in the before attribute value state. */
 650:             $this->state = 'beforeAttributeValue';
 651: 
 652:         } elseif($char === '"') {
 653:             /* U+0022 QUOTATION MARK (")
 654:             Switch to the attribute value (double-quoted) state. */
 655:             $this->state = 'attributeValueDoubleQuoted';
 656: 
 657:         } elseif($char === '&') {
 658:             /* U+0026 AMPERSAND (&)
 659:             Switch to the attribute value (unquoted) state and reconsume
 660:             this input character. */
 661:             $this->char--;
 662:             $this->state = 'attributeValueUnquoted';
 663: 
 664:         } elseif($char === '\'') {
 665:             /* U+0027 APOSTROPHE (')
 666:             Switch to the attribute value (single-quoted) state. */
 667:             $this->state = 'attributeValueSingleQuoted';
 668: 
 669:         } elseif($char === '>') {
 670:             /* U+003E GREATER-THAN SIGN (>)
 671:             Emit the current tag token. Switch to the data state. */
 672:             $this->emitToken($this->token);
 673:             $this->state = 'data';
 674: 
 675:         } else {
 676:             /* Anything else
 677:             Append the current input character to the current attribute's value.
 678:             Switch to the attribute value (unquoted) state. */
 679:             $last = count($this->token['attr']) - 1;
 680:             $this->token['attr'][$last]['value'] .= $char;
 681: 
 682:             $this->state = 'attributeValueUnquoted';
 683:         }
 684:     }
 685: 
 686:     private function attributeValueDoubleQuotedState() {
 687:         // Consume the next input character:
 688:         $this->char++;
 689:         $char = $this->character($this->char);
 690: 
 691:         if($char === '"') {
 692:             /* U+0022 QUOTATION MARK (")
 693:             Switch to the before attribute name state. */
 694:             $this->state = 'beforeAttributeName';
 695: 
 696:         } elseif($char === '&') {
 697:             /* U+0026 AMPERSAND (&)
 698:             Switch to the entity in attribute value state. */
 699:             $this->entityInAttributeValueState('double');
 700: 
 701:         } elseif($this->char === $this->EOF) {
 702:             /* EOF
 703:             Parse error. Emit the current tag token. Reconsume the character
 704:             in the data state. */
 705:             $this->emitToken($this->token);
 706: 
 707:             $this->char--;
 708:             $this->state = 'data';
 709: 
 710:         } else {
 711:             /* Anything else
 712:             Append the current input character to the current attribute's value.
 713:             Stay in the attribute value (double-quoted) state. */
 714:             $last = count($this->token['attr']) - 1;
 715:             $this->token['attr'][$last]['value'] .= $char;
 716: 
 717:             $this->state = 'attributeValueDoubleQuoted';
 718:         }
 719:     }
 720: 
 721:     private function attributeValueSingleQuotedState() {
 722:         // Consume the next input character:
 723:         $this->char++;
 724:         $char = $this->character($this->char);
 725: 
 726:         if($char === '\'') {
 727:             /* U+0022 QUOTATION MARK (')
 728:             Switch to the before attribute name state. */
 729:             $this->state = 'beforeAttributeName';
 730: 
 731:         } elseif($char === '&') {
 732:             /* U+0026 AMPERSAND (&)
 733:             Switch to the entity in attribute value state. */
 734:             $this->entityInAttributeValueState('single');
 735: 
 736:         } elseif($this->char === $this->EOF) {
 737:             /* EOF
 738:             Parse error. Emit the current tag token. Reconsume the character
 739:             in the data state. */
 740:             $this->emitToken($this->token);
 741: 
 742:             $this->char--;
 743:             $this->state = 'data';
 744: 
 745:         } else {
 746:             /* Anything else
 747:             Append the current input character to the current attribute's value.
 748:             Stay in the attribute value (single-quoted) state. */
 749:             $last = count($this->token['attr']) - 1;
 750:             $this->token['attr'][$last]['value'] .= $char;
 751: 
 752:             $this->state = 'attributeValueSingleQuoted';
 753:         }
 754:     }
 755: 
 756:     private function attributeValueUnquotedState() {
 757:         // Consume the next input character:
 758:         $this->char++;
 759:         $char = $this->character($this->char);
 760: 
 761:         if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
 762:             /* U+0009 CHARACTER TABULATION
 763:             U+000A LINE FEED (LF)
 764:             U+000B LINE TABULATION
 765:             U+000C FORM FEED (FF)
 766:             U+0020 SPACE
 767:             Switch to the before attribute name state. */
 768:             $this->state = 'beforeAttributeName';
 769: 
 770:         } elseif($char === '&') {
 771:             /* U+0026 AMPERSAND (&)
 772:             Switch to the entity in attribute value state. */
 773:             $this->entityInAttributeValueState();
 774: 
 775:         } elseif($char === '>') {
 776:             /* U+003E GREATER-THAN SIGN (>)
 777:             Emit the current tag token. Switch to the data state. */
 778:             $this->emitToken($this->token);
 779:             $this->state = 'data';
 780: 
 781:         } else {
 782:             /* Anything else
 783:             Append the current input character to the current attribute's value.
 784:             Stay in the attribute value (unquoted) state. */
 785:             $last = count($this->token['attr']) - 1;
 786:             $this->token['attr'][$last]['value'] .= $char;
 787: 
 788:             $this->state = 'attributeValueUnquoted';
 789:         }
 790:     }
 791: 
 792:     private function entityInAttributeValueState() {
 793:         // Attempt to consume an entity.
 794:         $entity = $this->entity();
 795: 
 796:         // If nothing is returned, append a U+0026 AMPERSAND character to the
 797:         // current attribute's value. Otherwise, emit the character token that
 798:         // was returned.
 799:         $char = (!$entity)
 800:             ? '&'
 801:             : $entity;
 802: 
 803:         $last = count($this->token['attr']) - 1;
 804:         $this->token['attr'][$last]['value'] .= $char;
 805:     }
 806: 
 807:     private function bogusCommentState() {
 808:         /* Consume every character up to the first U+003E GREATER-THAN SIGN
 809:         character (>) or the end of the file (EOF), whichever comes first. Emit
 810:         a comment token whose data is the concatenation of all the characters
 811:         starting from and including the character that caused the state machine
 812:         to switch into the bogus comment state, up to and including the last
 813:         consumed character before the U+003E character, if any, or up to the
 814:         end of the file otherwise. (If the comment was started by the end of
 815:         the file (EOF), the token is empty.) */
 816:         $data = $this->characters('^>', $this->char);
 817:         $this->emitToken(array(
 818:             'data' => $data,
 819:             'type' => self::COMMENT
 820:         ));
 821: 
 822:         $this->char += strlen($data);
 823: 
 824:         /* Switch to the data state. */
 825:         $this->state = 'data';
 826: 
 827:         /* If the end of the file was reached, reconsume the EOF character. */
 828:         if($this->char === $this->EOF) {
 829:             $this->char = $this->EOF - 1;
 830:         }
 831:     }
 832: 
 833:     private function markupDeclarationOpenState() {
 834:         /* If the next two characters are both U+002D HYPHEN-MINUS (-)
 835:         characters, consume those two characters, create a comment token whose
 836:         data is the empty string, and switch to the comment state. */
 837:         if($this->character($this->char + 1, 2) === '--') {
 838:             $this->char += 2;
 839:             $this->state = 'comment';
 840:             $this->token = array(
 841:                 'data' => null,
 842:                 'type' => self::COMMENT
 843:             );
 844: 
 845:         /* Otherwise if the next seven chacacters are a case-insensitive match
 846:         for the word "DOCTYPE", then consume those characters and switch to the
 847:         DOCTYPE state. */
 848:         } elseif(strtolower($this->character($this->char + 1, 7)) === 'doctype') {
 849:             $this->char += 7;
 850:             $this->state = 'doctype';
 851: 
 852:         /* Otherwise, is is a parse error. Switch to the bogus comment state.
 853:         The next character that is consumed, if any, is the first character
 854:         that will be in the comment. */
 855:         } else {
 856:             $this->char++;
 857:             $this->state = 'bogusComment';
 858:         }
 859:     }
 860: 
 861:     private function commentState() {
 862:         /* Consume the next input character: */
 863:         $this->char++;
 864:         $char = $this->char();
 865: 
 866:         /* U+002D HYPHEN-MINUS (-) */
 867:         if($char === '-') {
 868:             /* Switch to the comment dash state  */
 869:             $this->state = 'commentDash';
 870: 
 871:         /* EOF */
 872:         } elseif($this->char === $this->EOF) {
 873:             /* Parse error. Emit the comment token. Reconsume the EOF character
 874:             in the data state. */
 875:             $this->emitToken($this->token);
 876:             $this->char--;
 877:             $this->state = 'data';
 878: 
 879:         /* Anything else */
 880:         } else {
 881:             /* Append the input character to the comment token's data. Stay in
 882:             the comment state. */
 883:             $this->token['data'] .= $char;
 884:         }
 885:     }
 886: 
 887:     private function commentDashState() {
 888:         /* Consume the next input character: */
 889:         $this->char++;
 890:         $char = $this->char();
 891: 
 892:         /* U+002D HYPHEN-MINUS (-) */
 893:         if($char === '-') {
 894:             /* Switch to the comment end state  */
 895:             $this->state = 'commentEnd';
 896: 
 897:         /* EOF */
 898:         } elseif($this->char === $this->EOF) {
 899:             /* Parse error. Emit the comment token. Reconsume the EOF character
 900:             in the data state. */
 901:             $this->emitToken($this->token);
 902:             $this->char--;
 903:             $this->state = 'data';
 904: 
 905:         /* Anything else */
 906:         } else {
 907:             /* Append a U+002D HYPHEN-MINUS (-) character and the input
 908:             character to the comment token's data. Switch to the comment state. */
 909:             $this->token['data'] .= '-'.$char;
 910:             $this->state = 'comment';
 911:         }
 912:     }
 913: 
 914:     private function commentEndState() {
 915:         /* Consume the next input character: */
 916:         $this->char++;
 917:         $char = $this->char();
 918: 
 919:         if($char === '>') {
 920:             $this->emitToken($this->token);
 921:             $this->state = 'data';
 922: 
 923:         } elseif($char === '-') {
 924:             $this->token['data'] .= '-';
 925: 
 926:         } elseif($this->char === $this->EOF) {
 927:             $this->emitToken($this->token);
 928:             $this->char--;
 929:             $this->state = 'data';
 930: 
 931:         } else {
 932:             $this->token['data'] .= '--'.$char;
 933:             $this->state = 'comment';
 934:         }
 935:     }
 936: 
 937:     private function doctypeState() {
 938:         /* Consume the next input character: */
 939:         $this->char++;
 940:         $char = $this->char();
 941: 
 942:         if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
 943:             $this->state = 'beforeDoctypeName';
 944: 
 945:         } else {
 946:             $this->char--;
 947:             $this->state = 'beforeDoctypeName';
 948:         }
 949:     }
 950: 
 951:     private function beforeDoctypeNameState() {
 952:         /* Consume the next input character: */
 953:         $this->char++;
 954:         $char = $this->char();
 955: 
 956:         if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
 957:             // Stay in the before DOCTYPE name state.
 958: 
 959:         } elseif(preg_match('/^[a-z]$/', $char)) {
 960:             $this->token = array(
 961:                 'name' => strtoupper($char),
 962:                 'type' => self::DOCTYPE,
 963:                 'error' => true
 964:             );
 965: 
 966:             $this->state = 'doctypeName';
 967: 
 968:         } elseif($char === '>') {
 969:             $this->emitToken(array(
 970:                 'name' => null,
 971:                 'type' => self::DOCTYPE,
 972:                 'error' => true
 973:             ));
 974: 
 975:             $this->state = 'data';
 976: 
 977:         } elseif($this->char === $this->EOF) {
 978:             $this->emitToken(array(
 979:                 'name' => null,
 980:                 'type' => self::DOCTYPE,
 981:                 'error' => true
 982:             ));
 983: 
 984:             $this->char--;
 985:             $this->state = 'data';
 986: 
 987:         } else {
 988:             $this->token = array(
 989:                 'name' => $char,
 990:                 'type' => self::DOCTYPE,
 991:                 'error' => true
 992:             );
 993: 
 994:             $this->state = 'doctypeName';
 995:         }
 996:     }
 997: 
 998:     private function doctypeNameState() {
 999:         /* Consume the next input character: */
1000:         $this->char++;
1001:         $char = $this->char();
1002: 
1003:         if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1004:             $this->state = 'AfterDoctypeName';
1005: 
1006:         } elseif($char === '>') {
1007:             $this->emitToken($this->token);
1008:             $this->state = 'data';
1009: 
1010:         } elseif(preg_match('/^[a-z]$/', $char)) {
1011:             $this->token['name'] .= strtoupper($char);
1012: 
1013:         } elseif($this->char === $this->EOF) {
1014:             $this->emitToken($this->token);
1015:             $this->char--;
1016:             $this->state = 'data';
1017: 
1018:         } else {
1019:             $this->token['name'] .= $char;
1020:         }
1021: 
1022:         $this->token['error'] = ($this->token['name'] === 'HTML')
1023:             ? false
1024:             : true;
1025:     }
1026: 
1027:     private function afterDoctypeNameState() {
1028:         /* Consume the next input character: */
1029:         $this->char++;
1030:         $char = $this->char();
1031: 
1032:         if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1033:             // Stay in the DOCTYPE name state.
1034: 
1035:         } elseif($char === '>') {
1036:             $this->emitToken($this->token);
1037:             $this->state = 'data';
1038: 
1039:         } elseif($this->char === $this->EOF) {
1040:             $this->emitToken($this->token);
1041:             $this->char--;
1042:             $this->state = 'data';
1043: 
1044:         } else {
1045:             $this->token['error'] = true;
1046:             $this->state = 'bogusDoctype';
1047:         }
1048:     }
1049: 
1050:     private function bogusDoctypeState() {
1051:         /* Consume the next input character: */
1052:         $this->char++;
1053:         $char = $this->char();
1054: 
1055:         if($char === '>') {
1056:             $this->emitToken($this->token);
1057:             $this->state = 'data';
1058: 
1059:         } elseif($this->char === $this->EOF) {
1060:             $this->emitToken($this->token);
1061:             $this->char--;
1062:             $this->state = 'data';
1063: 
1064:         } else {
1065:             // Stay in the bogus DOCTYPE state.
1066:         }
1067:     }
1068: 
1069:     private function entity() {
1070:         $start = $this->char;
1071: 
1072:         // This section defines how to consume an entity. This definition is
1073:         // used when parsing entities in text and in attributes.
1074: 
1075:         // The behaviour depends on the identity of the next character (the
1076:         // one immediately after the U+0026 AMPERSAND character): 
1077: 
1078:         switch($this->character($this->char + 1)) {
1079:             // U+0023 NUMBER SIGN (#)
1080:             case '#':
1081: 
1082:                 // The behaviour further depends on the character after the
1083:                 // U+0023 NUMBER SIGN:
1084:                 switch($this->character($this->char + 1)) {
1085:                     // U+0078 LATIN SMALL LETTER X
1086:                     // U+0058 LATIN CAPITAL LETTER X
1087:                     case 'x':
1088:                     case 'X':
1089:                         // Follow the steps below, but using the range of
1090:                         // characters U+0030 DIGIT ZERO through to U+0039 DIGIT
1091:                         // NINE, U+0061 LATIN SMALL LETTER A through to U+0066
1092:                         // LATIN SMALL LETTER F, and U+0041 LATIN CAPITAL LETTER
1093:                         // A, through to U+0046 LATIN CAPITAL LETTER F (in other
1094:                         // words, 0-9, A-F, a-f).
1095:                         $char = 1;
1096:                         $char_class = '0-9A-Fa-f';
1097:                     break;
1098: 
1099:                     // Anything else
1100:                     default:
1101:                         // Follow the steps below, but using the range of
1102:                         // characters U+0030 DIGIT ZERO through to U+0039 DIGIT
1103:                         // NINE (i.e. just 0-9).
1104:                         $char = 0;
1105:                         $char_class = '0-9';
1106:                     break;
1107:                 }
1108: 
1109:                 // Consume as many characters as match the range of characters
1110:                 // given above.
1111:                 $this->char++;
1112:                 $e_name = $this->characters($char_class, $this->char + $char + 1);
1113:                 $entity = $this->character($start, $this->char);
1114:                 $cond = strlen($e_name) > 0;
1115: 
1116:                 // The rest of the parsing happens bellow.
1117:             break;
1118: 
1119:             // Anything else
1120:             default:
1121:                 // Consume the maximum number of characters possible, with the
1122:                 // consumed characters case-sensitively matching one of the
1123:                 // identifiers in the first column of the entities table.
1124:                 $e_name = $this->characters('0-9A-Za-z;', $this->char + 1);
1125:                 $len = strlen($e_name);
1126: 
1127:                 for($c = 1; $c <= $len; $c++) {
1128:                     $id = substr($e_name, 0, $c);
1129:                     $this->char++;
1130: 
1131:                     if(in_array($id, $this->entities)) {
1132:                         if ($e_name[$c-1] !== ';') {
1133:                             if ($c < $len && $e_name[$c] == ';') {
1134:                                 $this->char++; // consume extra semicolon
1135:                             }
1136:                         }
1137:                         $entity = $id;
1138:                         break;
1139:                     }
1140:                 }
1141: 
1142:                 $cond = isset($entity);
1143:                 // The rest of the parsing happens bellow.
1144:             break;
1145:         }
1146: 
1147:         if(!$cond) {
1148:             // If no match can be made, then this is a parse error. No
1149:             // characters are consumed, and nothing is returned.
1150:             $this->char = $start;
1151:             return false;
1152:         }
1153: 
1154:         // Return a character token for the character corresponding to the
1155:         // entity name (as given by the second column of the entities table).
1156:         return html_entity_decode('&'.$entity.';', ENT_QUOTES, 'UTF-8');
1157:     }
1158: 
1159:     private function emitToken($token) {
1160:         $emit = $this->tree->emitToken($token);
1161: 
1162:         if(is_int($emit)) {
1163:             $this->content_model = $emit;
1164: 
1165:         } elseif($token['type'] === self::ENDTAG) {
1166:             $this->content_model = self::PCDATA;
1167:         }
1168:     }
1169: 
1170:     private function EOF() {
1171:         $this->state = null;
1172:         $this->tree->emitToken(array(
1173:             'type' => self::EOF
1174:         ));
1175:     }
1176: }
1177: 
1178: class HTML5TreeConstructer {
1179:     public $stack = array();
1180: 
1181:     private $phase;
1182:     private $mode;
1183:     private $dom;
1184:     private $foster_parent = null;
1185:     private $a_formatting  = array();
1186: 
1187:     private $head_pointer = null;
1188:     private $form_pointer = null;
1189: 
1190:     private $scoping = array('button','caption','html','marquee','object','table','td','th');
1191:     private $formatting = array('a','b','big','em','font','i','nobr','s','small','strike','strong','tt','u');
1192:     private $special = array('address','area','base','basefont','bgsound',
1193:     'blockquote','body','br','center','col','colgroup','dd','dir','div','dl',
1194:     'dt','embed','fieldset','form','frame','frameset','h1','h2','h3','h4','h5',
1195:     'h6','head','hr','iframe','image','img','input','isindex','li','link',
1196:     'listing','menu','meta','noembed','noframes','noscript','ol','optgroup',
1197:     'option','p','param','plaintext','pre','script','select','spacer','style',
1198:     'tbody','textarea','tfoot','thead','title','tr','ul','wbr');
1199: 
1200:     // The different phases.
1201:     const INIT_PHASE = 0;
1202:     const ROOT_PHASE = 1;
1203:     const MAIN_PHASE = 2;
1204:     const END_PHASE  = 3;
1205: 
1206:     // The different insertion modes for the main phase.
1207:     const BEFOR_HEAD = 0;
1208:     const IN_HEAD    = 1;
1209:     const AFTER_HEAD = 2;
1210:     const IN_BODY    = 3;
1211:     const IN_TABLE   = 4;
1212:     const IN_CAPTION = 5;
1213:     const IN_CGROUP  = 6;
1214:     const IN_TBODY   = 7;
1215:     const IN_ROW     = 8;
1216:     const IN_CELL    = 9;
1217:     const IN_SELECT  = 10;
1218:     const AFTER_BODY = 11;
1219:     const IN_FRAME   = 12;
1220:     const AFTR_FRAME = 13;
1221: 
1222:     // The different types of elements.
1223:     const SPECIAL    = 0;
1224:     const SCOPING    = 1;
1225:     const FORMATTING = 2;
1226:     const PHRASING   = 3;
1227: 
1228:     const MARKER     = 0;
1229: 
1230:     public function __construct() {
1231:         $this->phase = self::INIT_PHASE;
1232:         $this->mode = self::BEFOR_HEAD;
1233:         $this->dom = new DOMDocument;
1234: 
1235:         $this->dom->encoding = 'UTF-8';
1236:         $this->dom->preserveWhiteSpace = true;
1237:         $this->dom->substituteEntities = true;
1238:         $this->dom->strictErrorChecking = false;
1239:     }
1240: 
1241:     // Process tag tokens
1242:     public function emitToken($token) {
1243:         switch($this->phase) {
1244:             case self::INIT_PHASE: return $this->initPhase($token); break;
1245:             case self::ROOT_PHASE: return $this->rootElementPhase($token); break;
1246:             case self::MAIN_PHASE: return $this->mainPhase($token); break;
1247:             case self::END_PHASE : return $this->trailingEndPhase($token); break;
1248:         }
1249:     }
1250: 
1251:     private function initPhase($token) {
1252:         /* Initially, the tree construction stage must handle each token
1253:         emitted from the tokenisation stage as follows: */
1254: 
1255:         /* A DOCTYPE token that is marked as being in error
1256:         A comment token
1257:         A start tag token
1258:         An end tag token
1259:         A character token that is not one of one of U+0009 CHARACTER TABULATION,
1260:             U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
1261:             or U+0020 SPACE
1262:         An end-of-file token */
1263:         if((isset($token['error']) && $token['error']) ||
1264:         $token['type'] === HTML5::COMMENT ||
1265:         $token['type'] === HTML5::STARTTAG ||
1266:         $token['type'] === HTML5::ENDTAG ||
1267:         $token['type'] === HTML5::EOF ||
1268:         ($token['type'] === HTML5::CHARACTR && isset($token['data']) &&
1269:         !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']))) {
1270:             /* This specification does not define how to handle this case. In
1271:             particular, user agents may ignore the entirety of this specification
1272:             altogether for such documents, and instead invoke special parse modes
1273:             with a greater emphasis on backwards compatibility. */
1274: 
1275:             $this->phase = self::ROOT_PHASE;
1276:             return $this->rootElementPhase($token);
1277: 
1278:         /* A DOCTYPE token marked as being correct */
1279:         } elseif(isset($token['error']) && !$token['error']) {
1280:             /* Append a DocumentType node to the Document  node, with the name
1281:             attribute set to the name given in the DOCTYPE token (which will be
1282:             "HTML"), and the other attributes specific to DocumentType objects
1283:             set to null, empty lists, or the empty string as appropriate. */
1284:             $doctype = new DOMDocumentType(null, null, 'HTML');
1285: 
1286:             /* Then, switch to the root element phase of the tree construction
1287:             stage. */
1288:             $this->phase = self::ROOT_PHASE;
1289: 
1290:         /* A character token that is one of one of U+0009 CHARACTER TABULATION,
1291:         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
1292:         or U+0020 SPACE */
1293:         } elseif(isset($token['data']) && preg_match('/^[\t\n\x0b\x0c ]+$/',
1294:         $token['data'])) {
1295:             /* Append that character  to the Document node. */
1296:             $text = $this->dom->createTextNode($token['data']);
1297:             $this->dom->appendChild($text);
1298:         }
1299:     }
1300: 
1301:     private function rootElementPhase($token) {
1302:         /* After the initial phase, as each token is emitted from the tokenisation
1303:         stage, it must be processed as described in this section. */
1304: 
1305:         /* A DOCTYPE token */
1306:         if($token['type'] === HTML5::DOCTYPE) {
1307:             // Parse error. Ignore the token.
1308: 
1309:         /* A comment token */
1310:         } elseif($token['type'] === HTML5::COMMENT) {
1311:             /* Append a Comment node to the Document object with the data
1312:             attribute set to the data given in the comment token. */
1313:             $comment = $this->dom->createComment($token['data']);
1314:             $this->dom->appendChild($comment);
1315: 
1316:         /* A character token that is one of one of U+0009 CHARACTER TABULATION,
1317:         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
1318:         or U+0020 SPACE */
1319:         } elseif($token['type'] === HTML5::CHARACTR &&
1320:         preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
1321:             /* Append that character  to the Document node. */
1322:             $text = $this->dom->createTextNode($token['data']);
1323:             $this->dom->appendChild($text);
1324: 
1325:         /* A character token that is not one of U+0009 CHARACTER TABULATION,
1326:             U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED
1327:             (FF), or U+0020 SPACE
1328:         A start tag token
1329:         An end tag token
1330:         An end-of-file token */
1331:         } elseif(($token['type'] === HTML5::CHARACTR &&
1332:         !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) ||
1333:         $token['type'] === HTML5::STARTTAG ||
1334:         $token['type'] === HTML5::ENDTAG ||
1335:         $token['type'] === HTML5::EOF) {
1336:             /* Create an HTMLElement node with the tag name html, in the HTML
1337:             namespace. Append it to the Document object. Switch to the main
1338:             phase and reprocess the current token. */
1339:             $html = $this->dom->createElement('html');
1340:             $this->dom->appendChild($html);
1341:             $this->stack[] = $html;
1342: 
1343:             $this->phase = self::MAIN_PHASE;
1344:             return $this->mainPhase($token);
1345:         }
1346:     }
1347: 
1348:     private function mainPhase($token) {
1349:         /* Tokens in the main phase must be handled as follows: */
1350: 
1351:         /* A DOCTYPE token */
1352:         if($token['type'] === HTML5::DOCTYPE) {
1353:             // Parse error. Ignore the token.
1354: 
1355:         /* A start tag token with the tag name "html" */
1356:         } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'html') {
1357:             /* If this start tag token was not the first start tag token, then
1358:             it is a parse error. */
1359: 
1360:             /* For each attribute on the token, check to see if the attribute
1361:             is already present on the top element of the stack of open elements.
1362:             If it is not, add the attribute and its corresponding value to that
1363:             element. */
1364:             foreach($token['attr'] as $attr) {
1365:                 if(!$this->stack[0]->hasAttribute($attr['name'])) {
1366:                     $this->stack[0]->setAttribute($attr['name'], $attr['value']);
1367:                 }
1368:             }
1369: 
1370:         /* An end-of-file token */
1371:         } elseif($token['type'] === HTML5::EOF) {
1372:             /* Generate implied end tags. */
1373:             $this->generateImpliedEndTags();
1374: 
1375:         /* Anything else. */
1376:         } else {
1377:             /* Depends on the insertion mode: */
1378:             switch($this->mode) {
1379:                 case self::BEFOR_HEAD: return $this->beforeHead($token); break;
1380:                 case self::IN_HEAD:    return $this->inHead($token); break;
1381:                 case self::AFTER_HEAD: return $this->afterHead($token); break;
1382:                 case self::IN_BODY:    return $this->inBody($token); break;
1383:                 case self::IN_TABLE:   return $this->inTable($token); break;
1384:                 case self::IN_CAPTION: return $this->inCaption($token); break;
1385:                 case self::IN_CGROUP:  return $this->inColumnGroup($token); break;
1386:                 case self::IN_TBODY:   return $this->inTableBody($token); break;
1387:                 case self::IN_ROW:     return $this->inRow($token); break;
1388:                 case self::IN_CELL:    return $this->inCell($token); break;
1389:                 case self::IN_SELECT:  return $this->inSelect($token); break;
1390:                 case self::AFTER_BODY: return $this->afterBody($token); break;
1391:                 case self::IN_FRAME:   return $this->inFrameset($token); break;
1392:                 case self::AFTR_FRAME: return $this->afterFrameset($token); break;
1393:                 case self::END_PHASE:  return $this->trailingEndPhase($token); break;
1394:             }
1395:         }
1396:     }
1397: 
1398:     private function beforeHead($token) {
1399:         /* Handle the token as follows: */
1400: 
1401:         /* A character token that is one of one of U+0009 CHARACTER TABULATION,
1402:         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
1403:         or U+0020 SPACE */
1404:         if($token['type'] === HTML5::CHARACTR &&
1405:         preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
1406:             /* Append the character to the current node. */
1407:             $this->insertText($token['data']);
1408: 
1409:         /* A comment token */
1410:         } elseif($token['type'] === HTML5::COMMENT) {
1411:             /* Append a Comment node to the current node with the data attribute
1412:             set to the data given in the comment token. */
1413:             $this->insertComment($token['data']);
1414: 
1415:         /* A start tag token with the tag name "head" */
1416:         } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') {
1417:             /* Create an element for the token, append the new element to the
1418:             current node and push it onto the stack of open elements. */
1419:             $element = $this->insertElement($token);
1420: 
1421:             /* Set the head element pointer to this new element node. */
1422:             $this->head_pointer = $element;
1423: 
1424:             /* Change the insertion mode to "in head". */
1425:             $this->mode = self::IN_HEAD;
1426: 
1427:         /* A start tag token whose tag name is one of: "base", "link", "meta",
1428:         "script", "style", "title". Or an end tag with the tag name "html".
1429:         Or a character token that is not one of U+0009 CHARACTER TABULATION,
1430:         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
1431:         or U+0020 SPACE. Or any other start tag token */
1432:         } elseif($token['type'] === HTML5::STARTTAG ||
1433:         ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') ||
1434:         ($token['type'] === HTML5::CHARACTR && !preg_match('/^[\t\n\x0b\x0c ]$/',
1435:         $token['data']))) {
1436:             /* Act as if a start tag token with the tag name "head" and no
1437:             attributes had been seen, then reprocess the current token. */
1438:             $this->beforeHead(array(
1439:                 'name' => 'head',
1440:                 'type' => HTML5::STARTTAG,
1441:                 'attr' => array()
1442:             ));
1443: 
1444:             return $this->inHead($token);
1445: 
1446:         /* Any other end tag */
1447:         } elseif($token['type'] === HTML5::ENDTAG) {
1448:             /* Parse error. Ignore the token. */
1449:         }
1450:     }
1451: 
1452:     private function inHead($token) {
1453:         /* Handle the token as follows: */
1454: 
1455:         /* A character token that is one of one of U+0009 CHARACTER TABULATION,
1456:         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
1457:         or U+0020 SPACE.
1458: 
1459:         THIS DIFFERS FROM THE SPEC: If the current node is either a title, style
1460:         or script element, append the character to the current node regardless
1461:         of its content. */
1462:         if(($token['type'] === HTML5::CHARACTR &&
1463:         preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || (
1464:         $token['type'] === HTML5::CHARACTR && in_array(end($this->stack)->nodeName,
1465:         array('title', 'style', 'script')))) {
1466:             /* Append the character to the current node. */
1467:             $this->insertText($token['data']);
1468: 
1469:         /* A comment token */
1470:         } elseif($token['type'] === HTML5::COMMENT) {
1471:             /* Append a Comment node to the current node with the data attribute
1472:             set to the data given in the comment token. */
1473:             $this->insertComment($token['data']);
1474: 
1475:         } elseif($token['type'] === HTML5::ENDTAG &&
1476:         in_array($token['name'], array('title', 'style', 'script'))) {
1477:             array_pop($this->stack);
1478:             return HTML5::PCDATA;
1479: 
1480:         /* A start tag with the tag name "title" */
1481:         } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'title') {
1482:             /* Create an element for the token and append the new element to the
1483:             node pointed to by the head element pointer, or, if that is null
1484:             (innerHTML case), to the current node. */
1485:             if($this->head_pointer !== null) {
1486:                 $element = $this->insertElement($token, false);
1487:                 $this->head_pointer->appendChild($element);
1488: 
1489:             } else {
1490:                 $element = $this->insertElement($token);
1491:             }
1492: 
1493:             /* Switch the tokeniser's content model flag  to the RCDATA state. */
1494:             return HTML5::RCDATA;
1495: 
1496:         /* A start tag with the tag name "style" */
1497:         } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'style') {
1498:             /* Create an element for the token and append the new element to the
1499:             node pointed to by the head element pointer, or, if that is null
1500:             (innerHTML case), to the current node. */
1501:             if($this->head_pointer !== null) {
1502:                 $element = $this->insertElement($token, false);
1503:                 $this->head_pointer->appendChild($element);
1504: 
1505:             } else {
1506:                 $this->insertElement($token);
1507:             }
1508: 
1509:             /* Switch the tokeniser's content model flag  to the CDATA state. */
1510:             return HTML5::CDATA;
1511: 
1512:         /* A start tag with the tag name "script" */
1513:         } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'script') {
1514:             /* Create an element for the token. */
1515:             $element = $this->insertElement($token, false);
1516:             $this->head_pointer->appendChild($element);
1517: 
1518:             /* Switch the tokeniser's content model flag  to the CDATA state. */
1519:             return HTML5::CDATA;
1520: 
1521:         /* A start tag with the tag name "base", "link", or "meta" */
1522:         } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
1523:         array('base', 'link', 'meta'))) {
1524:             /* Create an element for the token and append the new element to the
1525:             node pointed to by the head element pointer, or, if that is null
1526:             (innerHTML case), to the current node. */
1527:             if($this->head_pointer !== null) {
1528:                 $element = $this->insertElement($token, false);
1529:                 $this->head_pointer->appendChild($element);
1530:                 array_pop($this->stack);
1531: 
1532:             } else {
1533:                 $this->insertElement($token);
1534:             }
1535: 
1536:         /* An end tag with the tag name "head" */
1537:         } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'head') {
1538:             /* If the current node is a head element, pop the current node off
1539:             the stack of open elements. */
1540:             if($this->head_pointer->isSameNode(end($this->stack))) {
1541:                 array_pop($this->stack);
1542: 
1543:             /* Otherwise, this is a parse error. */
1544:             } else {
1545:                 // k
1546:             }
1547: 
1548:             /* Change the insertion mode to "after head". */
1549:             $this->mode = self::AFTER_HEAD;
1550: 
1551:         /* A start tag with the tag name "head" or an end tag except "html". */
1552:         } elseif(($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') ||
1553:         ($token['type'] === HTML5::ENDTAG && $token['name'] !== 'html')) {
1554:             // Parse error. Ignore the token.
1555: 
1556:         /* Anything else */
1557:         } else {
1558:             /* If the current node is a head element, act as if an end tag
1559:             token with the tag name "head" had been seen. */
1560:             if($this->head_pointer->isSameNode(end($this->stack))) {
1561:                 $this->inHead(array(
1562:                     'name' => 'head',
1563:                     'type' => HTML5::ENDTAG
1564:                 ));
1565: 
1566:             /* Otherwise, change the insertion mode to "after head". */
1567:             } else {
1568:                 $this->mode = self::AFTER_HEAD;
1569:             }
1570: 
1571:             /* Then, reprocess the current token. */
1572:             return $this->afterHead($token);
1573:         }
1574:     }
1575: 
1576:     private function afterHead($token) {
1577:         /* Handle the token as follows: */
1578: 
1579:         /* A character token that is one of one of U+0009 CHARACTER TABULATION,
1580:         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
1581:         or U+0020 SPACE */
1582:         if($token['type'] === HTML5::CHARACTR &&
1583:         preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
1584:             /* Append the character to the current node. */
1585:             $this->insertText($token['data']);
1586: 
1587:         /* A comment token */
1588:         } elseif($token['type'] === HTML5::COMMENT) {
1589:             /* Append a Comment node to the current node with the data attribute
1590:             set to the data given in the comment token. */
1591:             $this->insertComment($token['data']);
1592: 
1593:         /* A start tag token with the tag name "body" */
1594:         } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'body') {
1595:             /* Insert a body element for the token. */
1596:             $this->insertElement($token);
1597: 
1598:             /* Change the insertion mode to "in body". */
1599:             $this->mode = self::IN_BODY;
1600: 
1601:         /* A start tag token with the tag name "frameset" */
1602:         } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'frameset') {
1603:             /* Insert a frameset element for the token. */
1604:             $this->insertElement($token);
1605: 
1606:             /* Change the insertion mode to "in frameset". */
1607:             $this->mode = self::IN_FRAME;
1608: 
1609:         /* A start tag token whose tag name is one of: "base", "link", "meta",
1610:         "script", "style", "title" */
1611:         } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
1612:         array('base', 'link', 'meta', 'script', 'style', 'title'))) {
1613:             /* Parse error. Switch the insertion mode back to "in head" and
1614:             reprocess the token. */
1615:             $this->mode = self::IN_HEAD;
1616:             return $this->inHead($token);
1617: 
1618:         /* Anything else */
1619:         } else {
1620:             /* Act as if a start tag token with the tag name "body" and no
1621:             attributes had been seen, and then reprocess the current token. */
1622:             $this->afterHead(array(
1623:                 'name' => 'body',
1624:                 'type' => HTML5::STARTTAG,
1625:                 'attr' => array()
1626:             ));
1627: 
1628:             return $this->inBody($token);
1629:         }
1630:     }
1631: 
1632:     private function inBody($token) {
1633:         /* Handle the token as follows: */
1634: 
1635:         switch($token['type']) {
1636:             /* A character token */
1637:             case HTML5::CHARACTR:
1638:                 /* Reconstruct the active formatting elements, if any. */
1639:                 $this->reconstructActiveFormattingElements();
1640: 
1641:                 /* Append the token's character to the current node. */
1642:                 $this->insertText($token['data']);
1643:             break;
1644: 
1645:             /* A comment token */
1646:             case HTML5::COMMENT:
1647:                 /* Append a Comment node to the current node with the data
1648:                 attribute set to the data given in the comment token. */
1649:                 $this->insertComment($token['data']);
1650:             break;
1651: 
1652:             case HTML5::STARTTAG:
1653:             switch($token['name']) {
1654:                 /* A start tag token whose tag name is one of: "script",
1655:                 "style" */
1656:                 case 'script': case 'style':
1657:                     /* Process the token as if the insertion mode had been "in
1658:                     head". */
1659:                     return $this->inHead($token);
1660:                 break;
1661: 
1662:                 /* A start tag token whose tag name is one of: "base", "link",
1663:                 "meta", "title" */
1664:                 case 'base': case 'link': case 'meta': case 'title':
1665:                     /* Parse error. Process the token as if the insertion mode
1666:                     had    been "in head". */
1667:                     return $this->inHead($token);
1668:                 break;
1669: 
1670:                 /* A start tag token with the tag name "body" */
1671:                 case 'body':
1672:                     /* Parse error. If the second element on the stack of open
1673:                     elements is not a body element, or, if the stack of open
1674:                     elements has only one node on it, then ignore the token.
1675:                     (innerHTML case) */
1676:                     if(count($this->stack) === 1 || $this->stack[1]->nodeName !== 'body') {
1677:                         // Ignore
1678: 
1679:                     /* Otherwise, for each attribute on the token, check to see
1680:                     if the attribute is already present on the body element (the
1681:                     second element)    on the stack of open elements. If it is not,
1682:                     add the attribute and its corresponding value to that
1683:                     element. */
1684:                     } else {
1685:                         foreach($token['attr'] as $attr) {
1686:                             if(!$this->stack[1]->hasAttribute($attr['name'])) {
1687:                                 $this->stack[1]->setAttribute($attr['name'], $attr['value']);
1688:                             }
1689:                         }
1690:                     }
1691:                 break;
1692: 
1693:                 /* A start tag whose tag name is one of: "address",
1694:                 "blockquote", "center", "dir", "div", "dl", "fieldset",
1695:                 "listing", "menu", "ol", "p", "ul" */
1696:                 case 'address': case 'blockquote': case 'center': case 'dir':
1697:                 case 'div': case 'dl': case 'fieldset': case 'listing':
1698:                 case 'menu': case 'ol': case 'p': case 'ul':
1699:                     /* If the stack of open elements has a p element in scope,
1700:                     then act as if an end tag with the tag name p had been
1701:                     seen. */
1702:                     if($this->elementInScope('p')) {
1703:                         $this->emitToken(array(
1704:                             'name' => 'p',
1705:                             'type' => HTML5::ENDTAG
1706:                         ));
1707:                     }
1708: 
1709:                     /* Insert an HTML element for the token. */
1710:                     $this->insertElement($token);
1711:                 break;
1712: 
1713:                 /* A start tag whose tag name is "form" */
1714:                 case 'form':
1715:                     /* If the form element pointer is not null, ignore the
1716:                     token with a parse error. */
1717:                     if($this->form_pointer !== null) {
1718:                         // Ignore.
1719: 
1720:                     /* Otherwise: */
1721:                     } else {
1722:                         /* If the stack of open elements has a p element in
1723:                         scope, then act as if an end tag with the tag name p
1724:                         had been seen. */
1725:                         if($this->elementInScope('p')) {
1726:                             $this->emitToken(array(
1727:                                 'name' => 'p',
1728:                                 'type' => HTML5::ENDTAG
1729:                             ));
1730:                         }
1731: 
1732:                         /* Insert an HTML element for the token, and set the
1733:                         form element pointer to point to the element created. */
1734:                         $element = $this->insertElement($token);
1735:                         $this->form_pointer = $element;
1736:                     }
1737:                 break;
1738: 
1739:                 /* A start tag whose tag name is "li", "dd" or "dt" */
1740:                 case 'li': case 'dd': case 'dt':
1741:                     /* If the stack of open elements has a p  element in scope,
1742:                     then act as if an end tag with the tag name p had been
1743:                     seen. */
1744:                     if($this->elementInScope('p')) {
1745:                         $this->emitToken(array(
1746:                             'name' => 'p',
1747:                             'type' => HTML5::ENDTAG
1748:                         ));
1749:                     }
1750: 
1751:                     $stack_length = count($this->stack) - 1;
1752: 
1753:                     for($n = $stack_length; 0 <= $n; $n--) {
1754:                         /* 1. Initialise node to be the current node (the
1755:                         bottommost node of the stack). */
1756:                         $stop = false;
1757:                         $node = $this->stack[$n];
1758:                         $cat  = $this->getElementCategory($node->tagName);
1759: 
1760:                         /* 2. If node is an li, dd or dt element, then pop all
1761:                         the    nodes from the current node up to node, including
1762:                         node, then stop this algorithm. */
1763:                         if($token['name'] === $node->tagName ||    ($token['name'] !== 'li'
1764:                         && ($node->tagName === 'dd' || $node->tagName === 'dt'))) {
1765:                             for($x = $stack_length; $x >= $n ; $x--) {
1766:                                 array_pop($this->stack);
1767:                             }
1768: 
1769:                             break;
1770:                         }
1771: 
1772:                         /* 3. If node is not in the formatting category, and is
1773:                         not    in the phrasing category, and is not an address or
1774:                         div element, then stop this algorithm. */
1775:                         if($cat !== self::FORMATTING && $cat !== self::PHRASING &&
1776:                         $node->tagName !== 'address' && $node->tagName !== 'div') {
1777:                             break;
1778:                         }
1779:                     }
1780: 
1781:                     /* Finally, insert an HTML element with the same tag
1782:                     name as the    token's. */
1783:                     $this->insertElement($token);
1784:                 break;
1785: 
1786:                 /* A start tag token whose tag name is "plaintext" */
1787:                 case 'plaintext':
1788:                     /* If the stack of open elements has a p  element in scope,
1789:                     then act as if an end tag with the tag name p had been
1790:                     seen. */
1791:                     if($this->elementInScope('p')) {
1792:                         $this->emitToken(array(
1793:                             'name' => 'p',
1794:                             'type' => HTML5::ENDTAG
1795:                         ));
1796:                     }
1797: 
1798:                     /* Insert an HTML element for the token. */
1799:                     $this->insertElement($token);
1800: 
1801:                     return HTML5::PLAINTEXT;
1802:                 break;
1803: 
1804:                 /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4",
1805:                 "h5", "h6" */
1806:                 case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
1807:                     /* If the stack of open elements has a p  element in scope,
1808:                     then act as if an end tag with the tag name p had been seen. */
1809:                     if($this->elementInScope('p')) {
1810:                         $this->emitToken(array(
1811:                             'name' => 'p',
1812:                             'type' => HTML5::ENDTAG
1813:                         ));
1814:                     }
1815: 
1816:                     /* If the stack of open elements has in scope an element whose
1817:                     tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
1818:                     this is a parse error; pop elements from the stack until an
1819:                     element with one of those tag names has been popped from the
1820:                     stack. */
1821:                     while($this->elementInScope(array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) {
1822:                         array_pop($this->stack);
1823:                     }
1824: 
1825:                     /* Insert an HTML element for the token. */
1826:                     $this->insertElement($token);
1827:                 break;
1828: 
1829:                 /* A start tag whose tag name is "a" */
1830:                 case 'a':
1831:                     /* If the list of active formatting elements contains
1832:                     an element whose tag name is "a" between the end of the
1833:                     list and the last marker on the list (or the start of
1834:                     the list if there is no marker on the list), then this
1835:                     is a parse error; act as if an end tag with the tag name
1836:                     "a" had been seen, then remove that element from the list
1837:                     of active formatting elements and the stack of open
1838:                     elements if the end tag didn't already remove it (it
1839:                     might not have if the element is not in table scope). */
1840:                     $leng = count($this->a_formatting);
1841: 
1842:                     for($n = $leng - 1; $n >= 0; $n--) {
1843:                         if($this->a_formatting[$n] === self::MARKER) {
1844:                             break;
1845: 
1846:                         } elseif($this->a_formatting[$n]->nodeName === 'a') {
1847:                             $this->emitToken(array(
1848:                                 'name' => 'a',
1849:                                 'type' => HTML5::ENDTAG
1850:                             ));
1851:                             break;
1852:                         }
1853:                     }
1854: 
1855:                     /* Reconstruct the active formatting elements, if any. */
1856:                     $this->reconstructActiveFormattingElements();
1857: 
1858:                     /* Insert an HTML element for the token. */
1859:                     $el = $this->insertElement($token);
1860: 
1861:                     /* Add that element to the list of active formatting
1862:                     elements. */
1863:                     $this->a_formatting[] = $el;
1864:                 break;
1865: 
1866:                 /* A start tag whose tag name is one of: "b", "big", "em", "font",
1867:                 "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
1868:                 case 'b': case 'big': case 'em': case 'font': case 'i':
1869:                 case 'nobr': case 's': case 'small': case 'strike':
1870:                 case 'strong': case 'tt': case 'u':
1871:                     /* Reconstruct the active formatting elements, if any. */
1872:                     $this->reconstructActiveFormattingElements();
1873: 
1874:                     /* Insert an HTML element for the token. */
1875:                     $el = $this->insertElement($token);
1876: 
1877:                     /* Add that element to the list of active formatting
1878:                     elements. */
1879:                     $this->a_formatting[] = $el;
1880:                 break;
1881: 
1882:                 /* A start tag token whose tag name is "button" */
1883:                 case 'button':
1884:                     /* If the stack of open elements has a button element in scope,
1885:                     then this is a parse error; act as if an end tag with the tag
1886:                     name "button" had been seen, then reprocess the token. (We don't
1887:                     do that. Unnecessary.) */
1888:                     if($this->elementInScope('button')) {
1889:                         $this->inBody(array(
1890:                             'name' => 'button',
1891:                             'type' => HTML5::ENDTAG
1892:                         ));
1893:                     }
1894: 
1895:                     /* Reconstruct the active formatting elements, if any. */
1896:                     $this->reconstructActiveFormattingElements();
1897: 
1898:                     /* Insert an HTML element for the token. */
1899:                     $this->insertElement($token);
1900: 
1901:                     /* Insert a marker at the end of the list of active
1902:                     formatting elements. */
1903:                     $this->a_formatting[] = self::MARKER;
1904:                 break;
1905: 
1906:                 /* A start tag token whose tag name is one of: "marquee", "object" */
1907:                 case 'marquee': case 'object':
1908:                     /* Reconstruct the active formatting elements, if any. */
1909:                     $this->reconstructActiveFormattingElements();
1910: 
1911:                     /* Insert an HTML element for the token. */
1912:                     $this->insertElement($token);
1913: 
1914:                     /* Insert a marker at the end of the list of active
1915:                     formatting elements. */
1916:                     $this->a_formatting[] = self::MARKER;
1917:                 break;
1918: 
1919:                 /* A start tag token whose tag name is "xmp" */
1920:                 case 'xmp':
1921:                     /* Reconstruct the active formatting elements, if any. */
1922:                     $this->reconstructActiveFormattingElements();
1923: 
1924:                     /* Insert an HTML element for the token. */
1925:                     $this->insertElement($token);
1926: 
1927:                     /* Switch the content model flag to the CDATA state. */
1928:                     return HTML5::CDATA;
1929:                 break;
1930: 
1931:                 /* A start tag whose tag name is "table" */
1932:                 case 'table':
1933:                     /* If the stack of open elements has a p element in scope,
1934:                     then act as if an end tag with the tag name p had been seen. */
1935:                     if($this->elementInScope('p')) {
1936:                         $this->emitToken(array(
1937:                             'name' => 'p',
1938:                             'type' => HTML5::ENDTAG
1939:                         ));
1940:                     }
1941: 
1942:                     /* Insert an HTML element for the token. */
1943:                     $this->insertElement($token);
1944: 
1945:                     /* Change the insertion mode to "in table". */
1946:                     $this->mode = self::IN_TABLE;
1947:                 break;
1948: 
1949:                 /* A start tag whose tag name is one of: "area", "basefont",
1950:                 "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */
1951:                 case 'area': case 'basefont': case 'bgsound': case 'br':
1952:                 case 'embed': case 'img': case 'param': case 'spacer':
1953:                 case 'wbr':
1954:                     /* Reconstruct the active formatting elements, if any. */
1955:                     $this->reconstructActiveFormattingElements();
1956: 
1957:                     /* Insert an HTML element for the token. */
1958:                     $this->insertElement($token);
1959: 
1960:                     /* Immediately pop the current node off the stack of open elements. */
1961:                     array_pop($this->stack);
1962:                 break;
1963: 
1964:                 /* A start tag whose tag name is "hr" */
1965:                 case 'hr':
1966:                     /* If the stack of open elements has a p element in scope,
1967:                     then act as if an end tag with the tag name p had been seen. */
1968:                     if($this->elementInScope('p')) {
1969:                         $this->emitToken(array(
1970:                             'name' => 'p',
1971:                             'type' => HTML5::ENDTAG
1972:                         ));
1973:                     }
1974: 
1975:                     /* Insert an HTML element for the token. */
1976:                     $this->insertElement($token);
1977: 
1978:                     /* Immediately pop the current node off the stack of open elements. */
1979:                     array_pop($this->stack);
1980:                 break;
1981: 
1982:                 /* A start tag whose tag name is "image" */
1983:                 case 'image':
1984:                     /* Parse error. Change the token's tag name to "img" and
1985:                     reprocess it. (Don't ask.) */
1986:                     $token['name'] = 'img';
1987:                     return $this->inBody($token);
1988:                 break;
1989: 
1990:                 /* A start tag whose tag name is "input" */
1991:                 case 'input':
1992:                     /* Reconstruct the active formatting elements, if any. */
1993:                     $this->reconstructActiveFormattingElements();
1994: 
1995:                     /* Insert an input element for the token. */
1996:                     $element = $this->insertElement($token, false);
1997: 
1998:                     /* If the form element pointer is not null, then associate the
1999:                     input element with the form element pointed to by the form
2000:                     element pointer. */
2001:                     $this->form_pointer !== null
2002:                         ? $this->form_pointer->appendChild($element)
2003:                         : end($this->stack)->appendChild($element);
2004: 
2005:                     /* Pop that input element off the stack of open elements. */
2006:                     array_pop($this->stack);
2007:                 break;
2008: 
2009:                 /* A start tag whose tag name is "isindex" */
2010:                 case 'isindex':
2011:                     /* Parse error. */
2012:                     // w/e
2013: 
2014:                     /* If the form element pointer is not null,
2015:                     then ignore the token. */
2016:                     if($this->form_pointer === null) {
2017:                         /* Act as if a start tag token with the tag name "form" had
2018:                         been seen. */
2019:                         $this->inBody(array(
2020:                             'name' => 'body',
2021:                             'type' => HTML5::STARTTAG,
2022:                             'attr' => array()
2023:                         ));
2024: 
2025:                         /* Act as if a start tag token with the tag name "hr" had
2026:                         been seen. */
2027:                         $this->inBody(array(
2028:                             'name' => 'hr',
2029:                             'type' => HTML5::STARTTAG,
2030:                             'attr' => array()
2031:                         ));
2032: 
2033:                         /* Act as if a start tag token with the tag name "p" had
2034:                         been seen. */
2035:                         $this->inBody(array(
2036:                             'name' => 'p',
2037:                             'type' => HTML5::STARTTAG,
2038:                             'attr' => array()
2039:                         ));
2040: 
2041:                         /* Act as if a start tag token with the tag name "label"
2042:                         had been seen. */
2043:                         $this->inBody(array(
2044:                             'name' => 'label',
2045:                             'type' => HTML5::STARTTAG,
2046:                             'attr' => array()
2047:                         ));
2048: 
2049:                         /* Act as if a stream of character tokens had been seen. */
2050:                         $this->insertText('This is a searchable index. '.
2051:                         'Insert your search keywords here: ');
2052: 
2053:                         /* Act as if a start tag token with the tag name "input"
2054:                         had been seen, with all the attributes from the "isindex"
2055:                         token, except with the "name" attribute set to the value
2056:                         "isindex" (ignoring any explicit "name" attribute). */
2057:                         $attr = $token['attr'];
2058:                         $attr[] = array('name' => 'name', 'value' => 'isindex');
2059: 
2060:                         $this->inBody(array(
2061:                             'name' => 'input',
2062:                             'type' => HTML5::STARTTAG,
2063:                             'attr' => $attr
2064:                         ));
2065: 
2066:                         /* Act as if a stream of character tokens had been seen
2067:                         (see below for what they should say). */
2068:                         $this->insertText('This is a searchable index. '.
2069:                         'Insert your search keywords here: ');
2070: 
2071:                         /* Act as if an end tag token with the tag name "label"
2072:                         had been seen. */
2073:                         $this->inBody(array(
2074:                             'name' => 'label',
2075:                             'type' => HTML5::ENDTAG
2076:                         ));
2077: 
2078:                         /* Act as if an end tag token with the tag name "p" had
2079:                         been seen. */
2080:                         $this->inBody(array(
2081:                             'name' => 'p',
2082:                             'type' => HTML5::ENDTAG
2083:                         ));
2084: 
2085:                         /* Act as if a start tag token with the tag name "hr" had
2086:                         been seen. */
2087:                         $this->inBody(array(
2088:                             'name' => 'hr',
2089:                             'type' => HTML5::ENDTAG
2090:                         ));
2091: 
2092:                         /* Act as if an end tag token with the tag name "form" had
2093:                         been seen. */
2094:                         $this->inBody(array(
2095:                             'name' => 'form',
2096:                             'type' => HTML5::ENDTAG
2097:                         ));
2098:                     }
2099:                 break;
2100: 
2101:                 /* A start tag whose tag name is "textarea" */
2102:                 case 'textarea':
2103:                     $this->insertElement($token);
2104: 
2105:                     /* Switch the tokeniser's content model flag to the
2106:                     RCDATA state. */
2107:                     return HTML5::RCDATA;
2108:                 break;
2109: 
2110:                 /* A start tag whose tag name is one of: "iframe", "noembed",
2111:                 "noframes" */
2112:                 case 'iframe': case 'noembed': case 'noframes':
2113:                     $this->insertElement($token);
2114: 
2115:                     /* Switch the tokeniser's content model flag to the CDATA state. */
2116:                     return HTML5::CDATA;
2117:                 break;
2118: 
2119:                 /* A start tag whose tag name is "select" */
2120:                 case 'select':
2121:                     /* Reconstruct the active formatting elements, if any. */
2122:                     $this->reconstructActiveFormattingElements();
2123: 
2124:                     /* Insert an HTML element for the token. */
2125:                     $this->insertElement($token);
2126: 
2127:                     /* Change the insertion mode to "in select". */
2128:                     $this->mode = self::IN_SELECT;
2129:                 break;
2130: 
2131:                 /* A start or end tag whose tag name is one of: "caption", "col",
2132:                 "colgroup", "frame", "frameset", "head", "option", "optgroup",
2133:                 "tbody", "td", "tfoot", "th", "thead", "tr". */
2134:                 case 'caption': case 'col': case 'colgroup': case 'frame':
2135:                 case 'frameset': case 'head': case 'option': case 'optgroup':
2136:                 case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead':
2137:                 case 'tr':
2138:                     // Parse error. Ignore the token.
2139:                 break;
2140: 
2141:                 /* A start or end tag whose tag name is one of: "event-source",
2142:                 "section", "nav", "article", "aside", "header", "footer",
2143:                 "datagrid", "command" */
2144:                 case 'event-source': case 'section': case 'nav': case 'article':
2145:                 case 'aside': case 'header': case 'footer': case 'datagrid':
2146:                 case 'command':
2147:                     // Work in progress!
2148:                 break;
2149: 
2150:                 /* A start tag token not covered by the previous entries */
2151:                 default:
2152:                     /* Reconstruct the active formatting elements, if any. */
2153:                     $this->reconstructActiveFormattingElements();
2154: 
2155:                     $this->insertElement($token, true, true);
2156:                 break;
2157:             }
2158:             break;
2159: 
2160:             case HTML5::ENDTAG:
2161:             switch($token['name']) {
2162:                 /* An end tag with the tag name "body" */
2163:                 case 'body':
2164:                     /* If the second element in the stack of open elements is
2165:                     not a body element, this is a parse error. Ignore the token.
2166:                     (innerHTML case) */
2167:                     if(count($this->stack) < 2 || $this->stack[1]->nodeName !== 'body') {
2168:                         // Ignore.
2169: 
2170:                     /* If the current node is not the body element, then this
2171:                     is a parse error. */
2172:                     } elseif(end($this->stack)->nodeName !== 'body') {
2173:                         // Parse error.
2174:                     }
2175: 
2176:                     /* Change the insertion mode to "after body". */
2177:                     $this->mode = self::AFTER_BODY;
2178:                 break;
2179: 
2180:                 /* An end tag with the tag name "html" */
2181:                 case 'html':
2182:                     /* Act as if an end tag with tag name "body" had been seen,
2183:                     then, if that token wasn't ignored, reprocess the current
2184:                     token. */
2185:                     $this->inBody(array(
2186:                         'name' => 'body',
2187:                         'type' => HTML5::ENDTAG
2188:                     ));
2189: 
2190:                     return $this->afterBody($token);
2191:                 break;
2192: 
2193:                 /* An end tag whose tag name is one of: "address", "blockquote",
2194:                 "center", "dir", "div", "dl", "fieldset", "listing", "menu",
2195:                 "ol", "pre", "ul" */
2196:                 case 'address': case 'blockquote': case 'center': case 'dir':
2197:                 case 'div': case 'dl': case 'fieldset': case 'listing':
2198:                 case 'menu': case 'ol': case 'pre': case 'ul':
2199:                     /* If the stack of open elements has an element in scope
2200:                     with the same tag name as that of the token, then generate
2201:                     implied end tags. */
2202:                     if($this->elementInScope($token['name'])) {
2203:                         $this->generateImpliedEndTags();
2204: 
2205:                         /* Now, if the current node is not an element with
2206:                         the same tag name as that of the token, then this
2207:                         is a parse error. */
2208:                         // w/e
2209: 
2210:                         /* If the stack of open elements has an element in
2211:                         scope with the same tag name as that of the token,
2212:                         then pop elements from this stack until an element
2213:                         with that tag name has been popped from the stack. */
2214:                         for($n = count($this->stack) - 1; $n >= 0; $n--) {
2215:                             if($this->stack[$n]->nodeName === $token['name']) {
2216:                                 $n = -1;
2217:                             }
2218: 
2219:                             array_pop($this->stack);
2220:                         }
2221:                     }
2222:                 break;
2223: 
2224:                 /* An end tag whose tag name is "form" */
2225:                 case 'form':
2226:                     /* If the stack of open elements has an element in scope
2227:                     with the same tag name as that of the token, then generate
2228:                     implied    end tags. */
2229:                     if($this->elementInScope($token['name'])) {
2230:                         $this->generateImpliedEndTags();
2231: 
2232:                     } 
2233: 
2234:                     if(end($this->stack)->nodeName !== $token['name']) {
2235:                         /* Now, if the current node is not an element with the
2236:                         same tag name as that of the token, then this is a parse
2237:                         error. */
2238:                         // w/e
2239: 
2240:                     } else {
2241:                         /* Otherwise, if the current node is an element with
2242:                         the same tag name as that of the token pop that element
2243:                         from the stack. */
2244:                         array_pop($this->stack);
2245:                     }
2246: 
2247:                     /* In any case, set the form element pointer to null. */
2248:                     $this->form_pointer = null;
2249:                 break;
2250: 
2251:                 /* An end tag whose tag name is "p" */
2252:                 case 'p':
2253:                     /* If the stack of open elements has a p element in scope,
2254:                     then generate implied end tags, except for p elements. */
2255:                     if($this->elementInScope('p')) {
2256:                         $this->generateImpliedEndTags(array('p'));
2257: 
2258:                         /* If the current node is not a p element, then this is
2259:                         a parse error. */
2260:                         // k
2261: 
2262:                         /* If the stack of open elements has a p element in
2263:                         scope, then pop elements from this stack until the stack
2264:                         no longer has a p element in scope. */
2265:                         for($n = count($this->stack) - 1; $n >= 0; $n--) {
2266:                             if($this->elementInScope('p')) {
2267:                                 array_pop($this->stack);
2268: 
2269:                             } else {
2270:                                 break;
2271:                             }
2272:                         }
2273:                     }
2274:                 break;
2275: 
2276:                 /* An end tag whose tag name is "dd", "dt", or "li" */
2277:                 case 'dd': case 'dt': case 'li':
2278:                     /* If the stack of open elements has an element in scope
2279:                     whose tag name matches the tag name of the token, then
2280:                     generate implied end tags, except for elements with the
2281:                     same tag name as the token. */
2282:                     if($this->elementInScope($token['name'])) {
2283:                         $this->generateImpliedEndTags(array($token['name']));
2284: 
2285:                         /* If the current node is not an element with the same
2286:                         tag name as the token, then this is a parse error. */
2287:                         // w/e
2288: 
2289:                         /* If the stack of open elements has an element in scope
2290:                         whose tag name matches the tag name of the token, then
2291:                         pop elements from this stack until an element with that
2292:                         tag name has been popped from the stack. */
2293:                         for($n = count($this->stack) - 1; $n >= 0; $n--) {
2294:                             if($this->stack[$n]->nodeName === $token['name']) {
2295:                                 $n = -1;
2296:                             }
2297: 
2298:                             array_pop($this->stack);
2299:                         }
2300:                     }
2301:                 break;
2302: 
2303:                 /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4",
2304:                 "h5", "h6" */
2305:                 case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
2306:                     $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6');
2307: 
2308:                     /* If the stack of open elements has in scope an element whose
2309:                     tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
2310:                     generate implied end tags. */
2311:                     if($this->elementInScope($elements)) {
2312:                         $this->generateImpliedEndTags();
2313: 
2314:                         /* Now, if the current node is not an element with the same
2315:                         tag name as that of the token, then this is a parse error. */
2316:                         // w/e
2317: 
2318:                         /* If the stack of open elements has in scope an element
2319:                         whose tag name is one of "h1", "h2", "h3", "h4", "h5", or
2320:                         "h6", then pop elements from the stack until an element
2321:                         with one of those tag names has been popped from the stack. */
2322:                         while($this->elementInScope($elements)) {
2323:                             array_pop($this->stack);
2324:                         }
2325:                     }
2326:                 break;
2327: 
2328:                 /* An end tag whose tag name is one of: "a", "b", "big", "em",
2329:                 "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
2330:                 case 'a': case 'b': case 'big': case 'em': case 'font':
2331:                 case 'i': case 'nobr': case 's': case 'small': case 'strike':
2332:                 case 'strong': case 'tt': case 'u':
2333:                     /* 1. Let the formatting element be the last element in
2334:                     the list of active formatting elements that:
2335:                         * is between the end of the list and the last scope
2336:                         marker in the list, if any, or the start of the list
2337:                         otherwise, and
2338:                         * has the same tag name as the token.
2339:                     */
2340:                     while(true) {
2341:                         for($a = count($this->a_formatting) - 1; $a >= 0; $a--) {
2342:                             if($this->a_formatting[$a] === self::MARKER) {
2343:                                 break;
2344: 
2345:                             } elseif($this->a_formatting[$a]->tagName === $token['name']) {
2346:                                 $formatting_element = $this->a_formatting[$a];
2347:                                 $in_stack = in_array($formatting_element, $this->stack, true);
2348:                                 $fe_af_pos = $a;
2349:                                 break;
2350:                             }
2351:                         }
2352: 
2353:                         /* If there is no such node, or, if that node is
2354:                         also in the stack of open elements but the element
2355:                         is not in scope, then this is a parse error. Abort
2356:                         these steps. The token is ignored. */
2357:                         if(!isset($formatting_element) || ($in_stack &&
2358:                         !$this->elementInScope($token['name']))) {
2359:                             break;
2360: 
2361:                         /* Otherwise, if there is such a node, but that node
2362:                         is not in the stack of open elements, then this is a
2363:                         parse error; remove the element from the list, and
2364:                         abort these steps. */
2365:                         } elseif(isset($formatting_element) && !$in_stack) {
2366:                             unset($this->a_formatting[$fe_af_pos]);
2367:                             $this->a_formatting = array_merge($this->a_formatting);
2368:                             break;
2369:                         }
2370: 
2371:                         /* 2. Let the furthest block be the topmost node in the
2372:                         stack of open elements that is lower in the stack
2373:                         than the formatting element, and is not an element in
2374:                         the phrasing or formatting categories. There might
2375:                         not be one. */
2376:                         $fe_s_pos = array_search($formatting_element, $this->stack, true);
2377:                         $length = count($this->stack);
2378: 
2379:                         for($s = $fe_s_pos + 1; $s < $length; $s++) {
2380:                             $category = $this->getElementCategory($this->stack[$s]->nodeName);
2381: 
2382:                             if($category !== self::PHRASING && $category !== self::FORMATTING) {
2383:                                 $furthest_block = $this->stack[$s];
2384:                             }
2385:                         }
2386: 
2387:                         /* 3. If there is no furthest block, then the UA must
2388:                         skip the subsequent steps and instead just pop all
2389:                         the nodes from the bottom of the stack of open
2390:                         elements, from the current node up to the formatting
2391:                         element, and remove the formatting element from the
2392:                         list of active formatting elements. */
2393:                         if(!isset($furthest_block)) {
2394:                             for($n = $length - 1; $n >= $fe_s_pos; $n--) {
2395:                                 array_pop($this->stack);
2396:                             }
2397: 
2398:                             unset($this->a_formatting[$fe_af_pos]);
2399:                             $this->a_formatting = array_merge($this->a_formatting);
2400:                             break;
2401:                         }
2402: 
2403:                         /* 4. Let the common ancestor be the element
2404:                         immediately above the formatting element in the stack
2405:                         of open elements. */
2406:                         $common_ancestor = $this->stack[$fe_s_pos - 1];
2407: 
2408:                         /* 5. If the furthest block has a parent node, then
2409:                         remove the furthest block from its parent node. */
2410:                         if($furthest_block->parentNode !== null) {
2411:                             $furthest_block->parentNode->removeChild($furthest_block);
2412:                         }
2413: 
2414:                         /* 6. Let a bookmark note the position of the
2415:                         formatting element in the list of active formatting
2416:                         elements relative to the elements on either side
2417:                         of it in the list. */
2418:                         $bookmark = $fe_af_pos;
2419: 
2420:                         /* 7. Let node and last node  be the furthest block.
2421:                         Follow these steps: */
2422:                         $node = $furthest_block;
2423:                         $last_node = $furthest_block;
2424: 
2425:                         while(true) {
2426:                             for($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) {
2427:                                 /* 7.1 Let node be the element immediately
2428:                                 prior to node in the stack of open elements. */
2429:                                 $node = $this->stack[$n];
2430: 
2431:                                 /* 7.2 If node is not in the list of active
2432:                                 formatting elements, then remove node from
2433:                                 the stack of open elements and then go back
2434:                                 to step 1. */
2435:                                 if(!in_array($node, $this->a_formatting, true)) {
2436:                                     unset($this->stack[$n]);
2437:                                     $this->stack = array_merge($this->stack);
2438: 
2439:                                 } else {
2440:                                     break;
2441:                                 }
2442:                             }
2443: 
2444:                             /* 7.3 Otherwise, if node is the formatting
2445:                             element, then go to the next step in the overall
2446:                             algorithm. */
2447:                             if($node === $formatting_element) {
2448:                                 break;
2449: 
2450:                             /* 7.4 Otherwise, if last node is the furthest
2451:                             block, then move the aforementioned bookmark to
2452:                             be immediately after the node in the list of
2453:                             active formatting elements. */
2454:                             } elseif($last_node === $furthest_block) {
2455:                                 $bookmark = array_search($node, $this->a_formatting, true) + 1;
2456:                             }
2457: 
2458:                             /* 7.5 If node has any children, perform a
2459:                             shallow clone of node, replace the entry for
2460:                             node in the list of active formatting elements
2461:                             with an entry for the clone, replace the entry
2462:                             for node in the stack of open elements with an
2463:                             entry for the clone, and let node be the clone. */
2464:                             if($node->hasChildNodes()) {
2465:                                 $clone = $node->cloneNode();
2466:                                 $s_pos = array_search($node, $this->stack, true);
2467:                                 $a_pos = array_search($node, $this->a_formatting, true);
2468: 
2469:                                 $this->stack[$s_pos] = $clone;
2470:                                 $this->a_formatting[$a_pos] = $clone;
2471:                                 $node = $clone;
2472:                             }
2473: 
2474:                             /* 7.6 Insert last node into node, first removing
2475:                             it from its previous parent node if any. */
2476:                             if($last_node->parentNode !== null) {
2477:                                 $last_node->parentNode->removeChild($last_node);
2478:                             }
2479: 
2480:                             $node->appendChild($last_node);
2481: 
2482:                             /* 7.7 Let last node be node. */
2483:                             $last_node = $node;
2484:                         }
2485: 
2486:                         /* 8. Insert whatever last node ended up being in
2487:                         the previous step into the common ancestor node,
2488:                         first removing it from its previous parent node if
2489:                         any. */
2490:                         if($last_node->parentNode !== null) {
2491:                             $last_node->parentNode->removeChild($last_node);
2492:                         }
2493: 
2494:                         $common_ancestor->appendChild($last_node);
2495: 
2496:                         /* 9. Perform a shallow clone of the formatting
2497:                         element. */
2498:                         $clone = $formatting_element->cloneNode();
2499: 
2500:                         /* 10. Take all of the child nodes of the furthest
2501:                         block and append them to the clone created in the
2502:                         last step. */
2503:                         while($furthest_block->hasChildNodes()) {
2504:                             $child = $furthest_block->firstChild;
2505:                             $furthest_block->removeChild($child);
2506:                             $clone->appendChild($child);
2507:                         }
2508: 
2509:                         /* 11. Append that clone to the furthest block. */
2510:                         $furthest_block->appendChild($clone);
2511: 
2512:                         /* 12. Remove the formatting element from the list
2513:                         of active formatting elements, and insert the clone
2514:                         into the list of active formatting elements at the
2515:                         position of the aforementioned bookmark. */
2516:                         $fe_af_pos = array_search($formatting_element, $this->a_formatting, true);
2517:                         unset($this->a_formatting[$fe_af_pos]);
2518:                         $this->a_formatting = array_merge($this->a_formatting);
2519: 
2520:                         $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);
2521:                         $af_part2 = array_slice($this->a_formatting, $bookmark, count($this->a_formatting));
2522:                         $this->a_formatting = array_merge($af_part1, array($clone), $af_part2);
2523: 
2524:                         /* 13. Remove the formatting element from the stack
2525:                         of open elements, and insert the clone into the stack
2526:                         of open elements immediately after (i.e. in a more
2527:                         deeply nested position than) the position of the
2528:                         furthest block in that stack. */
2529:                         $fe_s_pos = array_search($formatting_element, $this->stack, true);
2530:                         $fb_s_pos = array_search($furthest_block, $this->stack, true);
2531:                         unset($this->stack[$fe_s_pos]);
2532: 
2533:                         $s_part1 = array_slice($this->stack, 0, $fb_s_pos);
2534:                         $s_part2 = array_slice($this->stack, $fb_s_pos + 1, count($this->stack));
2535:                         $this->stack = array_merge($s_part1, array($clone), $s_part2);
2536: 
2537:                         /* 14. Jump back to step 1 in this series of steps. */
2538:                         unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
2539:                     }
2540:                 break;
2541: 
2542:                 /* An end tag token whose tag name is one of: "button",
2543:                 "marquee", "object" */
2544:                 case 'button': case 'marquee': case 'object':
2545:                     /* If the stack of open elements has an element in scope whose
2546:                     tag name matches the tag name of the token, then generate implied
2547:                     tags. */
2548:                     if($this->elementInScope($token['name'])) {
2549:                         $this->generateImpliedEndTags();
2550: 
2551:                         /* Now, if the current node is not an element with the same
2552:                         tag name as the token, then this is a parse error. */
2553:                         // k
2554: 
2555:                         /* Now, if the stack of open elements has an element in scope
2556:                         whose tag name matches the tag name of the token, then pop
2557:                         elements from the stack until that element has been popped from
2558:                         the stack, and clear the list of active formatting elements up
2559:                         to the last marker. */
2560:                         for($n = count($this->stack) - 1; $n >= 0; $n--) {
2561:                             if($this->stack[$n]->nodeName === $token['name']) {
2562:                                 $n = -1;
2563:                             }
2564: 
2565:                             array_pop($this->stack);
2566:                         }
2567: 
2568:                         $marker = end(array_keys($this->a_formatting, self::MARKER, true));
2569: 
2570:                         for($n = count($this->a_formatting) - 1; $n > $marker; $n--) {
2571:                             array_pop($this->a_formatting);
2572:                         }
2573:                     }
2574:                 break;
2575: 
2576:                 /* Or an end tag whose tag name is one of: "area", "basefont",
2577:                 "bgsound", "br", "embed", "hr", "iframe", "image", "img",
2578:                 "input", "isindex", "noembed", "noframes", "param", "select",
2579:                 "spacer", "table", "textarea", "wbr" */
2580:                 case 'area': case 'basefont': case 'bgsound': case 'br':
2581:                 case 'embed': case 'hr': case 'iframe': case 'image':
2582:                 case 'img': case 'input': case 'isindex': case 'noembed':
2583:                 case 'noframes': case 'param': case 'select': case 'spacer':
2584:                 case 'table': case 'textarea': case 'wbr':
2585:                     // Parse error. Ignore the token.
2586:                 break;
2587: 
2588:                 /* An end tag token not covered by the previous entries */
2589:                 default:
2590:                     for($n = count($this->stack) - 1; $n >= 0; $n--) {
2591:                         /* Initialise node to be the current node (the bottommost
2592:                         node of the stack). */
2593:                         $node = end($this->stack);
2594: 
2595:                         /* If node has the same tag name as the end tag token,
2596:                         then: */
2597:                         if($token['name'] === $node->nodeName) {
2598:                             /* Generate implied end tags. */
2599:                             $this->generateImpliedEndTags();
2600: 
2601:                             /* If the tag name of the end tag token does not
2602:                             match the tag name of the current node, this is a
2603:                             parse error. */
2604:                             // k
2605: 
2606:                             /* Pop all the nodes from the current node up to
2607:                             node, including node, then stop this algorithm. */
2608:                             for($x = count($this->stack) - $n; $x >= $n; $x--) {
2609:                                 array_pop($this->stack);
2610:                             }
2611:                                     
2612:                         } else {
2613:                             $category = $this->getElementCategory($node);
2614: 
2615:                             if($category !== self::SPECIAL && $category !== self::SCOPING) {
2616:                                 /* Otherwise, if node is in neither the formatting
2617:                                 category nor the phrasing category, then this is a
2618:                                 parse error. Stop this algorithm. The end tag token
2619:                                 is ignored. */
2620:                                 return false;
2621:                             }
2622:                         }
2623:                     }
2624:                 break;
2625:             }
2626:             break;
2627:         }
2628:     }
2629: 
2630:     private function inTable($token) {
2631:         $clear = array('html', 'table');
2632: 
2633:         /* A character token that is one of one of U+0009 CHARACTER TABULATION,
2634:         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
2635:         or U+0020 SPACE */
2636:         if($token['type'] === HTML5::CHARACTR &&
2637:         preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
2638:             /* Append the character to the current node. */
2639:             $text = $this->dom->createTextNode($token['data']);
2640:             end($this->stack)->appendChild($text);
2641: 
2642:         /* A comment token */
2643:         } elseif($token['type'] === HTML5::COMMENT) {
2644:             /* Append a Comment node to the current node with the data
2645:             attribute set to the data given in the comment token. */
2646:             $comment = $this->dom->createComment($token['data']);
2647:             end($this->stack)->appendChild($comment);
2648: 
2649:         /* A start tag whose tag name is "caption" */
2650:         } elseif($token['type'] === HTML5::STARTTAG &&
2651:         $token['name'] === 'caption') {
2652:             /* Clear the stack back to a table context. */
2653:             $this->clearStackToTableContext($clear);
2654: 
2655:             /* Insert a marker at the end of the list of active
2656:             formatting elements. */
2657:             $this->a_formatting[] = self::MARKER;
2658: 
2659:             /* Insert an HTML element for the token, then switch the
2660:             insertion mode to "in caption". */
2661:             $this->insertElement($token);
2662:             $this->mode = self::IN_CAPTION;
2663: 
2664:         /* A start tag whose tag name is "colgroup" */
2665:         } elseif($token['type'] === HTML5::STARTTAG &&
2666:         $token['name'] === 'colgroup') {
2667:             /* Clear the stack back to a table context. */
2668:             $this->clearStackToTableContext($clear);
2669: 
2670:             /* Insert an HTML element for the token, then switch the
2671:             insertion mode to "in column group". */
2672:             $this->insertElement($token);
2673:             $this->mode = self::IN_CGROUP;
2674: 
2675:         /* A start tag whose tag name is "col" */
2676:         } elseif($token['type'] === HTML5::STARTTAG &&
2677:         $token['name'] === 'col') {
2678:             $this->inTable(array(
2679:                 'name' => 'colgroup',
2680:                 'type' => HTML5::STARTTAG,
2681:                 'attr' => array()
2682:             ));
2683: 
2684:             $this->inColumnGroup($token);
2685: 
2686:         /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */
2687:         } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
2688:         array('tbody', 'tfoot', 'thead'))) {
2689:             /* Clear the stack back to a table context. */
2690:             $this->clearStackToTableContext($clear);
2691: 
2692:             /* Insert an HTML element for the token, then switch the insertion
2693:             mode to "in table body". */
2694:             $this->insertElement($token);
2695:             $this->mode = self::IN_TBODY;
2696: 
2697:         /* A start tag whose tag name is one of: "td", "th", "tr" */
2698:         } elseif($token['type'] === HTML5::STARTTAG &&
2699:         in_array($token['name'], array('td', 'th', 'tr'))) {
2700:             /* Act as if a start tag token with the tag name "tbody" had been
2701:             seen, then reprocess the current token. */
2702:             $this->inTable(array(
2703:                 'name' => 'tbody',
2704:                 'type' => HTML5::STARTTAG,
2705:                 'attr' => array()
2706:             ));
2707: 
2708:             return $this->inTableBody($token);
2709: 
2710:         /* A start tag whose tag name is "table" */
2711:         } elseif($token['type'] === HTML5::STARTTAG &&
2712:         $token['name'] === 'table') {
2713:             /* Parse error. Act as if an end tag token with the tag name "table"
2714:             had been seen, then, if that token wasn't ignored, reprocess the
2715:             current token. */
2716:             $this->inTable(array(
2717:                 'name' => 'table',
2718:                 'type' => HTML5::ENDTAG
2719:             ));
2720: 
2721:             return $this->mainPhase($token);
2722: 
2723:         /* An end tag whose tag name is "table" */
2724:         } elseif($token['type'] === HTML5::ENDTAG &&
2725:         $token['name'] === 'table') {
2726:             /* If the stack of open elements does not have an element in table
2727:             scope with the same tag name as the token, this is a parse error.
2728:             Ignore the token. (innerHTML case) */
2729:             if(!$this->elementInScope($token['name'], true)) {
2730:                 return false;
2731: 
2732:             /* Otherwise: */
2733:             } else {
2734:                 /* Generate implied end tags. */
2735:                 $this->generateImpliedEndTags();
2736: 
2737:                 /* Now, if the current node is not a table element, then this
2738:                 is a parse error. */
2739:                 // w/e
2740: 
2741:                 /* Pop elements from this stack until a table element has been
2742:                 popped from the stack. */
2743:                 while(true) {
2744:                     $current = end($this->stack)->nodeName;
2745:                     array_pop($this->stack);
2746: 
2747:                     if($current === 'table') {
2748:                         break;
2749:                     }
2750:                 }
2751: 
2752:                 /* Reset the insertion mode appropriately. */
2753:                 $this->resetInsertionMode();
2754:             }
2755: 
2756:         /* An end tag whose tag name is one of: "body", "caption", "col",
2757:         "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
2758:         } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
2759:         array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td',
2760:         'tfoot', 'th', 'thead', 'tr'))) {
2761:             // Parse error. Ignore the token.
2762: 
2763:         /* Anything else */
2764:         } else {
2765:             /* Parse error. Process the token as if the insertion mode was "in
2766:             body", with the following exception: */
2767: 
2768:             /* If the current node is a table, tbody, tfoot, thead, or tr
2769:             element, then, whenever a node would be inserted into the current
2770:             node, it must instead be inserted into the foster parent element. */
2771:             if(in_array(end($this->stack)->nodeName,
2772:             array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
2773:                 /* The foster parent element is the parent element of the last
2774:                 table element in the stack of open elements, if there is a
2775:                 table element and it has such a parent element. If there is no
2776:                 table element in the stack of open elements (innerHTML case),
2777:                 then the foster parent element is the first element in the
2778:                 stack of open elements (the html  element). Otherwise, if there
2779:                 is a table element in the stack of open elements, but the last
2780:                 table element in the stack of open elements has no parent, or
2781:                 its parent node is not an element, then the foster parent
2782:                 element is the element before the last table element in the
2783:                 stack of open elements. */
2784:                 for($n = count($this->stack) - 1; $n >= 0; $n--) {
2785:                     if($this->stack[$n]->nodeName === 'table') {
2786:                         $table = $this->stack[$n];
2787:                         break;
2788:                     }
2789:                 }
2790: 
2791:                 if(isset($table) && $table->parentNode !== null) {
2792:                     $this->foster_parent = $table->parentNode;
2793: 
2794:                 } elseif(!isset($table)) {
2795:                     $this->foster_parent = $this->stack[0];
2796: 
2797:                 } elseif(isset($table) && ($table->parentNode === null ||
2798:                 $table->parentNode->nodeType !== XML_ELEMENT_NODE)) {
2799:                     $this->foster_parent = $this->stack[$n - 1];
2800:                 }
2801:             }
2802: 
2803:             $this->inBody($token);
2804:         }
2805:     }
2806: 
2807:     private function inCaption($token) {
2808:         /* An end tag whose tag name is "caption" */
2809:         if($token['type'] === HTML5::ENDTAG && $token['name'] === 'caption') {
2810:             /* If the stack of open elements does not have an element in table
2811:             scope with the same tag name as the token, this is a parse error.
2812:             Ignore the token. (innerHTML case) */
2813:             if(!$this->elementInScope($token['name'], true)) {
2814:                 // Ignore
2815: 
2816:             /* Otherwise: */
2817:             } else {
2818:                 /* Generate implied end tags. */
2819:                 $this->generateImpliedEndTags();
2820: 
2821:                 /* Now, if the current node is not a caption element, then this
2822:                 is a parse error. */
2823:                 // w/e
2824: 
2825:                 /* Pop elements from this stack until a caption element has
2826:                 been popped from the stack. */
2827:                 while(true) {
2828:                     $node = end($this->stack)->nodeName;
2829:                     array_pop($this->stack);
2830: 
2831:                     if($node === 'caption') {
2832:                         break;
2833:                     }
2834:                 }
2835: 
2836:                 /* Clear the list of active formatting elements up to the last
2837:                 marker. */
2838:                 $this->clearTheActiveFormattingElementsUpToTheLastMarker();
2839: 
2840:                 /* Switch the insertion mode to "in table". */
2841:                 $this->mode = self::IN_TABLE;
2842:             }
2843: 
2844:         /* A start tag whose tag name is one of: "caption", "col", "colgroup",
2845:         "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag
2846:         name is "table" */
2847:         } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'],
2848:         array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
2849:         'thead', 'tr'))) || ($token['type'] === HTML5::ENDTAG &&
2850:         $token['name'] === 'table')) {
2851:             /* Parse error. Act as if an end tag with the tag name "caption"
2852:             had been seen, then, if that token wasn't ignored, reprocess the
2853:             current token. */
2854:             $this->inCaption(array(
2855:                 'name' => 'caption',
2856:                 'type' => HTML5::ENDTAG
2857:             ));
2858: 
2859:             return $this->inTable($token);
2860: 
2861:         /* An end tag whose tag name is one of: "body", "col", "colgroup",
2862:         "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
2863:         } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
2864:         array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th',
2865:         'thead', 'tr'))) {
2866:             // Parse error. Ignore the token.
2867: 
2868:         /* Anything else */
2869:         } else {
2870:             /* Process the token as if the insertion mode was "in body". */
2871:             $this->inBody($token);
2872:         }
2873:     }
2874: 
2875:     private function inColumnGroup($token) {
2876:         /* A character token that is one of one of U+0009 CHARACTER TABULATION,
2877:         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
2878:         or U+0020 SPACE */
2879:         if($token['type'] === HTML5::CHARACTR &&
2880:         preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
2881:             /* Append the character to the current node. */
2882:             $text = $this->dom->createTextNode($token['data']);
2883:             end($this->stack)->appendChild($text);
2884: 
2885:         /* A comment token */
2886:         } elseif($token['type'] === HTML5::COMMENT) {
2887:             /* Append a Comment node to the current node with the data
2888:             attribute set to the data given in the comment token. */
2889:             $comment = $this->dom->createComment($token['data']);
2890:             end($this->stack)->appendChild($comment);
2891: 
2892:         /* A start tag whose tag name is "col" */
2893:         } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'col') {
2894:             /* Insert a col element for the token. Immediately pop the current
2895:             node off the stack of open elements. */
2896:             $this->insertElement($token);
2897:             array_pop($this->stack);
2898: 
2899:         /* An end tag whose tag name is "colgroup" */
2900:         } elseif($token['type'] === HTML5::ENDTAG &&
2901:         $token['name'] === 'colgroup') {
2902:             /* If the current node is the root html element, then this is a
2903:             parse error, ignore the token. (innerHTML case) */
2904:             if(end($this->stack)->nodeName === 'html') {
2905:                 // Ignore
2906: 
2907:             /* Otherwise, pop the current node (which will be a colgroup
2908:             element) from the stack of open elements. Switch the insertion
2909:             mode to "in table". */
2910:             } else {
2911:                 array_pop($this->stack);
2912:                 $this->mode = self::IN_TABLE;
2913:             }
2914: 
2915:         /* An end tag whose tag name is "col" */
2916:         } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'col') {
2917:             /* Parse error. Ignore the token. */
2918: 
2919:         /* Anything else */
2920:         } else {
2921:             /* Act as if an end tag with the tag name "colgroup" had been seen,
2922:             and then, if that token wasn't ignored, reprocess the current token. */
2923:             $this->inColumnGroup(array(
2924:                 'name' => 'colgroup',
2925:                 'type' => HTML5::ENDTAG
2926:             ));
2927: 
2928:             return $this->inTable($token);
2929:         }
2930:     }
2931: 
2932:     private function inTableBody($token) {
2933:         $clear = array('tbody', 'tfoot', 'thead', 'html');
2934: 
2935:         /* A start tag whose tag name is "tr" */
2936:         if($token['type'] === HTML5::STARTTAG && $token['name'] === 'tr') {
2937:             /* Clear the stack back to a table body context. */
2938:             $this->clearStackToTableContext($clear);
2939: 
2940:             /* Insert a tr element for the token, then switch the insertion
2941:             mode to "in row". */
2942:             $this->insertElement($token);
2943:             $this->mode = self::IN_ROW;
2944: 
2945:         /* A start tag whose tag name is one of: "th", "td" */
2946:         } elseif($token['type'] === HTML5::STARTTAG &&
2947:         ($token['name'] === 'th' ||    $token['name'] === 'td')) {
2948:             /* Parse error. Act as if a start tag with the tag name "tr" had
2949:             been seen, then reprocess the current token. */
2950:             $this->inTableBody(array(
2951:                 'name' => 'tr',
2952:                 'type' => HTML5::STARTTAG,
2953:                 'attr' => array()
2954:             ));
2955: 
2956:             return $this->inRow($token);
2957: 
2958:         /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
2959:         } elseif($token['type'] === HTML5::ENDTAG &&
2960:         in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
2961:             /* If the stack of open elements does not have an element in table
2962:             scope with the same tag name as the token, this is a parse error.
2963:             Ignore the token. */
2964:             if(!$this->elementInScope($token['name'], true)) {
2965:                 // Ignore
2966: 
2967:             /* Otherwise: */
2968:             } else {
2969:                 /* Clear the stack back to a table body context. */
2970:                 $this->clearStackToTableContext($clear);
2971: 
2972:                 /* Pop the current node from the stack of open elements. Switch
2973:                 the insertion mode to "in table". */
2974:                 array_pop($this->stack);
2975:                 $this->mode = self::IN_TABLE;
2976:             }
2977: 
2978:         /* A start tag whose tag name is one of: "caption", "col", "colgroup",
2979:         "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */
2980:         } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'],
2981:         array('caption', 'col', 'colgroup', 'tbody', 'tfoor', 'thead'))) ||
2982:         ($token['type'] === HTML5::STARTTAG && $token['name'] === 'table')) {
2983:             /* If the stack of open elements does not have a tbody, thead, or
2984:             tfoot element in table scope, this is a parse error. Ignore the
2985:             token. (innerHTML case) */
2986:             if(!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) {
2987:                 // Ignore.
2988: 
2989:             /* Otherwise: */
2990:             } else {
2991:                 /* Clear the stack back to a table body context. */
2992:                 $this->clearStackToTableContext($clear);
2993: 
2994:                 /* Act as if an end tag with the same tag name as the current
2995:                 node ("tbody", "tfoot", or "thead") had been seen, then
2996:                 reprocess the current token. */
2997:                 $this->inTableBody(array(
2998:                     'name' => end($this->stack)->nodeName,
2999:                     'type' => HTML5::ENDTAG
3000:                 ));
3001: 
3002:                 return $this->mainPhase($token);
3003:             }
3004: 
3005:         /* An end tag whose tag name is one of: "body", "caption", "col",
3006:         "colgroup", "html", "td", "th", "tr" */
3007:         } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
3008:         array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) {
3009:             /* Parse error. Ignore the token. */
3010: 
3011:         /* Anything else */
3012:         } else {
3013:             /* Process the token as if the insertion mode was "in table". */
3014:             $this->inTable($token);
3015:         }
3016:     }
3017: 
3018:     private function inRow($token) {
3019:         $clear = array('tr', 'html');
3020: 
3021:         /* A start tag whose tag name is one of: "th", "td" */
3022:         if($token['type'] === HTML5::STARTTAG &&
3023:         ($token['name'] === 'th' || $token['name'] === 'td')) {
3024:             /* Clear the stack back to a table row context. */
3025:             $this->clearStackToTableContext($clear);
3026: 
3027:             /* Insert an HTML element for the token, then switch the insertion
3028:             mode to "in cell". */
3029:             $this->insertElement($token);
3030:             $this->mode = self::IN_CELL;
3031: 
3032:             /* Insert a marker at the end of the list of active formatting
3033:             elements. */
3034:             $this->a_formatting[] = self::MARKER;
3035: 
3036:         /* An end tag whose tag name is "tr" */
3037:         } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'tr') {
3038:             /* If the stack of open elements does not have an element in table
3039:             scope with the same tag name as the token, this is a parse error.
3040:             Ignore the token. (innerHTML case) */
3041:             if(!$this->elementInScope($token['name'], true)) {
3042:                 // Ignore.
3043: 
3044:             /* Otherwise: */
3045:             } else {
3046:                 /* Clear the stack back to a table row context. */
3047:                 $this->clearStackToTableContext($clear);
3048: 
3049:                 /* Pop the current node (which will be a tr element) from the
3050:                 stack of open elements. Switch the insertion mode to "in table
3051:                 body". */
3052:                 array_pop($this->stack);
3053:                 $this->mode = self::IN_TBODY;
3054:             }
3055: 
3056:         /* A start tag whose tag name is one of: "caption", "col", "colgroup",
3057:         "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */
3058:         } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
3059:         array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) {
3060:             /* Act as if an end tag with the tag name "tr" had been seen, then,
3061:             if that token wasn't ignored, reprocess the current token. */
3062:             $this->inRow(array(
3063:                 'name' => 'tr',
3064:                 'type' => HTML5::ENDTAG
3065:             ));
3066: 
3067:             return $this->inCell($token);
3068: 
3069:         /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
3070:         } elseif($token['type'] === HTML5::ENDTAG &&
3071:         in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
3072:             /* If the stack of open elements does not have an element in table
3073:             scope with the same tag name as the token, this is a parse error.
3074:             Ignore the token. */
3075:             if(!$this->elementInScope($token['name'], true)) {
3076:                 // Ignore.
3077: 
3078:             /* Otherwise: */
3079:             } else {
3080:                 /* Otherwise, act as if an end tag with the tag name "tr" had
3081:                 been seen, then reprocess the current token. */
3082:                 $this->inRow(array(
3083:                     'name' => 'tr',
3084:                     'type' => HTML5::ENDTAG
3085:                 ));
3086: 
3087:                 return $this->inCell($token);
3088:             }
3089: 
3090:         /* An end tag whose tag name is one of: "body", "caption", "col",
3091:         "colgroup", "html", "td", "th" */
3092:         } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
3093:         array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) {
3094:             /* Parse error. Ignore the token. */
3095: 
3096:         /* Anything else */
3097:         } else {
3098:             /* Process the token as if the insertion mode was "in table". */
3099:             $this->inTable($token);
3100:         }
3101:     }
3102: 
3103:     private function inCell($token) {
3104:         /* An end tag whose tag name is one of: "td", "th" */
3105:         if($token['type'] === HTML5::ENDTAG &&
3106:         ($token['name'] === 'td' || $token['name'] === 'th')) {
3107:             /* If the stack of open elements does not have an element in table
3108:             scope with the same tag name as that of the token, then this is a
3109:             parse error and the token must be ignored. */
3110:             if(!$this->elementInScope($token['name'], true)) {
3111:                 // Ignore.
3112: 
3113:             /* Otherwise: */
3114:             } else {
3115:                 /* Generate implied end tags, except for elements with the same
3116:                 tag name as the token. */
3117:                 $this->generateImpliedEndTags(array($token['name']));
3118: 
3119:                 /* Now, if the current node is not an element with the same tag
3120:                 name as the token, then this is a parse error. */
3121:                 // k
3122: 
3123:                 /* Pop elements from this stack until an element with the same
3124:                 tag name as the token has been popped from the stack. */
3125:                 while(true) {
3126:                     $node = end($this->stack)->nodeName;
3127:                     array_pop($this->stack);
3128: 
3129:                     if($node === $token['name']) {
3130:                         break;
3131:                     }
3132:                 }
3133: 
3134:                 /* Clear the list of active formatting elements up to the last
3135:                 marker. */
3136:                 $this->clearTheActiveFormattingElementsUpToTheLastMarker();
3137: 
3138:                 /* Switch the insertion mode to "in row". (The current node
3139:                 will be a tr element at this point.) */
3140:                 $this->mode = self::IN_ROW;
3141:             }
3142: 
3143:         /* A start tag whose tag name is one of: "caption", "col", "colgroup",
3144:         "tbody", "td", "tfoot", "th", "thead", "tr" */
3145:         } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
3146:         array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
3147:         'thead', 'tr'))) {
3148:             /* If the stack of open elements does not have a td or th element
3149:             in table scope, then this is a parse error; ignore the token.
3150:             (innerHTML case) */
3151:             if(!$this->elementInScope(array('td', 'th'), true)) {
3152:                 // Ignore.
3153: 
3154:             /* Otherwise, close the cell (see below) and reprocess the current
3155:             token. */
3156:             } else {
3157:                 $this->closeCell();
3158:                 return $this->inRow($token);
3159:             }
3160: 
3161:         /* A start tag whose tag name is one of: "caption", "col", "colgroup",
3162:         "tbody", "td", "tfoot", "th", "thead", "tr" */
3163:         } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
3164:         array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
3165:         'thead', 'tr'))) {
3166:             /* If the stack of open elements does not have a td or th element
3167:             in table scope, then this is a parse error; ignore the token.
3168:             (innerHTML case) */
3169:             if(!$this->elementInScope(array('td', 'th'), true)) {
3170:                 // Ignore.
3171: 
3172:             /* Otherwise, close the cell (see below) and reprocess the current
3173:             token. */
3174:             } else {
3175:                 $this->closeCell();
3176:                 return $this->inRow($token);
3177:             }
3178: 
3179:         /* An end tag whose tag name is one of: "body", "caption", "col",
3180:         "colgroup", "html" */
3181:         } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
3182:         array('body', 'caption', 'col', 'colgroup', 'html'))) {
3183:             /* Parse error. Ignore the token. */
3184: 
3185:         /* An end tag whose tag name is one of: "table", "tbody", "tfoot",
3186:         "thead", "tr" */
3187:         } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
3188:         array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
3189:             /* If the stack of open elements does not have an element in table
3190:             scope with the same tag name as that of the token (which can only
3191:             happen for "tbody", "tfoot" and "thead", or, in the innerHTML case),
3192:             then this is a parse error and the token must be ignored. */
3193:             if(!$this->elementInScope($token['name'], true)) {
3194:                 // Ignore.
3195: 
3196:             /* Otherwise, close the cell (see below) and reprocess the current
3197:             token. */
3198:             } else {
3199:                 $this->closeCell();
3200:                 return $this->inRow($token);
3201:             }
3202: 
3203:         /* Anything else */
3204:         } else {
3205:             /* Process the token as if the insertion mode was "in body". */
3206:             $this->inBody($token);
3207:         }
3208:     }
3209: 
3210:     private function inSelect($token) {
3211:         /* Handle the token as follows: */
3212: 
3213:         /* A character token */
3214:         if($token['type'] === HTML5::CHARACTR) {
3215:             /* Append the token's character to the current node. */
3216:             $this->insertText($token['data']);
3217: 
3218:         /* A comment token */
3219:         } elseif($token['type'] === HTML5::COMMENT) {
3220:             /* Append a Comment node to the current node with the data
3221:             attribute set to the data given in the comment token. */
3222:             $this->insertComment($token['data']);
3223: 
3224:         /* A start tag token whose tag name is "option" */
3225:         } elseif($token['type'] === HTML5::STARTTAG &&
3226:         $token['name'] === 'option') {
3227:             /* If the current node is an option element, act as if an end tag
3228:             with the tag name "option" had been seen. */
3229:             if(end($this->stack)->nodeName === 'option') {
3230:                 $this->inSelect(array(
3231:                     'name' => 'option',
3232:                     'type' => HTML5::ENDTAG
3233:                 ));
3234:             }
3235: 
3236:             /* Insert an HTML element for the token. */
3237:             $this->insertElement($token);
3238: 
3239:         /* A start tag token whose tag name is "optgroup" */
3240:         } elseif($token['type'] === HTML5::STARTTAG &&
3241:         $token['name'] === 'optgroup') {
3242:             /* If the current node is an option element, act as if an end tag
3243:             with the tag name "option" had been seen. */
3244:             if(end($this->stack)->nodeName === 'option') {
3245:                 $this->inSelect(array(
3246:                     'name' => 'option',
3247:                     'type' => HTML5::ENDTAG
3248:                 ));
3249:             }
3250: 
3251:             /* If the current node is an optgroup element, act as if an end tag
3252:             with the tag name "optgroup" had been seen. */
3253:             if(end($this->stack)->nodeName === 'optgroup') {
3254:                 $this->inSelect(array(
3255:                     'name' => 'optgroup',
3256:                     'type' => HTML5::ENDTAG
3257:                 ));
3258:             }
3259: 
3260:             /* Insert an HTML element for the token. */
3261:             $this->insertElement($token);
3262: 
3263:         /* An end tag token whose tag name is "optgroup" */
3264:         } elseif($token['type'] === HTML5::ENDTAG &&
3265:         $token['name'] === 'optgroup') {
3266:             /* First, if the current node is an option element, and the node
3267:             immediately before it in the stack of open elements is an optgroup
3268:             element, then act as if an end tag with the tag name "option" had
3269:             been seen. */
3270:             $elements_in_stack = count($this->stack);
3271: 
3272:             if($this->stack[$elements_in_stack - 1]->nodeName === 'option' &&
3273:             $this->stack[$elements_in_stack - 2]->nodeName === 'optgroup') {
3274:                 $this->inSelect(array(
3275:                     'name' => 'option',
3276:                     'type' => HTML5::ENDTAG
3277:                 ));
3278:             }
3279: 
3280:             /* If the current node is an optgroup element, then pop that node
3281:             from the stack of open elements. Otherwise, this is a parse error,
3282:             ignore the token. */
3283:             if($this->stack[$elements_in_stack - 1] === 'optgroup') {
3284:                 array_pop($this->stack);
3285:             }
3286: 
3287:         /* An end tag token whose tag name is "option" */
3288:         } elseif($token['type'] === HTML5::ENDTAG &&
3289:         $token['name'] === 'option') {
3290:             /* If the current node is an option element, then pop that node
3291:             from the stack of open elements. Otherwise, this is a parse error,
3292:             ignore the token. */
3293:             if(end($this->stack)->nodeName === 'option') {
3294:                 array_pop($this->stack);
3295:             }
3296: 
3297:         /* An end tag whose tag name is "select" */
3298:         } elseif($token['type'] === HTML5::ENDTAG &&
3299:         $token['name'] === 'select') {
3300:             /* If the stack of open elements does not have an element in table
3301:             scope with the same tag name as the token, this is a parse error.
3302:             Ignore the token. (innerHTML case) */
3303:             if(!$this->elementInScope($token['name'], true)) {
3304:                 // w/e
3305: 
3306:             /* Otherwise: */
3307:             } else {
3308:                 /* Pop elements from the stack of open elements until a select
3309:                 element has been popped from the stack. */
3310:                 while(true) {
3311:                     $current = end($this->stack)->nodeName;
3312:                     array_pop($this->stack);
3313: 
3314:                     if($current === 'select') {
3315:                         break;
3316:                     }
3317:                 }
3318: 
3319:                 /* Reset the insertion mode appropriately. */
3320:                 $this->resetInsertionMode();
3321:             }
3322: 
3323:         /* A start tag whose tag name is "select" */
3324:         } elseif($token['name'] === 'select' &&
3325:         $token['type'] === HTML5::STARTTAG) {
3326:             /* Parse error. Act as if the token had been an end tag with the
3327:             tag name "select" instead. */
3328:             $this->inSelect(array(
3329:                 'name' => 'select',
3330:                 'type' => HTML5::ENDTAG
3331:             ));
3332: 
3333:         /* An end tag whose tag name is one of: "caption", "table", "tbody",
3334:         "tfoot", "thead", "tr", "td", "th" */
3335:         } elseif(in_array($token['name'], array('caption', 'table', 'tbody',
3336:         'tfoot', 'thead', 'tr', 'td', 'th')) && $token['type'] === HTML5::ENDTAG) {
3337:             /* Parse error. */
3338:             // w/e
3339: 
3340:             /* If the stack of open elements has an element in table scope with
3341:             the same tag name as that of the token, then act as if an end tag
3342:             with the tag name "select" had been seen, and reprocess the token.
3343:             Otherwise, ignore the token. */
3344:             if($this->elementInScope($token['name'], true)) {
3345:                 $this->inSelect(array(
3346:                     'name' => 'select',
3347:                     'type' => HTML5::ENDTAG
3348:                 ));
3349: 
3350:                 $this->mainPhase($token);
3351:             }
3352: 
3353:         /* Anything else */
3354:         } else {
3355:             /* Parse error. Ignore the token. */
3356:         }
3357:     }
3358: 
3359:     private function afterBody($token) {
3360:         /* Handle the token as follows: */
3361: 
3362:         /* A character token that is one of one of U+0009 CHARACTER TABULATION,
3363:         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
3364:         or U+0020 SPACE */
3365:         if($token['type'] === HTML5::CHARACTR &&
3366:         preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
3367:             /* Process the token as it would be processed if the insertion mode
3368:             was "in body". */
3369:             $this->inBody($token);
3370: 
3371:         /* A comment token */
3372:         } elseif($token['type'] === HTML5::COMMENT) {
3373:             /* Append a Comment node to the first element in the stack of open
3374:             elements (the html element), with the data attribute set to the
3375:             data given in the comment token. */
3376:             $comment = $this->dom->createComment($token['data']);
3377:             $this->stack[0]->appendChild($comment);
3378: 
3379:         /* An end tag with the tag name "html" */
3380:         } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') {
3381:             /* If the parser was originally created in order to handle the
3382:             setting of an element's innerHTML attribute, this is a parse error;
3383:             ignore the token. (The element will be an html element in this
3384:             case.) (innerHTML case) */
3385: 
3386:             /* Otherwise, switch to the trailing end phase. */
3387:             $this->phase = self::END_PHASE;
3388: 
3389:         /* Anything else */
3390:         } else {
3391:             /* Parse error. Set the insertion mode to "in body" and reprocess
3392:             the token. */
3393:             $this->mode = self::IN_BODY;
3394:             return $this->inBody($token);
3395:         }
3396:     }
3397: 
3398:     private function inFrameset($token) {
3399:         /* Handle the token as follows: */
3400: 
3401:         /* A character token that is one of one of U+0009 CHARACTER TABULATION,
3402:         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
3403:         U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
3404:         if($token['type'] === HTML5::CHARACTR &&
3405:         preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
3406:             /* Append the character to the current node. */
3407:             $this->insertText($token['data']);
3408: 
3409:         /* A comment token */
3410:         } elseif($token['type'] === HTML5::COMMENT) {
3411:             /* Append a Comment node to the current node with the data
3412:             attribute set to the data given in the comment token. */
3413:             $this->insertComment($token['data']);
3414: 
3415:         /* A start tag with the tag name "frameset" */
3416:         } elseif($token['name'] === 'frameset' &&
3417:         $token['type'] === HTML5::STARTTAG) {
3418:             $this->insertElement($token);
3419: 
3420:         /* An end tag with the tag name "frameset" */
3421:         } elseif($token['name'] === 'frameset' &&
3422:         $token['type'] === HTML5::ENDTAG) {
3423:             /* If the current node is the root html element, then this is a
3424:             parse error; ignore the token. (innerHTML case) */
3425:             if(end($this->stack)->nodeName === 'html') {
3426:                 // Ignore
3427: 
3428:             } else {
3429:                 /* Otherwise, pop the current node from the stack of open
3430:                 elements. */
3431:                 array_pop($this->stack);
3432: 
3433:                 /* If the parser was not originally created in order to handle
3434:                 the setting of an element's innerHTML attribute (innerHTML case),
3435:                 and the current node is no longer a frameset element, then change
3436:                 the insertion mode to "after frameset". */
3437:                 $this->mode = self::AFTR_FRAME;
3438:             }
3439: 
3440:         /* A start tag with the tag name "frame" */
3441:         } elseif($token['name'] === 'frame' &&
3442:         $token['type'] === HTML5::STARTTAG) {
3443:             /* Insert an HTML element for the token. */
3444:             $this->insertElement($token);
3445: 
3446:             /* Immediately pop the current node off the stack of open elements. */
3447:             array_pop($this->stack);
3448: 
3449:         /* A start tag with the tag name "noframes" */
3450:         } elseif($token['name'] === 'noframes' &&
3451:         $token['type'] === HTML5::STARTTAG) {
3452:             /* Process the token as if the insertion mode had been "in body". */
3453:             $this->inBody($token);
3454: 
3455:         /* Anything else */
3456:         } else {
3457:             /* Parse error. Ignore the token. */
3458:         }
3459:     }
3460: 
3461:     private function afterFrameset($token) {
3462:         /* Handle the token as follows: */
3463: 
3464:         /* A character token that is one of one of U+0009 CHARACTER TABULATION,
3465:         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
3466:         U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
3467:         if($token['type'] === HTML5::CHARACTR &&
3468:         preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
3469:             /* Append the character to the current node. */
3470:             $this->insertText($token['data']);
3471: 
3472:         /* A comment token */
3473:         } elseif($token['type'] === HTML5::COMMENT) {
3474:             /* Append a Comment node to the current node with the data
3475:             attribute set to the data given in the comment token. */
3476:             $this->insertComment($token['data']);
3477: 
3478:         /* An end tag with the tag name "html" */
3479:         } elseif($token['name'] === 'html' &&
3480:         $token['type'] === HTML5::ENDTAG) {
3481:             /* Switch to the trailing end phase. */
3482:             $this->phase = self::END_PHASE;
3483: 
3484:         /* A start tag with the tag name "noframes" */
3485:         } elseif($token['name'] === 'noframes' &&
3486:         $token['type'] === HTML5::STARTTAG) {
3487:             /* Process the token as if the insertion mode had been "in body". */
3488:             $this->inBody($token);
3489: 
3490:         /* Anything else */
3491:         } else {
3492:             /* Parse error. Ignore the token. */
3493:         }
3494:     }
3495: 
3496:     private function trailingEndPhase($token) {
3497:         /* After the main phase, as each token is emitted from the tokenisation
3498:         stage, it must be processed as described in this section. */
3499: 
3500:         /* A DOCTYPE token */
3501:         if($token['type'] === HTML5::DOCTYPE) {
3502:             // Parse error. Ignore the token.
3503: 
3504:         /* A comment token */
3505:         } elseif($token['type'] === HTML5::COMMENT) {
3506:             /* Append a Comment node to the Document object with the data
3507:             attribute set to the data given in the comment token. */
3508:             $comment = $this->dom->createComment($token['data']);
3509:             $this->dom->appendChild($comment);
3510: 
3511:         /* A character token that is one of one of U+0009 CHARACTER TABULATION,
3512:         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
3513:         or U+0020 SPACE */
3514:         } elseif($token['type'] === HTML5::CHARACTR &&
3515:         preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
3516:             /* Process the token as it would be processed in the main phase. */
3517:             $this->mainPhase($token);
3518: 
3519:         /* A character token that is not one of U+0009 CHARACTER TABULATION,
3520:         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
3521:         or U+0020 SPACE. Or a start tag token. Or an end tag token. */
3522:         } elseif(($token['type'] === HTML5::CHARACTR &&
3523:         preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) ||
3524:         $token['type'] === HTML5::STARTTAG || $token['type'] === HTML5::ENDTAG) {
3525:             /* Parse error. Switch back to the main phase and reprocess the
3526:             token. */
3527:             $this->phase = self::MAIN_PHASE;
3528:             return $this->mainPhase($token);
3529: 
3530:         /* An end-of-file token */
3531:         } elseif($token['type'] === HTML5::EOF) {
3532:             /* OMG DONE!! */
3533:         }
3534:     }
3535: 
3536:     private function insertElement($token, $append = true, $check = false) {
3537:         // Proprietary workaround for libxml2's limitations with tag names
3538:         if ($check) {
3539:             // Slightly modified HTML5 tag-name modification,
3540:             // removing anything that's not an ASCII letter, digit, or hyphen
3541:             $token['name'] = preg_replace('/[^a-z0-9-]/i', '', $token['name']);
3542:             // Remove leading hyphens and numbers
3543:             $token['name'] = ltrim($token['name'], '-0..9');
3544:             // In theory, this should ever be needed, but just in case
3545:             if ($token['name'] === '') $token['name'] = 'span'; // arbitrary generic choice
3546:         }
3547:         
3548:         $el = $this->dom->createElement($token['name']);
3549: 
3550:         foreach($token['attr'] as $attr) {
3551:             if(!$el->hasAttribute($attr['name'])) {
3552:                 $el->setAttribute($attr['name'], $attr['value']);
3553:             }
3554:         }
3555: 
3556:         $this->appendToRealParent($el);
3557:         $this->stack[] = $el;
3558: 
3559:         return $el;
3560:     }
3561: 
3562:     private function insertText($data) {
3563:         $text = $this->dom->createTextNode($data);
3564:         $this->appendToRealParent($text);
3565:     }
3566: 
3567:     private function insertComment($data) {
3568:         $comment = $this->dom->createComment($data);
3569:         $this->appendToRealParent($comment);
3570:     }
3571: 
3572:     private function appendToRealParent($node) {
3573:         if($this->foster_parent === null) {
3574:             end($this->stack)->appendChild($node);
3575: 
3576:         } elseif($this->foster_parent !== null) {
3577:             /* If the foster parent element is the parent element of the
3578:             last table element in the stack of open elements, then the new
3579:             node must be inserted immediately before the last table element
3580:             in the stack of open elements in the foster parent element;
3581:             otherwise, the new node must be appended to the foster parent
3582:             element. */
3583:             for($n = count($this->stack) - 1; $n >= 0; $n--) {
3584:                 if($this->stack[$n]->nodeName === 'table' &&
3585:                 $this->stack[$n]->parentNode !== null) {
3586:                     $table = $this->stack[$n];
3587:                     break;
3588:                 }
3589:             }
3590: 
3591:             if(isset($table) && $this->foster_parent->isSameNode($table->parentNode))
3592:                 $this->foster_parent->insertBefore($node, $table);
3593:             else
3594:                 $this->foster_parent->appendChild($node);
3595: 
3596:             $this->foster_parent = null;
3597:         }
3598:     }
3599: 
3600:     private function elementInScope($el, $table = false) {
3601:         if(is_array($el)) {
3602:             foreach($el as $element) {
3603:                 if($this->elementInScope($element, $table)) {
3604:                     return true;
3605:                 }
3606:             }
3607: 
3608:             return false;
3609:         }
3610: 
3611:         $leng = count($this->stack);
3612: 
3613:         for($n = 0; $n < $leng; $n++) {
3614:             /* 1. Initialise node to be the current node (the bottommost node of
3615:             the stack). */
3616:             $node = $this->stack[$leng - 1 - $n];
3617: 
3618:             if($node->tagName === $el) {
3619:                 /* 2. If node is the target node, terminate in a match state. */
3620:                 return true;
3621: 
3622:             } elseif($node->tagName === 'table') {
3623:                 /* 3. Otherwise, if node is a table element, terminate in a failure
3624:                 state. */
3625:                 return false;
3626: 
3627:             } elseif($table === true && in_array($node->tagName, array('caption', 'td',
3628:             'th', 'button', 'marquee', 'object'))) {
3629:                 /* 4. Otherwise, if the algorithm is the "has an element in scope"
3630:                 variant (rather than the "has an element in table scope" variant),
3631:                 and node is one of the following, terminate in a failure state. */
3632:                 return false;
3633: 
3634:             } elseif($node === $node->ownerDocument->documentElement) {
3635:                 /* 5. Otherwise, if node is an html element (root element), terminate
3636:                 in a failure state. (This can only happen if the node is the topmost
3637:                 node of the    stack of open elements, and prevents the next step from
3638:                 being invoked if there are no more elements in the stack.) */
3639:                 return false;
3640:             }
3641: 
3642:             /* Otherwise, set node to the previous entry in the stack of open
3643:             elements and return to step 2. (This will never fail, since the loop
3644:             will always terminate in the previous step if the top of the stack
3645:             is reached.) */
3646:         }
3647:     }
3648: 
3649:     private function reconstructActiveFormattingElements() {
3650:         /* 1. If there are no entries in the list of active formatting elements,
3651:         then there is nothing to reconstruct; stop this algorithm. */
3652:         $formatting_elements = count($this->a_formatting);
3653: 
3654:         if($formatting_elements === 0) {
3655:             return false;
3656:         }
3657: 
3658:         /* 3. Let entry be the last (most recently added) element in the list
3659:         of active formatting elements. */
3660:         $entry = end($this->a_formatting);
3661: 
3662:         /* 2. If the last (most recently added) entry in the list of active
3663:         formatting elements is a marker, or if it is an element that is in the
3664:         stack of open elements, then there is nothing to reconstruct; stop this
3665:         algorithm. */
3666:         if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
3667:             return false;
3668:         }
3669: 
3670:         for($a = $formatting_elements - 1; $a >= 0; true) {
3671:             /* 4. If there are no entries before entry in the list of active
3672:             formatting elements, then jump to step 8. */
3673:             if($a === 0) {
3674:                 $step_seven = false;
3675:                 break;
3676:             }
3677: 
3678:             /* 5. Let entry be the entry one earlier than entry in the list of
3679:             active formatting elements. */
3680:             $a--;
3681:             $entry = $this->a_formatting[$a];
3682: 
3683:             /* 6. If entry is neither a marker nor an element that is also in
3684:             thetack of open elements, go to step 4. */
3685:             if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
3686:                 break;
3687:             }
3688:         }
3689: 
3690:         while(true) {
3691:             /* 7. Let entry be the element one later than entry in the list of
3692:             active formatting elements. */
3693:             if(isset($step_seven) && $step_seven === true) {
3694:                 $a++;
3695:                 $entry = $this->a_formatting[$a];
3696:             }
3697: 
3698:             /* 8. Perform a shallow clone of the element entry to obtain clone. */
3699:             $clone = $entry->cloneNode();
3700: 
3701:             /* 9. Append clone to the current node and push it onto the stack
3702:             of open elements  so that it is the new current node. */
3703:             end($this->stack)->appendChild($clone);
3704:             $this->stack[] = $clone;
3705: 
3706:             /* 10. Replace the entry for entry in the list with an entry for
3707:             clone. */
3708:             $this->a_formatting[$a] = $clone;
3709: 
3710:             /* 11. If the entry for clone in the list of active formatting
3711:             elements is not the last entry in the list, return to step 7. */
3712:             if(end($this->a_formatting) !== $clone) {
3713:                 $step_seven = true;
3714:             } else {
3715:                 break;
3716:             }
3717:         }
3718:     }
3719: 
3720:     private function clearTheActiveFormattingElementsUpToTheLastMarker() {
3721:         /* When the steps below require the UA to clear the list of active
3722:         formatting elements up to the last marker, the UA must perform the
3723:         following steps: */
3724: 
3725:         while(true) {
3726:             /* 1. Let entry be the last (most recently added) entry in the list
3727:             of active formatting elements. */
3728:             $entry = end($this->a_formatting);
3729: 
3730:             /* 2. Remove entry from the list of active formatting elements. */
3731:             array_pop($this->a_formatting);
3732: 
3733:             /* 3. If entry was a marker, then stop the algorithm at this point.
3734:             The list has been cleared up to the last marker. */
3735:             if($entry === self::MARKER) {
3736:                 break;
3737:             }
3738:         }
3739:     }
3740: 
3741:     private function generateImpliedEndTags($exclude = array()) {
3742:         /* When the steps below require the UA to generate implied end tags,
3743:         then, if the current node is a dd element, a dt element, an li element,
3744:         a p element, a td element, a th  element, or a tr element, the UA must
3745:         act as if an end tag with the respective tag name had been seen and
3746:         then generate implied end tags again. */
3747:         $node = end($this->stack);
3748:         $elements = array_diff(array('dd', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude);
3749: 
3750:         while(in_array(end($this->stack)->nodeName, $elements)) {
3751:             array_pop($this->stack);
3752:         }
3753:     }
3754: 
3755:     private function getElementCategory($node) {
3756:         $name = $node->tagName;
3757:         if(in_array($name, $this->special))
3758:             return self::SPECIAL;
3759: 
3760:         elseif(in_array($name, $this->scoping))
3761:             return self::SCOPING;
3762: 
3763:         elseif(in_array($name, $this->formatting))
3764:             return self::FORMATTING;
3765: 
3766:         else
3767:             return self::PHRASING;
3768:     }
3769: 
3770:     private function clearStackToTableContext($elements) {
3771:         /* When the steps above require the UA to clear the stack back to a
3772:         table context, it means that the UA must, while the current node is not
3773:         a table element or an html element, pop elements from the stack of open
3774:         elements. If this causes any elements to be popped from the stack, then
3775:         this is a parse error. */
3776:         while(true) {
3777:             $node = end($this->stack)->nodeName;
3778: 
3779:             if(in_array($node, $elements)) {
3780:                 break;
3781:             } else {
3782:                 array_pop($this->stack);
3783:             }
3784:         }
3785:     }
3786: 
3787:     private function resetInsertionMode() {
3788:         /* 1. Let last be false. */
3789:         $last = false;
3790:         $leng = count($this->stack);
3791: 
3792:         for($n = $leng - 1; $n >= 0; $n--) {
3793:             /* 2. Let node be the last node in the stack of open elements. */
3794:             $node = $this->stack[$n];
3795: 
3796:             /* 3. If node is the first node in the stack of open elements, then
3797:             set last to true. If the element whose innerHTML  attribute is being
3798:             set is neither a td  element nor a th element, then set node to the
3799:             element whose innerHTML  attribute is being set. (innerHTML  case) */
3800:             if($this->stack[0]->isSameNode($node)) {
3801:                 $last = true;
3802:             }
3803: 
3804:             /* 4. If node is a select element, then switch the insertion mode to
3805:             "in select" and abort these steps. (innerHTML case) */
3806:             if($node->nodeName === 'select') {
3807:                 $this->mode = self::IN_SELECT;
3808:                 break;
3809: 
3810:             /* 5. If node is a td or th element, then switch the insertion mode
3811:             to "in cell" and abort these steps. */
3812:             } elseif($node->nodeName === 'td' || $node->nodeName === 'th') {
3813:                 $this->mode = self::IN_CELL;
3814:                 break;
3815: 
3816:             /* 6. If node is a tr element, then switch the insertion mode to
3817:             "in    row" and abort these steps. */
3818:             } elseif($node->nodeName === 'tr') {
3819:                 $this->mode = self::IN_ROW;
3820:                 break;
3821: 
3822:             /* 7. If node is a tbody, thead, or tfoot element, then switch the
3823:             insertion mode to "in table body" and abort these steps. */
3824:             } elseif(in_array($node->nodeName, array('tbody', 'thead', 'tfoot'))) {
3825:                 $this->mode = self::IN_TBODY;
3826:                 break;
3827: 
3828:             /* 8. If node is a caption element, then switch the insertion mode
3829:             to "in caption" and abort these steps. */
3830:             } elseif($node->nodeName === 'caption') {
3831:                 $this->mode = self::IN_CAPTION;
3832:                 break;
3833: 
3834:             /* 9. If node is a colgroup element, then switch the insertion mode
3835:             to "in column group" and abort these steps. (innerHTML case) */
3836:             } elseif($node->nodeName === 'colgroup') {
3837:                 $this->mode = self::IN_CGROUP;
3838:                 break;
3839: 
3840:             /* 10. If node is a table element, then switch the insertion mode
3841:             to "in table" and abort these steps. */
3842:             } elseif($node->nodeName === 'table') {
3843:                 $this->mode = self::IN_TABLE;
3844:                 break;
3845: 
3846:             /* 11. If node is a head element, then switch the insertion mode
3847:             to "in body" ("in body"! not "in head"!) and abort these steps.
3848:             (innerHTML case) */
3849:             } elseif($node->nodeName === 'head') {
3850:                 $this->mode = self::IN_BODY;
3851:                 break;
3852: 
3853:             /* 12. If node is a body element, then switch the insertion mode to
3854:             "in body" and abort these steps. */
3855:             } elseif($node->nodeName === 'body') {
3856:                 $this->mode = self::IN_BODY;
3857:                 break;
3858: 
3859:             /* 13. If node is a frameset element, then switch the insertion
3860:             mode to "in frameset" and abort these steps. (innerHTML case) */
3861:             } elseif($node->nodeName === 'frameset') {
3862:                 $this->mode = self::IN_FRAME;
3863:                 break;
3864: 
3865:             /* 14. If node is an html element, then: if the head element
3866:             pointer is null, switch the insertion mode to "before head",
3867:             otherwise, switch the insertion mode to "after head". In either
3868:             case, abort these steps. (innerHTML case) */
3869:             } elseif($node->nodeName === 'html') {
3870:                 $this->mode = ($this->head_pointer === null)
3871:                     ? self::BEFOR_HEAD
3872:                     : self::AFTER_HEAD;
3873: 
3874:                 break;
3875: 
3876:             /* 15. If last is true, then set the insertion mode to "in body"
3877:             and    abort these steps. (innerHTML case) */
3878:             } elseif($last) {
3879:                 $this->mode = self::IN_BODY;
3880:                 break;
3881:             }
3882:         }
3883:     }
3884: 
3885:     private function closeCell() {
3886:         /* If the stack of open elements has a td or th element in table scope,
3887:         then act as if an end tag token with that tag name had been seen. */
3888:         foreach(array('td', 'th') as $cell) {
3889:             if($this->elementInScope($cell, true)) {
3890:                 $this->inCell(array(
3891:                     'name' => $cell,
3892:                     'type' => HTML5::ENDTAG
3893:                 ));
3894: 
3895:                 break;
3896:             }
3897:         }
3898:     }
3899: 
3900:     public function save() {
3901:         return $this->dom;
3902:     }
3903: }
3904: ?>
3905: 
API documentation generated by ApiGen 2.8.0