Overview

Packages

  • application
    • commands
    • components
      • actions
      • filters
      • leftWidget
      • permissions
      • sortableWidget
      • util
      • webupdater
      • x2flow
        • actions
        • triggers
      • X2GridView
      • X2Settings
    • controllers
    • models
      • embedded
    • modules
      • accounts
        • controllers
        • models
      • actions
        • controllers
        • models
      • calendar
        • controllers
        • models
      • charts
        • models
      • contacts
        • controllers
        • models
      • docs
        • components
        • controllers
        • models
      • groups
        • controllers
        • models
      • marketing
        • components
        • controllers
        • models
      • media
        • controllers
        • models
      • mobile
        • components
      • opportunities
        • controllers
        • models
      • products
        • controllers
        • models
      • quotes
        • controllers
        • models
      • services
        • controllers
        • models
      • template
        • models
      • users
        • controllers
        • models
      • workflow
        • controllers
        • models
      • x2Leads
        • controllers
        • models
  • Net
  • None
  • PHP
  • system
    • base
    • caching
      • dependencies
    • collections
    • console
    • db
      • ar
      • schema
        • cubrid
        • mssql
        • mysql
        • oci
        • pgsql
        • sqlite
    • i18n
      • gettext
    • logging
    • test
    • utils
    • validators
    • web
      • actions
      • auth
      • filters
      • form
      • helpers
      • renderers
      • services
      • widgets
        • captcha
        • pagers
  • Text
    • Highlighter
  • zii
    • behaviors
    • widgets
      • grid
      • jui

Classes

  • ActionFormModel
  • ArrayUtil
  • ArrayValidator
  • AssociatedMediaBehavior
  • AuxLib
  • Changelog
  • DetailView
  • EncryptUtilTmp
  • EventsWidgetFieldFormatter
  • FailedLogins
  • FieldFormatter
  • FieldFormatterBase
  • FieldInputRenderer
  • FileFieldBehavior
  • FiltersForm
  • FilterUtil
  • FineDiff
  • FineDiffCopyOp
  • FineDiffDeleteOp
  • FineDiffInsertOp
  • FineDiffOp
  • FineDiffOps
  • FineDiffReplaceOp
  • GlobalCSSFormModel
  • GlobalImportFormModel
  • GoogleAuthenticator
  • HTML5
  • HTML5TreeConstructer
  • HTMLPurifier
  • HTMLPurifier_Arborize
  • HTMLPurifier_AttrCollections
  • HTMLPurifier_AttrDef
  • HTMLPurifier_AttrDef_Clone
  • HTMLPurifier_AttrDef_CSS
  • HTMLPurifier_AttrDef_CSS_AlphaValue
  • HTMLPurifier_AttrDef_CSS_Background
  • HTMLPurifier_AttrDef_CSS_BackgroundPosition
  • HTMLPurifier_AttrDef_CSS_Border
  • HTMLPurifier_AttrDef_CSS_Color
  • HTMLPurifier_AttrDef_CSS_Composite
  • HTMLPurifier_AttrDef_CSS_DenyElementDecorator
  • HTMLPurifier_AttrDef_CSS_Filter
  • HTMLPurifier_AttrDef_CSS_Font
  • HTMLPurifier_AttrDef_CSS_FontFamily
  • HTMLPurifier_AttrDef_CSS_Ident
  • HTMLPurifier_AttrDef_CSS_ImportantDecorator
  • HTMLPurifier_AttrDef_CSS_Length
  • HTMLPurifier_AttrDef_CSS_ListStyle
  • HTMLPurifier_AttrDef_CSS_Multiple
  • HTMLPurifier_AttrDef_CSS_Number
  • HTMLPurifier_AttrDef_CSS_Percentage
  • HTMLPurifier_AttrDef_CSS_TextDecoration
  • HTMLPurifier_AttrDef_CSS_URI
  • HTMLPurifier_AttrDef_Enum
  • HTMLPurifier_AttrDef_HTML_Bool
  • HTMLPurifier_AttrDef_HTML_Class
  • HTMLPurifier_AttrDef_HTML_Color
  • HTMLPurifier_AttrDef_HTML_FrameTarget
  • HTMLPurifier_AttrDef_HTML_ID
  • HTMLPurifier_AttrDef_HTML_Length
  • HTMLPurifier_AttrDef_HTML_LinkTypes
  • HTMLPurifier_AttrDef_HTML_MultiLength
  • HTMLPurifier_AttrDef_HTML_Nmtokens
  • HTMLPurifier_AttrDef_HTML_Pixels
  • HTMLPurifier_AttrDef_Integer
  • HTMLPurifier_AttrDef_Lang
  • HTMLPurifier_AttrDef_Switch
  • HTMLPurifier_AttrDef_Text
  • HTMLPurifier_AttrDef_URI
  • HTMLPurifier_AttrDef_URI_Email
  • HTMLPurifier_AttrDef_URI_Email_SimpleCheck
  • HTMLPurifier_AttrDef_URI_Host
  • HTMLPurifier_AttrDef_URI_IPv4
  • HTMLPurifier_AttrDef_URI_IPv6
  • HTMLPurifier_AttrTransform
  • HTMLPurifier_AttrTransform_Background
  • HTMLPurifier_AttrTransform_BdoDir
  • HTMLPurifier_AttrTransform_BgColor
  • HTMLPurifier_AttrTransform_BoolToCSS
  • HTMLPurifier_AttrTransform_Border
  • HTMLPurifier_AttrTransform_EnumToCSS
  • HTMLPurifier_AttrTransform_ImgRequired
  • HTMLPurifier_AttrTransform_ImgSpace
  • HTMLPurifier_AttrTransform_Input
  • HTMLPurifier_AttrTransform_Lang
  • HTMLPurifier_AttrTransform_Length
  • HTMLPurifier_AttrTransform_Name
  • HTMLPurifier_AttrTransform_NameSync
  • HTMLPurifier_AttrTransform_Nofollow
  • HTMLPurifier_AttrTransform_SafeEmbed
  • HTMLPurifier_AttrTransform_SafeObject
  • HTMLPurifier_AttrTransform_SafeParam
  • HTMLPurifier_AttrTransform_ScriptRequired
  • HTMLPurifier_AttrTransform_TargetBlank
  • HTMLPurifier_AttrTransform_Textarea
  • HTMLPurifier_AttrTypes
  • HTMLPurifier_AttrValidator
  • HTMLPurifier_Bootstrap
  • HTMLPurifier_ChildDef
  • HTMLPurifier_ChildDef_Chameleon
  • HTMLPurifier_ChildDef_Custom
  • HTMLPurifier_ChildDef_Empty
  • HTMLPurifier_ChildDef_List
  • HTMLPurifier_ChildDef_Optional
  • HTMLPurifier_ChildDef_Required
  • HTMLPurifier_ChildDef_StrictBlockquote
  • HTMLPurifier_ChildDef_Table
  • HTMLPurifier_Config
  • HTMLPurifier_ConfigSchema
  • HTMLPurifier_ConfigSchema_Builder_ConfigSchema
  • HTMLPurifier_ConfigSchema_Builder_Xml
  • HTMLPurifier_ConfigSchema_Interchange
  • HTMLPurifier_ConfigSchema_Interchange_Directive
  • HTMLPurifier_ConfigSchema_Interchange_Id
  • HTMLPurifier_ConfigSchema_InterchangeBuilder
  • HTMLPurifier_ConfigSchema_Validator
  • HTMLPurifier_ConfigSchema_ValidatorAtom
  • HTMLPurifier_ContentSets
  • HTMLPurifier_Context
  • HTMLPurifier_CSSDefinition
  • HTMLPurifier_Definition
  • HTMLPurifier_DefinitionCache
  • HTMLPurifier_DefinitionCache_Decorator
  • HTMLPurifier_DefinitionCache_Decorator_Cleanup
  • HTMLPurifier_DefinitionCache_Decorator_Memory
  • HTMLPurifier_DefinitionCache_Null
  • HTMLPurifier_DefinitionCache_Serializer
  • HTMLPurifier_DefinitionCacheFactory
  • HTMLPurifier_Doctype
  • HTMLPurifier_DoctypeRegistry
  • HTMLPurifier_ElementDef
  • HTMLPurifier_Encoder
  • HTMLPurifier_EntityLookup
  • HTMLPurifier_EntityParser
  • HTMLPurifier_ErrorCollector
  • HTMLPurifier_ErrorStruct
  • HTMLPurifier_Filter
  • HTMLPurifier_Filter_ExtractStyleBlocks
  • HTMLPurifier_Filter_YouTube
  • HTMLPurifier_Generator
  • HTMLPurifier_HTMLDefinition
  • HTMLPurifier_HTMLModule
  • HTMLPurifier_HTMLModule_Bdo
  • HTMLPurifier_HTMLModule_CommonAttributes
  • HTMLPurifier_HTMLModule_Edit
  • HTMLPurifier_HTMLModule_Forms
  • HTMLPurifier_HTMLModule_Hypertext
  • HTMLPurifier_HTMLModule_Iframe
  • HTMLPurifier_HTMLModule_Image
  • HTMLPurifier_HTMLModule_Legacy
  • HTMLPurifier_HTMLModule_List
  • HTMLPurifier_HTMLModule_Name
  • HTMLPurifier_HTMLModule_Nofollow
  • HTMLPurifier_HTMLModule_NonXMLCommonAttributes
  • HTMLPurifier_HTMLModule_Object
  • HTMLPurifier_HTMLModule_Presentation
  • HTMLPurifier_HTMLModule_Proprietary
  • HTMLPurifier_HTMLModule_Ruby
  • HTMLPurifier_HTMLModule_SafeEmbed
  • HTMLPurifier_HTMLModule_SafeObject
  • HTMLPurifier_HTMLModule_SafeScripting
  • HTMLPurifier_HTMLModule_Scripting
  • HTMLPurifier_HTMLModule_StyleAttribute
  • HTMLPurifier_HTMLModule_Tables
  • HTMLPurifier_HTMLModule_Target
  • HTMLPurifier_HTMLModule_TargetBlank
  • HTMLPurifier_HTMLModule_Text
  • HTMLPurifier_HTMLModule_Tidy
  • HTMLPurifier_HTMLModule_Tidy_Name
  • HTMLPurifier_HTMLModule_Tidy_Proprietary
  • HTMLPurifier_HTMLModule_Tidy_Strict
  • HTMLPurifier_HTMLModule_Tidy_Transitional
  • HTMLPurifier_HTMLModule_Tidy_XHTML
  • HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4
  • HTMLPurifier_HTMLModule_XMLCommonAttributes
  • HTMLPurifier_HTMLModuleManager
  • HTMLPurifier_IDAccumulator
  • HTMLPurifier_Injector
  • HTMLPurifier_Injector_AutoParagraph
  • HTMLPurifier_Injector_DisplayLinkURI
  • HTMLPurifier_Injector_Linkify
  • HTMLPurifier_Injector_PurifierLinkify
  • HTMLPurifier_Injector_RemoveEmpty
  • HTMLPurifier_Injector_RemoveSpansWithoutAttributes
  • HTMLPurifier_Injector_SafeObject
  • HTMLPurifier_Language
  • HTMLPurifier_Language_en_x_test
  • HTMLPurifier_LanguageFactory
  • HTMLPurifier_Length
  • HTMLPurifier_Lexer
  • HTMLPurifier_Lexer_DirectLex
  • HTMLPurifier_Lexer_DOMLex
  • HTMLPurifier_Lexer_PH5P
  • HTMLPurifier_Node
  • HTMLPurifier_Node_Comment
  • HTMLPurifier_Node_Element
  • HTMLPurifier_Node_Text
  • HTMLPurifier_PercentEncoder
  • HTMLPurifier_Printer
  • HTMLPurifier_Printer_ConfigForm
  • HTMLPurifier_Printer_ConfigForm_bool
  • HTMLPurifier_Printer_ConfigForm_default
  • HTMLPurifier_Printer_ConfigForm_NullDecorator
  • HTMLPurifier_Printer_CSSDefinition
  • HTMLPurifier_Printer_HTMLDefinition
  • HTMLPurifier_PropertyList
  • HTMLPurifier_PropertyListIterator
  • HTMLPurifier_Queue
  • HTMLPurifier_Strategy
  • HTMLPurifier_Strategy_Composite
  • HTMLPurifier_Strategy_Core
  • HTMLPurifier_Strategy_FixNesting
  • HTMLPurifier_Strategy_MakeWellFormed
  • HTMLPurifier_Strategy_RemoveForeignElements
  • HTMLPurifier_Strategy_ValidateAttributes
  • HTMLPurifier_StringHash
  • HTMLPurifier_StringHashParser
  • HTMLPurifier_TagTransform
  • HTMLPurifier_TagTransform_Font
  • HTMLPurifier_TagTransform_Simple
  • HTMLPurifier_Token
  • HTMLPurifier_Token_Comment
  • HTMLPurifier_Token_Empty
  • HTMLPurifier_Token_End
  • HTMLPurifier_Token_Start
  • HTMLPurifier_Token_Tag
  • HTMLPurifier_Token_Text
  • HTMLPurifier_TokenFactory
  • HTMLPurifier_UnitConverter
  • HTMLPurifier_URI
  • HTMLPurifier_URIDefinition
  • HTMLPurifier_URIFilter
  • HTMLPurifier_URIFilter_DisableExternal
  • HTMLPurifier_URIFilter_DisableExternalResources
  • HTMLPurifier_URIFilter_DisableResources
  • HTMLPurifier_URIFilter_HostBlacklist
  • HTMLPurifier_URIFilter_MakeAbsolute
  • HTMLPurifier_URIFilter_Munge
  • HTMLPurifier_URIFilter_SafeIframe
  • HTMLPurifier_URIParser
  • HTMLPurifier_URIScheme
  • HTMLPurifier_URIScheme_data
  • HTMLPurifier_URIScheme_file
  • HTMLPurifier_URIScheme_ftp
  • HTMLPurifier_URIScheme_http
  • HTMLPurifier_URIScheme_https
  • HTMLPurifier_URIScheme_mailto
  • HTMLPurifier_URIScheme_news
  • HTMLPurifier_URIScheme_nntp
  • HTMLPurifier_URISchemeRegistry
  • HTMLPurifier_VarParser
  • HTMLPurifier_VarParser_Flexible
  • HTMLPurifier_VarParser_Native
  • HTMLPurifier_Zipper
  • JSONFieldsBehavior
  • JSONResponse
  • Markdown_Parser
  • MarkdownExtra_Parser
  • MediaFieldFormatter
  • MediaSelector
  • MobileActiveRecordFieldFormatter
  • MobileActivityFeed
  • MobileChartDashboard
  • MobileFieldFormatter
  • MobileFieldInputRenderer
  • ModuleModelNameValidator
  • MultiChildNode
  • MultiTypeAutocomplete
  • PasswordUtil
  • ProductFeature
  • ProfileWidgetLayout
  • QueryParamGenerator
  • RecordLimitBehavior
  • RecordView
  • RecordViewWidgetLayout
  • RelationshipsGridModel
  • RelationshipsJoin
  • RepairUserDataCommand
  • RequestUtil
  • RequiredIfNotSetValidator
  • ResponseUtil
  • RunMigrationScriptCommand
  • ServiceWebFormDesigner
  • Settings
  • StringUtil
  • TestEmailAction
  • TestEmailActionForm
  • ThemeGenerator
  • TimerUtil
  • TopicsFieldFormatter
  • TopicsWidgetLayout
  • TransactionalViewFieldFormatter
  • UrlUtil
  • ValidLinkValidator
  • WebFormDesigner
  • WebLeadFormDesigner
  • X2ActiveRecordBehavior
  • X2ActiveRecordFieldFormatter
  • X2ButtonColumn
  • X2ConditionList
  • X2ConsoleCommand
  • X2ControllerBehavior
  • X2DataColumn
  • X2DuplicateBehavior
  • X2Flashes
  • X2GridViewFieldFormatter
  • X2IPAddress
  • X2LeadsDataColumn
  • X2MergeableBehavior
  • X2MessageSource
  • X2MobileControllerBehavior
  • X2MobileProfileControllerBehavior
  • X2MobileQuotesControllerBehavior
  • X2MobileSiteControllerBehavior
  • X2MobileTopicsControllerBehavior
  • X2ModelConversionBehavior
  • X2ModelConversionWidget
  • X2ModelForeignKeyValidator
  • X2ModelUniqueIndexValidator
  • X2NonWebUser
  • X2StaticDropdown
  • X2StaticField
  • X2StaticFieldsBehavior
  • X2UrlManager
  • X2Validator
  • X2WidgetBehavior

Interfaces

  • AdminOwnedCredentials

Exceptions

  • CampaignMailingException
  • CodeExchangeException
  • GetCredentialsException
  • HTMLPurifier_ConfigSchema_Exception
  • HTMLPurifier_Exception
  • HTMLPurifier_VarParserException
  • Net_IDNA2_Exception
  • Net_IDNA2_Exception_Nameprep
  • NoRefreshTokenException
  • NoUserIdException
  • StringUtilException

Functions

  • checkCurrency
  • checkDNS
  • checkServerVar
  • checkTimezone
  • decodeQuotes
  • echoIcons
  • encodeQuotes
  • exceptionForError
  • getField
  • getLanguageName
  • getModuleTitle
  • handleReqError
  • handleReqException
  • htmlpurifier_filter_extractstyleblocks_muteerrorhandler
  • installer_t
  • installer_tr
  • isAllowedDir
  • mediaMigrationRrmdir
  • migrateMediaDir
  • printGraph
  • printR
  • renderFields
  • reqShutdown
  • RIP
  • translateOptions
  • tryGetRemote
  • Overview
  • Package
  • Class
  • Tree
    1: <?php
    2: 
    3: /**
    4:  * @file
    5:  * This file was auto-generated by generate-includes.php and includes all of
    6:  * the core files required by HTML Purifier. Use this if performance is a
    7:  * primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
    8:  * FILE, changes will be overwritten the next time the script is run.
    9:  *
   10:  * @version 4.6.0
   11:  *
   12:  * @warning
   13:  *      You must *not* include any other HTML Purifier files before this file,
   14:  *      because 'require' not 'require_once' is used.
   15:  *
   16:  * @warning
   17:  *      This file requires that the include path contains the HTML Purifier
   18:  *      library directory; this is not auto-set.
   19:  */
   20: 
   21: 
   22: 
   23: /*! @mainpage
   24:  *
   25:  * HTML Purifier is an HTML filter that will take an arbitrary snippet of
   26:  * HTML and rigorously test, validate and filter it into a version that
   27:  * is safe for output onto webpages. It achieves this by:
   28:  *
   29:  *  -# Lexing (parsing into tokens) the document,
   30:  *  -# Executing various strategies on the tokens:
   31:  *      -# Removing all elements not in the whitelist,
   32:  *      -# Making the tokens well-formed,
   33:  *      -# Fixing the nesting of the nodes, and
   34:  *      -# Validating attributes of the nodes; and
   35:  *  -# Generating HTML from the purified tokens.
   36:  *
   37:  * However, most users will only need to interface with the HTMLPurifier
   38:  * and HTMLPurifier_Config.
   39:  */
   40: 
   41: /*
   42:     HTML Purifier 4.6.0 - Standards Compliant HTML Filtering
   43:     Copyright (C) 2006-2008 Edward Z. Yang
   44: 
   45:     This library is free software; you can redistribute it and/or
   46:     modify it under the terms of the GNU Lesser General Public
   47:     License as published by the Free Software Foundation; either
   48:     version 2.1 of the License, or (at your option) any later version.
   49: 
   50:     This library is distributed in the hope that it will be useful,
   51:     but WITHOUT ANY WARRANTY; without even the implied warranty of
   52:     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   53:     Lesser General Public License for more details.
   54: 
   55:     You should have received a copy of the GNU Lesser General Public
   56:     License along with this library; if not, write to the Free Software
   57:     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
   58:  */
   59: 
   60: /**
   61:  * Facade that coordinates HTML Purifier's subsystems in order to purify HTML.
   62:  *
   63:  * @note There are several points in which configuration can be specified
   64:  *       for HTML Purifier.  The precedence of these (from lowest to
   65:  *       highest) is as follows:
   66:  *          -# Instance: new HTMLPurifier($config)
   67:  *          -# Invocation: purify($html, $config)
   68:  *       These configurations are entirely independent of each other and
   69:  *       are *not* merged (this behavior may change in the future).
   70:  *
   71:  * @todo We need an easier way to inject strategies using the configuration
   72:  *       object.
   73:  */
   74: class HTMLPurifier
   75: {
   76: 
   77:     /**
   78:      * Version of HTML Purifier.
   79:      * @type string
   80:      */
   81:     public $version = '4.6.0';
   82: 
   83:     /**
   84:      * Constant with version of HTML Purifier.
   85:      */
   86:     const VERSION = '4.6.0';
   87: 
   88:     /**
   89:      * Global configuration object.
   90:      * @type HTMLPurifier_Config
   91:      */
   92:     public $config;
   93: 
   94:     /**
   95:      * Array of extra filter objects to run on HTML,
   96:      * for backwards compatibility.
   97:      * @type HTMLPurifier_Filter[]
   98:      */
   99:     private $filters = array();
  100: 
  101:     /**
  102:      * Single instance of HTML Purifier.
  103:      * @type HTMLPurifier
  104:      */
  105:     private static $instance;
  106: 
  107:     /**
  108:      * @type HTMLPurifier_Strategy_Core
  109:      */
  110:     protected $strategy;
  111: 
  112:     /**
  113:      * @type HTMLPurifier_Generator
  114:      */
  115:     protected $generator;
  116: 
  117:     /**
  118:      * Resultant context of last run purification.
  119:      * Is an array of contexts if the last called method was purifyArray().
  120:      * @type HTMLPurifier_Context
  121:      */
  122:     public $context;
  123: 
  124:     /**
  125:      * Initializes the purifier.
  126:      *
  127:      * @param HTMLPurifier_Config $config Optional HTMLPurifier_Config object
  128:      *                for all instances of the purifier, if omitted, a default
  129:      *                configuration is supplied (which can be overridden on a
  130:      *                per-use basis).
  131:      *                The parameter can also be any type that
  132:      *                HTMLPurifier_Config::create() supports.
  133:      */
  134:     public function __construct($config = null)
  135:     {
  136:         $this->config = HTMLPurifier_Config::create($config);
  137:         $this->strategy = new HTMLPurifier_Strategy_Core();
  138:     }
  139: 
  140:     /**
  141:      * Adds a filter to process the output. First come first serve
  142:      *
  143:      * @param HTMLPurifier_Filter $filter HTMLPurifier_Filter object
  144:      */
  145:     public function addFilter($filter)
  146:     {
  147:         trigger_error(
  148:             'HTMLPurifier->addFilter() is deprecated, use configuration directives' .
  149:             ' in the Filter namespace or Filter.Custom',
  150:             E_USER_WARNING
  151:         );
  152:         $this->filters[] = $filter;
  153:     }
  154: 
  155:     /**
  156:      * Filters an HTML snippet/document to be XSS-free and standards-compliant.
  157:      *
  158:      * @param string $html String of HTML to purify
  159:      * @param HTMLPurifier_Config $config Config object for this operation,
  160:      *                if omitted, defaults to the config object specified during this
  161:      *                object's construction. The parameter can also be any type
  162:      *                that HTMLPurifier_Config::create() supports.
  163:      *
  164:      * @return string Purified HTML
  165:      */
  166:     public function purify($html, $config = null)
  167:     {
  168:         // :TODO: make the config merge in, instead of replace
  169:         $config = $config ? HTMLPurifier_Config::create($config) : $this->config;
  170: 
  171:         // implementation is partially environment dependant, partially
  172:         // configuration dependant
  173:         $lexer = HTMLPurifier_Lexer::create($config);
  174: 
  175:         $context = new HTMLPurifier_Context();
  176: 
  177:         // setup HTML generator
  178:         $this->generator = new HTMLPurifier_Generator($config, $context);
  179:         $context->register('Generator', $this->generator);
  180: 
  181:         // set up global context variables
  182:         if ($config->get('Core.CollectErrors')) {
  183:             // may get moved out if other facilities use it
  184:             $language_factory = HTMLPurifier_LanguageFactory::instance();
  185:             $language = $language_factory->create($config, $context);
  186:             $context->register('Locale', $language);
  187: 
  188:             $error_collector = new HTMLPurifier_ErrorCollector($context);
  189:             $context->register('ErrorCollector', $error_collector);
  190:         }
  191: 
  192:         // setup id_accumulator context, necessary due to the fact that
  193:         // AttrValidator can be called from many places
  194:         $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
  195:         $context->register('IDAccumulator', $id_accumulator);
  196: 
  197:         $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
  198: 
  199:         // setup filters
  200:         $filter_flags = $config->getBatch('Filter');
  201:         $custom_filters = $filter_flags['Custom'];
  202:         unset($filter_flags['Custom']);
  203:         $filters = array();
  204:         foreach ($filter_flags as $filter => $flag) {
  205:             if (!$flag) {
  206:                 continue;
  207:             }
  208:             if (strpos($filter, '.') !== false) {
  209:                 continue;
  210:             }
  211:             $class = "HTMLPurifier_Filter_$filter";
  212:             $filters[] = new $class;
  213:         }
  214:         foreach ($custom_filters as $filter) {
  215:             // maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat
  216:             $filters[] = $filter;
  217:         }
  218:         $filters = array_merge($filters, $this->filters);
  219:         // maybe prepare(), but later
  220: 
  221:         for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) {
  222:             $html = $filters[$i]->preFilter($html, $config, $context);
  223:         }
  224: 
  225:         // purified HTML
  226:         $html =
  227:             $this->generator->generateFromTokens(
  228:                 // list of tokens
  229:                 $this->strategy->execute(
  230:                     // list of un-purified tokens
  231:                     $lexer->tokenizeHTML(
  232:                         // un-purified HTML
  233:                         $html,
  234:                         $config,
  235:                         $context
  236:                     ),
  237:                     $config,
  238:                     $context
  239:                 )
  240:             );
  241: 
  242:         for ($i = $filter_size - 1; $i >= 0; $i--) {
  243:             $html = $filters[$i]->postFilter($html, $config, $context);
  244:         }
  245: 
  246:         $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
  247:         $this->context =& $context;
  248:         return $html;
  249:     }
  250: 
  251:     /**
  252:      * Filters an array of HTML snippets
  253:      *
  254:      * @param string[] $array_of_html Array of html snippets
  255:      * @param HTMLPurifier_Config $config Optional config object for this operation.
  256:      *                See HTMLPurifier::purify() for more details.
  257:      *
  258:      * @return string[] Array of purified HTML
  259:      */
  260:     public function purifyArray($array_of_html, $config = null)
  261:     {
  262:         $context_array = array();
  263:         foreach ($array_of_html as $key => $html) {
  264:             $array_of_html[$key] = $this->purify($html, $config);
  265:             $context_array[$key] = $this->context;
  266:         }
  267:         $this->context = $context_array;
  268:         return $array_of_html;
  269:     }
  270: 
  271:     /**
  272:      * Singleton for enforcing just one HTML Purifier in your system
  273:      *
  274:      * @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype
  275:      *                   HTMLPurifier instance to overload singleton with,
  276:      *                   or HTMLPurifier_Config instance to configure the
  277:      *                   generated version with.
  278:      *
  279:      * @return HTMLPurifier
  280:      */
  281:     public static function instance($prototype = null)
  282:     {
  283:         if (!self::$instance || $prototype) {
  284:             if ($prototype instanceof HTMLPurifier) {
  285:                 self::$instance = $prototype;
  286:             } elseif ($prototype) {
  287:                 self::$instance = new HTMLPurifier($prototype);
  288:             } else {
  289:                 self::$instance = new HTMLPurifier();
  290:             }
  291:         }
  292:         return self::$instance;
  293:     }
  294: 
  295:     /**
  296:      * Singleton for enforcing just one HTML Purifier in your system
  297:      *
  298:      * @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype
  299:      *                   HTMLPurifier instance to overload singleton with,
  300:      *                   or HTMLPurifier_Config instance to configure the
  301:      *                   generated version with.
  302:      *
  303:      * @return HTMLPurifier
  304:      * @note Backwards compatibility, see instance()
  305:      */
  306:     public static function getInstance($prototype = null)
  307:     {
  308:         return HTMLPurifier::instance($prototype);
  309:     }
  310: }
  311: 
  312: 
  313: 
  314: 
  315: 
  316: /**
  317:  * Converts a stream of HTMLPurifier_Token into an HTMLPurifier_Node,
  318:  * and back again.
  319:  *
  320:  * @note This transformation is not an equivalence.  We mutate the input
  321:  * token stream to make it so; see all [MUT] markers in code.
  322:  */
  323: class HTMLPurifier_Arborize
  324: {
  325:     public static function arborize($tokens, $config, $context) {
  326:         $definition = $config->getHTMLDefinition();
  327:         $parent = new HTMLPurifier_Token_Start($definition->info_parent);
  328:         $stack = array($parent->toNode());
  329:         foreach ($tokens as $token) {
  330:             $token->skip = null; // [MUT]
  331:             $token->carryover = null; // [MUT]
  332:             if ($token instanceof HTMLPurifier_Token_End) {
  333:                 $token->start = null; // [MUT]
  334:                 $r = array_pop($stack);
  335:                 assert($r->name === $token->name);
  336:                 assert(empty($token->attr));
  337:                 $r->endCol = $token->col;
  338:                 $r->endLine = $token->line;
  339:                 $r->endArmor = $token->armor;
  340:                 continue;
  341:             }
  342:             $node = $token->toNode();
  343:             $stack[count($stack)-1]->children[] = $node;
  344:             if ($token instanceof HTMLPurifier_Token_Start) {
  345:                 $stack[] = $node;
  346:             }
  347:         }
  348:         assert(count($stack) == 1);
  349:         return $stack[0];
  350:     }
  351: 
  352:     public static function flatten($node, $config, $context) {
  353:         $level = 0;
  354:         $nodes = array($level => new HTMLPurifier_Queue(array($node)));
  355:         $closingTokens = array();
  356:         $tokens = array();
  357:         do {
  358:             while (!$nodes[$level]->isEmpty()) {
  359:                 $node = $nodes[$level]->shift(); // FIFO
  360:                 list($start, $end) = $node->toTokenPair();
  361:                 if ($level > 0) {
  362:                     $tokens[] = $start;
  363:                 }
  364:                 if ($end !== NULL) {
  365:                     $closingTokens[$level][] = $end;
  366:                 }
  367:                 if ($node instanceof HTMLPurifier_Node_Element) {
  368:                     $level++;
  369:                     $nodes[$level] = new HTMLPurifier_Queue();
  370:                     foreach ($node->children as $childNode) {
  371:                         $nodes[$level]->push($childNode);
  372:                     }
  373:                 }
  374:             }
  375:             $level--;
  376:             if ($level && isset($closingTokens[$level])) {
  377:                 while ($token = array_pop($closingTokens[$level])) {
  378:                     $tokens[] = $token;
  379:                 }
  380:             }
  381:         } while ($level > 0);
  382:         return $tokens;
  383:     }
  384: }
  385: 
  386: 
  387: 
  388: /**
  389:  * Defines common attribute collections that modules reference
  390:  */
  391: 
  392: class HTMLPurifier_AttrCollections
  393: {
  394: 
  395:     /**
  396:      * Associative array of attribute collections, indexed by name.
  397:      * @type array
  398:      */
  399:     public $info = array();
  400: 
  401:     /**
  402:      * Performs all expansions on internal data for use by other inclusions
  403:      * It also collects all attribute collection extensions from
  404:      * modules
  405:      * @param HTMLPurifier_AttrTypes $attr_types HTMLPurifier_AttrTypes instance
  406:      * @param HTMLPurifier_HTMLModule[] $modules Hash array of HTMLPurifier_HTMLModule members
  407:      */
  408:     public function __construct($attr_types, $modules)
  409:     {
  410:         // load extensions from the modules
  411:         foreach ($modules as $module) {
  412:             foreach ($module->attr_collections as $coll_i => $coll) {
  413:                 if (!isset($this->info[$coll_i])) {
  414:                     $this->info[$coll_i] = array();
  415:                 }
  416:                 foreach ($coll as $attr_i => $attr) {
  417:                     if ($attr_i === 0 && isset($this->info[$coll_i][$attr_i])) {
  418:                         // merge in includes
  419:                         $this->info[$coll_i][$attr_i] = array_merge(
  420:                             $this->info[$coll_i][$attr_i],
  421:                             $attr
  422:                         );
  423:                         continue;
  424:                     }
  425:                     $this->info[$coll_i][$attr_i] = $attr;
  426:                 }
  427:             }
  428:         }
  429:         // perform internal expansions and inclusions
  430:         foreach ($this->info as $name => $attr) {
  431:             // merge attribute collections that include others
  432:             $this->performInclusions($this->info[$name]);
  433:             // replace string identifiers with actual attribute objects
  434:             $this->expandIdentifiers($this->info[$name], $attr_types);
  435:         }
  436:     }
  437: 
  438:     /**
  439:      * Takes a reference to an attribute associative array and performs
  440:      * all inclusions specified by the zero index.
  441:      * @param array &$attr Reference to attribute array
  442:      */
  443:     public function performInclusions(&$attr)
  444:     {
  445:         if (!isset($attr[0])) {
  446:             return;
  447:         }
  448:         $merge = $attr[0];
  449:         $seen  = array(); // recursion guard
  450:         // loop through all the inclusions
  451:         for ($i = 0; isset($merge[$i]); $i++) {
  452:             if (isset($seen[$merge[$i]])) {
  453:                 continue;
  454:             }
  455:             $seen[$merge[$i]] = true;
  456:             // foreach attribute of the inclusion, copy it over
  457:             if (!isset($this->info[$merge[$i]])) {
  458:                 continue;
  459:             }
  460:             foreach ($this->info[$merge[$i]] as $key => $value) {
  461:                 if (isset($attr[$key])) {
  462:                     continue;
  463:                 } // also catches more inclusions
  464:                 $attr[$key] = $value;
  465:             }
  466:             if (isset($this->info[$merge[$i]][0])) {
  467:                 // recursion
  468:                 $merge = array_merge($merge, $this->info[$merge[$i]][0]);
  469:             }
  470:         }
  471:         unset($attr[0]);
  472:     }
  473: 
  474:     /**
  475:      * Expands all string identifiers in an attribute array by replacing
  476:      * them with the appropriate values inside HTMLPurifier_AttrTypes
  477:      * @param array &$attr Reference to attribute array
  478:      * @param HTMLPurifier_AttrTypes $attr_types HTMLPurifier_AttrTypes instance
  479:      */
  480:     public function expandIdentifiers(&$attr, $attr_types)
  481:     {
  482:         // because foreach will process new elements we add, make sure we
  483:         // skip duplicates
  484:         $processed = array();
  485: 
  486:         foreach ($attr as $def_i => $def) {
  487:             // skip inclusions
  488:             if ($def_i === 0) {
  489:                 continue;
  490:             }
  491: 
  492:             if (isset($processed[$def_i])) {
  493:                 continue;
  494:             }
  495: 
  496:             // determine whether or not attribute is required
  497:             if ($required = (strpos($def_i, '*') !== false)) {
  498:                 // rename the definition
  499:                 unset($attr[$def_i]);
  500:                 $def_i = trim($def_i, '*');
  501:                 $attr[$def_i] = $def;
  502:             }
  503: 
  504:             $processed[$def_i] = true;
  505: 
  506:             // if we've already got a literal object, move on
  507:             if (is_object($def)) {
  508:                 // preserve previous required
  509:                 $attr[$def_i]->required = ($required || $attr[$def_i]->required);
  510:                 continue;
  511:             }
  512: 
  513:             if ($def === false) {
  514:                 unset($attr[$def_i]);
  515:                 continue;
  516:             }
  517: 
  518:             if ($t = $attr_types->get($def)) {
  519:                 $attr[$def_i] = $t;
  520:                 $attr[$def_i]->required = $required;
  521:             } else {
  522:                 unset($attr[$def_i]);
  523:             }
  524:         }
  525:     }
  526: }
  527: 
  528: 
  529: 
  530: 
  531: 
  532: /**
  533:  * Base class for all validating attribute definitions.
  534:  *
  535:  * This family of classes forms the core for not only HTML attribute validation,
  536:  * but also any sort of string that needs to be validated or cleaned (which
  537:  * means CSS properties and composite definitions are defined here too).
  538:  * Besides defining (through code) what precisely makes the string valid,
  539:  * subclasses are also responsible for cleaning the code if possible.
  540:  */
  541: 
  542: abstract class HTMLPurifier_AttrDef
  543: {
  544: 
  545:     /**
  546:      * Tells us whether or not an HTML attribute is minimized.
  547:      * Has no meaning in other contexts.
  548:      * @type bool
  549:      */
  550:     public $minimized = false;
  551: 
  552:     /**
  553:      * Tells us whether or not an HTML attribute is required.
  554:      * Has no meaning in other contexts
  555:      * @type bool
  556:      */
  557:     public $required = false;
  558: 
  559:     /**
  560:      * Validates and cleans passed string according to a definition.
  561:      *
  562:      * @param string $string String to be validated and cleaned.
  563:      * @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object.
  564:      * @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object.
  565:      */
  566:     abstract public function validate($string, $config, $context);
  567: 
  568:     /**
  569:      * Convenience method that parses a string as if it were CDATA.
  570:      *
  571:      * This method process a string in the manner specified at
  572:      * <http://www.w3.org/TR/html4/types.html#h-6.2> by removing
  573:      * leading and trailing whitespace, ignoring line feeds, and replacing
  574:      * carriage returns and tabs with spaces.  While most useful for HTML
  575:      * attributes specified as CDATA, it can also be applied to most CSS
  576:      * values.
  577:      *
  578:      * @note This method is not entirely standards compliant, as trim() removes
  579:      *       more types of whitespace than specified in the spec. In practice,
  580:      *       this is rarely a problem, as those extra characters usually have
  581:      *       already been removed by HTMLPurifier_Encoder.
  582:      *
  583:      * @warning This processing is inconsistent with XML's whitespace handling
  584:      *          as specified by section 3.3.3 and referenced XHTML 1.0 section
  585:      *          4.7.  However, note that we are NOT necessarily
  586:      *          parsing XML, thus, this behavior may still be correct. We
  587:      *          assume that newlines have been normalized.
  588:      */
  589:     public function parseCDATA($string)
  590:     {
  591:         $string = trim($string);
  592:         $string = str_replace(array("\n", "\t", "\r"), ' ', $string);
  593:         return $string;
  594:     }
  595: 
  596:     /**
  597:      * Factory method for creating this class from a string.
  598:      * @param string $string String construction info
  599:      * @return HTMLPurifier_AttrDef Created AttrDef object corresponding to $string
  600:      */
  601:     public function make($string)
  602:     {
  603:         // default implementation, return a flyweight of this object.
  604:         // If $string has an effect on the returned object (i.e. you
  605:         // need to overload this method), it is best
  606:         // to clone or instantiate new copies. (Instantiation is safer.)
  607:         return $this;
  608:     }
  609: 
  610:     /**
  611:      * Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work
  612:      * properly. THIS IS A HACK!
  613:      * @param string $string a CSS colour definition
  614:      * @return string
  615:      */
  616:     protected function mungeRgb($string)
  617:     {
  618:         return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
  619:     }
  620: 
  621:     /**
  622:      * Parses a possibly escaped CSS string and returns the "pure"
  623:      * version of it.
  624:      */
  625:     protected function expandCSSEscape($string)
  626:     {
  627:         // flexibly parse it
  628:         $ret = '';
  629:         for ($i = 0, $c = strlen($string); $i < $c; $i++) {
  630:             if ($string[$i] === '\\') {
  631:                 $i++;
  632:                 if ($i >= $c) {
  633:                     $ret .= '\\';
  634:                     break;
  635:                 }
  636:                 if (ctype_xdigit($string[$i])) {
  637:                     $code = $string[$i];
  638:                     for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
  639:                         if (!ctype_xdigit($string[$i])) {
  640:                             break;
  641:                         }
  642:                         $code .= $string[$i];
  643:                     }
  644:                     // We have to be extremely careful when adding
  645:                     // new characters, to make sure we're not breaking
  646:                     // the encoding.
  647:                     $char = HTMLPurifier_Encoder::unichr(hexdec($code));
  648:                     if (HTMLPurifier_Encoder::cleanUTF8($char) === '') {
  649:                         continue;
  650:                     }
  651:                     $ret .= $char;
  652:                     if ($i < $c && trim($string[$i]) !== '') {
  653:                         $i--;
  654:                     }
  655:                     continue;
  656:                 }
  657:                 if ($string[$i] === "\n") {
  658:                     continue;
  659:                 }
  660:             }
  661:             $ret .= $string[$i];
  662:         }
  663:         return $ret;
  664:     }
  665: }
  666: 
  667: 
  668: 
  669: 
  670: 
  671: /**
  672:  * Processes an entire attribute array for corrections needing multiple values.
  673:  *
  674:  * Occasionally, a certain attribute will need to be removed and popped onto
  675:  * another value.  Instead of creating a complex return syntax for
  676:  * HTMLPurifier_AttrDef, we just pass the whole attribute array to a
  677:  * specialized object and have that do the special work.  That is the
  678:  * family of HTMLPurifier_AttrTransform.
  679:  *
  680:  * An attribute transformation can be assigned to run before or after
  681:  * HTMLPurifier_AttrDef validation.  See HTMLPurifier_HTMLDefinition for
  682:  * more details.
  683:  */
  684: 
  685: abstract class HTMLPurifier_AttrTransform
  686: {
  687: 
  688:     /**
  689:      * Abstract: makes changes to the attributes dependent on multiple values.
  690:      *
  691:      * @param array $attr Assoc array of attributes, usually from
  692:      *              HTMLPurifier_Token_Tag::$attr
  693:      * @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object.
  694:      * @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object
  695:      * @return array Processed attribute array.
  696:      */
  697:     abstract public function transform($attr, $config, $context);
  698: 
  699:     /**
  700:      * Prepends CSS properties to the style attribute, creating the
  701:      * attribute if it doesn't exist.
  702:      * @param array &$attr Attribute array to process (passed by reference)
  703:      * @param string $css CSS to prepend
  704:      */
  705:     public function prependCSS(&$attr, $css)
  706:     {
  707:         $attr['style'] = isset($attr['style']) ? $attr['style'] : '';
  708:         $attr['style'] = $css . $attr['style'];
  709:     }
  710: 
  711:     /**
  712:      * Retrieves and removes an attribute
  713:      * @param array &$attr Attribute array to process (passed by reference)
  714:      * @param mixed $key Key of attribute to confiscate
  715:      * @return mixed
  716:      */
  717:     public function confiscateAttr(&$attr, $key)
  718:     {
  719:         if (!isset($attr[$key])) {
  720:             return null;
  721:         }
  722:         $value = $attr[$key];
  723:         unset($attr[$key]);
  724:         return $value;
  725:     }
  726: }
  727: 
  728: 
  729: 
  730: 
  731: 
  732: /**
  733:  * Provides lookup array of attribute types to HTMLPurifier_AttrDef objects
  734:  */
  735: class HTMLPurifier_AttrTypes
  736: {
  737:     /**
  738:      * Lookup array of attribute string identifiers to concrete implementations.
  739:      * @type HTMLPurifier_AttrDef[]
  740:      */
  741:     protected $info = array();
  742: 
  743:     /**
  744:      * Constructs the info array, supplying default implementations for attribute
  745:      * types.
  746:      */
  747:     public function __construct()
  748:     {
  749:         // XXX This is kind of poor, since we don't actually /clone/
  750:         // instances; instead, we use the supplied make() attribute. So,
  751:         // the underlying class must know how to deal with arguments.
  752:         // With the old implementation of Enum, that ignored its
  753:         // arguments when handling a make dispatch, the IAlign
  754:         // definition wouldn't work.
  755: 
  756:         // pseudo-types, must be instantiated via shorthand
  757:         $this->info['Enum']    = new HTMLPurifier_AttrDef_Enum();
  758:         $this->info['Bool']    = new HTMLPurifier_AttrDef_HTML_Bool();
  759: 
  760:         $this->info['CDATA']    = new HTMLPurifier_AttrDef_Text();
  761:         $this->info['ID']       = new HTMLPurifier_AttrDef_HTML_ID();
  762:         $this->info['Length']   = new HTMLPurifier_AttrDef_HTML_Length();
  763:         $this->info['MultiLength'] = new HTMLPurifier_AttrDef_HTML_MultiLength();
  764:         $this->info['NMTOKENS'] = new HTMLPurifier_AttrDef_HTML_Nmtokens();
  765:         $this->info['Pixels']   = new HTMLPurifier_AttrDef_HTML_Pixels();
  766:         $this->info['Text']     = new HTMLPurifier_AttrDef_Text();
  767:         $this->info['URI']      = new HTMLPurifier_AttrDef_URI();
  768:         $this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang();
  769:         $this->info['Color']    = new HTMLPurifier_AttrDef_HTML_Color();
  770:         $this->info['IAlign']   = self::makeEnum('top,middle,bottom,left,right');
  771:         $this->info['LAlign']   = self::makeEnum('top,bottom,left,right');
  772:         $this->info['FrameTarget'] = new HTMLPurifier_AttrDef_HTML_FrameTarget();
  773: 
  774:         // unimplemented aliases
  775:         $this->info['ContentType'] = new HTMLPurifier_AttrDef_Text();
  776:         $this->info['ContentTypes'] = new HTMLPurifier_AttrDef_Text();
  777:         $this->info['Charsets'] = new HTMLPurifier_AttrDef_Text();
  778:         $this->info['Character'] = new HTMLPurifier_AttrDef_Text();
  779: 
  780:         // "proprietary" types
  781:         $this->info['Class'] = new HTMLPurifier_AttrDef_HTML_Class();
  782: 
  783:         // number is really a positive integer (one or more digits)
  784:         // FIXME: ^^ not always, see start and value of list items
  785:         $this->info['Number']   = new HTMLPurifier_AttrDef_Integer(false, false, true);
  786:     }
  787: 
  788:     private static function makeEnum($in)
  789:     {
  790:         return new HTMLPurifier_AttrDef_Clone(new HTMLPurifier_AttrDef_Enum(explode(',', $in)));
  791:     }
  792: 
  793:     /**
  794:      * Retrieves a type
  795:      * @param string $type String type name
  796:      * @return HTMLPurifier_AttrDef Object AttrDef for type
  797:      */
  798:     public function get($type)
  799:     {
  800:         // determine if there is any extra info tacked on
  801:         if (strpos($type, '#') !== false) {
  802:             list($type, $string) = explode('#', $type, 2);
  803:         } else {
  804:             $string = '';
  805:         }
  806: 
  807:         if (!isset($this->info[$type])) {
  808:             trigger_error('Cannot retrieve undefined attribute type ' . $type, E_USER_ERROR);
  809:             return;
  810:         }
  811:         return $this->info[$type]->make($string);
  812:     }
  813: 
  814:     /**
  815:      * Sets a new implementation for a type
  816:      * @param string $type String type name
  817:      * @param HTMLPurifier_AttrDef $impl Object AttrDef for type
  818:      */
  819:     public function set($type, $impl)
  820:     {
  821:         $this->info[$type] = $impl;
  822:     }
  823: }
  824: 
  825: 
  826: 
  827: 
  828: 
  829: /**
  830:  * Validates the attributes of a token. Doesn't manage required attributes
  831:  * very well. The only reason we factored this out was because RemoveForeignElements
  832:  * also needed it besides ValidateAttributes.
  833:  */
  834: class HTMLPurifier_AttrValidator
  835: {
  836: 
  837:     /**
  838:      * Validates the attributes of a token, mutating it as necessary.
  839:      * that has valid tokens
  840:      * @param HTMLPurifier_Token $token Token to validate.
  841:      * @param HTMLPurifier_Config $config Instance of HTMLPurifier_Config
  842:      * @param HTMLPurifier_Context $context Instance of HTMLPurifier_Context
  843:      */
  844:     public function validateToken($token, $config, $context)
  845:     {
  846:         $definition = $config->getHTMLDefinition();
  847:         $e =& $context->get('ErrorCollector', true);
  848: 
  849:         // initialize IDAccumulator if necessary
  850:         $ok =& $context->get('IDAccumulator', true);
  851:         if (!$ok) {
  852:             $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
  853:             $context->register('IDAccumulator', $id_accumulator);
  854:         }
  855: 
  856:         // initialize CurrentToken if necessary
  857:         $current_token =& $context->get('CurrentToken', true);
  858:         if (!$current_token) {
  859:             $context->register('CurrentToken', $token);
  860:         }
  861: 
  862:         if (!$token instanceof HTMLPurifier_Token_Start &&
  863:             !$token instanceof HTMLPurifier_Token_Empty
  864:         ) {
  865:             return;
  866:         }
  867: 
  868:         // create alias to global definition array, see also $defs
  869:         // DEFINITION CALL
  870:         $d_defs = $definition->info_global_attr;
  871: 
  872:         // don't update token until the very end, to ensure an atomic update
  873:         $attr = $token->attr;
  874: 
  875:         // do global transformations (pre)
  876:         // nothing currently utilizes this
  877:         foreach ($definition->info_attr_transform_pre as $transform) {
  878:             $attr = $transform->transform($o = $attr, $config, $context);
  879:             if ($e) {
  880:                 if ($attr != $o) {
  881:                     $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
  882:                 }
  883:             }
  884:         }
  885: 
  886:         // do local transformations only applicable to this element (pre)
  887:         // ex. <p align="right"> to <p style="text-align:right;">
  888:         foreach ($definition->info[$token->name]->attr_transform_pre as $transform) {
  889:             $attr = $transform->transform($o = $attr, $config, $context);
  890:             if ($e) {
  891:                 if ($attr != $o) {
  892:                     $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
  893:                 }
  894:             }
  895:         }
  896: 
  897:         // create alias to this element's attribute definition array, see
  898:         // also $d_defs (global attribute definition array)
  899:         // DEFINITION CALL
  900:         $defs = $definition->info[$token->name]->attr;
  901: 
  902:         $attr_key = false;
  903:         $context->register('CurrentAttr', $attr_key);
  904: 
  905:         // iterate through all the attribute keypairs
  906:         // Watch out for name collisions: $key has previously been used
  907:         foreach ($attr as $attr_key => $value) {
  908: 
  909:             // call the definition
  910:             if (isset($defs[$attr_key])) {
  911:                 // there is a local definition defined
  912:                 if ($defs[$attr_key] === false) {
  913:                     // We've explicitly been told not to allow this element.
  914:                     // This is usually when there's a global definition
  915:                     // that must be overridden.
  916:                     // Theoretically speaking, we could have a
  917:                     // AttrDef_DenyAll, but this is faster!
  918:                     $result = false;
  919:                 } else {
  920:                     // validate according to the element's definition
  921:                     $result = $defs[$attr_key]->validate(
  922:                         $value,
  923:                         $config,
  924:                         $context
  925:                     );
  926:                 }
  927:             } elseif (isset($d_defs[$attr_key])) {
  928:                 // there is a global definition defined, validate according
  929:                 // to the global definition
  930:                 $result = $d_defs[$attr_key]->validate(
  931:                     $value,
  932:                     $config,
  933:                     $context
  934:                 );
  935:             } else {
  936:                 // system never heard of the attribute? DELETE!
  937:                 $result = false;
  938:             }
  939: 
  940:             // put the results into effect
  941:             if ($result === false || $result === null) {
  942:                 // this is a generic error message that should replaced
  943:                 // with more specific ones when possible
  944:                 if ($e) {
  945:                     $e->send(E_ERROR, 'AttrValidator: Attribute removed');
  946:                 }
  947: 
  948:                 // remove the attribute
  949:                 unset($attr[$attr_key]);
  950:             } elseif (is_string($result)) {
  951:                 // generally, if a substitution is happening, there
  952:                 // was some sort of implicit correction going on. We'll
  953:                 // delegate it to the attribute classes to say exactly what.
  954: 
  955:                 // simple substitution
  956:                 $attr[$attr_key] = $result;
  957:             } else {
  958:                 // nothing happens
  959:             }
  960: 
  961:             // we'd also want slightly more complicated substitution
  962:             // involving an array as the return value,
  963:             // although we're not sure how colliding attributes would
  964:             // resolve (certain ones would be completely overriden,
  965:             // others would prepend themselves).
  966:         }
  967: 
  968:         $context->destroy('CurrentAttr');
  969: 
  970:         // post transforms
  971: 
  972:         // global (error reporting untested)
  973:         foreach ($definition->info_attr_transform_post as $transform) {
  974:             $attr = $transform->transform($o = $attr, $config, $context);
  975:             if ($e) {
  976:                 if ($attr != $o) {
  977:                     $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
  978:                 }
  979:             }
  980:         }
  981: 
  982:         // local (error reporting untested)
  983:         foreach ($definition->info[$token->name]->attr_transform_post as $transform) {
  984:             $attr = $transform->transform($o = $attr, $config, $context);
  985:             if ($e) {
  986:                 if ($attr != $o) {
  987:                     $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
  988:                 }
  989:             }
  990:         }
  991: 
  992:         $token->attr = $attr;
  993: 
  994:         // destroy CurrentToken if we made it ourselves
  995:         if (!$current_token) {
  996:             $context->destroy('CurrentToken');
  997:         }
  998: 
  999:     }
 1000: 
 1001: 
 1002: }
 1003: 
 1004: 
 1005: 
 1006: 
 1007: 
 1008: // constants are slow, so we use as few as possible
 1009: if (!defined('HTMLPURIFIER_PREFIX')) {
 1010:     define('HTMLPURIFIER_PREFIX', dirname(__FILE__) . '/standalone');
 1011:     set_include_path(HTMLPURIFIER_PREFIX . PATH_SEPARATOR . get_include_path());
 1012: }
 1013: 
 1014: // accomodations for versions earlier than 5.0.2
 1015: // borrowed from PHP_Compat, LGPL licensed, by Aidan Lister <aidan@php.net>
 1016: if (!defined('PHP_EOL')) {
 1017:     switch (strtoupper(substr(PHP_OS, 0, 3))) {
 1018:         case 'WIN':
 1019:             define('PHP_EOL', "\r\n");
 1020:             break;
 1021:         case 'DAR':
 1022:             define('PHP_EOL', "\r");
 1023:             break;
 1024:         default:
 1025:             define('PHP_EOL', "\n");
 1026:     }
 1027: }
 1028: 
 1029: /**
 1030:  * Bootstrap class that contains meta-functionality for HTML Purifier such as
 1031:  * the autoload function.
 1032:  *
 1033:  * @note
 1034:  *      This class may be used without any other files from HTML Purifier.
 1035:  */
 1036: class HTMLPurifier_Bootstrap
 1037: {
 1038: 
 1039:     /**
 1040:      * Autoload function for HTML Purifier
 1041:      * @param string $class Class to load
 1042:      * @return bool
 1043:      */
 1044:     public static function autoload($class)
 1045:     {
 1046:         $file = HTMLPurifier_Bootstrap::getPath($class);
 1047:         if (!$file) {
 1048:             return false;
 1049:         }
 1050:         // Technically speaking, it should be ok and more efficient to
 1051:         // just do 'require', but Antonio Parraga reports that with
 1052:         // Zend extensions such as Zend debugger and APC, this invariant
 1053:         // may be broken.  Since we have efficient alternatives, pay
 1054:         // the cost here and avoid the bug.
 1055:         require_once HTMLPURIFIER_PREFIX . '/' . $file;
 1056:         return true;
 1057:     }
 1058: 
 1059:     /**
 1060:      * Returns the path for a specific class.
 1061:      * @param string $class Class path to get
 1062:      * @return string
 1063:      */
 1064:     public static function getPath($class)
 1065:     {
 1066:         if (strncmp('HTMLPurifier', $class, 12) !== 0) {
 1067:             return false;
 1068:         }
 1069:         // Custom implementations
 1070:         if (strncmp('HTMLPurifier_Language_', $class, 22) === 0) {
 1071:             $code = str_replace('_', '-', substr($class, 22));
 1072:             $file = 'HTMLPurifier/Language/classes/' . $code . '.php';
 1073:         } else {
 1074:             $file = str_replace('_', '/', $class) . '.php';
 1075:         }
 1076:         if (!file_exists(HTMLPURIFIER_PREFIX . '/' . $file)) {
 1077:             return false;
 1078:         }
 1079:         return $file;
 1080:     }
 1081: 
 1082:     /**
 1083:      * "Pre-registers" our autoloader on the SPL stack.
 1084:      */
 1085:     public static function registerAutoload()
 1086:     {
 1087:         $autoload = array('HTMLPurifier_Bootstrap', 'autoload');
 1088:         if (($funcs = spl_autoload_functions()) === false) {
 1089:             spl_autoload_register($autoload);
 1090:         } elseif (function_exists('spl_autoload_unregister')) {
 1091:             if (version_compare(PHP_VERSION, '5.3.0', '>=')) {
 1092:                 // prepend flag exists, no need for shenanigans
 1093:                 spl_autoload_register($autoload, true, true);
 1094:             } else {
 1095:                 $buggy  = version_compare(PHP_VERSION, '5.2.11', '<');
 1096:                 $compat = version_compare(PHP_VERSION, '5.1.2', '<=') &&
 1097:                           version_compare(PHP_VERSION, '5.1.0', '>=');
 1098:                 foreach ($funcs as $func) {
 1099:                     if ($buggy && is_array($func)) {
 1100:                         // :TRICKY: There are some compatibility issues and some
 1101:                         // places where we need to error out
 1102:                         $reflector = new ReflectionMethod($func[0], $func[1]);
 1103:                         if (!$reflector->isStatic()) {
 1104:                             throw new Exception(
 1105:                                 'HTML Purifier autoloader registrar is not compatible
 1106:                                 with non-static object methods due to PHP Bug #44144;
 1107:                                 Please do not use HTMLPurifier.autoload.php (or any
 1108:                                 file that includes this file); instead, place the code:
 1109:                                 spl_autoload_register(array(\'HTMLPurifier_Bootstrap\', \'autoload\'))
 1110:                                 after your own autoloaders.'
 1111:                             );
 1112:                         }
 1113:                         // Suprisingly, spl_autoload_register supports the
 1114:                         // Class::staticMethod callback format, although call_user_func doesn't
 1115:                         if ($compat) {
 1116:                             $func = implode('::', $func);
 1117:                         }
 1118:                     }
 1119:                     spl_autoload_unregister($func);
 1120:                 }
 1121:                 spl_autoload_register($autoload);
 1122:                 foreach ($funcs as $func) {
 1123:                     spl_autoload_register($func);
 1124:                 }
 1125:             }
 1126:         }
 1127:     }
 1128: }
 1129: 
 1130: 
 1131: 
 1132: 
 1133: 
 1134: /**
 1135:  * Super-class for definition datatype objects, implements serialization
 1136:  * functions for the class.
 1137:  */
 1138: abstract class HTMLPurifier_Definition
 1139: {
 1140: 
 1141:     /**
 1142:      * Has setup() been called yet?
 1143:      * @type bool
 1144:      */
 1145:     public $setup = false;
 1146: 
 1147:     /**
 1148:      * If true, write out the final definition object to the cache after
 1149:      * setup.  This will be true only if all invocations to get a raw
 1150:      * definition object are also optimized.  This does not cause file
 1151:      * system thrashing because on subsequent calls the cached object
 1152:      * is used and any writes to the raw definition object are short
 1153:      * circuited.  See enduser-customize.html for the high-level
 1154:      * picture.
 1155:      * @type bool
 1156:      */
 1157:     public $optimized = null;
 1158: 
 1159:     /**
 1160:      * What type of definition is it?
 1161:      * @type string
 1162:      */
 1163:     public $type;
 1164: 
 1165:     /**
 1166:      * Sets up the definition object into the final form, something
 1167:      * not done by the constructor
 1168:      * @param HTMLPurifier_Config $config
 1169:      */
 1170:     abstract protected function doSetup($config);
 1171: 
 1172:     /**
 1173:      * Setup function that aborts if already setup
 1174:      * @param HTMLPurifier_Config $config
 1175:      */
 1176:     public function setup($config)
 1177:     {
 1178:         if ($this->setup) {
 1179:             return;
 1180:         }
 1181:         $this->setup = true;
 1182:         $this->doSetup($config);
 1183:     }
 1184: }
 1185: 
 1186: 
 1187: 
 1188: 
 1189: 
 1190: /**
 1191:  * Defines allowed CSS attributes and what their values are.
 1192:  * @see HTMLPurifier_HTMLDefinition
 1193:  */
 1194: class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
 1195: {
 1196: 
 1197:     public $type = 'CSS';
 1198: 
 1199:     /**
 1200:      * Assoc array of attribute name to definition object.
 1201:      * @type HTMLPurifier_AttrDef[]
 1202:      */
 1203:     public $info = array();
 1204: 
 1205:     /**
 1206:      * Constructs the info array.  The meat of this class.
 1207:      * @param HTMLPurifier_Config $config
 1208:      */
 1209:     protected function doSetup($config)
 1210:     {
 1211:         $this->info['text-align'] = new HTMLPurifier_AttrDef_Enum(
 1212:             array('left', 'right', 'center', 'justify'),
 1213:             false
 1214:         );
 1215: 
 1216:         $border_style =
 1217:             $this->info['border-bottom-style'] =
 1218:             $this->info['border-right-style'] =
 1219:             $this->info['border-left-style'] =
 1220:             $this->info['border-top-style'] = new HTMLPurifier_AttrDef_Enum(
 1221:                 array(
 1222:                     'none',
 1223:                     'hidden',
 1224:                     'dotted',
 1225:                     'dashed',
 1226:                     'solid',
 1227:                     'double',
 1228:                     'groove',
 1229:                     'ridge',
 1230:                     'inset',
 1231:                     'outset'
 1232:                 ),
 1233:                 false
 1234:             );
 1235: 
 1236:         $this->info['border-style'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_style);
 1237: 
 1238:         $this->info['clear'] = new HTMLPurifier_AttrDef_Enum(
 1239:             array('none', 'left', 'right', 'both'),
 1240:             false
 1241:         );
 1242:         $this->info['float'] = new HTMLPurifier_AttrDef_Enum(
 1243:             array('none', 'left', 'right'),
 1244:             false
 1245:         );
 1246:         $this->info['font-style'] = new HTMLPurifier_AttrDef_Enum(
 1247:             array('normal', 'italic', 'oblique'),
 1248:             false
 1249:         );
 1250:         $this->info['font-variant'] = new HTMLPurifier_AttrDef_Enum(
 1251:             array('normal', 'small-caps'),
 1252:             false
 1253:         );
 1254: 
 1255:         $uri_or_none = new HTMLPurifier_AttrDef_CSS_Composite(
 1256:             array(
 1257:                 new HTMLPurifier_AttrDef_Enum(array('none')),
 1258:                 new HTMLPurifier_AttrDef_CSS_URI()
 1259:             )
 1260:         );
 1261: 
 1262:         $this->info['list-style-position'] = new HTMLPurifier_AttrDef_Enum(
 1263:             array('inside', 'outside'),
 1264:             false
 1265:         );
 1266:         $this->info['list-style-type'] = new HTMLPurifier_AttrDef_Enum(
 1267:             array(
 1268:                 'disc',
 1269:                 'circle',
 1270:                 'square',
 1271:                 'decimal',
 1272:                 'lower-roman',
 1273:                 'upper-roman',
 1274:                 'lower-alpha',
 1275:                 'upper-alpha',
 1276:                 'none'
 1277:             ),
 1278:             false
 1279:         );
 1280:         $this->info['list-style-image'] = $uri_or_none;
 1281: 
 1282:         $this->info['list-style'] = new HTMLPurifier_AttrDef_CSS_ListStyle($config);
 1283: 
 1284:         $this->info['text-transform'] = new HTMLPurifier_AttrDef_Enum(
 1285:             array('capitalize', 'uppercase', 'lowercase', 'none'),
 1286:             false
 1287:         );
 1288:         $this->info['color'] = new HTMLPurifier_AttrDef_CSS_Color();
 1289: 
 1290:         $this->info['background-image'] = $uri_or_none;
 1291:         $this->info['background-repeat'] = new HTMLPurifier_AttrDef_Enum(
 1292:             array('repeat', 'repeat-x', 'repeat-y', 'no-repeat')
 1293:         );
 1294:         $this->info['background-attachment'] = new HTMLPurifier_AttrDef_Enum(
 1295:             array('scroll', 'fixed')
 1296:         );
 1297:         $this->info['background-position'] = new HTMLPurifier_AttrDef_CSS_BackgroundPosition();
 1298: 
 1299:         $border_color =
 1300:             $this->info['border-top-color'] =
 1301:             $this->info['border-bottom-color'] =
 1302:             $this->info['border-left-color'] =
 1303:             $this->info['border-right-color'] =
 1304:             $this->info['background-color'] = new HTMLPurifier_AttrDef_CSS_Composite(
 1305:                 array(
 1306:                     new HTMLPurifier_AttrDef_Enum(array('transparent')),
 1307:                     new HTMLPurifier_AttrDef_CSS_Color()
 1308:                 )
 1309:             );
 1310: 
 1311:         $this->info['background'] = new HTMLPurifier_AttrDef_CSS_Background($config);
 1312: 
 1313:         $this->info['border-color'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_color);
 1314: 
 1315:         $border_width =
 1316:             $this->info['border-top-width'] =
 1317:             $this->info['border-bottom-width'] =
 1318:             $this->info['border-left-width'] =
 1319:             $this->info['border-right-width'] = new HTMLPurifier_AttrDef_CSS_Composite(
 1320:                 array(
 1321:                     new HTMLPurifier_AttrDef_Enum(array('thin', 'medium', 'thick')),
 1322:                     new HTMLPurifier_AttrDef_CSS_Length('0') //disallow negative
 1323:                 )
 1324:             );
 1325: 
 1326:         $this->info['border-width'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_width);
 1327: 
 1328:         $this->info['letter-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(
 1329:             array(
 1330:                 new HTMLPurifier_AttrDef_Enum(array('normal')),
 1331:                 new HTMLPurifier_AttrDef_CSS_Length()
 1332:             )
 1333:         );
 1334: 
 1335:         $this->info['word-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(
 1336:             array(
 1337:                 new HTMLPurifier_AttrDef_Enum(array('normal')),
 1338:                 new HTMLPurifier_AttrDef_CSS_Length()
 1339:             )
 1340:         );
 1341: 
 1342:         $this->info['font-size'] = new HTMLPurifier_AttrDef_CSS_Composite(
 1343:             array(
 1344:                 new HTMLPurifier_AttrDef_Enum(
 1345:                     array(
 1346:                         'xx-small',
 1347:                         'x-small',
 1348:                         'small',
 1349:                         'medium',
 1350:                         'large',
 1351:                         'x-large',
 1352:                         'xx-large',
 1353:                         'larger',
 1354:                         'smaller'
 1355:                     )
 1356:                 ),
 1357:                 new HTMLPurifier_AttrDef_CSS_Percentage(),
 1358:                 new HTMLPurifier_AttrDef_CSS_Length()
 1359:             )
 1360:         );
 1361: 
 1362:         $this->info['line-height'] = new HTMLPurifier_AttrDef_CSS_Composite(
 1363:             array(
 1364:                 new HTMLPurifier_AttrDef_Enum(array('normal')),
 1365:                 new HTMLPurifier_AttrDef_CSS_Number(true), // no negatives
 1366:                 new HTMLPurifier_AttrDef_CSS_Length('0'),
 1367:                 new HTMLPurifier_AttrDef_CSS_Percentage(true)
 1368:             )
 1369:         );
 1370: 
 1371:         $margin =
 1372:             $this->info['margin-top'] =
 1373:             $this->info['margin-bottom'] =
 1374:             $this->info['margin-left'] =
 1375:             $this->info['margin-right'] = new HTMLPurifier_AttrDef_CSS_Composite(
 1376:                 array(
 1377:                     new HTMLPurifier_AttrDef_CSS_Length(),
 1378:                     new HTMLPurifier_AttrDef_CSS_Percentage(),
 1379:                     new HTMLPurifier_AttrDef_Enum(array('auto'))
 1380:                 )
 1381:             );
 1382: 
 1383:         $this->info['margin'] = new HTMLPurifier_AttrDef_CSS_Multiple($margin);
 1384: 
 1385:         // non-negative
 1386:         $padding =
 1387:             $this->info['padding-top'] =
 1388:             $this->info['padding-bottom'] =
 1389:             $this->info['padding-left'] =
 1390:             $this->info['padding-right'] = new HTMLPurifier_AttrDef_CSS_Composite(
 1391:                 array(
 1392:                     new HTMLPurifier_AttrDef_CSS_Length('0'),
 1393:                     new HTMLPurifier_AttrDef_CSS_Percentage(true)
 1394:                 )
 1395:             );
 1396: 
 1397:         $this->info['padding'] = new HTMLPurifier_AttrDef_CSS_Multiple($padding);
 1398: 
 1399:         $this->info['text-indent'] = new HTMLPurifier_AttrDef_CSS_Composite(
 1400:             array(
 1401:                 new HTMLPurifier_AttrDef_CSS_Length(),
 1402:                 new HTMLPurifier_AttrDef_CSS_Percentage()
 1403:             )
 1404:         );
 1405: 
 1406:         $trusted_wh = new HTMLPurifier_AttrDef_CSS_Composite(
 1407:             array(
 1408:                 new HTMLPurifier_AttrDef_CSS_Length('0'),
 1409:                 new HTMLPurifier_AttrDef_CSS_Percentage(true),
 1410:                 new HTMLPurifier_AttrDef_Enum(array('auto'))
 1411:             )
 1412:         );
 1413:         $max = $config->get('CSS.MaxImgLength');
 1414: 
 1415:         $this->info['width'] =
 1416:         $this->info['height'] =
 1417:             $max === null ?
 1418:                 $trusted_wh :
 1419:                 new HTMLPurifier_AttrDef_Switch(
 1420:                     'img',
 1421:                     // For img tags:
 1422:                     new HTMLPurifier_AttrDef_CSS_Composite(
 1423:                         array(
 1424:                             new HTMLPurifier_AttrDef_CSS_Length('0', $max),
 1425:                             new HTMLPurifier_AttrDef_Enum(array('auto'))
 1426:                         )
 1427:                     ),
 1428:                     // For everyone else:
 1429:                     $trusted_wh
 1430:                 );
 1431: 
 1432:         $this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration();
 1433: 
 1434:         $this->info['font-family'] = new HTMLPurifier_AttrDef_CSS_FontFamily();
 1435: 
 1436:         // this could use specialized code
 1437:         $this->info['font-weight'] = new HTMLPurifier_AttrDef_Enum(
 1438:             array(
 1439:                 'normal',
 1440:                 'bold',
 1441:                 'bolder',
 1442:                 'lighter',
 1443:                 '100',
 1444:                 '200',
 1445:                 '300',
 1446:                 '400',
 1447:                 '500',
 1448:                 '600',
 1449:                 '700',
 1450:                 '800',
 1451:                 '900'
 1452:             ),
 1453:             false
 1454:         );
 1455: 
 1456:         // MUST be called after other font properties, as it references
 1457:         // a CSSDefinition object
 1458:         $this->info['font'] = new HTMLPurifier_AttrDef_CSS_Font($config);
 1459: 
 1460:         // same here
 1461:         $this->info['border'] =
 1462:         $this->info['border-bottom'] =
 1463:         $this->info['border-top'] =
 1464:         $this->info['border-left'] =
 1465:         $this->info['border-right'] = new HTMLPurifier_AttrDef_CSS_Border($config);
 1466: 
 1467:         $this->info['border-collapse'] = new HTMLPurifier_AttrDef_Enum(
 1468:             array('collapse', 'separate')
 1469:         );
 1470: 
 1471:         $this->info['caption-side'] = new HTMLPurifier_AttrDef_Enum(
 1472:             array('top', 'bottom')
 1473:         );
 1474: 
 1475:         $this->info['table-layout'] = new HTMLPurifier_AttrDef_Enum(
 1476:             array('auto', 'fixed')
 1477:         );
 1478: 
 1479:         $this->info['vertical-align'] = new HTMLPurifier_AttrDef_CSS_Composite(
 1480:             array(
 1481:                 new HTMLPurifier_AttrDef_Enum(
 1482:                     array(
 1483:                         'baseline',
 1484:                         'sub',
 1485:                         'super',
 1486:                         'top',
 1487:                         'text-top',
 1488:                         'middle',
 1489:                         'bottom',
 1490:                         'text-bottom'
 1491:                     )
 1492:                 ),
 1493:                 new HTMLPurifier_AttrDef_CSS_Length(),
 1494:                 new HTMLPurifier_AttrDef_CSS_Percentage()
 1495:             )
 1496:         );
 1497: 
 1498:         $this->info['border-spacing'] = new HTMLPurifier_AttrDef_CSS_Multiple(new HTMLPurifier_AttrDef_CSS_Length(), 2);
 1499: 
 1500:         // These CSS properties don't work on many browsers, but we live
 1501:         // in THE FUTURE!
 1502:         $this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(
 1503:             array('nowrap', 'normal', 'pre', 'pre-wrap', 'pre-line')
 1504:         );
 1505: 
 1506:         if ($config->get('CSS.Proprietary')) {
 1507:             $this->doSetupProprietary($config);
 1508:         }
 1509: 
 1510:         if ($config->get('CSS.AllowTricky')) {
 1511:             $this->doSetupTricky($config);
 1512:         }
 1513: 
 1514:         if ($config->get('CSS.Trusted')) {
 1515:             $this->doSetupTrusted($config);
 1516:         }
 1517: 
 1518:         $allow_important = $config->get('CSS.AllowImportant');
 1519:         // wrap all attr-defs with decorator that handles !important
 1520:         foreach ($this->info as $k => $v) {
 1521:             $this->info[$k] = new HTMLPurifier_AttrDef_CSS_ImportantDecorator($v, $allow_important);
 1522:         }
 1523: 
 1524:         $this->setupConfigStuff($config);
 1525:     }
 1526: 
 1527:     /**
 1528:      * @param HTMLPurifier_Config $config
 1529:      */
 1530:     protected function doSetupProprietary($config)
 1531:     {
 1532:         // Internet Explorer only scrollbar colors
 1533:         $this->info['scrollbar-arrow-color'] = new HTMLPurifier_AttrDef_CSS_Color();
 1534:         $this->info['scrollbar-base-color'] = new HTMLPurifier_AttrDef_CSS_Color();
 1535:         $this->info['scrollbar-darkshadow-color'] = new HTMLPurifier_AttrDef_CSS_Color();
 1536:         $this->info['scrollbar-face-color'] = new HTMLPurifier_AttrDef_CSS_Color();
 1537:         $this->info['scrollbar-highlight-color'] = new HTMLPurifier_AttrDef_CSS_Color();
 1538:         $this->info['scrollbar-shadow-color'] = new HTMLPurifier_AttrDef_CSS_Color();
 1539: 
 1540:         // technically not proprietary, but CSS3, and no one supports it
 1541:         $this->info['opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
 1542:         $this->info['-moz-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
 1543:         $this->info['-khtml-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
 1544: 
 1545:         // only opacity, for now
 1546:         $this->info['filter'] = new HTMLPurifier_AttrDef_CSS_Filter();
 1547: 
 1548:         // more CSS3
 1549:         $this->info['page-break-after'] =
 1550:         $this->info['page-break-before'] = new HTMLPurifier_AttrDef_Enum(
 1551:             array(
 1552:                 'auto',
 1553:                 'always',
 1554:                 'avoid',
 1555:                 'left',
 1556:                 'right'
 1557:             )
 1558:         );
 1559:         $this->info['page-break-inside'] = new HTMLPurifier_AttrDef_Enum(array('auto', 'avoid'));
 1560: 
 1561:     }
 1562: 
 1563:     /**
 1564:      * @param HTMLPurifier_Config $config
 1565:      */
 1566:     protected function doSetupTricky($config)
 1567:     {
 1568:         $this->info['display'] = new HTMLPurifier_AttrDef_Enum(
 1569:             array(
 1570:                 'inline',
 1571:                 'block',
 1572:                 'list-item',
 1573:                 'run-in',
 1574:                 'compact',
 1575:                 'marker',
 1576:                 'table',
 1577:                 'inline-block',
 1578:                 'inline-table',
 1579:                 'table-row-group',
 1580:                 'table-header-group',
 1581:                 'table-footer-group',
 1582:                 'table-row',
 1583:                 'table-column-group',
 1584:                 'table-column',
 1585:                 'table-cell',
 1586:                 'table-caption',
 1587:                 'none'
 1588:             )
 1589:         );
 1590:         $this->info['visibility'] = new HTMLPurifier_AttrDef_Enum(
 1591:             array('visible', 'hidden', 'collapse')
 1592:         );
 1593:         $this->info['overflow'] = new HTMLPurifier_AttrDef_Enum(array('visible', 'hidden', 'auto', 'scroll'));
 1594:     }
 1595: 
 1596:     /**
 1597:      * @param HTMLPurifier_Config $config
 1598:      */
 1599:     protected function doSetupTrusted($config)
 1600:     {
 1601:         $this->info['position'] = new HTMLPurifier_AttrDef_Enum(
 1602:             array('static', 'relative', 'absolute', 'fixed')
 1603:         );
 1604:         $this->info['top'] =
 1605:         $this->info['left'] =
 1606:         $this->info['right'] =
 1607:         $this->info['bottom'] = new HTMLPurifier_AttrDef_CSS_Composite(
 1608:             array(
 1609:                 new HTMLPurifier_AttrDef_CSS_Length(),
 1610:                 new HTMLPurifier_AttrDef_CSS_Percentage(),
 1611:                 new HTMLPurifier_AttrDef_Enum(array('auto')),
 1612:             )
 1613:         );
 1614:         $this->info['z-index'] = new HTMLPurifier_AttrDef_CSS_Composite(
 1615:             array(
 1616:                 new HTMLPurifier_AttrDef_Integer(),
 1617:                 new HTMLPurifier_AttrDef_Enum(array('auto')),
 1618:             )
 1619:         );
 1620:     }
 1621: 
 1622:     /**
 1623:      * Performs extra config-based processing. Based off of
 1624:      * HTMLPurifier_HTMLDefinition.
 1625:      * @param HTMLPurifier_Config $config
 1626:      * @todo Refactor duplicate elements into common class (probably using
 1627:      *       composition, not inheritance).
 1628:      */
 1629:     protected function setupConfigStuff($config)
 1630:     {
 1631:         // setup allowed elements
 1632:         $support = "(for information on implementing this, see the " .
 1633:             "support forums) ";
 1634:         $allowed_properties = $config->get('CSS.AllowedProperties');
 1635:         if ($allowed_properties !== null) {
 1636:             foreach ($this->info as $name => $d) {
 1637:                 if (!isset($allowed_properties[$name])) {
 1638:                     unset($this->info[$name]);
 1639:                 }
 1640:                 unset($allowed_properties[$name]);
 1641:             }
 1642:             // emit errors
 1643:             foreach ($allowed_properties as $name => $d) {
 1644:                 // :TODO: Is this htmlspecialchars() call really necessary?
 1645:                 $name = htmlspecialchars($name);
 1646:                 trigger_error("Style attribute '$name' is not supported $support", E_USER_WARNING);
 1647:             }
 1648:         }
 1649: 
 1650:         $forbidden_properties = $config->get('CSS.ForbiddenProperties');
 1651:         if ($forbidden_properties !== null) {
 1652:             foreach ($this->info as $name => $d) {
 1653:                 if (isset($forbidden_properties[$name])) {
 1654:                     unset($this->info[$name]);
 1655:                 }
 1656:             }
 1657:         }
 1658:     }
 1659: }
 1660: 
 1661: 
 1662: 
 1663: 
 1664: 
 1665: /**
 1666:  * Defines allowed child nodes and validates nodes against it.
 1667:  */
 1668: abstract class HTMLPurifier_ChildDef
 1669: {
 1670:     /**
 1671:      * Type of child definition, usually right-most part of class name lowercase.
 1672:      * Used occasionally in terms of context.
 1673:      * @type string
 1674:      */
 1675:     public $type;
 1676: 
 1677:     /**
 1678:      * Indicates whether or not an empty array of children is okay.
 1679:      *
 1680:      * This is necessary for redundant checking when changes affecting
 1681:      * a child node may cause a parent node to now be disallowed.
 1682:      * @type bool
 1683:      */
 1684:     public $allow_empty;
 1685: 
 1686:     /**
 1687:      * Lookup array of all elements that this definition could possibly allow.
 1688:      * @type array
 1689:      */
 1690:     public $elements = array();
 1691: 
 1692:     /**
 1693:      * Get lookup of tag names that should not close this element automatically.
 1694:      * All other elements will do so.
 1695:      * @param HTMLPurifier_Config $config HTMLPurifier_Config object
 1696:      * @return array
 1697:      */
 1698:     public function getAllowedElements($config)
 1699:     {
 1700:         return $this->elements;
 1701:     }
 1702: 
 1703:     /**
 1704:      * Validates nodes according to definition and returns modification.
 1705:      *
 1706:      * @param HTMLPurifier_Node[] $children Array of HTMLPurifier_Node
 1707:      * @param HTMLPurifier_Config $config HTMLPurifier_Config object
 1708:      * @param HTMLPurifier_Context $context HTMLPurifier_Context object
 1709:      * @return bool|array true to leave nodes as is, false to remove parent node, array of replacement children
 1710:      */
 1711:     abstract public function validateChildren($children, $config, $context);
 1712: }
 1713: 
 1714: 
 1715: 
 1716: 
 1717: 
 1718: /**
 1719:  * Configuration object that triggers customizable behavior.
 1720:  *
 1721:  * @warning This class is strongly defined: that means that the class
 1722:  *          will fail if an undefined directive is retrieved or set.
 1723:  *
 1724:  * @note Many classes that could (although many times don't) use the
 1725:  *       configuration object make it a mandatory parameter.  This is
 1726:  *       because a configuration object should always be forwarded,
 1727:  *       otherwise, you run the risk of missing a parameter and then
 1728:  *       being stumped when a configuration directive doesn't work.
 1729:  *
 1730:  * @todo Reconsider some of the public member variables
 1731:  */
 1732: class HTMLPurifier_Config
 1733: {
 1734: 
 1735:     /**
 1736:      * HTML Purifier's version
 1737:      * @type string
 1738:      */
 1739:     public $version = '4.6.0';
 1740: 
 1741:     /**
 1742:      * Whether or not to automatically finalize
 1743:      * the object if a read operation is done.
 1744:      * @type bool
 1745:      */
 1746:     public $autoFinalize = true;
 1747: 
 1748:     // protected member variables
 1749: 
 1750:     /**
 1751:      * Namespace indexed array of serials for specific namespaces.
 1752:      * @see getSerial() for more info.
 1753:      * @type string[]
 1754:      */
 1755:     protected $serials = array();
 1756: 
 1757:     /**
 1758:      * Serial for entire configuration object.
 1759:      * @type string
 1760:      */
 1761:     protected $serial;
 1762: 
 1763:     /**
 1764:      * Parser for variables.
 1765:      * @type HTMLPurifier_VarParser_Flexible
 1766:      */
 1767:     protected $parser = null;
 1768: 
 1769:     /**
 1770:      * Reference HTMLPurifier_ConfigSchema for value checking.
 1771:      * @type HTMLPurifier_ConfigSchema
 1772:      * @note This is public for introspective purposes. Please don't
 1773:      *       abuse!
 1774:      */
 1775:     public $def;
 1776: 
 1777:     /**
 1778:      * Indexed array of definitions.
 1779:      * @type HTMLPurifier_Definition[]
 1780:      */
 1781:     protected $definitions;
 1782: 
 1783:     /**
 1784:      * Whether or not config is finalized.
 1785:      * @type bool
 1786:      */
 1787:     protected $finalized = false;
 1788: 
 1789:     /**
 1790:      * Property list containing configuration directives.
 1791:      * @type array
 1792:      */
 1793:     protected $plist;
 1794: 
 1795:     /**
 1796:      * Whether or not a set is taking place due to an alias lookup.
 1797:      * @type bool
 1798:      */
 1799:     private $aliasMode;
 1800: 
 1801:     /**
 1802:      * Set to false if you do not want line and file numbers in errors.
 1803:      * (useful when unit testing).  This will also compress some errors
 1804:      * and exceptions.
 1805:      * @type bool
 1806:      */
 1807:     public $chatty = true;
 1808: 
 1809:     /**
 1810:      * Current lock; only gets to this namespace are allowed.
 1811:      * @type string
 1812:      */
 1813:     private $lock;
 1814: 
 1815:     /**
 1816:      * Constructor
 1817:      * @param HTMLPurifier_ConfigSchema $definition ConfigSchema that defines
 1818:      * what directives are allowed.
 1819:      * @param HTMLPurifier_PropertyList $parent
 1820:      */
 1821:     public function __construct($definition, $parent = null)
 1822:     {
 1823:         $parent = $parent ? $parent : $definition->defaultPlist;
 1824:         $this->plist = new HTMLPurifier_PropertyList($parent);
 1825:         $this->def = $definition; // keep a copy around for checking
 1826:         $this->parser = new HTMLPurifier_VarParser_Flexible();
 1827:     }
 1828: 
 1829:     /**
 1830:      * Convenience constructor that creates a config object based on a mixed var
 1831:      * @param mixed $config Variable that defines the state of the config
 1832:      *                      object. Can be: a HTMLPurifier_Config() object,
 1833:      *                      an array of directives based on loadArray(),
 1834:      *                      or a string filename of an ini file.
 1835:      * @param HTMLPurifier_ConfigSchema $schema Schema object
 1836:      * @return HTMLPurifier_Config Configured object
 1837:      */
 1838:     public static function create($config, $schema = null)
 1839:     {
 1840:         if ($config instanceof HTMLPurifier_Config) {
 1841:             // pass-through
 1842:             return $config;
 1843:         }
 1844:         if (!$schema) {
 1845:             $ret = HTMLPurifier_Config::createDefault();
 1846:         } else {
 1847:             $ret = new HTMLPurifier_Config($schema);
 1848:         }
 1849:         if (is_string($config)) {
 1850:             $ret->loadIni($config);
 1851:         } elseif (is_array($config)) $ret->loadArray($config);
 1852:         return $ret;
 1853:     }
 1854: 
 1855:     /**
 1856:      * Creates a new config object that inherits from a previous one.
 1857:      * @param HTMLPurifier_Config $config Configuration object to inherit from.
 1858:      * @return HTMLPurifier_Config object with $config as its parent.
 1859:      */
 1860:     public static function inherit(HTMLPurifier_Config $config)
 1861:     {
 1862:         return new HTMLPurifier_Config($config->def, $config->plist);
 1863:     }
 1864: 
 1865:     /**
 1866:      * Convenience constructor that creates a default configuration object.
 1867:      * @return HTMLPurifier_Config default object.
 1868:      */
 1869:     public static function createDefault()
 1870:     {
 1871:         $definition = HTMLPurifier_ConfigSchema::instance();
 1872:         $config = new HTMLPurifier_Config($definition);
 1873:         return $config;
 1874:     }
 1875: 
 1876:     /**
 1877:      * Retrieves a value from the configuration.
 1878:      *
 1879:      * @param string $key String key
 1880:      * @param mixed $a
 1881:      *
 1882:      * @return mixed
 1883:      */
 1884:     public function get($key, $a = null)
 1885:     {
 1886:         if ($a !== null) {
 1887:             $this->triggerError(
 1888:                 "Using deprecated API: use \$config->get('$key.$a') instead",
 1889:                 E_USER_WARNING
 1890:             );
 1891:             $key = "$key.$a";
 1892:         }
 1893:         if (!$this->finalized) {
 1894:             $this->autoFinalize();
 1895:         }
 1896:         if (!isset($this->def->info[$key])) {
 1897:             // can't add % due to SimpleTest bug
 1898:             $this->triggerError(
 1899:                 'Cannot retrieve value of undefined directive ' . htmlspecialchars($key),
 1900:                 E_USER_WARNING
 1901:             );
 1902:             return;
 1903:         }
 1904:         if (isset($this->def->info[$key]->isAlias)) {
 1905:             $d = $this->def->info[$key];
 1906:             $this->triggerError(
 1907:                 'Cannot get value from aliased directive, use real name ' . $d->key,
 1908:                 E_USER_ERROR
 1909:             );
 1910:             return;
 1911:         }
 1912:         if ($this->lock) {
 1913:             list($ns) = explode('.', $key);
 1914:             if ($ns !== $this->lock) {
 1915:                 $this->triggerError(
 1916:                     'Cannot get value of namespace ' . $ns . ' when lock for ' .
 1917:                     $this->lock .
 1918:                     ' is active, this probably indicates a Definition setup method ' .
 1919:                     'is accessing directives that are not within its namespace',
 1920:                     E_USER_ERROR
 1921:                 );
 1922:                 return;
 1923:             }
 1924:         }
 1925:         return $this->plist->get($key);
 1926:     }
 1927: 
 1928:     /**
 1929:      * Retrieves an array of directives to values from a given namespace
 1930:      *
 1931:      * @param string $namespace String namespace
 1932:      *
 1933:      * @return array
 1934:      */
 1935:     public function getBatch($namespace)
 1936:     {
 1937:         if (!$this->finalized) {
 1938:             $this->autoFinalize();
 1939:         }
 1940:         $full = $this->getAll();
 1941:         if (!isset($full[$namespace])) {
 1942:             $this->triggerError(
 1943:                 'Cannot retrieve undefined namespace ' .
 1944:                 htmlspecialchars($namespace),
 1945:                 E_USER_WARNING
 1946:             );
 1947:             return;
 1948:         }
 1949:         return $full[$namespace];
 1950:     }
 1951: 
 1952:     /**
 1953:      * Returns a SHA-1 signature of a segment of the configuration object
 1954:      * that uniquely identifies that particular configuration
 1955:      *
 1956:      * @param string $namespace Namespace to get serial for
 1957:      *
 1958:      * @return string
 1959:      * @note Revision is handled specially and is removed from the batch
 1960:      *       before processing!
 1961:      */
 1962:     public function getBatchSerial($namespace)
 1963:     {
 1964:         if (empty($this->serials[$namespace])) {
 1965:             $batch = $this->getBatch($namespace);
 1966:             unset($batch['DefinitionRev']);
 1967:             $this->serials[$namespace] = sha1(serialize($batch));
 1968:         }
 1969:         return $this->serials[$namespace];
 1970:     }
 1971: 
 1972:     /**
 1973:      * Returns a SHA-1 signature for the entire configuration object
 1974:      * that uniquely identifies that particular configuration
 1975:      *
 1976:      * @return string
 1977:      */
 1978:     public function getSerial()
 1979:     {
 1980:         if (empty($this->serial)) {
 1981:             $this->serial = sha1(serialize($this->getAll()));
 1982:         }
 1983:         return $this->serial;
 1984:     }
 1985: 
 1986:     /**
 1987:      * Retrieves all directives, organized by namespace
 1988:      *
 1989:      * @warning This is a pretty inefficient function, avoid if you can
 1990:      */
 1991:     public function getAll()
 1992:     {
 1993:         if (!$this->finalized) {
 1994:             $this->autoFinalize();
 1995:         }
 1996:         $ret = array();
 1997:         foreach ($this->plist->squash() as $name => $value) {
 1998:             list($ns, $key) = explode('.', $name, 2);
 1999:             $ret[$ns][$key] = $value;
 2000:         }
 2001:         return $ret;
 2002:     }
 2003: 
 2004:     /**
 2005:      * Sets a value to configuration.
 2006:      *
 2007:      * @param string $key key
 2008:      * @param mixed $value value
 2009:      * @param mixed $a
 2010:      */
 2011:     public function set($key, $value, $a = null)
 2012:     {
 2013:         if (strpos($key, '.') === false) {
 2014:             $namespace = $key;
 2015:             $directive = $value;
 2016:             $value = $a;
 2017:             $key = "$key.$directive";
 2018:             $this->triggerError("Using deprecated API: use \$config->set('$key', ...) instead", E_USER_NOTICE);
 2019:         } else {
 2020:             list($namespace) = explode('.', $key);
 2021:         }
 2022:         if ($this->isFinalized('Cannot set directive after finalization')) {
 2023:             return;
 2024:         }
 2025:         if (!isset($this->def->info[$key])) {
 2026:             $this->triggerError(
 2027:                 'Cannot set undefined directive ' . htmlspecialchars($key) . ' to value',
 2028:                 E_USER_WARNING
 2029:             );
 2030:             return;
 2031:         }
 2032:         $def = $this->def->info[$key];
 2033: 
 2034:         if (isset($def->isAlias)) {
 2035:             if ($this->aliasMode) {
 2036:                 $this->triggerError(
 2037:                     'Double-aliases not allowed, please fix '.
 2038:                     'ConfigSchema bug with' . $key,
 2039:                     E_USER_ERROR
 2040:                 );
 2041:                 return;
 2042:             }
 2043:             $this->aliasMode = true;
 2044:             $this->set($def->key, $value);
 2045:             $this->aliasMode = false;
 2046:             $this->triggerError("$key is an alias, preferred directive name is {$def->key}", E_USER_NOTICE);
 2047:             return;
 2048:         }
 2049: 
 2050:         // Raw type might be negative when using the fully optimized form
 2051:         // of stdclass, which indicates allow_null == true
 2052:         $rtype = is_int($def) ? $def : $def->type;
 2053:         if ($rtype < 0) {
 2054:             $type = -$rtype;
 2055:             $allow_null = true;
 2056:         } else {
 2057:             $type = $rtype;
 2058:             $allow_null = isset($def->allow_null);
 2059:         }
 2060: 
 2061:         try {
 2062:             $value = $this->parser->parse($value, $type, $allow_null);
 2063:         } catch (HTMLPurifier_VarParserException $e) {
 2064:             $this->triggerError(
 2065:                 'Value for ' . $key . ' is of invalid type, should be ' .
 2066:                 HTMLPurifier_VarParser::getTypeName($type),
 2067:                 E_USER_WARNING
 2068:             );
 2069:             return;
 2070:         }
 2071:         if (is_string($value) && is_object($def)) {
 2072:             // resolve value alias if defined
 2073:             if (isset($def->aliases[$value])) {
 2074:                 $value = $def->aliases[$value];
 2075:             }
 2076:             // check to see if the value is allowed
 2077:             if (isset($def->allowed) && !isset($def->allowed[$value])) {
 2078:                 $this->triggerError(
 2079:                     'Value not supported, valid values are: ' .
 2080:                     $this->_listify($def->allowed),
 2081:                     E_USER_WARNING
 2082:                 );
 2083:                 return;
 2084:             }
 2085:         }
 2086:         $this->plist->set($key, $value);
 2087: 
 2088:         // reset definitions if the directives they depend on changed
 2089:         // this is a very costly process, so it's discouraged
 2090:         // with finalization
 2091:         if ($namespace == 'HTML' || $namespace == 'CSS' || $namespace == 'URI') {
 2092:             $this->definitions[$namespace] = null;
 2093:         }
 2094: 
 2095:         $this->serials[$namespace] = false;
 2096:     }
 2097: 
 2098:     /**
 2099:      * Convenience function for error reporting
 2100:      *
 2101:      * @param array $lookup
 2102:      *
 2103:      * @return string
 2104:      */
 2105:     private function _listify($lookup)
 2106:     {
 2107:         $list = array();
 2108:         foreach ($lookup as $name => $b) {
 2109:             $list[] = $name;
 2110:         }
 2111:         return implode(', ', $list);
 2112:     }
 2113: 
 2114:     /**
 2115:      * Retrieves object reference to the HTML definition.
 2116:      *
 2117:      * @param bool $raw Return a copy that has not been setup yet. Must be
 2118:      *             called before it's been setup, otherwise won't work.
 2119:      * @param bool $optimized If true, this method may return null, to
 2120:      *             indicate that a cached version of the modified
 2121:      *             definition object is available and no further edits
 2122:      *             are necessary.  Consider using
 2123:      *             maybeGetRawHTMLDefinition, which is more explicitly
 2124:      *             named, instead.
 2125:      *
 2126:      * @return HTMLPurifier_HTMLDefinition
 2127:      */
 2128:     public function getHTMLDefinition($raw = false, $optimized = false)
 2129:     {
 2130:         return $this->getDefinition('HTML', $raw, $optimized);
 2131:     }
 2132: 
 2133:     /**
 2134:      * Retrieves object reference to the CSS definition
 2135:      *
 2136:      * @param bool $raw Return a copy that has not been setup yet. Must be
 2137:      *             called before it's been setup, otherwise won't work.
 2138:      * @param bool $optimized If true, this method may return null, to
 2139:      *             indicate that a cached version of the modified
 2140:      *             definition object is available and no further edits
 2141:      *             are necessary.  Consider using
 2142:      *             maybeGetRawCSSDefinition, which is more explicitly
 2143:      *             named, instead.
 2144:      *
 2145:      * @return HTMLPurifier_CSSDefinition
 2146:      */
 2147:     public function getCSSDefinition($raw = false, $optimized = false)
 2148:     {
 2149:         return $this->getDefinition('CSS', $raw, $optimized);
 2150:     }
 2151: 
 2152:     /**
 2153:      * Retrieves object reference to the URI definition
 2154:      *
 2155:      * @param bool $raw Return a copy that has not been setup yet. Must be
 2156:      *             called before it's been setup, otherwise won't work.
 2157:      * @param bool $optimized If true, this method may return null, to
 2158:      *             indicate that a cached version of the modified
 2159:      *             definition object is available and no further edits
 2160:      *             are necessary.  Consider using
 2161:      *             maybeGetRawURIDefinition, which is more explicitly
 2162:      *             named, instead.
 2163:      *
 2164:      * @return HTMLPurifier_URIDefinition
 2165:      */
 2166:     public function getURIDefinition($raw = false, $optimized = false)
 2167:     {
 2168:         return $this->getDefinition('URI', $raw, $optimized);
 2169:     }
 2170: 
 2171:     /**
 2172:      * Retrieves a definition
 2173:      *
 2174:      * @param string $type Type of definition: HTML, CSS, etc
 2175:      * @param bool $raw Whether or not definition should be returned raw
 2176:      * @param bool $optimized Only has an effect when $raw is true.  Whether
 2177:      *        or not to return null if the result is already present in
 2178:      *        the cache.  This is off by default for backwards
 2179:      *        compatibility reasons, but you need to do things this
 2180:      *        way in order to ensure that caching is done properly.
 2181:      *        Check out enduser-customize.html for more details.
 2182:      *        We probably won't ever change this default, as much as the
 2183:      *        maybe semantics is the "right thing to do."
 2184:      *
 2185:      * @throws HTMLPurifier_Exception
 2186:      * @return HTMLPurifier_Definition
 2187:      */
 2188:     public function getDefinition($type, $raw = false, $optimized = false)
 2189:     {
 2190:         if ($optimized && !$raw) {
 2191:             throw new HTMLPurifier_Exception("Cannot set optimized = true when raw = false");
 2192:         }
 2193:         if (!$this->finalized) {
 2194:             $this->autoFinalize();
 2195:         }
 2196:         // temporarily suspend locks, so we can handle recursive definition calls
 2197:         $lock = $this->lock;
 2198:         $this->lock = null;
 2199:         $factory = HTMLPurifier_DefinitionCacheFactory::instance();
 2200:         $cache = $factory->create($type, $this);
 2201:         $this->lock = $lock;
 2202:         if (!$raw) {
 2203:             // full definition
 2204:             // ---------------
 2205:             // check if definition is in memory
 2206:             if (!empty($this->definitions[$type])) {
 2207:                 $def = $this->definitions[$type];
 2208:                 // check if the definition is setup
 2209:                 if ($def->setup) {
 2210:                     return $def;
 2211:                 } else {
 2212:                     $def->setup($this);
 2213:                     if ($def->optimized) {
 2214:                         $cache->add($def, $this);
 2215:                     }
 2216:                     return $def;
 2217:                 }
 2218:             }
 2219:             // check if definition is in cache
 2220:             $def = $cache->get($this);
 2221:             if ($def) {
 2222:                 // definition in cache, save to memory and return it
 2223:                 $this->definitions[$type] = $def;
 2224:                 return $def;
 2225:             }
 2226:             // initialize it
 2227:             $def = $this->initDefinition($type);
 2228:             // set it up
 2229:             $this->lock = $type;
 2230:             $def->setup($this);
 2231:             $this->lock = null;
 2232:             // save in cache
 2233:             $cache->add($def, $this);
 2234:             // return it
 2235:             return $def;
 2236:         } else {
 2237:             // raw definition
 2238:             // --------------
 2239:             // check preconditions
 2240:             $def = null;
 2241:             if ($optimized) {
 2242:                 if (is_null($this->get($type . '.DefinitionID'))) {
 2243:                     // fatally error out if definition ID not set
 2244:                     throw new HTMLPurifier_Exception(
 2245:                         "Cannot retrieve raw version without specifying %$type.DefinitionID"
 2246:                     );
 2247:                 }
 2248:             }
 2249:             if (!empty($this->definitions[$type])) {
 2250:                 $def = $this->definitions[$type];
 2251:                 if ($def->setup && !$optimized) {
 2252:                     $extra = $this->chatty ?
 2253:                         " (try moving this code block earlier in your initialization)" :
 2254:                         "";
 2255:                     throw new HTMLPurifier_Exception(
 2256:                         "Cannot retrieve raw definition after it has already been setup" .
 2257:                         $extra
 2258:                     );
 2259:                 }
 2260:                 if ($def->optimized === null) {
 2261:                     $extra = $this->chatty ? " (try flushing your cache)" : "";
 2262:                     throw new HTMLPurifier_Exception(
 2263:                         "Optimization status of definition is unknown" . $extra
 2264:                     );
 2265:                 }
 2266:                 if ($def->optimized !== $optimized) {
 2267:                     $msg = $optimized ? "optimized" : "unoptimized";
 2268:                     $extra = $this->chatty ?
 2269:                         " (this backtrace is for the first inconsistent call, which was for a $msg raw definition)"
 2270:                         : "";
 2271:                     throw new HTMLPurifier_Exception(
 2272:                         "Inconsistent use of optimized and unoptimized raw definition retrievals" . $extra
 2273:                     );
 2274:                 }
 2275:             }
 2276:             // check if definition was in memory
 2277:             if ($def) {
 2278:                 if ($def->setup) {
 2279:                     // invariant: $optimized === true (checked above)
 2280:                     return null;
 2281:                 } else {
 2282:                     return $def;
 2283:                 }
 2284:             }
 2285:             // if optimized, check if definition was in cache
 2286:             // (because we do the memory check first, this formulation
 2287:             // is prone to cache slamming, but I think
 2288:             // guaranteeing that either /all/ of the raw
 2289:             // setup code or /none/ of it is run is more important.)
 2290:             if ($optimized) {
 2291:                 // This code path only gets run once; once we put
 2292:                 // something in $definitions (which is guaranteed by the
 2293:                 // trailing code), we always short-circuit above.
 2294:                 $def = $cache->get($this);
 2295:                 if ($def) {
 2296:                     // save the full definition for later, but don't
 2297:                     // return it yet
 2298:                     $this->definitions[$type] = $def;
 2299:                     return null;
 2300:                 }
 2301:             }
 2302:             // check invariants for creation
 2303:             if (!$optimized) {
 2304:                 if (!is_null($this->get($type . '.DefinitionID'))) {
 2305:                     if ($this->chatty) {
 2306:                         $this->triggerError(
 2307:                             'Due to a documentation error in previous version of HTML Purifier, your ' .
 2308:                             'definitions are not being cached.  If this is OK, you can remove the ' .
 2309:                             '%$type.DefinitionRev and %$type.DefinitionID declaration.  Otherwise, ' .
 2310:                             'modify your code to use maybeGetRawDefinition, and test if the returned ' .
 2311:                             'value is null before making any edits (if it is null, that means that a ' .
 2312:                             'cached version is available, and no raw operations are necessary).  See ' .
 2313:                             '<a href="http://htmlpurifier.org/docs/enduser-customize.html#optimized">' .
 2314:                             'Customize</a> for more details',
 2315:                             E_USER_WARNING
 2316:                         );
 2317:                     } else {
 2318:                         $this->triggerError(
 2319:                             "Useless DefinitionID declaration",
 2320:                             E_USER_WARNING
 2321:                         );
 2322:                     }
 2323:                 }
 2324:             }
 2325:             // initialize it
 2326:             $def = $this->initDefinition($type);
 2327:             $def->optimized = $optimized;
 2328:             return $def;
 2329:         }
 2330:         throw new HTMLPurifier_Exception("The impossible happened!");
 2331:     }
 2332: 
 2333:     /**
 2334:      * Initialise definition
 2335:      *
 2336:      * @param string $type What type of definition to create
 2337:      *
 2338:      * @return HTMLPurifier_CSSDefinition|HTMLPurifier_HTMLDefinition|HTMLPurifier_URIDefinition
 2339:      * @throws HTMLPurifier_Exception
 2340:      */
 2341:     private function initDefinition($type)
 2342:     {
 2343:         // quick checks failed, let's create the object
 2344:         if ($type == 'HTML') {
 2345:             $def = new HTMLPurifier_HTMLDefinition();
 2346:         } elseif ($type == 'CSS') {
 2347:             $def = new HTMLPurifier_CSSDefinition();
 2348:         } elseif ($type == 'URI') {
 2349:             $def = new HTMLPurifier_URIDefinition();
 2350:         } else {
 2351:             throw new HTMLPurifier_Exception(
 2352:                 "Definition of $type type not supported"
 2353:             );
 2354:         }
 2355:         $this->definitions[$type] = $def;
 2356:         return $def;
 2357:     }
 2358: 
 2359:     public function maybeGetRawDefinition($name)
 2360:     {
 2361:         return $this->getDefinition($name, true, true);
 2362:     }
 2363: 
 2364:     public function maybeGetRawHTMLDefinition()
 2365:     {
 2366:         return $this->getDefinition('HTML', true, true);
 2367:     }
 2368: 
 2369:     public function maybeGetRawCSSDefinition()
 2370:     {
 2371:         return $this->getDefinition('CSS', true, true);
 2372:     }
 2373: 
 2374:     public function maybeGetRawURIDefinition()
 2375:     {
 2376:         return $this->getDefinition('URI', true, true);
 2377:     }
 2378: 
 2379:     /**
 2380:      * Loads configuration values from an array with the following structure:
 2381:      * Namespace.Directive => Value
 2382:      *
 2383:      * @param array $config_array Configuration associative array
 2384:      */
 2385:     public function loadArray($config_array)
 2386:     {
 2387:         if ($this->isFinalized('Cannot load directives after finalization')) {
 2388:             return;
 2389:         }
 2390:         foreach ($config_array as $key => $value) {
 2391:             $key = str_replace('_', '.', $key);
 2392:             if (strpos($key, '.') !== false) {
 2393:                 $this->set($key, $value);
 2394:             } else {
 2395:                 $namespace = $key;
 2396:                 $namespace_values = $value;
 2397:                 foreach ($namespace_values as $directive => $value2) {
 2398:                     $this->set($namespace .'.'. $directive, $value2);
 2399:                 }
 2400:             }
 2401:         }
 2402:     }
 2403: 
 2404:     /**
 2405:      * Returns a list of array(namespace, directive) for all directives
 2406:      * that are allowed in a web-form context as per an allowed
 2407:      * namespaces/directives list.
 2408:      *
 2409:      * @param array $allowed List of allowed namespaces/directives
 2410:      * @param HTMLPurifier_ConfigSchema $schema Schema to use, if not global copy
 2411:      *
 2412:      * @return array
 2413:      */
 2414:     public static function getAllowedDirectivesForForm($allowed, $schema = null)
 2415:     {
 2416:         if (!$schema) {
 2417:             $schema = HTMLPurifier_ConfigSchema::instance();
 2418:         }
 2419:         if ($allowed !== true) {
 2420:             if (is_string($allowed)) {
 2421:                 $allowed = array($allowed);
 2422:             }
 2423:             $allowed_ns = array();
 2424:             $allowed_directives = array();
 2425:             $blacklisted_directives = array();
 2426:             foreach ($allowed as $ns_or_directive) {
 2427:                 if (strpos($ns_or_directive, '.') !== false) {
 2428:                     // directive
 2429:                     if ($ns_or_directive[0] == '-') {
 2430:                         $blacklisted_directives[substr($ns_or_directive, 1)] = true;
 2431:                     } else {
 2432:                         $allowed_directives[$ns_or_directive] = true;
 2433:                     }
 2434:                 } else {
 2435:                     // namespace
 2436:                     $allowed_ns[$ns_or_directive] = true;
 2437:                 }
 2438:             }
 2439:         }
 2440:         $ret = array();
 2441:         foreach ($schema->info as $key => $def) {
 2442:             list($ns, $directive) = explode('.', $key, 2);
 2443:             if ($allowed !== true) {
 2444:                 if (isset($blacklisted_directives["$ns.$directive"])) {
 2445:                     continue;
 2446:                 }
 2447:                 if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) {
 2448:                     continue;
 2449:                 }
 2450:             }
 2451:             if (isset($def->isAlias)) {
 2452:                 continue;
 2453:             }
 2454:             if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') {
 2455:                 continue;
 2456:             }
 2457:             $ret[] = array($ns, $directive);
 2458:         }
 2459:         return $ret;
 2460:     }
 2461: 
 2462:     /**
 2463:      * Loads configuration values from $_GET/$_POST that were posted
 2464:      * via ConfigForm
 2465:      *
 2466:      * @param array $array $_GET or $_POST array to import
 2467:      * @param string|bool $index Index/name that the config variables are in
 2468:      * @param array|bool $allowed List of allowed namespaces/directives
 2469:      * @param bool $mq_fix Boolean whether or not to enable magic quotes fix
 2470:      * @param HTMLPurifier_ConfigSchema $schema Schema to use, if not global copy
 2471:      *
 2472:      * @return mixed
 2473:      */
 2474:     public static function loadArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null)
 2475:     {
 2476:         $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $schema);
 2477:         $config = HTMLPurifier_Config::create($ret, $schema);
 2478:         return $config;
 2479:     }
 2480: 
 2481:     /**
 2482:      * Merges in configuration values from $_GET/$_POST to object. NOT STATIC.
 2483:      *
 2484:      * @param array $array $_GET or $_POST array to import
 2485:      * @param string|bool $index Index/name that the config variables are in
 2486:      * @param array|bool $allowed List of allowed namespaces/directives
 2487:      * @param bool $mq_fix Boolean whether or not to enable magic quotes fix
 2488:      */
 2489:     public function mergeArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true)
 2490:     {
 2491:          $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $this->def);
 2492:          $this->loadArray($ret);
 2493:     }
 2494: 
 2495:     /**
 2496:      * Prepares an array from a form into something usable for the more
 2497:      * strict parts of HTMLPurifier_Config
 2498:      *
 2499:      * @param array $array $_GET or $_POST array to import
 2500:      * @param string|bool $index Index/name that the config variables are in
 2501:      * @param array|bool $allowed List of allowed namespaces/directives
 2502:      * @param bool $mq_fix Boolean whether or not to enable magic quotes fix
 2503:      * @param HTMLPurifier_ConfigSchema $schema Schema to use, if not global copy
 2504:      *
 2505:      * @return array
 2506:      */
 2507:     public static function prepareArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null)
 2508:     {
 2509:         if ($index !== false) {
 2510:             $array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array();
 2511:         }
 2512:         $mq = $mq_fix && function_exists('get_magic_quotes_gpc') && get_magic_quotes_gpc();
 2513: 
 2514:         $allowed = HTMLPurifier_Config::getAllowedDirectivesForForm($allowed, $schema);
 2515:         $ret = array();
 2516:         foreach ($allowed as $key) {
 2517:             list($ns, $directive) = $key;
 2518:             $skey = "$ns.$directive";
 2519:             if (!empty($array["Null_$skey"])) {
 2520:                 $ret[$ns][$directive] = null;
 2521:                 continue;
 2522:             }
 2523:             if (!isset($array[$skey])) {
 2524:                 continue;
 2525:             }
 2526:             $value = $mq ? stripslashes($array[$skey]) : $array[$skey];
 2527:             $ret[$ns][$directive] = $value;
 2528:         }
 2529:         return $ret;
 2530:     }
 2531: 
 2532:     /**
 2533:      * Loads configuration values from an ini file
 2534:      *
 2535:      * @param string $filename Name of ini file
 2536:      */
 2537:     public function loadIni($filename)
 2538:     {
 2539:         if ($this->isFinalized('Cannot load directives after finalization')) {
 2540:             return;
 2541:         }
 2542:         $array = parse_ini_file($filename, true);
 2543:         $this->loadArray($array);
 2544:     }
 2545: 
 2546:     /**
 2547:      * Checks whether or not the configuration object is finalized.
 2548:      *
 2549:      * @param string|bool $error String error message, or false for no error
 2550:      *
 2551:      * @return bool
 2552:      */
 2553:     public function isFinalized($error = false)
 2554:     {
 2555:         if ($this->finalized && $error) {
 2556:             $this->triggerError($error, E_USER_ERROR);
 2557:         }
 2558:         return $this->finalized;
 2559:     }
 2560: 
 2561:     /**
 2562:      * Finalizes configuration only if auto finalize is on and not
 2563:      * already finalized
 2564:      */
 2565:     public function autoFinalize()
 2566:     {
 2567:         if ($this->autoFinalize) {
 2568:             $this->finalize();
 2569:         } else {
 2570:             $this->plist->squash(true);
 2571:         }
 2572:     }
 2573: 
 2574:     /**
 2575:      * Finalizes a configuration object, prohibiting further change
 2576:      */
 2577:     public function finalize()
 2578:     {
 2579:         $this->finalized = true;
 2580:         $this->parser = null;
 2581:     }
 2582: 
 2583:     /**
 2584:      * Produces a nicely formatted error message by supplying the
 2585:      * stack frame information OUTSIDE of HTMLPurifier_Config.
 2586:      *
 2587:      * @param string $msg An error message
 2588:      * @param int $no An error number
 2589:      */
 2590:     protected function triggerError($msg, $no)
 2591:     {
 2592:         // determine previous stack frame
 2593:         $extra = '';
 2594:         if ($this->chatty) {
 2595:             $trace = debug_backtrace();
 2596:             // zip(tail(trace), trace) -- but PHP is not Haskell har har
 2597:             for ($i = 0, $c = count($trace); $i < $c - 1; $i++) {
 2598:                 // XXX this is not correct on some versions of HTML Purifier
 2599:                 if ($trace[$i + 1]['class'] === 'HTMLPurifier_Config') {
 2600:                     continue;
 2601:                 }
 2602:                 $frame = $trace[$i];
 2603:                 $extra = " invoked on line {$frame['line']} in file {$frame['file']}";
 2604:                 break;
 2605:             }
 2606:         }
 2607:         trigger_error($msg . $extra, $no);
 2608:     }
 2609: 
 2610:     /**
 2611:      * Returns a serialized form of the configuration object that can
 2612:      * be reconstituted.
 2613:      *
 2614:      * @return string
 2615:      */
 2616:     public function serialize()
 2617:     {
 2618:         $this->getDefinition('HTML');
 2619:         $this->getDefinition('CSS');
 2620:         $this->getDefinition('URI');
 2621:         return serialize($this);
 2622:     }
 2623: 
 2624: }
 2625: 
 2626: 
 2627: 
 2628: 
 2629: 
 2630: /**
 2631:  * Configuration definition, defines directives and their defaults.
 2632:  */
 2633: class HTMLPurifier_ConfigSchema
 2634: {
 2635:     /**
 2636:      * Defaults of the directives and namespaces.
 2637:      * @type array
 2638:      * @note This shares the exact same structure as HTMLPurifier_Config::$conf
 2639:      */
 2640:     public $defaults = array();
 2641: 
 2642:     /**
 2643:      * The default property list. Do not edit this property list.
 2644:      * @type array
 2645:      */
 2646:     public $defaultPlist;
 2647: 
 2648:     /**
 2649:      * Definition of the directives.
 2650:      * The structure of this is:
 2651:      *
 2652:      *  array(
 2653:      *      'Namespace' => array(
 2654:      *          'Directive' => new stdclass(),
 2655:      *      )
 2656:      *  )
 2657:      *
 2658:      * The stdclass may have the following properties:
 2659:      *
 2660:      *  - If isAlias isn't set:
 2661:      *      - type: Integer type of directive, see HTMLPurifier_VarParser for definitions
 2662:      *      - allow_null: If set, this directive allows null values
 2663:      *      - aliases: If set, an associative array of value aliases to real values
 2664:      *      - allowed: If set, a lookup array of allowed (string) values
 2665:      *  - If isAlias is set:
 2666:      *      - namespace: Namespace this directive aliases to
 2667:      *      - name: Directive name this directive aliases to
 2668:      *
 2669:      * In certain degenerate cases, stdclass will actually be an integer. In
 2670:      * that case, the value is equivalent to an stdclass with the type
 2671:      * property set to the integer. If the integer is negative, type is
 2672:      * equal to the absolute value of integer, and allow_null is true.
 2673:      *
 2674:      * This class is friendly with HTMLPurifier_Config. If you need introspection
 2675:      * about the schema, you're better of using the ConfigSchema_Interchange,
 2676:      * which uses more memory but has much richer information.
 2677:      * @type array
 2678:      */
 2679:     public $info = array();
 2680: 
 2681:     /**
 2682:      * Application-wide singleton
 2683:      * @type HTMLPurifier_ConfigSchema
 2684:      */
 2685:     protected static $singleton;
 2686: 
 2687:     public function __construct()
 2688:     {
 2689:         $this->defaultPlist = new HTMLPurifier_PropertyList();
 2690:     }
 2691: 
 2692:     /**
 2693:      * Unserializes the default ConfigSchema.
 2694:      * @return HTMLPurifier_ConfigSchema
 2695:      */
 2696:     public static function makeFromSerial()
 2697:     {
 2698:         $contents = file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema.ser');
 2699:         $r = unserialize($contents);
 2700:         if (!$r) {
 2701:             $hash = sha1($contents);
 2702:             trigger_error("Unserialization of configuration schema failed, sha1 of file was $hash", E_USER_ERROR);
 2703:         }
 2704:         return $r;
 2705:     }
 2706: 
 2707:     /**
 2708:      * Retrieves an instance of the application-wide configuration definition.
 2709:      * @param HTMLPurifier_ConfigSchema $prototype
 2710:      * @return HTMLPurifier_ConfigSchema
 2711:      */
 2712:     public static function instance($prototype = null)
 2713:     {
 2714:         if ($prototype !== null) {
 2715:             HTMLPurifier_ConfigSchema::$singleton = $prototype;
 2716:         } elseif (HTMLPurifier_ConfigSchema::$singleton === null || $prototype === true) {
 2717:             HTMLPurifier_ConfigSchema::$singleton = HTMLPurifier_ConfigSchema::makeFromSerial();
 2718:         }
 2719:         return HTMLPurifier_ConfigSchema::$singleton;
 2720:     }
 2721: 
 2722:     /**
 2723:      * Defines a directive for configuration
 2724:      * @warning Will fail of directive's namespace is defined.
 2725:      * @warning This method's signature is slightly different from the legacy
 2726:      *          define() static method! Beware!
 2727:      * @param string $key Name of directive
 2728:      * @param mixed $default Default value of directive
 2729:      * @param string $type Allowed type of the directive. See
 2730:      *      HTMLPurifier_DirectiveDef::$type for allowed values
 2731:      * @param bool $allow_null Whether or not to allow null values
 2732:      */
 2733:     public function add($key, $default, $type, $allow_null)
 2734:     {
 2735:         $obj = new stdclass();
 2736:         $obj->type = is_int($type) ? $type : HTMLPurifier_VarParser::$types[$type];
 2737:         if ($allow_null) {
 2738:             $obj->allow_null = true;
 2739:         }
 2740:         $this->info[$key] = $obj;
 2741:         $this->defaults[$key] = $default;
 2742:         $this->defaultPlist->set($key, $default);
 2743:     }
 2744: 
 2745:     /**
 2746:      * Defines a directive value alias.
 2747:      *
 2748:      * Directive value aliases are convenient for developers because it lets
 2749:      * them set a directive to several values and get the same result.
 2750:      * @param string $key Name of Directive
 2751:      * @param array $aliases Hash of aliased values to the real alias
 2752:      */
 2753:     public function addValueAliases($key, $aliases)
 2754:     {
 2755:         if (!isset($this->info[$key]->aliases)) {
 2756:             $this->info[$key]->aliases = array();
 2757:         }
 2758:         foreach ($aliases as $alias => $real) {
 2759:             $this->info[$key]->aliases[$alias] = $real;
 2760:         }
 2761:     }
 2762: 
 2763:     /**
 2764:      * Defines a set of allowed values for a directive.
 2765:      * @warning This is slightly different from the corresponding static
 2766:      *          method definition.
 2767:      * @param string $key Name of directive
 2768:      * @param array $allowed Lookup array of allowed values
 2769:      */
 2770:     public function addAllowedValues($key, $allowed)
 2771:     {
 2772:         $this->info[$key]->allowed = $allowed;
 2773:     }
 2774: 
 2775:     /**
 2776:      * Defines a directive alias for backwards compatibility
 2777:      * @param string $key Directive that will be aliased
 2778:      * @param string $new_key Directive that the alias will be to
 2779:      */
 2780:     public function addAlias($key, $new_key)
 2781:     {
 2782:         $obj = new stdclass;
 2783:         $obj->key = $new_key;
 2784:         $obj->isAlias = true;
 2785:         $this->info[$key] = $obj;
 2786:     }
 2787: 
 2788:     /**
 2789:      * Replaces any stdclass that only has the type property with type integer.
 2790:      */
 2791:     public function postProcess()
 2792:     {
 2793:         foreach ($this->info as $key => $v) {
 2794:             if (count((array) $v) == 1) {
 2795:                 $this->info[$key] = $v->type;
 2796:             } elseif (count((array) $v) == 2 && isset($v->allow_null)) {
 2797:                 $this->info[$key] = -$v->type;
 2798:             }
 2799:         }
 2800:     }
 2801: }
 2802: 
 2803: 
 2804: 
 2805: 
 2806: 
 2807: /**
 2808:  * @todo Unit test
 2809:  */
 2810: class HTMLPurifier_ContentSets
 2811: {
 2812: 
 2813:     /**
 2814:      * List of content set strings (pipe separators) indexed by name.
 2815:      * @type array
 2816:      */
 2817:     public $info = array();
 2818: 
 2819:     /**
 2820:      * List of content set lookups (element => true) indexed by name.
 2821:      * @type array
 2822:      * @note This is in HTMLPurifier_HTMLDefinition->info_content_sets
 2823:      */
 2824:     public $lookup = array();
 2825: 
 2826:     /**
 2827:      * Synchronized list of defined content sets (keys of info).
 2828:      * @type array
 2829:      */
 2830:     protected $keys = array();
 2831:     /**
 2832:      * Synchronized list of defined content values (values of info).
 2833:      * @type array
 2834:      */
 2835:     protected $values = array();
 2836: 
 2837:     /**
 2838:      * Merges in module's content sets, expands identifiers in the content
 2839:      * sets and populates the keys, values and lookup member variables.
 2840:      * @param HTMLPurifier_HTMLModule[] $modules List of HTMLPurifier_HTMLModule
 2841:      */
 2842:     public function __construct($modules)
 2843:     {
 2844:         if (!is_array($modules)) {
 2845:             $modules = array($modules);
 2846:         }
 2847:         // populate content_sets based on module hints
 2848:         // sorry, no way of overloading
 2849:         foreach ($modules as $module) {
 2850:             foreach ($module->content_sets as $key => $value) {
 2851:                 $temp = $this->convertToLookup($value);
 2852:                 if (isset($this->lookup[$key])) {
 2853:                     // add it into the existing content set
 2854:                     $this->lookup[$key] = array_merge($this->lookup[$key], $temp);
 2855:                 } else {
 2856:                     $this->lookup[$key] = $temp;
 2857:                 }
 2858:             }
 2859:         }
 2860:         $old_lookup = false;
 2861:         while ($old_lookup !== $this->lookup) {
 2862:             $old_lookup = $this->lookup;
 2863:             foreach ($this->lookup as $i => $set) {
 2864:                 $add = array();
 2865:                 foreach ($set as $element => $x) {
 2866:                     if (isset($this->lookup[$element])) {
 2867:                         $add += $this->lookup[$element];
 2868:                         unset($this->lookup[$i][$element]);
 2869:                     }
 2870:                 }
 2871:                 $this->lookup[$i] += $add;
 2872:             }
 2873:         }
 2874: 
 2875:         foreach ($this->lookup as $key => $lookup) {
 2876:             $this->info[$key] = implode(' | ', array_keys($lookup));
 2877:         }
 2878:         $this->keys   = array_keys($this->info);
 2879:         $this->values = array_values($this->info);
 2880:     }
 2881: 
 2882:     /**
 2883:      * Accepts a definition; generates and assigns a ChildDef for it
 2884:      * @param HTMLPurifier_ElementDef $def HTMLPurifier_ElementDef reference
 2885:      * @param HTMLPurifier_HTMLModule $module Module that defined the ElementDef
 2886:      */
 2887:     public function generateChildDef(&$def, $module)
 2888:     {
 2889:         if (!empty($def->child)) { // already done!
 2890:             return;
 2891:         }
 2892:         $content_model = $def->content_model;
 2893:         if (is_string($content_model)) {
 2894:             // Assume that $this->keys is alphanumeric
 2895:             $def->content_model = preg_replace_callback(
 2896:                 '/\b(' . implode('|', $this->keys) . ')\b/',
 2897:                 array($this, 'generateChildDefCallback'),
 2898:                 $content_model
 2899:             );
 2900:             //$def->content_model = str_replace(
 2901:             //    $this->keys, $this->values, $content_model);
 2902:         }
 2903:         $def->child = $this->getChildDef($def, $module);
 2904:     }
 2905: 
 2906:     public function generateChildDefCallback($matches)
 2907:     {
 2908:         return $this->info[$matches[0]];
 2909:     }
 2910: 
 2911:     /**
 2912:      * Instantiates a ChildDef based on content_model and content_model_type
 2913:      * member variables in HTMLPurifier_ElementDef
 2914:      * @note This will also defer to modules for custom HTMLPurifier_ChildDef
 2915:      *       subclasses that need content set expansion
 2916:      * @param HTMLPurifier_ElementDef $def HTMLPurifier_ElementDef to have ChildDef extracted
 2917:      * @param HTMLPurifier_HTMLModule $module Module that defined the ElementDef
 2918:      * @return HTMLPurifier_ChildDef corresponding to ElementDef
 2919:      */
 2920:     public function getChildDef($def, $module)
 2921:     {
 2922:         $value = $def->content_model;
 2923:         if (is_object($value)) {
 2924:             trigger_error(
 2925:                 'Literal object child definitions should be stored in '.
 2926:                 'ElementDef->child not ElementDef->content_model',
 2927:                 E_USER_NOTICE
 2928:             );
 2929:             return $value;
 2930:         }
 2931:         switch ($def->content_model_type) {
 2932:             case 'required':
 2933:                 return new HTMLPurifier_ChildDef_Required($value);
 2934:             case 'optional':
 2935:                 return new HTMLPurifier_ChildDef_Optional($value);
 2936:             case 'empty':
 2937:                 return new HTMLPurifier_ChildDef_Empty();
 2938:             case 'custom':
 2939:                 return new HTMLPurifier_ChildDef_Custom($value);
 2940:         }
 2941:         // defer to its module
 2942:         $return = false;
 2943:         if ($module->defines_child_def) { // save a func call
 2944:             $return = $module->getChildDef($def);
 2945:         }
 2946:         if ($return !== false) {
 2947:             return $return;
 2948:         }
 2949:         // error-out
 2950:         trigger_error(
 2951:             'Could not determine which ChildDef class to instantiate',
 2952:             E_USER_ERROR
 2953:         );
 2954:         return false;
 2955:     }
 2956: 
 2957:     /**
 2958:      * Converts a string list of elements separated by pipes into
 2959:      * a lookup array.
 2960:      * @param string $string List of elements
 2961:      * @return array Lookup array of elements
 2962:      */
 2963:     protected function convertToLookup($string)
 2964:     {
 2965:         $array = explode('|', str_replace(' ', '', $string));
 2966:         $ret = array();
 2967:         foreach ($array as $k) {
 2968:             $ret[$k] = true;
 2969:         }
 2970:         return $ret;
 2971:     }
 2972: }
 2973: 
 2974: 
 2975: 
 2976: 
 2977: 
 2978: /**
 2979:  * Registry object that contains information about the current context.
 2980:  * @warning Is a bit buggy when variables are set to null: it thinks
 2981:  *          they don't exist! So use false instead, please.
 2982:  * @note Since the variables Context deals with may not be objects,
 2983:  *       references are very important here! Do not remove!
 2984:  */
 2985: class HTMLPurifier_Context
 2986: {
 2987: 
 2988:     /**
 2989:      * Private array that stores the references.
 2990:      * @type array
 2991:      */
 2992:     private $_storage = array();
 2993: 
 2994:     /**
 2995:      * Registers a variable into the context.
 2996:      * @param string $name String name
 2997:      * @param mixed $ref Reference to variable to be registered
 2998:      */
 2999:     public function register($name, &$ref)
 3000:     {
 3001:         if (array_key_exists($name, $this->_storage)) {
 3002:             trigger_error(
 3003:                 "Name $name produces collision, cannot re-register",
 3004:                 E_USER_ERROR
 3005:             );
 3006:             return;
 3007:         }
 3008:         $this->_storage[$name] =& $ref;
 3009:     }
 3010: 
 3011:     /**
 3012:      * Retrieves a variable reference from the context.
 3013:      * @param string $name String name
 3014:      * @param bool $ignore_error Boolean whether or not to ignore error
 3015:      * @return mixed
 3016:      */
 3017:     public function &get($name, $ignore_error = false)
 3018:     {
 3019:         if (!array_key_exists($name, $this->_storage)) {
 3020:             if (!$ignore_error) {
 3021:                 trigger_error(
 3022:                     "Attempted to retrieve non-existent variable $name",
 3023:                     E_USER_ERROR
 3024:                 );
 3025:             }
 3026:             $var = null; // so we can return by reference
 3027:             return $var;
 3028:         }
 3029:         return $this->_storage[$name];
 3030:     }
 3031: 
 3032:     /**
 3033:      * Destroys a variable in the context.
 3034:      * @param string $name String name
 3035:      */
 3036:     public function destroy($name)
 3037:     {
 3038:         if (!array_key_exists($name, $this->_storage)) {
 3039:             trigger_error(
 3040:                 "Attempted to destroy non-existent variable $name",
 3041:                 E_USER_ERROR
 3042:             );
 3043:             return;
 3044:         }
 3045:         unset($this->_storage[$name]);
 3046:     }
 3047: 
 3048:     /**
 3049:      * Checks whether or not the variable exists.
 3050:      * @param string $name String name
 3051:      * @return bool
 3052:      */
 3053:     public function exists($name)
 3054:     {
 3055:         return array_key_exists($name, $this->_storage);
 3056:     }
 3057: 
 3058:     /**
 3059:      * Loads a series of variables from an associative array
 3060:      * @param array $context_array Assoc array of variables to load
 3061:      */
 3062:     public function loadArray($context_array)
 3063:     {
 3064:         foreach ($context_array as $key => $discard) {
 3065:             $this->register($key, $context_array[$key]);
 3066:         }
 3067:     }
 3068: }
 3069: 
 3070: 
 3071: 
 3072: 
 3073: 
 3074: /**
 3075:  * Abstract class representing Definition cache managers that implements
 3076:  * useful common methods and is a factory.
 3077:  * @todo Create a separate maintenance file advanced users can use to
 3078:  *       cache their custom HTMLDefinition, which can be loaded
 3079:  *       via a configuration directive
 3080:  * @todo Implement memcached
 3081:  */
 3082: abstract class HTMLPurifier_DefinitionCache
 3083: {
 3084:     /**
 3085:      * @type string
 3086:      */
 3087:     public $type;
 3088: 
 3089:     /**
 3090:      * @param string $type Type of definition objects this instance of the
 3091:      *      cache will handle.
 3092:      */
 3093:     public function __construct($type)
 3094:     {
 3095:         $this->type = $type;
 3096:     }
 3097: 
 3098:     /**
 3099:      * Generates a unique identifier for a particular configuration
 3100:      * @param HTMLPurifier_Config $config Instance of HTMLPurifier_Config
 3101:      * @return string
 3102:      */
 3103:     public function generateKey($config)
 3104:     {
 3105:         return $config->version . ',' . // possibly replace with function calls
 3106:                $config->getBatchSerial($this->type) . ',' .
 3107:                $config->get($this->type . '.DefinitionRev');
 3108:     }
 3109: 
 3110:     /**
 3111:      * Tests whether or not a key is old with respect to the configuration's
 3112:      * version and revision number.
 3113:      * @param string $key Key to test
 3114:      * @param HTMLPurifier_Config $config Instance of HTMLPurifier_Config to test against
 3115:      * @return bool
 3116:      */
 3117:     public function isOld($key, $config)
 3118:     {
 3119:         if (substr_count($key, ',') < 2) {
 3120:             return true;
 3121:         }
 3122:         list($version, $hash, $revision) = explode(',', $key, 3);
 3123:         $compare = version_compare($version, $config->version);
 3124:         // version mismatch, is always old
 3125:         if ($compare != 0) {
 3126:             return true;
 3127:         }
 3128:         // versions match, ids match, check revision number
 3129:         if ($hash == $config->getBatchSerial($this->type) &&
 3130:             $revision < $config->get($this->type . '.DefinitionRev')) {
 3131:             return true;
 3132:         }
 3133:         return false;
 3134:     }
 3135: 
 3136:     /**
 3137:      * Checks if a definition's type jives with the cache's type
 3138:      * @note Throws an error on failure
 3139:      * @param HTMLPurifier_Definition $def Definition object to check
 3140:      * @return bool true if good, false if not
 3141:      */
 3142:     public function checkDefType($def)
 3143:     {
 3144:         if ($def->type !== $this->type) {
 3145:             trigger_error("Cannot use definition of type {$def->type} in cache for {$this->type}");
 3146:             return false;
 3147:         }
 3148:         return true;
 3149:     }
 3150: 
 3151:     /**
 3152:      * Adds a definition object to the cache
 3153:      * @param HTMLPurifier_Definition $def
 3154:      * @param HTMLPurifier_Config $config
 3155:      */
 3156:     abstract public function add($def, $config);
 3157: 
 3158:     /**
 3159:      * Unconditionally saves a definition object to the cache
 3160:      * @param HTMLPurifier_Definition $def
 3161:      * @param HTMLPurifier_Config $config
 3162:      */
 3163:     abstract public function set($def, $config);
 3164: 
 3165:     /**
 3166:      * Replace an object in the cache
 3167:      * @param HTMLPurifier_Definition $def
 3168:      * @param HTMLPurifier_Config $config
 3169:      */
 3170:     abstract public function replace($def, $config);
 3171: 
 3172:     /**
 3173:      * Retrieves a definition object from the cache
 3174:      * @param HTMLPurifier_Config $config
 3175:      */
 3176:     abstract public function get($config);
 3177: 
 3178:     /**
 3179:      * Removes a definition object to the cache
 3180:      * @param HTMLPurifier_Config $config
 3181:      */
 3182:     abstract public function remove($config);
 3183: 
 3184:     /**
 3185:      * Clears all objects from cache
 3186:      * @param HTMLPurifier_Config $config
 3187:      */
 3188:     abstract public function flush($config);
 3189: 
 3190:     /**
 3191:      * Clears all expired (older version or revision) objects from cache
 3192:      * @note Be carefuly implementing this method as flush. Flush must
 3193:      *       not interfere with other Definition types, and cleanup()
 3194:      *       should not be repeatedly called by userland code.
 3195:      * @param HTMLPurifier_Config $config
 3196:      */
 3197:     abstract public function cleanup($config);
 3198: }
 3199: 
 3200: 
 3201: 
 3202: 
 3203: 
 3204: /**
 3205:  * Responsible for creating definition caches.
 3206:  */
 3207: class HTMLPurifier_DefinitionCacheFactory
 3208: {
 3209:     /**
 3210:      * @type array
 3211:      */
 3212:     protected $caches = array('Serializer' => array());
 3213: 
 3214:     /**
 3215:      * @type array
 3216:      */
 3217:     protected $implementations = array();
 3218: 
 3219:     /**
 3220:      * @type HTMLPurifier_DefinitionCache_Decorator[]
 3221:      */
 3222:     protected $decorators = array();
 3223: 
 3224:     /**
 3225:      * Initialize default decorators
 3226:      */
 3227:     public function setup()
 3228:     {
 3229:         $this->addDecorator('Cleanup');
 3230:     }
 3231: 
 3232:     /**
 3233:      * Retrieves an instance of global definition cache factory.
 3234:      * @param HTMLPurifier_DefinitionCacheFactory $prototype
 3235:      * @return HTMLPurifier_DefinitionCacheFactory
 3236:      */
 3237:     public static function instance($prototype = null)
 3238:     {
 3239:         static $instance;
 3240:         if ($prototype !== null) {
 3241:             $instance = $prototype;
 3242:         } elseif ($instance === null || $prototype === true) {
 3243:             $instance = new HTMLPurifier_DefinitionCacheFactory();
 3244:             $instance->setup();
 3245:         }
 3246:         return $instance;
 3247:     }
 3248: 
 3249:     /**
 3250:      * Registers a new definition cache object
 3251:      * @param string $short Short name of cache object, for reference
 3252:      * @param string $long Full class name of cache object, for construction
 3253:      */
 3254:     public function register($short, $long)
 3255:     {
 3256:         $this->implementations[$short] = $long;
 3257:     }
 3258: 
 3259:     /**
 3260:      * Factory method that creates a cache object based on configuration
 3261:      * @param string $type Name of definitions handled by cache
 3262:      * @param HTMLPurifier_Config $config Config instance
 3263:      * @return mixed
 3264:      */
 3265:     public function create($type, $config)
 3266:     {
 3267:         $method = $config->get('Cache.DefinitionImpl');
 3268:         if ($method === null) {
 3269:             return new HTMLPurifier_DefinitionCache_Null($type);
 3270:         }
 3271:         if (!empty($this->caches[$method][$type])) {
 3272:             return $this->caches[$method][$type];
 3273:         }
 3274:         if (isset($this->implementations[$method]) &&
 3275:             class_exists($class = $this->implementations[$method], false)) {
 3276:             $cache = new $class($type);
 3277:         } else {
 3278:             if ($method != 'Serializer') {
 3279:                 trigger_error("Unrecognized DefinitionCache $method, using Serializer instead", E_USER_WARNING);
 3280:             }
 3281:             $cache = new HTMLPurifier_DefinitionCache_Serializer($type);
 3282:         }
 3283:         foreach ($this->decorators as $decorator) {
 3284:             $new_cache = $decorator->decorate($cache);
 3285:             // prevent infinite recursion in PHP 4
 3286:             unset($cache);
 3287:             $cache = $new_cache;
 3288:         }
 3289:         $this->caches[$method][$type] = $cache;
 3290:         return $this->caches[$method][$type];
 3291:     }
 3292: 
 3293:     /**
 3294:      * Registers a decorator to add to all new cache objects
 3295:      * @param HTMLPurifier_DefinitionCache_Decorator|string $decorator An instance or the name of a decorator
 3296:      */
 3297:     public function addDecorator($decorator)
 3298:     {
 3299:         if (is_string($decorator)) {
 3300:             $class = "HTMLPurifier_DefinitionCache_Decorator_$decorator";
 3301:             $decorator = new $class;
 3302:         }
 3303:         $this->decorators[$decorator->name] = $decorator;
 3304:     }
 3305: }
 3306: 
 3307: 
 3308: 
 3309: 
 3310: 
 3311: /**
 3312:  * Represents a document type, contains information on which modules
 3313:  * need to be loaded.
 3314:  * @note This class is inspected by Printer_HTMLDefinition->renderDoctype.
 3315:  *       If structure changes, please update that function.
 3316:  */
 3317: class HTMLPurifier_Doctype
 3318: {
 3319:     /**
 3320:      * Full name of doctype
 3321:      * @type string
 3322:      */
 3323:     public $name;
 3324: 
 3325:     /**
 3326:      * List of standard modules (string identifiers or literal objects)
 3327:      * that this doctype uses
 3328:      * @type array
 3329:      */
 3330:     public $modules = array();
 3331: 
 3332:     /**
 3333:      * List of modules to use for tidying up code
 3334:      * @type array
 3335:      */
 3336:     public $tidyModules = array();
 3337: 
 3338:     /**
 3339:      * Is the language derived from XML (i.e. XHTML)?
 3340:      * @type bool
 3341:      */
 3342:     public $xml = true;
 3343: 
 3344:     /**
 3345:      * List of aliases for this doctype
 3346:      * @type array
 3347:      */
 3348:     public $aliases = array();
 3349: 
 3350:     /**
 3351:      * Public DTD identifier
 3352:      * @type string
 3353:      */
 3354:     public $dtdPublic;
 3355: 
 3356:     /**
 3357:      * System DTD identifier
 3358:      * @type string
 3359:      */
 3360:     public $dtdSystem;
 3361: 
 3362:     public function __construct(
 3363:         $name = null,
 3364:         $xml = true,
 3365:         $modules = array(),
 3366:         $tidyModules = array(),
 3367:         $aliases = array(),
 3368:         $dtd_public = null,
 3369:         $dtd_system = null
 3370:     ) {
 3371:         $this->name         = $name;
 3372:         $this->xml          = $xml;
 3373:         $this->modules      = $modules;
 3374:         $this->tidyModules  = $tidyModules;
 3375:         $this->aliases      = $aliases;
 3376:         $this->dtdPublic    = $dtd_public;
 3377:         $this->dtdSystem    = $dtd_system;
 3378:     }
 3379: }
 3380: 
 3381: 
 3382: 
 3383: 
 3384: 
 3385: class HTMLPurifier_DoctypeRegistry
 3386: {
 3387: 
 3388:     /**
 3389:      * Hash of doctype names to doctype objects.
 3390:      * @type array
 3391:      */
 3392:     protected $doctypes;
 3393: 
 3394:     /**
 3395:      * Lookup table of aliases to real doctype names.
 3396:      * @type array
 3397:      */
 3398:     protected $aliases;
 3399: 
 3400:     /**
 3401:      * Registers a doctype to the registry
 3402:      * @note Accepts a fully-formed doctype object, or the
 3403:      *       parameters for constructing a doctype object
 3404:      * @param string $doctype Name of doctype or literal doctype object
 3405:      * @param bool $xml
 3406:      * @param array $modules Modules doctype will load
 3407:      * @param array $tidy_modules Modules doctype will load for certain modes
 3408:      * @param array $aliases Alias names for doctype
 3409:      * @param string $dtd_public
 3410:      * @param string $dtd_system
 3411:      * @return HTMLPurifier_Doctype Editable registered doctype
 3412:      */
 3413:     public function register(
 3414:         $doctype,
 3415:         $xml = true,
 3416:         $modules = array(),
 3417:         $tidy_modules = array(),
 3418:         $aliases = array(),
 3419:         $dtd_public = null,
 3420:         $dtd_system = null
 3421:     ) {
 3422:         if (!is_array($modules)) {
 3423:             $modules = array($modules);
 3424:         }
 3425:         if (!is_array($tidy_modules)) {
 3426:             $tidy_modules = array($tidy_modules);
 3427:         }
 3428:         if (!is_array($aliases)) {
 3429:             $aliases = array($aliases);
 3430:         }
 3431:         if (!is_object($doctype)) {
 3432:             $doctype = new HTMLPurifier_Doctype(
 3433:                 $doctype,
 3434:                 $xml,
 3435:                 $modules,
 3436:                 $tidy_modules,
 3437:                 $aliases,
 3438:                 $dtd_public,
 3439:                 $dtd_system
 3440:             );
 3441:         }
 3442:         $this->doctypes[$doctype->name] = $doctype;
 3443:         $name = $doctype->name;
 3444:         // hookup aliases
 3445:         foreach ($doctype->aliases as $alias) {
 3446:             if (isset($this->doctypes[$alias])) {
 3447:                 continue;
 3448:             }
 3449:             $this->aliases[$alias] = $name;
 3450:         }
 3451:         // remove old aliases
 3452:         if (isset($this->aliases[$name])) {
 3453:             unset($this->aliases[$name]);
 3454:         }
 3455:         return $doctype;
 3456:     }
 3457: 
 3458:     /**
 3459:      * Retrieves reference to a doctype of a certain name
 3460:      * @note This function resolves aliases
 3461:      * @note When possible, use the more fully-featured make()
 3462:      * @param string $doctype Name of doctype
 3463:      * @return HTMLPurifier_Doctype Editable doctype object
 3464:      */
 3465:     public function get($doctype)
 3466:     {
 3467:         if (isset($this->aliases[$doctype])) {
 3468:             $doctype = $this->aliases[$doctype];
 3469:         }
 3470:         if (!isset($this->doctypes[$doctype])) {
 3471:             trigger_error('Doctype ' . htmlspecialchars($doctype) . ' does not exist', E_USER_ERROR);
 3472:             $anon = new HTMLPurifier_Doctype($doctype);
 3473:             return $anon;
 3474:         }
 3475:         return $this->doctypes[$doctype];
 3476:     }
 3477: 
 3478:     /**
 3479:      * Creates a doctype based on a configuration object,
 3480:      * will perform initialization on the doctype
 3481:      * @note Use this function to get a copy of doctype that config
 3482:      *       can hold on to (this is necessary in order to tell
 3483:      *       Generator whether or not the current document is XML
 3484:      *       based or not).
 3485:      * @param HTMLPurifier_Config $config
 3486:      * @return HTMLPurifier_Doctype
 3487:      */
 3488:     public function make($config)
 3489:     {
 3490:         return clone $this->get($this->getDoctypeFromConfig($config));
 3491:     }
 3492: 
 3493:     /**
 3494:      * Retrieves the doctype from the configuration object
 3495:      * @param HTMLPurifier_Config $config
 3496:      * @return string
 3497:      */
 3498:     public function getDoctypeFromConfig($config)
 3499:     {
 3500:         // recommended test
 3501:         $doctype = $config->get('HTML.Doctype');
 3502:         if (!empty($doctype)) {
 3503:             return $doctype;
 3504:         }
 3505:         $doctype = $config->get('HTML.CustomDoctype');
 3506:         if (!empty($doctype)) {
 3507:             return $doctype;
 3508:         }
 3509:         // backwards-compatibility
 3510:         if ($config->get('HTML.XHTML')) {
 3511:             $doctype = 'XHTML 1.0';
 3512:         } else {
 3513:             $doctype = 'HTML 4.01';
 3514:         }
 3515:         if ($config->get('HTML.Strict')) {
 3516:             $doctype .= ' Strict';
 3517:         } else {
 3518:             $doctype .= ' Transitional';
 3519:         }
 3520:         return $doctype;
 3521:     }
 3522: }
 3523: 
 3524: 
 3525: 
 3526: 
 3527: 
 3528: /**
 3529:  * Structure that stores an HTML element definition. Used by
 3530:  * HTMLPurifier_HTMLDefinition and HTMLPurifier_HTMLModule.
 3531:  * @note This class is inspected by HTMLPurifier_Printer_HTMLDefinition.
 3532:  *       Please update that class too.
 3533:  * @warning If you add new properties to this class, you MUST update
 3534:  *          the mergeIn() method.
 3535:  */
 3536: class HTMLPurifier_ElementDef
 3537: {
 3538:     /**
 3539:      * Does the definition work by itself, or is it created solely
 3540:      * for the purpose of merging into another definition?
 3541:      * @type bool
 3542:      */
 3543:     public $standalone = true;
 3544: 
 3545:     /**
 3546:      * Associative array of attribute name to HTMLPurifier_AttrDef.
 3547:      * @type array
 3548:      * @note Before being processed by HTMLPurifier_AttrCollections
 3549:      *       when modules are finalized during
 3550:      *       HTMLPurifier_HTMLDefinition->setup(), this array may also
 3551:      *       contain an array at index 0 that indicates which attribute
 3552:      *       collections to load into the full array. It may also
 3553:      *       contain string indentifiers in lieu of HTMLPurifier_AttrDef,
 3554:      *       see HTMLPurifier_AttrTypes on how they are expanded during
 3555:      *       HTMLPurifier_HTMLDefinition->setup() processing.
 3556:      */
 3557:     public $attr = array();
 3558: 
 3559:     // XXX: Design note: currently, it's not possible to override
 3560:     // previously defined AttrTransforms without messing around with
 3561:     // the final generated config. This is by design; a previous version
 3562:     // used an associated list of attr_transform, but it was extremely
 3563:     // easy to accidentally override other attribute transforms by
 3564:     // forgetting to specify an index (and just using 0.)  While we
 3565:     // could check this by checking the index number and complaining,
 3566:     // there is a second problem which is that it is not at all easy to
 3567:     // tell when something is getting overridden. Combine this with a
 3568:     // codebase where this isn't really being used, and it's perfect for
 3569:     // nuking.
 3570: 
 3571:     /**
 3572:      * List of tags HTMLPurifier_AttrTransform to be done before validation.
 3573:      * @type array
 3574:      */
 3575:     public $attr_transform_pre = array();
 3576: 
 3577:     /**
 3578:      * List of tags HTMLPurifier_AttrTransform to be done after validation.
 3579:      * @type array
 3580:      */
 3581:     public $attr_transform_post = array();
 3582: 
 3583:     /**
 3584:      * HTMLPurifier_ChildDef of this tag.
 3585:      * @type HTMLPurifier_ChildDef
 3586:      */
 3587:     public $child;
 3588: 
 3589:     /**
 3590:      * Abstract string representation of internal ChildDef rules.
 3591:      * @see HTMLPurifier_ContentSets for how this is parsed and then transformed
 3592:      * into an HTMLPurifier_ChildDef.
 3593:      * @warning This is a temporary variable that is not available after
 3594:      *      being processed by HTMLDefinition
 3595:      * @type string
 3596:      */
 3597:     public $content_model;
 3598: 
 3599:     /**
 3600:      * Value of $child->type, used to determine which ChildDef to use,
 3601:      * used in combination with $content_model.
 3602:      * @warning This must be lowercase
 3603:      * @warning This is a temporary variable that is not available after
 3604:      *      being processed by HTMLDefinition
 3605:      * @type string
 3606:      */
 3607:     public $content_model_type;
 3608: 
 3609:     /**
 3610:      * Does the element have a content model (#PCDATA | Inline)*? This
 3611:      * is important for chameleon ins and del processing in
 3612:      * HTMLPurifier_ChildDef_Chameleon. Dynamically set: modules don't
 3613:      * have to worry about this one.
 3614:      * @type bool
 3615:      */
 3616:     public $descendants_are_inline = false;
 3617: 
 3618:     /**
 3619:      * List of the names of required attributes this element has.
 3620:      * Dynamically populated by HTMLPurifier_HTMLDefinition::getElement()
 3621:      * @type array
 3622:      */
 3623:     public $required_attr = array();
 3624: 
 3625:     /**
 3626:      * Lookup table of tags excluded from all descendants of this tag.
 3627:      * @type array
 3628:      * @note SGML permits exclusions for all descendants, but this is
 3629:      *       not possible with DTDs or XML Schemas. W3C has elected to
 3630:      *       use complicated compositions of content_models to simulate
 3631:      *       exclusion for children, but we go the simpler, SGML-style
 3632:      *       route of flat-out exclusions, which correctly apply to
 3633:      *       all descendants and not just children. Note that the XHTML
 3634:      *       Modularization Abstract Modules are blithely unaware of such
 3635:      *       distinctions.
 3636:      */
 3637:     public $excludes = array();
 3638: 
 3639:     /**
 3640:      * This tag is explicitly auto-closed by the following tags.
 3641:      * @type array
 3642:      */
 3643:     public $autoclose = array();
 3644: 
 3645:     /**
 3646:      * If a foreign element is found in this element, test if it is
 3647:      * allowed by this sub-element; if it is, instead of closing the
 3648:      * current element, place it inside this element.
 3649:      * @type string
 3650:      */
 3651:     public $wrap;
 3652: 
 3653:     /**
 3654:      * Whether or not this is a formatting element affected by the
 3655:      * "Active Formatting Elements" algorithm.
 3656:      * @type bool
 3657:      */
 3658:     public $formatting;
 3659: 
 3660:     /**
 3661:      * Low-level factory constructor for creating new standalone element defs
 3662:      */
 3663:     public static function create($content_model, $content_model_type, $attr)
 3664:     {
 3665:         $def = new HTMLPurifier_ElementDef();
 3666:         $def->content_model = $content_model;
 3667:         $def->content_model_type = $content_model_type;
 3668:         $def->attr = $attr;
 3669:         return $def;
 3670:     }
 3671: 
 3672:     /**
 3673:      * Merges the values of another element definition into this one.
 3674:      * Values from the new element def take precedence if a value is
 3675:      * not mergeable.
 3676:      * @param HTMLPurifier_ElementDef $def
 3677:      */
 3678:     public function mergeIn($def)
 3679:     {
 3680:         // later keys takes precedence
 3681:         foreach ($def->attr as $k => $v) {
 3682:             if ($k === 0) {
 3683:                 // merge in the includes
 3684:                 // sorry, no way to override an include
 3685:                 foreach ($v as $v2) {
 3686:                     $this->attr[0][] = $v2;
 3687:                 }
 3688:                 continue;
 3689:             }
 3690:             if ($v === false) {
 3691:                 if (isset($this->attr[$k])) {
 3692:                     unset($this->attr[$k]);
 3693:                 }
 3694:                 continue;
 3695:             }
 3696:             $this->attr[$k] = $v;
 3697:         }
 3698:         $this->_mergeAssocArray($this->excludes, $def->excludes);
 3699:         $this->attr_transform_pre = array_merge($this->attr_transform_pre, $def->attr_transform_pre);
 3700:         $this->attr_transform_post = array_merge($this->attr_transform_post, $def->attr_transform_post);
 3701: 
 3702:         if (!empty($def->content_model)) {
 3703:             $this->content_model =
 3704:                 str_replace("#SUPER", $this->content_model, $def->content_model);
 3705:             $this->child = false;
 3706:         }
 3707:         if (!empty($def->content_model_type)) {
 3708:             $this->content_model_type = $def->content_model_type;
 3709:             $this->child = false;
 3710:         }
 3711:         if (!is_null($def->child)) {
 3712:             $this->child = $def->child;
 3713:         }
 3714:         if (!is_null($def->formatting)) {
 3715:             $this->formatting = $def->formatting;
 3716:         }
 3717:         if ($def->descendants_are_inline) {
 3718:             $this->descendants_are_inline = $def->descendants_are_inline;
 3719:         }
 3720:     }
 3721: 
 3722:     /**
 3723:      * Merges one array into another, removes values which equal false
 3724:      * @param $a1 Array by reference that is merged into
 3725:      * @param $a2 Array that merges into $a1
 3726:      */
 3727:     private function _mergeAssocArray(&$a1, $a2)
 3728:     {
 3729:         foreach ($a2 as $k => $v) {
 3730:             if ($v === false) {
 3731:                 if (isset($a1[$k])) {
 3732:                     unset($a1[$k]);
 3733:                 }
 3734:                 continue;
 3735:             }
 3736:             $a1[$k] = $v;
 3737:         }
 3738:     }
 3739: }
 3740: 
 3741: 
 3742: 
 3743: 
 3744: 
 3745: /**
 3746:  * A UTF-8 specific character encoder that handles cleaning and transforming.
 3747:  * @note All functions in this class should be static.
 3748:  */
 3749: class HTMLPurifier_Encoder
 3750: {
 3751: 
 3752:     /**
 3753:      * Constructor throws fatal error if you attempt to instantiate class
 3754:      */
 3755:     private function __construct()
 3756:     {
 3757:         trigger_error('Cannot instantiate encoder, call methods statically', E_USER_ERROR);
 3758:     }
 3759: 
 3760:     /**
 3761:      * Error-handler that mutes errors, alternative to shut-up operator.
 3762:      */
 3763:     public static function muteErrorHandler()
 3764:     {
 3765:     }
 3766: 
 3767:     /**
 3768:      * iconv wrapper which mutes errors, but doesn't work around bugs.
 3769:      * @param string $in Input encoding
 3770:      * @param string $out Output encoding
 3771:      * @param string $text The text to convert
 3772:      * @return string
 3773:      */
 3774:     public static function unsafeIconv($in, $out, $text)
 3775:     {
 3776:         set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
 3777:         $r = iconv($in, $out, $text);
 3778:         restore_error_handler();
 3779:         return $r;
 3780:     }
 3781: 
 3782:     /**
 3783:      * iconv wrapper which mutes errors and works around bugs.
 3784:      * @param string $in Input encoding
 3785:      * @param string $out Output encoding
 3786:      * @param string $text The text to convert
 3787:      * @param int $max_chunk_size
 3788:      * @return string
 3789:      */
 3790:     public static function iconv($in, $out, $text, $max_chunk_size = 8000)
 3791:     {
 3792:         $code = self::testIconvTruncateBug();
 3793:         if ($code == self::ICONV_OK) {
 3794:             return self::unsafeIconv($in, $out, $text);
 3795:         } elseif ($code == self::ICONV_TRUNCATES) {
 3796:             // we can only work around this if the input character set
 3797:             // is utf-8
 3798:             if ($in == 'utf-8') {
 3799:                 if ($max_chunk_size < 4) {
 3800:                     trigger_error('max_chunk_size is too small', E_USER_WARNING);
 3801:                     return false;
 3802:                 }
 3803:                 // split into 8000 byte chunks, but be careful to handle
 3804:                 // multibyte boundaries properly
 3805:                 if (($c = strlen($text)) <= $max_chunk_size) {
 3806:                     return self::unsafeIconv($in, $out, $text);
 3807:                 }
 3808:                 $r = '';
 3809:                 $i = 0;
 3810:                 while (true) {
 3811:                     if ($i + $max_chunk_size >= $c) {
 3812:                         $r .= self::unsafeIconv($in, $out, substr($text, $i));
 3813:                         break;
 3814:                     }
 3815:                     // wibble the boundary
 3816:                     if (0x80 != (0xC0 & ord($text[$i + $max_chunk_size]))) {
 3817:                         $chunk_size = $max_chunk_size;
 3818:                     } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 1]))) {
 3819:                         $chunk_size = $max_chunk_size - 1;
 3820:                     } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 2]))) {
 3821:                         $chunk_size = $max_chunk_size - 2;
 3822:                     } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 3]))) {
 3823:                         $chunk_size = $max_chunk_size - 3;
 3824:                     } else {
 3825:                         return false; // rather confusing UTF-8...
 3826:                     }
 3827:                     $chunk = substr($text, $i, $chunk_size); // substr doesn't mind overlong lengths
 3828:                     $r .= self::unsafeIconv($in, $out, $chunk);
 3829:                     $i += $chunk_size;
 3830:                 }
 3831:                 return $r;
 3832:             } else {
 3833:                 return false;
 3834:             }
 3835:         } else {
 3836:             return false;
 3837:         }
 3838:     }
 3839: 
 3840:     /**
 3841:      * Cleans a UTF-8 string for well-formedness and SGML validity
 3842:      *
 3843:      * It will parse according to UTF-8 and return a valid UTF8 string, with
 3844:      * non-SGML codepoints excluded.
 3845:      *
 3846:      * @param string $str The string to clean
 3847:      * @param bool $force_php
 3848:      * @return string
 3849:      *
 3850:      * @note Just for reference, the non-SGML code points are 0 to 31 and
 3851:      *       127 to 159, inclusive.  However, we allow code points 9, 10
 3852:      *       and 13, which are the tab, line feed and carriage return
 3853:      *       respectively. 128 and above the code points map to multibyte
 3854:      *       UTF-8 representations.
 3855:      *
 3856:      * @note Fallback code adapted from utf8ToUnicode by Henri Sivonen and
 3857:      *       hsivonen@iki.fi at <http://iki.fi/hsivonen/php-utf8/> under the
 3858:      *       LGPL license.  Notes on what changed are inside, but in general,
 3859:      *       the original code transformed UTF-8 text into an array of integer
 3860:      *       Unicode codepoints. Understandably, transforming that back to
 3861:      *       a string would be somewhat expensive, so the function was modded to
 3862:      *       directly operate on the string.  However, this discourages code
 3863:      *       reuse, and the logic enumerated here would be useful for any
 3864:      *       function that needs to be able to understand UTF-8 characters.
 3865:      *       As of right now, only smart lossless character encoding converters
 3866:      *       would need that, and I'm probably not going to implement them.
 3867:      *       Once again, PHP 6 should solve all our problems.
 3868:      */
 3869:     public static function cleanUTF8($str, $force_php = false)
 3870:     {
 3871:         // UTF-8 validity is checked since PHP 4.3.5
 3872:         // This is an optimization: if the string is already valid UTF-8, no
 3873:         // need to do PHP stuff. 99% of the time, this will be the case.
 3874:         // The regexp matches the XML char production, as well as well as excluding
 3875:         // non-SGML codepoints U+007F to U+009F
 3876:         if (preg_match(
 3877:             '/^[\x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]*$/Du',
 3878:             $str
 3879:         )) {
 3880:             return $str;
 3881:         }
 3882: 
 3883:         $mState = 0; // cached expected number of octets after the current octet
 3884:                      // until the beginning of the next UTF8 character sequence
 3885:         $mUcs4  = 0; // cached Unicode character
 3886:         $mBytes = 1; // cached expected number of octets in the current sequence
 3887: 
 3888:         // original code involved an $out that was an array of Unicode
 3889:         // codepoints.  Instead of having to convert back into UTF-8, we've
 3890:         // decided to directly append valid UTF-8 characters onto a string
 3891:         // $out once they're done.  $char accumulates raw bytes, while $mUcs4
 3892:         // turns into the Unicode code point, so there's some redundancy.
 3893: 
 3894:         $out = '';
 3895:         $char = '';
 3896: 
 3897:         $len = strlen($str);
 3898:         for ($i = 0; $i < $len; $i++) {
 3899:             $in = ord($str{$i});
 3900:             $char .= $str[$i]; // append byte to char
 3901:             if (0 == $mState) {
 3902:                 // When mState is zero we expect either a US-ASCII character
 3903:                 // or a multi-octet sequence.
 3904:                 if (0 == (0x80 & ($in))) {
 3905:                     // US-ASCII, pass straight through.
 3906:                     if (($in <= 31 || $in == 127) &&
 3907:                         !($in == 9 || $in == 13 || $in == 10) // save \r\t\n
 3908:                     ) {
 3909:                         // control characters, remove
 3910:                     } else {
 3911:                         $out .= $char;
 3912:                     }
 3913:                     // reset
 3914:                     $char = '';
 3915:                     $mBytes = 1;
 3916:                 } elseif (0xC0 == (0xE0 & ($in))) {
 3917:                     // First octet of 2 octet sequence
 3918:                     $mUcs4 = ($in);
 3919:                     $mUcs4 = ($mUcs4 & 0x1F) << 6;
 3920:                     $mState = 1;
 3921:                     $mBytes = 2;
 3922:                 } elseif (0xE0 == (0xF0 & ($in))) {
 3923:                     // First octet of 3 octet sequence
 3924:                     $mUcs4 = ($in);
 3925:                     $mUcs4 = ($mUcs4 & 0x0F) << 12;
 3926:                     $mState = 2;
 3927:                     $mBytes = 3;
 3928:                 } elseif (0xF0 == (0xF8 & ($in))) {
 3929:                     // First octet of 4 octet sequence
 3930:                     $mUcs4 = ($in);
 3931:                     $mUcs4 = ($mUcs4 & 0x07) << 18;
 3932:                     $mState = 3;
 3933:                     $mBytes = 4;
 3934:                 } elseif (0xF8 == (0xFC & ($in))) {
 3935:                     // First octet of 5 octet sequence.
 3936:                     //
 3937:                     // This is illegal because the encoded codepoint must be
 3938:                     // either:
 3939:                     // (a) not the shortest form or
 3940:                     // (b) outside the Unicode range of 0-0x10FFFF.
 3941:                     // Rather than trying to resynchronize, we will carry on
 3942:                     // until the end of the sequence and let the later error
 3943:                     // handling code catch it.
 3944:                     $mUcs4 = ($in);
 3945:                     $mUcs4 = ($mUcs4 & 0x03) << 24;
 3946:                     $mState = 4;
 3947:                     $mBytes = 5;
 3948:                 } elseif (0xFC == (0xFE & ($in))) {
 3949:                     // First octet of 6 octet sequence, see comments for 5
 3950:                     // octet sequence.
 3951:                     $mUcs4 = ($in);
 3952:                     $mUcs4 = ($mUcs4 & 1) << 30;
 3953:                     $mState = 5;
 3954:                     $mBytes = 6;
 3955:                 } else {
 3956:                     // Current octet is neither in the US-ASCII range nor a
 3957:                     // legal first octet of a multi-octet sequence.
 3958:                     $mState = 0;
 3959:                     $mUcs4  = 0;
 3960:                     $mBytes = 1;
 3961:                     $char = '';
 3962:                 }
 3963:             } else {
 3964:                 // When mState is non-zero, we expect a continuation of the
 3965:                 // multi-octet sequence
 3966:                 if (0x80 == (0xC0 & ($in))) {
 3967:                     // Legal continuation.
 3968:                     $shift = ($mState - 1) * 6;
 3969:                     $tmp = $in;
 3970:                     $tmp = ($tmp & 0x0000003F) << $shift;
 3971:                     $mUcs4 |= $tmp;
 3972: 
 3973:                     if (0 == --$mState) {
 3974:                         // End of the multi-octet sequence. mUcs4 now contains
 3975:                         // the final Unicode codepoint to be output
 3976: 
 3977:                         // Check for illegal sequences and codepoints.
 3978: 
 3979:                         // From Unicode 3.1, non-shortest form is illegal
 3980:                         if (((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
 3981:                             ((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
 3982:                             ((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
 3983:                             (4 < $mBytes) ||
 3984:                             // From Unicode 3.2, surrogate characters = illegal
 3985:                             (($mUcs4 & 0xFFFFF800) == 0xD800) ||
 3986:                             // Codepoints outside the Unicode range are illegal
 3987:                             ($mUcs4 > 0x10FFFF)
 3988:                         ) {
 3989: 
 3990:                         } elseif (0xFEFF != $mUcs4 && // omit BOM
 3991:                             // check for valid Char unicode codepoints
 3992:                             (
 3993:                                 0x9 == $mUcs4 ||
 3994:                                 0xA == $mUcs4 ||
 3995:                                 0xD == $mUcs4 ||
 3996:                                 (0x20 <= $mUcs4 && 0x7E >= $mUcs4) ||
 3997:                                 // 7F-9F is not strictly prohibited by XML,
 3998:                                 // but it is non-SGML, and thus we don't allow it
 3999:                                 (0xA0 <= $mUcs4 && 0xD7FF >= $mUcs4) ||
 4000:                                 (0x10000 <= $mUcs4 && 0x10FFFF >= $mUcs4)
 4001:                             )
 4002:                         ) {
 4003:                             $out .= $char;
 4004:                         }
 4005:                         // initialize UTF8 cache (reset)
 4006:                         $mState = 0;
 4007:                         $mUcs4  = 0;
 4008:                         $mBytes = 1;
 4009:                         $char = '';
 4010:                     }
 4011:                 } else {
 4012:                     // ((0xC0 & (*in) != 0x80) && (mState != 0))
 4013:                     // Incomplete multi-octet sequence.
 4014:                     // used to result in complete fail, but we'll reset
 4015:                     $mState = 0;
 4016:                     $mUcs4  = 0;
 4017:                     $mBytes = 1;
 4018:                     $char ='';
 4019:                 }
 4020:             }
 4021:         }
 4022:         return $out;
 4023:     }
 4024: 
 4025:     /**
 4026:      * Translates a Unicode codepoint into its corresponding UTF-8 character.
 4027:      * @note Based on Feyd's function at
 4028:      *       <http://forums.devnetwork.net/viewtopic.php?p=191404#191404>,
 4029:      *       which is in public domain.
 4030:      * @note While we're going to do code point parsing anyway, a good
 4031:      *       optimization would be to refuse to translate code points that
 4032:      *       are non-SGML characters.  However, this could lead to duplication.
 4033:      * @note This is very similar to the unichr function in
 4034:      *       maintenance/generate-entity-file.php (although this is superior,
 4035:      *       due to its sanity checks).
 4036:      */
 4037: 
 4038:     // +----------+----------+----------+----------+
 4039:     // | 33222222 | 22221111 | 111111   |          |
 4040:     // | 10987654 | 32109876 | 54321098 | 76543210 | bit
 4041:     // +----------+----------+----------+----------+
 4042:     // |          |          |          | 0xxxxxxx | 1 byte 0x00000000..0x0000007F
 4043:     // |          |          | 110yyyyy | 10xxxxxx | 2 byte 0x00000080..0x000007FF
 4044:     // |          | 1110zzzz | 10yyyyyy | 10xxxxxx | 3 byte 0x00000800..0x0000FFFF
 4045:     // | 11110www | 10wwzzzz | 10yyyyyy | 10xxxxxx | 4 byte 0x00010000..0x0010FFFF
 4046:     // +----------+----------+----------+----------+
 4047:     // | 00000000 | 00011111 | 11111111 | 11111111 | Theoretical upper limit of legal scalars: 2097151 (0x001FFFFF)
 4048:     // | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes
 4049:     // +----------+----------+----------+----------+
 4050: 
 4051:     public static function unichr($code)
 4052:     {
 4053:         if ($code > 1114111 or $code < 0 or
 4054:           ($code >= 55296 and $code <= 57343) ) {
 4055:             // bits are set outside the "valid" range as defined
 4056:             // by UNICODE 4.1.0
 4057:             return '';
 4058:         }
 4059: 
 4060:         $x = $y = $z = $w = 0;
 4061:         if ($code < 128) {
 4062:             // regular ASCII character
 4063:             $x = $code;
 4064:         } else {
 4065:             // set up bits for UTF-8
 4066:             $x = ($code & 63) | 128;
 4067:             if ($code < 2048) {
 4068:                 $y = (($code & 2047) >> 6) | 192;
 4069:             } else {
 4070:                 $y = (($code & 4032) >> 6) | 128;
 4071:                 if ($code < 65536) {
 4072:                     $z = (($code >> 12) & 15) | 224;
 4073:                 } else {
 4074:                     $z = (($code >> 12) & 63) | 128;
 4075:                     $w = (($code >> 18) & 7)  | 240;
 4076:                 }
 4077:             }
 4078:         }
 4079:         // set up the actual character
 4080:         $ret = '';
 4081:         if ($w) {
 4082:             $ret .= chr($w);
 4083:         }
 4084:         if ($z) {
 4085:             $ret .= chr($z);
 4086:         }
 4087:         if ($y) {
 4088:             $ret .= chr($y);
 4089:         }
 4090:         $ret .= chr($x);
 4091: 
 4092:         return $ret;
 4093:     }
 4094: 
 4095:     /**
 4096:      * @return bool
 4097:      */
 4098:     public static function iconvAvailable()
 4099:     {
 4100:         static $iconv = null;
 4101:         if ($iconv === null) {
 4102:             $iconv = function_exists('iconv') && self::testIconvTruncateBug() != self::ICONV_UNUSABLE;
 4103:         }
 4104:         return $iconv;
 4105:     }
 4106: 
 4107:     /**
 4108:      * Convert a string to UTF-8 based on configuration.
 4109:      * @param string $str The string to convert
 4110:      * @param HTMLPurifier_Config $config
 4111:      * @param HTMLPurifier_Context $context
 4112:      * @return string
 4113:      */
 4114:     public static function convertToUTF8($str, $config, $context)
 4115:     {
 4116:         $encoding = $config->get('Core.Encoding');
 4117:         if ($encoding === 'utf-8') {
 4118:             return $str;
 4119:         }
 4120:         static $iconv = null;
 4121:         if ($iconv === null) {
 4122:             $iconv = self::iconvAvailable();
 4123:         }
 4124:         if ($iconv && !$config->get('Test.ForceNoIconv')) {
 4125:             // unaffected by bugs, since UTF-8 support all characters
 4126:             $str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str);
 4127:             if ($str === false) {
 4128:                 // $encoding is not a valid encoding
 4129:                 trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR);
 4130:                 return '';
 4131:             }
 4132:             // If the string is bjorked by Shift_JIS or a similar encoding
 4133:             // that doesn't support all of ASCII, convert the naughty
 4134:             // characters to their true byte-wise ASCII/UTF-8 equivalents.
 4135:             $str = strtr($str, self::testEncodingSupportsASCII($encoding));
 4136:             return $str;
 4137:         } elseif ($encoding === 'iso-8859-1') {
 4138:             $str = utf8_encode($str);
 4139:             return $str;
 4140:         }
 4141:         $bug = HTMLPurifier_Encoder::testIconvTruncateBug();
 4142:         if ($bug == self::ICONV_OK) {
 4143:             trigger_error('Encoding not supported, please install iconv', E_USER_ERROR);
 4144:         } else {
 4145:             trigger_error(
 4146:                 'You have a buggy version of iconv, see https://bugs.php.net/bug.php?id=48147 ' .
 4147:                 'and http://sourceware.org/bugzilla/show_bug.cgi?id=13541',
 4148:                 E_USER_ERROR
 4149:             );
 4150:         }
 4151:     }
 4152: 
 4153:     /**
 4154:      * Converts a string from UTF-8 based on configuration.
 4155:      * @param string $str The string to convert
 4156:      * @param HTMLPurifier_Config $config
 4157:      * @param HTMLPurifier_Context $context
 4158:      * @return string
 4159:      * @note Currently, this is a lossy conversion, with unexpressable
 4160:      *       characters being omitted.
 4161:      */
 4162:     public static function convertFromUTF8($str, $config, $context)
 4163:     {
 4164:         $encoding = $config->get('Core.Encoding');
 4165:         if ($escape = $config->get('Core.EscapeNonASCIICharacters')) {
 4166:             $str = self::convertToASCIIDumbLossless($str);
 4167:         }
 4168:         if ($encoding === 'utf-8') {
 4169:             return $str;
 4170:         }
 4171:         static $iconv = null;
 4172:         if ($iconv === null) {
 4173:             $iconv = self::iconvAvailable();
 4174:         }
 4175:         if ($iconv && !$config->get('Test.ForceNoIconv')) {
 4176:             // Undo our previous fix in convertToUTF8, otherwise iconv will barf
 4177:             $ascii_fix = self::testEncodingSupportsASCII($encoding);
 4178:             if (!$escape && !empty($ascii_fix)) {
 4179:                 $clear_fix = array();
 4180:                 foreach ($ascii_fix as $utf8 => $native) {
 4181:                     $clear_fix[$utf8] = '';
 4182:                 }
 4183:                 $str = strtr($str, $clear_fix);
 4184:             }
 4185:             $str = strtr($str, array_flip($ascii_fix));
 4186:             // Normal stuff
 4187:             $str = self::iconv('utf-8', $encoding . '//IGNORE', $str);
 4188:             return $str;
 4189:         } elseif ($encoding === 'iso-8859-1') {
 4190:             $str = utf8_decode($str);
 4191:             return $str;
 4192:         }
 4193:         trigger_error('Encoding not supported', E_USER_ERROR);
 4194:         // You might be tempted to assume that the ASCII representation
 4195:         // might be OK, however, this is *not* universally true over all
 4196:         // encodings.  So we take the conservative route here, rather
 4197:         // than forcibly turn on %Core.EscapeNonASCIICharacters
 4198:     }
 4199: 
 4200:     /**
 4201:      * Lossless (character-wise) conversion of HTML to ASCII
 4202:      * @param string $str UTF-8 string to be converted to ASCII
 4203:      * @return string ASCII encoded string with non-ASCII character entity-ized
 4204:      * @warning Adapted from MediaWiki, claiming fair use: this is a common
 4205:      *       algorithm. If you disagree with this license fudgery,
 4206:      *       implement it yourself.
 4207:      * @note Uses decimal numeric entities since they are best supported.
 4208:      * @note This is a DUMB function: it has no concept of keeping
 4209:      *       character entities that the projected character encoding
 4210:      *       can allow. We could possibly implement a smart version
 4211:      *       but that would require it to also know which Unicode
 4212:      *       codepoints the charset supported (not an easy task).
 4213:      * @note Sort of with cleanUTF8() but it assumes that $str is
 4214:      *       well-formed UTF-8
 4215:      */
 4216:     public static function convertToASCIIDumbLossless($str)
 4217:     {
 4218:         $bytesleft = 0;
 4219:         $result = '';
 4220:         $working = 0;
 4221:         $len = strlen($str);
 4222:         for ($i = 0; $i < $len; $i++) {
 4223:             $bytevalue = ord($str[$i]);
 4224:             if ($bytevalue <= 0x7F) { //0xxx xxxx
 4225:                 $result .= chr($bytevalue);
 4226:                 $bytesleft = 0;
 4227:             } elseif ($bytevalue <= 0xBF) { //10xx xxxx
 4228:                 $working = $working << 6;
 4229:                 $working += ($bytevalue & 0x3F);
 4230:                 $bytesleft--;
 4231:                 if ($bytesleft <= 0) {
 4232:                     $result .= "&#" . $working . ";";
 4233:                 }
 4234:             } elseif ($bytevalue <= 0xDF) { //110x xxxx
 4235:                 $working = $bytevalue & 0x1F;
 4236:                 $bytesleft = 1;
 4237:             } elseif ($bytevalue <= 0xEF) { //1110 xxxx
 4238:                 $working = $bytevalue & 0x0F;
 4239:                 $bytesleft = 2;
 4240:             } else { //1111 0xxx
 4241:                 $working = $bytevalue & 0x07;
 4242:                 $bytesleft = 3;
 4243:             }
 4244:         }
 4245:         return $result;
 4246:     }
 4247: 
 4248:     /** No bugs detected in iconv. */
 4249:     const ICONV_OK = 0;
 4250: 
 4251:     /** Iconv truncates output if converting from UTF-8 to another
 4252:      *  character set with //IGNORE, and a non-encodable character is found */
 4253:     const ICONV_TRUNCATES = 1;
 4254: 
 4255:     /** Iconv does not support //IGNORE, making it unusable for
 4256:      *  transcoding purposes */
 4257:     const ICONV_UNUSABLE = 2;
 4258: 
 4259:     /**
 4260:      * glibc iconv has a known bug where it doesn't handle the magic
 4261:      * //IGNORE stanza correctly.  In particular, rather than ignore
 4262:      * characters, it will return an EILSEQ after consuming some number
 4263:      * of characters, and expect you to restart iconv as if it were
 4264:      * an E2BIG.  Old versions of PHP did not respect the errno, and
 4265:      * returned the fragment, so as a result you would see iconv
 4266:      * mysteriously truncating output. We can work around this by
 4267:      * manually chopping our input into segments of about 8000
 4268:      * characters, as long as PHP ignores the error code.  If PHP starts
 4269:      * paying attention to the error code, iconv becomes unusable.
 4270:      *
 4271:      * @return int Error code indicating severity of bug.
 4272:      */
 4273:     public static function testIconvTruncateBug()
 4274:     {
 4275:         static $code = null;
 4276:         if ($code === null) {
 4277:             // better not use iconv, otherwise infinite loop!
 4278:             $r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1" . str_repeat('a', 9000));
 4279:             if ($r === false) {
 4280:                 $code = self::ICONV_UNUSABLE;
 4281:             } elseif (($c = strlen($r)) < 9000) {
 4282:                 $code = self::ICONV_TRUNCATES;
 4283:             } elseif ($c > 9000) {
 4284:                 trigger_error(
 4285:                     'Your copy of iconv is extremely buggy. Please notify HTML Purifier maintainers: ' .
 4286:                     'include your iconv version as per phpversion()',
 4287:                     E_USER_ERROR
 4288:                 );
 4289:             } else {
 4290:                 $code = self::ICONV_OK;
 4291:             }
 4292:         }
 4293:         return $code;
 4294:     }
 4295: 
 4296:     /**
 4297:      * This expensive function tests whether or not a given character
 4298:      * encoding supports ASCII. 7/8-bit encodings like Shift_JIS will
 4299:      * fail this test, and require special processing. Variable width
 4300:      * encodings shouldn't ever fail.
 4301:      *
 4302:      * @param string $encoding Encoding name to test, as per iconv format
 4303:      * @param bool $bypass Whether or not to bypass the precompiled arrays.
 4304:      * @return Array of UTF-8 characters to their corresponding ASCII,
 4305:      *      which can be used to "undo" any overzealous iconv action.
 4306:      */
 4307:     public static function testEncodingSupportsASCII($encoding, $bypass = false)
 4308:     {
 4309:         // All calls to iconv here are unsafe, proof by case analysis:
 4310:         // If ICONV_OK, no difference.
 4311:         // If ICONV_TRUNCATE, all calls involve one character inputs,
 4312:         // so bug is not triggered.
 4313:         // If ICONV_UNUSABLE, this call is irrelevant
 4314:         static $encodings = array();
 4315:         if (!$bypass) {
 4316:             if (isset($encodings[$encoding])) {
 4317:                 return $encodings[$encoding];
 4318:             }
 4319:             $lenc = strtolower($encoding);
 4320:             switch ($lenc) {
 4321:                 case 'shift_jis':
 4322:                     return array("\xC2\xA5" => '\\', "\xE2\x80\xBE" => '~');
 4323:                 case 'johab':
 4324:                     return array("\xE2\x82\xA9" => '\\');
 4325:             }
 4326:             if (strpos($lenc, 'iso-8859-') === 0) {
 4327:                 return array();
 4328:             }
 4329:         }
 4330:         $ret = array();
 4331:         if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) {
 4332:             return false;
 4333:         }
 4334:         for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars
 4335:             $c = chr($i); // UTF-8 char
 4336:             $r = self::unsafeIconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion
 4337:             if ($r === '' ||
 4338:                 // This line is needed for iconv implementations that do not
 4339:                 // omit characters that do not exist in the target character set
 4340:                 ($r === $c && self::unsafeIconv($encoding, 'UTF-8//IGNORE', $r) !== $c)
 4341:             ) {
 4342:                 // Reverse engineer: what's the UTF-8 equiv of this byte
 4343:                 // sequence? This assumes that there's no variable width
 4344:                 // encoding that doesn't support ASCII.
 4345:                 $ret[self::unsafeIconv($encoding, 'UTF-8//IGNORE', $c)] = $c;
 4346:             }
 4347:         }
 4348:         $encodings[$encoding] = $ret;
 4349:         return $ret;
 4350:     }
 4351: }
 4352: 
 4353: 
 4354: 
 4355: 
 4356: 
 4357: /**
 4358:  * Object that provides entity lookup table from entity name to character
 4359:  */
 4360: class HTMLPurifier_EntityLookup
 4361: {
 4362:     /**
 4363:      * Assoc array of entity name to character represented.
 4364:      * @type array
 4365:      */
 4366:     public $table;
 4367: 
 4368:     /**
 4369:      * Sets up the entity lookup table from the serialized file contents.
 4370:      * @param bool $file
 4371:      * @note The serialized contents are versioned, but were generated
 4372:      *       using the maintenance script generate_entity_file.php
 4373:      * @warning This is not in constructor to help enforce the Singleton
 4374:      */
 4375:     public function setup($file = false)
 4376:     {
 4377:         if (!$file) {
 4378:             $file = HTMLPURIFIER_PREFIX . '/HTMLPurifier/EntityLookup/entities.ser';
 4379:         }
 4380:         $this->table = unserialize(file_get_contents($file));
 4381:     }
 4382: 
 4383:     /**
 4384:      * Retrieves sole instance of the object.
 4385:      * @param bool|HTMLPurifier_EntityLookup $prototype Optional prototype of custom lookup table to overload with.
 4386:      * @return HTMLPurifier_EntityLookup
 4387:      */
 4388:     public static function instance($prototype = false)
 4389:     {
 4390:         // no references, since PHP doesn't copy unless modified
 4391:         static $instance = null;
 4392:         if ($prototype) {
 4393:             $instance = $prototype;
 4394:         } elseif (!$instance) {
 4395:             $instance = new HTMLPurifier_EntityLookup();
 4396:             $instance->setup();
 4397:         }
 4398:         return $instance;
 4399:     }
 4400: }
 4401: 
 4402: 
 4403: 
 4404: 
 4405: 
 4406: // if want to implement error collecting here, we'll need to use some sort
 4407: // of global data (probably trigger_error) because it's impossible to pass
 4408: // $config or $context to the callback functions.
 4409: 
 4410: /**
 4411:  * Handles referencing and derefencing character entities
 4412:  */
 4413: class HTMLPurifier_EntityParser
 4414: {
 4415: 
 4416:     /**
 4417:      * Reference to entity lookup table.
 4418:      * @type HTMLPurifier_EntityLookup
 4419:      */
 4420:     protected $_entity_lookup;
 4421: 
 4422:     /**
 4423:      * Callback regex string for parsing entities.
 4424:      * @type string
 4425:      */
 4426:     protected $_substituteEntitiesRegex =
 4427:         '/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z_:][A-Za-z0-9.\-_:]*));?/';
 4428:         //     1. hex             2. dec      3. string (XML style)
 4429: 
 4430:     /**
 4431:      * Decimal to parsed string conversion table for special entities.
 4432:      * @type array
 4433:      */
 4434:     protected $_special_dec2str =
 4435:             array(
 4436:                     34 => '"',
 4437:                     38 => '&',
 4438:                     39 => "'",
 4439:                     60 => '<',
 4440:                     62 => '>'
 4441:             );
 4442: 
 4443:     /**
 4444:      * Stripped entity names to decimal conversion table for special entities.
 4445:      * @type array
 4446:      */
 4447:     protected $_special_ent2dec =
 4448:             array(
 4449:                     'quot' => 34,
 4450:                     'amp'  => 38,
 4451:                     'lt'   => 60,
 4452:                     'gt'   => 62
 4453:             );
 4454: 
 4455:     /**
 4456:      * Substitutes non-special entities with their parsed equivalents. Since
 4457:      * running this whenever you have parsed character is t3h 5uck, we run
 4458:      * it before everything else.
 4459:      *
 4460:      * @param string $string String to have non-special entities parsed.
 4461:      * @return string Parsed string.
 4462:      */
 4463:     public function substituteNonSpecialEntities($string)
 4464:     {
 4465:         // it will try to detect missing semicolons, but don't rely on it
 4466:         return preg_replace_callback(
 4467:             $this->_substituteEntitiesRegex,
 4468:             array($this, 'nonSpecialEntityCallback'),
 4469:             $string
 4470:         );
 4471:     }
 4472: 
 4473:     /**
 4474:      * Callback function for substituteNonSpecialEntities() that does the work.
 4475:      *
 4476:      * @param array $matches  PCRE matches array, with 0 the entire match, and
 4477:      *                  either index 1, 2 or 3 set with a hex value, dec value,
 4478:      *                  or string (respectively).
 4479:      * @return string Replacement string.
 4480:      */
 4481: 
 4482:     protected function nonSpecialEntityCallback($matches)
 4483:     {
 4484:         // replaces all but big five
 4485:         $entity = $matches[0];
 4486:         $is_num = (@$matches[0][1] === '#');
 4487:         if ($is_num) {
 4488:             $is_hex = (@$entity[2] === 'x');
 4489:             $code = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
 4490:             // abort for special characters
 4491:             if (isset($this->_special_dec2str[$code])) {
 4492:                 return $entity;
 4493:             }
 4494:             return HTMLPurifier_Encoder::unichr($code);
 4495:         } else {
 4496:             if (isset($this->_special_ent2dec[$matches[3]])) {
 4497:                 return $entity;
 4498:             }
 4499:             if (!$this->_entity_lookup) {
 4500:                 $this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
 4501:             }
 4502:             if (isset($this->_entity_lookup->table[$matches[3]])) {
 4503:                 return $this->_entity_lookup->table[$matches[3]];
 4504:             } else {
 4505:                 return $entity;
 4506:             }
 4507:         }
 4508:     }
 4509: 
 4510:     /**
 4511:      * Substitutes only special entities with their parsed equivalents.
 4512:      *
 4513:      * @notice We try to avoid calling this function because otherwise, it
 4514:      * would have to be called a lot (for every parsed section).
 4515:      *
 4516:      * @param string $string String to have non-special entities parsed.
 4517:      * @return string Parsed string.
 4518:      */
 4519:     public function substituteSpecialEntities($string)
 4520:     {
 4521:         return preg_replace_callback(
 4522:             $this->_substituteEntitiesRegex,
 4523:             array($this, 'specialEntityCallback'),
 4524:             $string
 4525:         );
 4526:     }
 4527: 
 4528:     /**
 4529:      * Callback function for substituteSpecialEntities() that does the work.
 4530:      *
 4531:      * This callback has same syntax as nonSpecialEntityCallback().
 4532:      *
 4533:      * @param array $matches  PCRE-style matches array, with 0 the entire match, and
 4534:      *                  either index 1, 2 or 3 set with a hex value, dec value,
 4535:      *                  or string (respectively).
 4536:      * @return string Replacement string.
 4537:      */
 4538:     protected function specialEntityCallback($matches)
 4539:     {
 4540:         $entity = $matches[0];
 4541:         $is_num = (@$matches[0][1] === '#');
 4542:         if ($is_num) {
 4543:             $is_hex = (@$entity[2] === 'x');
 4544:             $int = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
 4545:             return isset($this->_special_dec2str[$int]) ?
 4546:                 $this->_special_dec2str[$int] :
 4547:                 $entity;
 4548:         } else {
 4549:             return isset($this->_special_ent2dec[$matches[3]]) ?
 4550:                 $this->_special_ent2dec[$matches[3]] :
 4551:                 $entity;
 4552:         }
 4553:     }
 4554: }
 4555: 
 4556: 
 4557: 
 4558: 
 4559: 
 4560: /**
 4561:  * Error collection class that enables HTML Purifier to report HTML
 4562:  * problems back to the user
 4563:  */
 4564: class HTMLPurifier_ErrorCollector
 4565: {
 4566: 
 4567:     /**
 4568:      * Identifiers for the returned error array. These are purposely numeric
 4569:      * so list() can be used.
 4570:      */
 4571:     const LINENO   = 0;
 4572:     const SEVERITY = 1;
 4573:     const MESSAGE  = 2;
 4574:     const CHILDREN = 3;
 4575: 
 4576:     /**
 4577:      * @type array
 4578:      */
 4579:     protected $errors;
 4580: 
 4581:     /**
 4582:      * @type array
 4583:      */
 4584:     protected $_current;
 4585: 
 4586:     /**
 4587:      * @type array
 4588:      */
 4589:     protected $_stacks = array(array());
 4590: 
 4591:     /**
 4592:      * @type HTMLPurifier_Language
 4593:      */
 4594:     protected $locale;
 4595: 
 4596:     /**
 4597:      * @type HTMLPurifier_Generator
 4598:      */
 4599:     protected $generator;
 4600: 
 4601:     /**
 4602:      * @type HTMLPurifier_Context
 4603:      */
 4604:     protected $context;
 4605: 
 4606:     /**
 4607:      * @type array
 4608:      */
 4609:     protected $lines = array();
 4610: 
 4611:     /**
 4612:      * @param HTMLPurifier_Context $context
 4613:      */
 4614:     public function __construct($context)
 4615:     {
 4616:         $this->locale    =& $context->get('Locale');
 4617:         $this->context   = $context;
 4618:         $this->_current  =& $this->_stacks[0];
 4619:         $this->errors    =& $this->_stacks[0];
 4620:     }
 4621: 
 4622:     /**
 4623:      * Sends an error message to the collector for later use
 4624:      * @param int $severity Error severity, PHP error style (don't use E_USER_)
 4625:      * @param string $msg Error message text
 4626:      */
 4627:     public function send($severity, $msg)
 4628:     {
 4629:         $args = array();
 4630:         if (func_num_args() > 2) {
 4631:             $args = func_get_args();
 4632:             array_shift($args);
 4633:             unset($args[0]);
 4634:         }
 4635: 
 4636:         $token = $this->context->get('CurrentToken', true);
 4637:         $line  = $token ? $token->line : $this->context->get('CurrentLine', true);
 4638:         $col   = $token ? $token->col  : $this->context->get('CurrentCol', true);
 4639:         $attr  = $this->context->get('CurrentAttr', true);
 4640: 
 4641:         // perform special substitutions, also add custom parameters
 4642:         $subst = array();
 4643:         if (!is_null($token)) {
 4644:             $args['CurrentToken'] = $token;
 4645:         }
 4646:         if (!is_null($attr)) {
 4647:             $subst['$CurrentAttr.Name'] = $attr;
 4648:             if (isset($token->attr[$attr])) {
 4649:                 $subst['$CurrentAttr.Value'] = $token->attr[$attr];
 4650:             }
 4651:         }
 4652: 
 4653:         if (empty($args)) {
 4654:             $msg = $this->locale->getMessage($msg);
 4655:         } else {
 4656:             $msg = $this->locale->formatMessage($msg, $args);
 4657:         }
 4658: 
 4659:         if (!empty($subst)) {
 4660:             $msg = strtr($msg, $subst);
 4661:         }
 4662: 
 4663:         // (numerically indexed)
 4664:         $error = array(
 4665:             self::LINENO   => $line,
 4666:             self::SEVERITY => $severity,
 4667:             self::MESSAGE  => $msg,
 4668:             self::CHILDREN => array()
 4669:         );
 4670:         $this->_current[] = $error;
 4671: 
 4672:         // NEW CODE BELOW ...
 4673:         // Top-level errors are either:
 4674:         //  TOKEN type, if $value is set appropriately, or
 4675:         //  "syntax" type, if $value is null
 4676:         $new_struct = new HTMLPurifier_ErrorStruct();
 4677:         $new_struct->type = HTMLPurifier_ErrorStruct::TOKEN;
 4678:         if ($token) {
 4679:             $new_struct->value = clone $token;
 4680:         }
 4681:         if (is_int($line) && is_int($col)) {
 4682:             if (isset($this->lines[$line][$col])) {
 4683:                 $struct = $this->lines[$line][$col];
 4684:             } else {
 4685:                 $struct = $this->lines[$line][$col] = $new_struct;
 4686:             }
 4687:             // These ksorts may present a performance problem
 4688:             ksort($this->lines[$line], SORT_NUMERIC);
 4689:         } else {
 4690:             if (isset($this->lines[-1])) {
 4691:                 $struct = $this->lines[-1];
 4692:             } else {
 4693:                 $struct = $this->lines[-1] = $new_struct;
 4694:             }
 4695:         }
 4696:         ksort($this->lines, SORT_NUMERIC);
 4697: 
 4698:         // Now, check if we need to operate on a lower structure
 4699:         if (!empty($attr)) {
 4700:             $struct = $struct->getChild(HTMLPurifier_ErrorStruct::ATTR, $attr);
 4701:             if (!$struct->value) {
 4702:                 $struct->value = array($attr, 'PUT VALUE HERE');
 4703:             }
 4704:         }
 4705:         if (!empty($cssprop)) {
 4706:             $struct = $struct->getChild(HTMLPurifier_ErrorStruct::CSSPROP, $cssprop);
 4707:             if (!$struct->value) {
 4708:                 // if we tokenize CSS this might be a little more difficult to do
 4709:                 $struct->value = array($cssprop, 'PUT VALUE HERE');
 4710:             }
 4711:         }
 4712: 
 4713:         // Ok, structs are all setup, now time to register the error
 4714:         $struct->addError($severity, $msg);
 4715:     }
 4716: 
 4717:     /**
 4718:      * Retrieves raw error data for custom formatter to use
 4719:      */
 4720:     public function getRaw()
 4721:     {
 4722:         return $this->errors;
 4723:     }
 4724: 
 4725:     /**
 4726:      * Default HTML formatting implementation for error messages
 4727:      * @param HTMLPurifier_Config $config Configuration, vital for HTML output nature
 4728:      * @param array $errors Errors array to display; used for recursion.
 4729:      * @return string
 4730:      */
 4731:     public function getHTMLFormatted($config, $errors = null)
 4732:     {
 4733:         $ret = array();
 4734: 
 4735:         $this->generator = new HTMLPurifier_Generator($config, $this->context);
 4736:         if ($errors === null) {
 4737:             $errors = $this->errors;
 4738:         }
 4739: 
 4740:         // 'At line' message needs to be removed
 4741: 
 4742:         // generation code for new structure goes here. It needs to be recursive.
 4743:         foreach ($this->lines as $line => $col_array) {
 4744:             if ($line == -1) {
 4745:                 continue;
 4746:             }
 4747:             foreach ($col_array as $col => $struct) {
 4748:                 $this->_renderStruct($ret, $struct, $line, $col);
 4749:             }
 4750:         }
 4751:         if (isset($this->lines[-1])) {
 4752:             $this->_renderStruct($ret, $this->lines[-1]);
 4753:         }
 4754: 
 4755:         if (empty($errors)) {
 4756:             return '<p>' . $this->locale->getMessage('ErrorCollector: No errors') . '</p>';
 4757:         } else {
 4758:             return '<ul><li>' . implode('</li><li>', $ret) . '</li></ul>';
 4759:         }
 4760: 
 4761:     }
 4762: 
 4763:     private function _renderStruct(&$ret, $struct, $line = null, $col = null)
 4764:     {
 4765:         $stack = array($struct);
 4766:         $context_stack = array(array());
 4767:         while ($current = array_pop($stack)) {
 4768:             $context = array_pop($context_stack);
 4769:             foreach ($current->errors as $error) {
 4770:                 list($severity, $msg) = $error;
 4771:                 $string = '';
 4772:                 $string .= '<div>';
 4773:                 // W3C uses an icon to indicate the severity of the error.
 4774:                 $error = $this->locale->getErrorName($severity);
 4775:                 $string .= "<span class=\"error e$severity\"><strong>$error</strong></span> ";
 4776:                 if (!is_null($line) && !is_null($col)) {
 4777:                     $string .= "<em class=\"location\">Line $line, Column $col: </em> ";
 4778:                 } else {
 4779:                     $string .= '<em class="location">End of Document: </em> ';
 4780:                 }
 4781:                 $string .= '<strong class="description">' . $this->generator->escape($msg) . '</strong> ';
 4782:                 $string .= '</div>';
 4783:                 // Here, have a marker for the character on the column appropriate.
 4784:                 // Be sure to clip extremely long lines.
 4785:                 //$string .= '<pre>';
 4786:                 //$string .= '';
 4787:                 //$string .= '</pre>';
 4788:                 $ret[] = $string;
 4789:             }
 4790:             foreach ($current->children as $array) {
 4791:                 $context[] = $current;
 4792:                 $stack = array_merge($stack, array_reverse($array, true));
 4793:                 for ($i = count($array); $i > 0; $i--) {
 4794:                     $context_stack[] = $context;
 4795:                 }
 4796:             }
 4797:         }
 4798:     }
 4799: }
 4800: 
 4801: 
 4802: 
 4803: 
 4804: 
 4805: /**
 4806:  * Records errors for particular segments of an HTML document such as tokens,
 4807:  * attributes or CSS properties. They can contain error structs (which apply
 4808:  * to components of what they represent), but their main purpose is to hold
 4809:  * errors applying to whatever struct is being used.
 4810:  */
 4811: class HTMLPurifier_ErrorStruct
 4812: {
 4813: 
 4814:     /**
 4815:      * Possible values for $children first-key. Note that top-level structures
 4816:      * are automatically token-level.
 4817:      */
 4818:     const TOKEN     = 0;
 4819:     const ATTR      = 1;
 4820:     const CSSPROP   = 2;
 4821: 
 4822:     /**
 4823:      * Type of this struct.
 4824:      * @type string
 4825:      */
 4826:     public $type;
 4827: 
 4828:     /**
 4829:      * Value of the struct we are recording errors for. There are various
 4830:      * values for this:
 4831:      *  - TOKEN: Instance of HTMLPurifier_Token
 4832:      *  - ATTR: array('attr-name', 'value')
 4833:      *  - CSSPROP: array('prop-name', 'value')
 4834:      * @type mixed
 4835:      */
 4836:     public $value;
 4837: 
 4838:     /**
 4839:      * Errors registered for this structure.
 4840:      * @type array
 4841:      */
 4842:     public $errors = array();
 4843: 
 4844:     /**
 4845:      * Child ErrorStructs that are from this structure. For example, a TOKEN
 4846:      * ErrorStruct would contain ATTR ErrorStructs. This is a multi-dimensional
 4847:      * array in structure: [TYPE]['identifier']
 4848:      * @type array
 4849:      */
 4850:     public $children = array();
 4851: 
 4852:     /**
 4853:      * @param string $type
 4854:      * @param string $id
 4855:      * @return mixed
 4856:      */
 4857:     public function getChild($type, $id)
 4858:     {
 4859:         if (!isset($this->children[$type][$id])) {
 4860:             $this->children[$type][$id] = new HTMLPurifier_ErrorStruct();
 4861:             $this->children[$type][$id]->type = $type;
 4862:         }
 4863:         return $this->children[$type][$id];
 4864:     }
 4865: 
 4866:     /**
 4867:      * @param int $severity
 4868:      * @param string $message
 4869:      */
 4870:     public function addError($severity, $message)
 4871:     {
 4872:         $this->errors[] = array($severity, $message);
 4873:     }
 4874: }
 4875: 
 4876: 
 4877: 
 4878: 
 4879: 
 4880: /**
 4881:  * Global exception class for HTML Purifier; any exceptions we throw
 4882:  * are from here.
 4883:  */
 4884: class HTMLPurifier_Exception extends Exception
 4885: {
 4886: 
 4887: }
 4888: 
 4889: 
 4890: 
 4891: 
 4892: 
 4893: /**
 4894:  * Represents a pre or post processing filter on HTML Purifier's output
 4895:  *
 4896:  * Sometimes, a little ad-hoc fixing of HTML has to be done before
 4897:  * it gets sent through HTML Purifier: you can use filters to acheive
 4898:  * this effect. For instance, YouTube videos can be preserved using
 4899:  * this manner. You could have used a decorator for this task, but
 4900:  * PHP's support for them is not terribly robust, so we're going
 4901:  * to just loop through the filters.
 4902:  *
 4903:  * Filters should be exited first in, last out. If there are three filters,
 4904:  * named 1, 2 and 3, the order of execution should go 1->preFilter,
 4905:  * 2->preFilter, 3->preFilter, purify, 3->postFilter, 2->postFilter,
 4906:  * 1->postFilter.
 4907:  *
 4908:  * @note Methods are not declared abstract as it is perfectly legitimate
 4909:  *       for an implementation not to want anything to happen on a step
 4910:  */
 4911: 
 4912: class HTMLPurifier_Filter
 4913: {
 4914: 
 4915:     /**
 4916:      * Name of the filter for identification purposes.
 4917:      * @type string
 4918:      */
 4919:     public $name;
 4920: 
 4921:     /**
 4922:      * Pre-processor function, handles HTML before HTML Purifier
 4923:      * @param string $html
 4924:      * @param HTMLPurifier_Config $config
 4925:      * @param HTMLPurifier_Context $context
 4926:      * @return string
 4927:      */
 4928:     public function preFilter($html, $config, $context)
 4929:     {
 4930:         return $html;
 4931:     }
 4932: 
 4933:     /**
 4934:      * Post-processor function, handles HTML after HTML Purifier
 4935:      * @param string $html
 4936:      * @param HTMLPurifier_Config $config
 4937:      * @param HTMLPurifier_Context $context
 4938:      * @return string
 4939:      */
 4940:     public function postFilter($html, $config, $context)
 4941:     {
 4942:         return $html;
 4943:     }
 4944: }
 4945: 
 4946: 
 4947: 
 4948: 
 4949: 
 4950: /**
 4951:  * Generates HTML from tokens.
 4952:  * @todo Refactor interface so that configuration/context is determined
 4953:  *       upon instantiation, no need for messy generateFromTokens() calls
 4954:  * @todo Make some of the more internal functions protected, and have
 4955:  *       unit tests work around that
 4956:  */
 4957: class HTMLPurifier_Generator
 4958: {
 4959: 
 4960:     /**
 4961:      * Whether or not generator should produce XML output.
 4962:      * @type bool
 4963:      */
 4964:     private $_xhtml = true;
 4965: 
 4966:     /**
 4967:      * :HACK: Whether or not generator should comment the insides of <script> tags.
 4968:      * @type bool
 4969:      */
 4970:     private $_scriptFix = false;
 4971: 
 4972:     /**
 4973:      * Cache of HTMLDefinition during HTML output to determine whether or
 4974:      * not attributes should be minimized.
 4975:      * @type HTMLPurifier_HTMLDefinition
 4976:      */
 4977:     private $_def;
 4978: 
 4979:     /**
 4980:      * Cache of %Output.SortAttr.
 4981:      * @type bool
 4982:      */
 4983:     private $_sortAttr;
 4984: 
 4985:     /**
 4986:      * Cache of %Output.FlashCompat.
 4987:      * @type bool
 4988:      */
 4989:     private $_flashCompat;
 4990: 
 4991:     /**
 4992:      * Cache of %Output.FixInnerHTML.
 4993:      * @type bool
 4994:      */
 4995:     private $_innerHTMLFix;
 4996: 
 4997:     /**
 4998:      * Stack for keeping track of object information when outputting IE
 4999:      * compatibility code.
 5000:      * @type array
 5001:      */
 5002:     private $_flashStack = array();
 5003: 
 5004:     /**
 5005:      * Configuration for the generator
 5006:      * @type HTMLPurifier_Config
 5007:      */
 5008:     protected $config;
 5009: 
 5010:     /**
 5011:      * @param HTMLPurifier_Config $config
 5012:      * @param HTMLPurifier_Context $context
 5013:      */
 5014:     public function __construct($config, $context)
 5015:     {
 5016:         $this->config = $config;
 5017:         $this->_scriptFix = $config->get('Output.CommentScriptContents');
 5018:         $this->_innerHTMLFix = $config->get('Output.FixInnerHTML');
 5019:         $this->_sortAttr = $config->get('Output.SortAttr');
 5020:         $this->_flashCompat = $config->get('Output.FlashCompat');
 5021:         $this->_def = $config->getHTMLDefinition();
 5022:         $this->_xhtml = $this->_def->doctype->xml;
 5023:     }
 5024: 
 5025:     /**
 5026:      * Generates HTML from an array of tokens.
 5027:      * @param HTMLPurifier_Token[] $tokens Array of HTMLPurifier_Token
 5028:      * @return string Generated HTML
 5029:      */
 5030:     public function generateFromTokens($tokens)
 5031:     {
 5032:         if (!$tokens) {
 5033:             return '';
 5034:         }
 5035: 
 5036:         // Basic algorithm
 5037:         $html = '';
 5038:         for ($i = 0, $size = count($tokens); $i < $size; $i++) {
 5039:             if ($this->_scriptFix && $tokens[$i]->name === 'script'
 5040:                 && $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) {
 5041:                 // script special case
 5042:                 // the contents of the script block must be ONE token
 5043:                 // for this to work.
 5044:                 $html .= $this->generateFromToken($tokens[$i++]);
 5045:                 $html .= $this->generateScriptFromToken($tokens[$i++]);
 5046:             }
 5047:             $html .= $this->generateFromToken($tokens[$i]);
 5048:         }
 5049: 
 5050:         // Tidy cleanup
 5051:         if (extension_loaded('tidy') && $this->config->get('Output.TidyFormat')) {
 5052:             $tidy = new Tidy;
 5053:             $tidy->parseString(
 5054:                 $html,
 5055:                 array(
 5056:                    'indent'=> true,
 5057:                    'output-xhtml' => $this->_xhtml,
 5058:                    'show-body-only' => true,
 5059:                    'indent-spaces' => 2,
 5060:                    'wrap' => 68,
 5061:                 ),
 5062:                 'utf8'
 5063:             );
 5064:             $tidy->cleanRepair();
 5065:             $html = (string) $tidy; // explicit cast necessary
 5066:         }
 5067: 
 5068:         // Normalize newlines to system defined value
 5069:         if ($this->config->get('Core.NormalizeNewlines')) {
 5070:             $nl = $this->config->get('Output.Newline');
 5071:             if ($nl === null) {
 5072:                 $nl = PHP_EOL;
 5073:             }
 5074:             if ($nl !== "\n") {
 5075:                 $html = str_replace("\n", $nl, $html);
 5076:             }
 5077:         }
 5078:         return $html;
 5079:     }
 5080: 
 5081:     /**
 5082:      * Generates HTML from a single token.
 5083:      * @param HTMLPurifier_Token $token HTMLPurifier_Token object.
 5084:      * @return string Generated HTML
 5085:      */
 5086:     public function generateFromToken($token)
 5087:     {
 5088:         if (!$token instanceof HTMLPurifier_Token) {
 5089:             trigger_error('Cannot generate HTML from non-HTMLPurifier_Token object', E_USER_WARNING);
 5090:             return '';
 5091: 
 5092:         } elseif ($token instanceof HTMLPurifier_Token_Start) {
 5093:             $attr = $this->generateAttributes($token->attr, $token->name);
 5094:             if ($this->_flashCompat) {
 5095:                 if ($token->name == "object") {
 5096:                     $flash = new stdclass();
 5097:                     $flash->attr = $token->attr;
 5098:                     $flash->param = array();
 5099:                     $this->_flashStack[] = $flash;
 5100:                 }
 5101:             }
 5102:             return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
 5103: 
 5104:         } elseif ($token instanceof HTMLPurifier_Token_End) {
 5105:             $_extra = '';
 5106:             if ($this->_flashCompat) {
 5107:                 if ($token->name == "object" && !empty($this->_flashStack)) {
 5108:                     // doesn't do anything for now
 5109:                 }
 5110:             }
 5111:             return $_extra . '</' . $token->name . '>';
 5112: 
 5113:         } elseif ($token instanceof HTMLPurifier_Token_Empty) {
 5114:             if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) {
 5115:                 $this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value'];
 5116:             }
 5117:             $attr = $this->generateAttributes($token->attr, $token->name);
 5118:              return '<' . $token->name . ($attr ? ' ' : '') . $attr .
 5119:                 ( $this->_xhtml ? ' /': '' ) // <br /> v. <br>
 5120:                 . '>';
 5121: 
 5122:         } elseif ($token instanceof HTMLPurifier_Token_Text) {
 5123:             return $this->escape($token->data, ENT_NOQUOTES);
 5124: 
 5125:         } elseif ($token instanceof HTMLPurifier_Token_Comment) {
 5126:             return '<!--' . $token->data . '-->';
 5127:         } else {
 5128:             return '';
 5129: 
 5130:         }
 5131:     }
 5132: 
 5133:     /**
 5134:      * Special case processor for the contents of script tags
 5135:      * @param HTMLPurifier_Token $token HTMLPurifier_Token object.
 5136:      * @return string
 5137:      * @warning This runs into problems if there's already a literal
 5138:      *          --> somewhere inside the script contents.
 5139:      */
 5140:     public function generateScriptFromToken($token)
 5141:     {
 5142:         if (!$token instanceof HTMLPurifier_Token_Text) {
 5143:             return $this->generateFromToken($token);
 5144:         }
 5145:         // Thanks <http://lachy.id.au/log/2005/05/script-comments>
 5146:         $data = preg_replace('#//\s*$#', '', $token->data);
 5147:         return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>';
 5148:     }
 5149: 
 5150:     /**
 5151:      * Generates attribute declarations from attribute array.
 5152:      * @note This does not include the leading or trailing space.
 5153:      * @param array $assoc_array_of_attributes Attribute array
 5154:      * @param string $element Name of element attributes are for, used to check
 5155:      *        attribute minimization.
 5156:      * @return string Generated HTML fragment for insertion.
 5157:      */
 5158:     public function generateAttributes($assoc_array_of_attributes, $element = '')
 5159:     {
 5160:         $html = '';
 5161:         if ($this->_sortAttr) {
 5162:             ksort($assoc_array_of_attributes);
 5163:         }
 5164:         foreach ($assoc_array_of_attributes as $key => $value) {
 5165:             if (!$this->_xhtml) {
 5166:                 // Remove namespaced attributes
 5167:                 if (strpos($key, ':') !== false) {
 5168:                     continue;
 5169:                 }
 5170:                 // Check if we should minimize the attribute: val="val" -> val
 5171:                 if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) {
 5172:                     $html .= $key . ' ';
 5173:                     continue;
 5174:                 }
 5175:             }
 5176:             // Workaround for Internet Explorer innerHTML bug.
 5177:             // Essentially, Internet Explorer, when calculating
 5178:             // innerHTML, omits quotes if there are no instances of
 5179:             // angled brackets, quotes or spaces.  However, when parsing
 5180:             // HTML (for example, when you assign to innerHTML), it
 5181:             // treats backticks as quotes.  Thus,
 5182:             //      <img alt="``" />
 5183:             // becomes
 5184:             //      <img alt=`` />
 5185:             // becomes
 5186:             //      <img alt='' />
 5187:             // Fortunately, all we need to do is trigger an appropriate
 5188:             // quoting style, which we do by adding an extra space.
 5189:             // This also is consistent with the W3C spec, which states
 5190:             // that user agents may ignore leading or trailing
 5191:             // whitespace (in fact, most don't, at least for attributes
 5192:             // like alt, but an extra space at the end is barely
 5193:             // noticeable).  Still, we have a configuration knob for
 5194:             // this, since this transformation is not necesary if you
 5195:             // don't process user input with innerHTML or you don't plan
 5196:             // on supporting Internet Explorer.
 5197:             if ($this->_innerHTMLFix) {
 5198:                 if (strpos($value, '`') !== false) {
 5199:                     // check if correct quoting style would not already be
 5200:                     // triggered
 5201:                     if (strcspn($value, '"\' <>') === strlen($value)) {
 5202:                         // protect!
 5203:                         $value .= ' ';
 5204:                     }
 5205:                 }
 5206:             }
 5207:             $html .= $key.'="'.$this->escape($value).'" ';
 5208:         }
 5209:         return rtrim($html);
 5210:     }
 5211: 
 5212:     /**
 5213:      * Escapes raw text data.
 5214:      * @todo This really ought to be protected, but until we have a facility
 5215:      *       for properly generating HTML here w/o using tokens, it stays
 5216:      *       public.
 5217:      * @param string $string String data to escape for HTML.
 5218:      * @param int $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is
 5219:      *               permissible for non-attribute output.
 5220:      * @return string escaped data.
 5221:      */
 5222:     public function escape($string, $quote = null)
 5223:     {
 5224:         // Workaround for APC bug on Mac Leopard reported by sidepodcast
 5225:         // http://htmlpurifier.org/phorum/read.php?3,4823,4846
 5226:         if ($quote === null) {
 5227:             $quote = ENT_COMPAT;
 5228:         }
 5229:         return htmlspecialchars($string, $quote, 'UTF-8');
 5230:     }
 5231: }
 5232: 
 5233: 
 5234: 
 5235: 
 5236: 
 5237: /**
 5238:  * Definition of the purified HTML that describes allowed children,
 5239:  * attributes, and many other things.
 5240:  *
 5241:  * Conventions:
 5242:  *
 5243:  * All member variables that are prefixed with info
 5244:  * (including the main $info array) are used by HTML Purifier internals
 5245:  * and should not be directly edited when customizing the HTMLDefinition.
 5246:  * They can usually be set via configuration directives or custom
 5247:  * modules.
 5248:  *
 5249:  * On the other hand, member variables without the info prefix are used
 5250:  * internally by the HTMLDefinition and MUST NOT be used by other HTML
 5251:  * Purifier internals. Many of them, however, are public, and may be
 5252:  * edited by userspace code to tweak the behavior of HTMLDefinition.
 5253:  *
 5254:  * @note This class is inspected by Printer_HTMLDefinition; please
 5255:  *       update that class if things here change.
 5256:  *
 5257:  * @warning Directives that change this object's structure must be in
 5258:  *          the HTML or Attr namespace!
 5259:  */
 5260: class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
 5261: {
 5262: 
 5263:     // FULLY-PUBLIC VARIABLES ---------------------------------------------
 5264: 
 5265:     /**
 5266:      * Associative array of element names to HTMLPurifier_ElementDef.
 5267:      * @type HTMLPurifier_ElementDef[]
 5268:      */
 5269:     public $info = array();
 5270: 
 5271:     /**
 5272:      * Associative array of global attribute name to attribute definition.
 5273:      * @type array
 5274:      */
 5275:     public $info_global_attr = array();
 5276: 
 5277:     /**
 5278:      * String name of parent element HTML will be going into.
 5279:      * @type string
 5280:      */
 5281:     public $info_parent = 'div';
 5282: 
 5283:     /**
 5284:      * Definition for parent element, allows parent element to be a
 5285:      * tag that's not allowed inside the HTML fragment.
 5286:      * @type HTMLPurifier_ElementDef
 5287:      */
 5288:     public $info_parent_def;
 5289: 
 5290:     /**
 5291:      * String name of element used to wrap inline elements in block context.
 5292:      * @type string
 5293:      * @note This is rarely used except for BLOCKQUOTEs in strict mode
 5294:      */
 5295:     public $info_block_wrapper = 'p';
 5296: 
 5297:     /**
 5298:      * Associative array of deprecated tag name to HTMLPurifier_TagTransform.
 5299:      * @type array
 5300:      */
 5301:     public $info_tag_transform = array();
 5302: 
 5303:     /**
 5304:      * Indexed list of HTMLPurifier_AttrTransform to be performed before validation.
 5305:      * @type HTMLPurifier_AttrTransform[]
 5306:      */
 5307:     public $info_attr_transform_pre = array();
 5308: 
 5309:     /**
 5310:      * Indexed list of HTMLPurifier_AttrTransform to be performed after validation.
 5311:      * @type HTMLPurifier_AttrTransform[]
 5312:      */
 5313:     public $info_attr_transform_post = array();
 5314: 
 5315:     /**
 5316:      * Nested lookup array of content set name (Block, Inline) to
 5317:      * element name to whether or not it belongs in that content set.
 5318:      * @type array
 5319:      */
 5320:     public $info_content_sets = array();
 5321: 
 5322:     /**
 5323:      * Indexed list of HTMLPurifier_Injector to be used.
 5324:      * @type HTMLPurifier_Injector[]
 5325:      */
 5326:     public $info_injector = array();
 5327: 
 5328:     /**
 5329:      * Doctype object
 5330:      * @type HTMLPurifier_Doctype
 5331:      */
 5332:     public $doctype;
 5333: 
 5334: 
 5335: 
 5336:     // RAW CUSTOMIZATION STUFF --------------------------------------------
 5337: 
 5338:     /**
 5339:      * Adds a custom attribute to a pre-existing element
 5340:      * @note This is strictly convenience, and does not have a corresponding
 5341:      *       method in HTMLPurifier_HTMLModule
 5342:      * @param string $element_name Element name to add attribute to
 5343:      * @param string $attr_name Name of attribute
 5344:      * @param mixed $def Attribute definition, can be string or object, see
 5345:      *             HTMLPurifier_AttrTypes for details
 5346:      */
 5347:     public function addAttribute($element_name, $attr_name, $def)
 5348:     {
 5349:         $module = $this->getAnonymousModule();
 5350:         if (!isset($module->info[$element_name])) {
 5351:             $element = $module->addBlankElement($element_name);
 5352:         } else {
 5353:             $element = $module->info[$element_name];
 5354:         }
 5355:         $element->attr[$attr_name] = $def;
 5356:     }
 5357: 
 5358:     /**
 5359:      * Adds a custom element to your HTML definition
 5360:      * @see HTMLPurifier_HTMLModule::addElement() for detailed
 5361:      *       parameter and return value descriptions.
 5362:      */
 5363:     public function addElement($element_name, $type, $contents, $attr_collections, $attributes = array())
 5364:     {
 5365:         $module = $this->getAnonymousModule();
 5366:         // assume that if the user is calling this, the element
 5367:         // is safe. This may not be a good idea
 5368:         $element = $module->addElement($element_name, $type, $contents, $attr_collections, $attributes);
 5369:         return $element;
 5370:     }
 5371: 
 5372:     /**
 5373:      * Adds a blank element to your HTML definition, for overriding
 5374:      * existing behavior
 5375:      * @param string $element_name
 5376:      * @return HTMLPurifier_ElementDef
 5377:      * @see HTMLPurifier_HTMLModule::addBlankElement() for detailed
 5378:      *       parameter and return value descriptions.
 5379:      */
 5380:     public function addBlankElement($element_name)
 5381:     {
 5382:         $module  = $this->getAnonymousModule();
 5383:         $element = $module->addBlankElement($element_name);
 5384:         return $element;
 5385:     }
 5386: 
 5387:     /**
 5388:      * Retrieves a reference to the anonymous module, so you can
 5389:      * bust out advanced features without having to make your own
 5390:      * module.
 5391:      * @return HTMLPurifier_HTMLModule
 5392:      */
 5393:     public function getAnonymousModule()
 5394:     {
 5395:         if (!$this->_anonModule) {
 5396:             $this->_anonModule = new HTMLPurifier_HTMLModule();
 5397:             $this->_anonModule->name = 'Anonymous';
 5398:         }
 5399:         return $this->_anonModule;
 5400:     }
 5401: 
 5402:     private $_anonModule = null;
 5403: 
 5404:     // PUBLIC BUT INTERNAL VARIABLES --------------------------------------
 5405: 
 5406:     /**
 5407:      * @type string
 5408:      */
 5409:     public $type = 'HTML';
 5410: 
 5411:     /**
 5412:      * @type HTMLPurifier_HTMLModuleManager
 5413:      */
 5414:     public $manager;
 5415: 
 5416:     /**
 5417:      * Performs low-cost, preliminary initialization.
 5418:      */
 5419:     public function __construct()
 5420:     {
 5421:         $this->manager = new HTMLPurifier_HTMLModuleManager();
 5422:     }
 5423: 
 5424:     /**
 5425:      * @param HTMLPurifier_Config $config
 5426:      */
 5427:     protected function doSetup($config)
 5428:     {
 5429:         $this->processModules($config);
 5430:         $this->setupConfigStuff($config);
 5431:         unset($this->manager);
 5432: 
 5433:         // cleanup some of the element definitions
 5434:         foreach ($this->info as $k => $v) {
 5435:             unset($this->info[$k]->content_model);
 5436:             unset($this->info[$k]->content_model_type);
 5437:         }
 5438:     }
 5439: 
 5440:     /**
 5441:      * Extract out the information from the manager
 5442:      * @param HTMLPurifier_Config $config
 5443:      */
 5444:     protected function processModules($config)
 5445:     {
 5446:         if ($this->_anonModule) {
 5447:             // for user specific changes
 5448:             // this is late-loaded so we don't have to deal with PHP4
 5449:             // reference wonky-ness
 5450:             $this->manager->addModule($this->_anonModule);
 5451:             unset($this->_anonModule);
 5452:         }
 5453: 
 5454:         $this->manager->setup($config);
 5455:         $this->doctype = $this->manager->doctype;
 5456: 
 5457:         foreach ($this->manager->modules as $module) {
 5458:             foreach ($module->info_tag_transform as $k => $v) {
 5459:                 if ($v === false) {
 5460:                     unset($this->info_tag_transform[$k]);
 5461:                 } else {
 5462:                     $this->info_tag_transform[$k] = $v;
 5463:                 }
 5464:             }
 5465:             foreach ($module->info_attr_transform_pre as $k => $v) {
 5466:                 if ($v === false) {
 5467:                     unset($this->info_attr_transform_pre[$k]);
 5468:                 } else {
 5469:                     $this->info_attr_transform_pre[$k] = $v;
 5470:                 }
 5471:             }
 5472:             foreach ($module->info_attr_transform_post as $k => $v) {
 5473:                 if ($v === false) {
 5474:                     unset($this->info_attr_transform_post[$k]);
 5475:                 } else {
 5476:                     $this->info_attr_transform_post[$k] = $v;
 5477:                 }
 5478:             }
 5479:             foreach ($module->info_injector as $k => $v) {
 5480:                 if ($v === false) {
 5481:                     unset($this->info_injector[$k]);
 5482:                 } else {
 5483:                     $this->info_injector[$k] = $v;
 5484:                 }
 5485:             }
 5486:         }
 5487:         $this->info = $this->manager->getElements();
 5488:         $this->info_content_sets = $this->manager->contentSets->lookup;
 5489:     }
 5490: 
 5491:     /**
 5492:      * Sets up stuff based on config. We need a better way of doing this.
 5493:      * @param HTMLPurifier_Config $config
 5494:      */
 5495:     protected function setupConfigStuff($config)
 5496:     {
 5497:         $block_wrapper = $config->get('HTML.BlockWrapper');
 5498:         if (isset($this->info_content_sets['Block'][$block_wrapper])) {
 5499:             $this->info_block_wrapper = $block_wrapper;
 5500:         } else {
 5501:             trigger_error(
 5502:                 'Cannot use non-block element as block wrapper',
 5503:                 E_USER_ERROR
 5504:             );
 5505:         }
 5506: 
 5507:         $parent = $config->get('HTML.Parent');
 5508:         $def = $this->manager->getElement($parent, true);
 5509:         if ($def) {
 5510:             $this->info_parent = $parent;
 5511:             $this->info_parent_def = $def;
 5512:         } else {
 5513:             trigger_error(
 5514:                 'Cannot use unrecognized element as parent',
 5515:                 E_USER_ERROR
 5516:             );
 5517:             $this->info_parent_def = $this->manager->getElement($this->info_parent, true);
 5518:         }
 5519: 
 5520:         // support template text
 5521:         $support = "(for information on implementing this, see the support forums) ";
 5522: 
 5523:         // setup allowed elements -----------------------------------------
 5524: 
 5525:         $allowed_elements = $config->get('HTML.AllowedElements');
 5526:         $allowed_attributes = $config->get('HTML.AllowedAttributes'); // retrieve early
 5527: 
 5528:         if (!is_array($allowed_elements) && !is_array($allowed_attributes)) {
 5529:             $allowed = $config->get('HTML.Allowed');
 5530:             if (is_string($allowed)) {
 5531:                 list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed);
 5532:             }
 5533:         }
 5534: 
 5535:         if (is_array($allowed_elements)) {
 5536:             foreach ($this->info as $name => $d) {
 5537:                 if (!isset($allowed_elements[$name])) {
 5538:                     unset($this->info[$name]);
 5539:                 }
 5540:                 unset($allowed_elements[$name]);
 5541:             }
 5542:             // emit errors
 5543:             foreach ($allowed_elements as $element => $d) {
 5544:                 $element = htmlspecialchars($element); // PHP doesn't escape errors, be careful!
 5545:                 trigger_error("Element '$element' is not supported $support", E_USER_WARNING);
 5546:             }
 5547:         }
 5548: 
 5549:         // setup allowed attributes ---------------------------------------
 5550: 
 5551:         $allowed_attributes_mutable = $allowed_attributes; // by copy!
 5552:         if (is_array($allowed_attributes)) {
 5553:             // This actually doesn't do anything, since we went away from
 5554:             // global attributes. It's possible that userland code uses
 5555:             // it, but HTMLModuleManager doesn't!
 5556:             foreach ($this->info_global_attr as $attr => $x) {
 5557:                 $keys = array($attr, "*@$attr", "*.$attr");
 5558:                 $delete = true;
 5559:                 foreach ($keys as $key) {
 5560:                     if ($delete && isset($allowed_attributes[$key])) {
 5561:                         $delete = false;
 5562:                     }
 5563:                     if (isset($allowed_attributes_mutable[$key])) {
 5564:                         unset($allowed_attributes_mutable[$key]);
 5565:                     }
 5566:                 }
 5567:                 if ($delete) {
 5568:                     unset($this->info_global_attr[$attr]);
 5569:                 }
 5570:             }
 5571: 
 5572:             foreach ($this->info as $tag => $info) {
 5573:                 foreach ($info->attr as $attr => $x) {
 5574:                     $keys = array("$tag@$attr", $attr, "*@$attr", "$tag.$attr", "*.$attr");
 5575:                     $delete = true;
 5576:                     foreach ($keys as $key) {
 5577:                         if ($delete && isset($allowed_attributes[$key])) {
 5578:                             $delete = false;
 5579:                         }
 5580:                         if (isset($allowed_attributes_mutable[$key])) {
 5581:                             unset($allowed_attributes_mutable[$key]);
 5582:                         }
 5583:                     }
 5584:                     if ($delete) {
 5585:                         if ($this->info[$tag]->attr[$attr]->required) {
 5586:                             trigger_error(
 5587:                                 "Required attribute '$attr' in element '$tag' " .
 5588:                                 "was not allowed, which means '$tag' will not be allowed either",
 5589:                                 E_USER_WARNING
 5590:                             );
 5591:                         }
 5592:                         unset($this->info[$tag]->attr[$attr]);
 5593:                     }
 5594:                 }
 5595:             }
 5596:             // emit errors
 5597:             foreach ($allowed_attributes_mutable as $elattr => $d) {
 5598:                 $bits = preg_split('/[.@]/', $elattr, 2);
 5599:                 $c = count($bits);
 5600:                 switch ($c) {
 5601:                     case 2:
 5602:                         if ($bits[0] !== '*') {
 5603:                             $element = htmlspecialchars($bits[0]);
 5604:                             $attribute = htmlspecialchars($bits[1]);
 5605:                             if (!isset($this->info[$element])) {
 5606:                                 trigger_error(
 5607:                                     "Cannot allow attribute '$attribute' if element " .
 5608:                                     "'$element' is not allowed/supported $support"
 5609:                                 );
 5610:                             } else {
 5611:                                 trigger_error(
 5612:                                     "Attribute '$attribute' in element '$element' not supported $support",
 5613:                                     E_USER_WARNING
 5614:                                 );
 5615:                             }
 5616:                             break;
 5617:                         }
 5618:                         // otherwise fall through
 5619:                     case 1:
 5620:                         $attribute = htmlspecialchars($bits[0]);
 5621:                         trigger_error(
 5622:                             "Global attribute '$attribute' is not ".
 5623:                             "supported in any elements $support",
 5624:                             E_USER_WARNING
 5625:                         );
 5626:                         break;
 5627:                 }
 5628:             }
 5629:         }
 5630: 
 5631:         // setup forbidden elements ---------------------------------------
 5632: 
 5633:         $forbidden_elements   = $config->get('HTML.ForbiddenElements');
 5634:         $forbidden_attributes = $config->get('HTML.ForbiddenAttributes');
 5635: 
 5636:         foreach ($this->info as $tag => $info) {
 5637:             if (isset($forbidden_elements[$tag])) {
 5638:                 unset($this->info[$tag]);
 5639:                 continue;
 5640:             }
 5641:             foreach ($info->attr as $attr => $x) {
 5642:                 if (isset($forbidden_attributes["$tag@$attr"]) ||
 5643:                     isset($forbidden_attributes["*@$attr"]) ||
 5644:                     isset($forbidden_attributes[$attr])
 5645:                 ) {
 5646:                     unset($this->info[$tag]->attr[$attr]);
 5647:                     continue;
 5648:                 } elseif (isset($forbidden_attributes["$tag.$attr"])) { // this segment might get removed eventually
 5649:                     // $tag.$attr are not user supplied, so no worries!
 5650:                     trigger_error(
 5651:                         "Error with $tag.$attr: tag.attr syntax not supported for " .
 5652:                         "HTML.ForbiddenAttributes; use tag@attr instead",
 5653:                         E_USER_WARNING
 5654:                     );
 5655:                 }
 5656:             }
 5657:         }
 5658:         foreach ($forbidden_attributes as $key => $v) {
 5659:             if (strlen($key) < 2) {
 5660:                 continue;
 5661:             }
 5662:             if ($key[0] != '*') {
 5663:                 continue;
 5664:             }
 5665:             if ($key[1] == '.') {
 5666:                 trigger_error(
 5667:                     "Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead",
 5668:                     E_USER_WARNING
 5669:                 );
 5670:             }
 5671:         }
 5672: 
 5673:         // setup injectors -----------------------------------------------------
 5674:         foreach ($this->info_injector as $i => $injector) {
 5675:             if ($injector->checkNeeded($config) !== false) {
 5676:                 // remove injector that does not have it's required
 5677:                 // elements/attributes present, and is thus not needed.
 5678:                 unset($this->info_injector[$i]);
 5679:             }
 5680:         }
 5681:     }
 5682: 
 5683:     /**
 5684:      * Parses a TinyMCE-flavored Allowed Elements and Attributes list into
 5685:      * separate lists for processing. Format is element[attr1|attr2],element2...
 5686:      * @warning Although it's largely drawn from TinyMCE's implementation,
 5687:      *      it is different, and you'll probably have to modify your lists
 5688:      * @param array $list String list to parse
 5689:      * @return array
 5690:      * @todo Give this its own class, probably static interface
 5691:      */
 5692:     public function parseTinyMCEAllowedList($list)
 5693:     {
 5694:         $list = str_replace(array(' ', "\t"), '', $list);
 5695: 
 5696:         $elements = array();
 5697:         $attributes = array();
 5698: 
 5699:         $chunks = preg_split('/(,|[\n\r]+)/', $list);
 5700:         foreach ($chunks as $chunk) {
 5701:             if (empty($chunk)) {
 5702:                 continue;
 5703:             }
 5704:             // remove TinyMCE element control characters
 5705:             if (!strpos($chunk, '[')) {
 5706:                 $element = $chunk;
 5707:                 $attr = false;
 5708:             } else {
 5709:                 list($element, $attr) = explode('[', $chunk);
 5710:             }
 5711:             if ($element !== '*') {
 5712:                 $elements[$element] = true;
 5713:             }
 5714:             if (!$attr) {
 5715:                 continue;
 5716:             }
 5717:             $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ]
 5718:             $attr = explode('|', $attr);
 5719:             foreach ($attr as $key) {
 5720:                 $attributes["$element.$key"] = true;
 5721:             }
 5722:         }
 5723:         return array($elements, $attributes);
 5724:     }
 5725: }
 5726: 
 5727: 
 5728: 
 5729: 
 5730: 
 5731: /**
 5732:  * Represents an XHTML 1.1 module, with information on elements, tags
 5733:  * and attributes.
 5734:  * @note Even though this is technically XHTML 1.1, it is also used for
 5735:  *       regular HTML parsing. We are using modulization as a convenient
 5736:  *       way to represent the internals of HTMLDefinition, and our
 5737:  *       implementation is by no means conforming and does not directly
 5738:  *       use the normative DTDs or XML schemas.
 5739:  * @note The public variables in a module should almost directly
 5740:  *       correspond to the variables in HTMLPurifier_HTMLDefinition.
 5741:  *       However, the prefix info carries no special meaning in these
 5742:  *       objects (include it anyway if that's the correspondence though).
 5743:  * @todo Consider making some member functions protected
 5744:  */
 5745: 
 5746: class HTMLPurifier_HTMLModule
 5747: {
 5748: 
 5749:     // -- Overloadable ----------------------------------------------------
 5750: 
 5751:     /**
 5752:      * Short unique string identifier of the module.
 5753:      * @type string
 5754:      */
 5755:     public $name;
 5756: 
 5757:     /**
 5758:      * Informally, a list of elements this module changes.
 5759:      * Not used in any significant way.
 5760:      * @type array
 5761:      */
 5762:     public $elements = array();
 5763: 
 5764:     /**
 5765:      * Associative array of element names to element definitions.
 5766:      * Some definitions may be incomplete, to be merged in later
 5767:      * with the full definition.
 5768:      * @type array
 5769:      */
 5770:     public $info = array();
 5771: 
 5772:     /**
 5773:      * Associative array of content set names to content set additions.
 5774:      * This is commonly used to, say, add an A element to the Inline
 5775:      * content set. This corresponds to an internal variable $content_sets
 5776:      * and NOT info_content_sets member variable of HTMLDefinition.
 5777:      * @type array
 5778:      */
 5779:     public $content_sets = array();
 5780: 
 5781:     /**
 5782:      * Associative array of attribute collection names to attribute
 5783:      * collection additions. More rarely used for adding attributes to
 5784:      * the global collections. Example is the StyleAttribute module adding
 5785:      * the style attribute to the Core. Corresponds to HTMLDefinition's
 5786:      * attr_collections->info, since the object's data is only info,
 5787:      * with extra behavior associated with it.
 5788:      * @type array
 5789:      */
 5790:     public $attr_collections = array();
 5791: 
 5792:     /**
 5793:      * Associative array of deprecated tag name to HTMLPurifier_TagTransform.
 5794:      * @type array
 5795:      */
 5796:     public $info_tag_transform = array();
 5797: 
 5798:     /**
 5799:      * List of HTMLPurifier_AttrTransform to be performed before validation.
 5800:      * @type array
 5801:      */
 5802:     public $info_attr_transform_pre = array();
 5803: 
 5804:     /**
 5805:      * List of HTMLPurifier_AttrTransform to be performed after validation.
 5806:      * @type array
 5807:      */
 5808:     public $info_attr_transform_post = array();
 5809: 
 5810:     /**
 5811:      * List of HTMLPurifier_Injector to be performed during well-formedness fixing.
 5812:      * An injector will only be invoked if all of it's pre-requisites are met;
 5813:      * if an injector fails setup, there will be no error; it will simply be
 5814:      * silently disabled.
 5815:      * @type array
 5816:      */
 5817:     public $info_injector = array();
 5818: 
 5819:     /**
 5820:      * Boolean flag that indicates whether or not getChildDef is implemented.
 5821:      * For optimization reasons: may save a call to a function. Be sure
 5822:      * to set it if you do implement getChildDef(), otherwise it will have
 5823:      * no effect!
 5824:      * @type bool
 5825:      */
 5826:     public $defines_child_def = false;
 5827: 
 5828:     /**
 5829:      * Boolean flag whether or not this module is safe. If it is not safe, all
 5830:      * of its members are unsafe. Modules are safe by default (this might be
 5831:      * slightly dangerous, but it doesn't make much sense to force HTML Purifier,
 5832:      * which is based off of safe HTML, to explicitly say, "This is safe," even
 5833:      * though there are modules which are "unsafe")
 5834:      *
 5835:      * @type bool
 5836:      * @note Previously, safety could be applied at an element level granularity.
 5837:      *       We've removed this ability, so in order to add "unsafe" elements
 5838:      *       or attributes, a dedicated module with this property set to false
 5839:      *       must be used.
 5840:      */
 5841:     public $safe = true;
 5842: 
 5843:     /**
 5844:      * Retrieves a proper HTMLPurifier_ChildDef subclass based on
 5845:      * content_model and content_model_type member variables of
 5846:      * the HTMLPurifier_ElementDef class. There is a similar function
 5847:      * in HTMLPurifier_HTMLDefinition.
 5848:      * @param HTMLPurifier_ElementDef $def
 5849:      * @return HTMLPurifier_ChildDef subclass
 5850:      */
 5851:     public function getChildDef($def)
 5852:     {
 5853:         return false;
 5854:     }
 5855: 
 5856:     // -- Convenience -----------------------------------------------------
 5857: 
 5858:     /**
 5859:      * Convenience function that sets up a new element
 5860:      * @param string $element Name of element to add
 5861:      * @param string|bool $type What content set should element be registered to?
 5862:      *              Set as false to skip this step.
 5863:      * @param string $contents Allowed children in form of:
 5864:      *              "$content_model_type: $content_model"
 5865:      * @param array $attr_includes What attribute collections to register to
 5866:      *              element?
 5867:      * @param array $attr What unique attributes does the element define?
 5868:      * @see HTMLPurifier_ElementDef:: for in-depth descriptions of these parameters.
 5869:      * @return HTMLPurifier_ElementDef Created element definition object, so you
 5870:      *         can set advanced parameters
 5871:      */
 5872:     public function addElement($element, $type, $contents, $attr_includes = array(), $attr = array())
 5873:     {
 5874:         $this->elements[] = $element;
 5875:         // parse content_model
 5876:         list($content_model_type, $content_model) = $this->parseContents($contents);
 5877:         // merge in attribute inclusions
 5878:         $this->mergeInAttrIncludes($attr, $attr_includes);
 5879:         // add element to content sets
 5880:         if ($type) {
 5881:             $this->addElementToContentSet($element, $type);
 5882:         }
 5883:         // create element
 5884:         $this->info[$element] = HTMLPurifier_ElementDef::create(
 5885:             $content_model,
 5886:             $content_model_type,
 5887:             $attr
 5888:         );
 5889:         // literal object $contents means direct child manipulation
 5890:         if (!is_string($contents)) {
 5891:             $this->info[$element]->child = $contents;
 5892:         }
 5893:         return $this->info[$element];
 5894:     }
 5895: 
 5896:     /**
 5897:      * Convenience function that creates a totally blank, non-standalone
 5898:      * element.
 5899:      * @param string $element Name of element to create
 5900:      * @return HTMLPurifier_ElementDef Created element
 5901:      */
 5902:     public function addBlankElement($element)
 5903:     {
 5904:         if (!isset($this->info[$element])) {
 5905:             $this->elements[] = $element;
 5906:             $this->info[$element] = new HTMLPurifier_ElementDef();
 5907:             $this->info[$element]->standalone = false;
 5908:         } else {
 5909:             trigger_error("Definition for $element already exists in module, cannot redefine");
 5910:         }
 5911:         return $this->info[$element];
 5912:     }
 5913: 
 5914:     /**
 5915:      * Convenience function that registers an element to a content set
 5916:      * @param string $element Element to register
 5917:      * @param string $type Name content set (warning: case sensitive, usually upper-case
 5918:      *        first letter)
 5919:      */
 5920:     public function addElementToContentSet($element, $type)
 5921:     {
 5922:         if (!isset($this->content_sets[$type])) {
 5923:             $this->content_sets[$type] = '';
 5924:         } else {
 5925:             $this->content_sets[$type] .= ' | ';
 5926:         }
 5927:         $this->content_sets[$type] .= $element;
 5928:     }
 5929: 
 5930:     /**
 5931:      * Convenience function that transforms single-string contents
 5932:      * into separate content model and content model type
 5933:      * @param string $contents Allowed children in form of:
 5934:      *                  "$content_model_type: $content_model"
 5935:      * @return array
 5936:      * @note If contents is an object, an array of two nulls will be
 5937:      *       returned, and the callee needs to take the original $contents
 5938:      *       and use it directly.
 5939:      */
 5940:     public function parseContents($contents)
 5941:     {
 5942:         if (!is_string($contents)) {
 5943:             return array(null, null);
 5944:         } // defer
 5945:         switch ($contents) {
 5946:             // check for shorthand content model forms
 5947:             case 'Empty':
 5948:                 return array('empty', '');
 5949:             case 'Inline':
 5950:                 return array('optional', 'Inline | #PCDATA');
 5951:             case 'Flow':
 5952:                 return array('optional', 'Flow | #PCDATA');
 5953:         }
 5954:         list($content_model_type, $content_model) = explode(':', $contents);
 5955:         $content_model_type = strtolower(trim($content_model_type));
 5956:         $content_model = trim($content_model);
 5957:         return array($content_model_type, $content_model);
 5958:     }
 5959: 
 5960:     /**
 5961:      * Convenience function that merges a list of attribute includes into
 5962:      * an attribute array.
 5963:      * @param array $attr Reference to attr array to modify
 5964:      * @param array $attr_includes Array of includes / string include to merge in
 5965:      */
 5966:     public function mergeInAttrIncludes(&$attr, $attr_includes)
 5967:     {
 5968:         if (!is_array($attr_includes)) {
 5969:             if (empty($attr_includes)) {
 5970:                 $attr_includes = array();
 5971:             } else {
 5972:                 $attr_includes = array($attr_includes);
 5973:             }
 5974:         }
 5975:         $attr[0] = $attr_includes;
 5976:     }
 5977: 
 5978:     /**
 5979:      * Convenience function that generates a lookup table with boolean
 5980:      * true as value.
 5981:      * @param string $list List of values to turn into a lookup
 5982:      * @note You can also pass an arbitrary number of arguments in
 5983:      *       place of the regular argument
 5984:      * @return array array equivalent of list
 5985:      */
 5986:     public function makeLookup($list)
 5987:     {
 5988:         if (is_string($list)) {
 5989:             $list = func_get_args();
 5990:         }
 5991:         $ret = array();
 5992:         foreach ($list as $value) {
 5993:             if (is_null($value)) {
 5994:                 continue;
 5995:             }
 5996:             $ret[$value] = true;
 5997:         }
 5998:         return $ret;
 5999:     }
 6000: 
 6001:     /**
 6002:      * Lazy load construction of the module after determining whether
 6003:      * or not it's needed, and also when a finalized configuration object
 6004:      * is available.
 6005:      * @param HTMLPurifier_Config $config
 6006:      */
 6007:     public function setup($config)
 6008:     {
 6009:     }
 6010: }
 6011: 
 6012: 
 6013: 
 6014: 
 6015: 
 6016: class HTMLPurifier_HTMLModuleManager
 6017: {
 6018: 
 6019:     /**
 6020:      * @type HTMLPurifier_DoctypeRegistry
 6021:      */
 6022:     public $doctypes;
 6023: 
 6024:     /**
 6025:      * Instance of current doctype.
 6026:      * @type string
 6027:      */
 6028:     public $doctype;
 6029: 
 6030:     /**
 6031:      * @type HTMLPurifier_AttrTypes
 6032:      */
 6033:     public $attrTypes;
 6034: 
 6035:     /**
 6036:      * Active instances of modules for the specified doctype are
 6037:      * indexed, by name, in this array.
 6038:      * @type HTMLPurifier_HTMLModule[]
 6039:      */
 6040:     public $modules = array();
 6041: 
 6042:     /**
 6043:      * Array of recognized HTMLPurifier_HTMLModule instances,
 6044:      * indexed by module's class name. This array is usually lazy loaded, but a
 6045:      * user can overload a module by pre-emptively registering it.
 6046:      * @type HTMLPurifier_HTMLModule[]
 6047:      */
 6048:     public $registeredModules = array();
 6049: 
 6050:     /**
 6051:      * List of extra modules that were added by the user
 6052:      * using addModule(). These get unconditionally merged into the current doctype, whatever
 6053:      * it may be.
 6054:      * @type HTMLPurifier_HTMLModule[]
 6055:      */
 6056:     public $userModules = array();
 6057: 
 6058:     /**
 6059:      * Associative array of element name to list of modules that have
 6060:      * definitions for the element; this array is dynamically filled.
 6061:      * @type array
 6062:      */
 6063:     public $elementLookup = array();
 6064: 
 6065:     /**
 6066:      * List of prefixes we should use for registering small names.
 6067:      * @type array
 6068:      */
 6069:     public $prefixes = array('HTMLPurifier_HTMLModule_');
 6070: 
 6071:     /**
 6072:      * @type HTMLPurifier_ContentSets
 6073:      */
 6074:     public $contentSets;
 6075: 
 6076:     /**
 6077:      * @type HTMLPurifier_AttrCollections
 6078:      */
 6079:     public $attrCollections;
 6080: 
 6081:     /**
 6082:      * If set to true, unsafe elements and attributes will be allowed.
 6083:      * @type bool
 6084:      */
 6085:     public $trusted = false;
 6086: 
 6087:     public function __construct()
 6088:     {
 6089:         // editable internal objects
 6090:         $this->attrTypes = new HTMLPurifier_AttrTypes();
 6091:         $this->doctypes  = new HTMLPurifier_DoctypeRegistry();
 6092: 
 6093:         // setup basic modules
 6094:         $common = array(
 6095:             'CommonAttributes', 'Text', 'Hypertext', 'List',
 6096:             'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
 6097:             'StyleAttribute',
 6098:             // Unsafe:
 6099:             'Scripting', 'Object', 'Forms',
 6100:             // Sorta legacy, but present in strict:
 6101:             'Name',
 6102:         );
 6103:         $transitional = array('Legacy', 'Target', 'Iframe');
 6104:         $xml = array('XMLCommonAttributes');
 6105:         $non_xml = array('NonXMLCommonAttributes');
 6106: 
 6107:         // setup basic doctypes
 6108:         $this->doctypes->register(
 6109:             'HTML 4.01 Transitional',
 6110:             false,
 6111:             array_merge($common, $transitional, $non_xml),
 6112:             array('Tidy_Transitional', 'Tidy_Proprietary'),
 6113:             array(),
 6114:             '-//W3C//DTD HTML 4.01 Transitional//EN',
 6115:             'http://www.w3.org/TR/html4/loose.dtd'
 6116:         );
 6117: 
 6118:         $this->doctypes->register(
 6119:             'HTML 4.01 Strict',
 6120:             false,
 6121:             array_merge($common, $non_xml),
 6122:             array('Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
 6123:             array(),
 6124:             '-//W3C//DTD HTML 4.01//EN',
 6125:             'http://www.w3.org/TR/html4/strict.dtd'
 6126:         );
 6127: 
 6128:         $this->doctypes->register(
 6129:             'XHTML 1.0 Transitional',
 6130:             true,
 6131:             array_merge($common, $transitional, $xml, $non_xml),
 6132:             array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Name'),
 6133:             array(),
 6134:             '-//W3C//DTD XHTML 1.0 Transitional//EN',
 6135:             'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
 6136:         );
 6137: 
 6138:         $this->doctypes->register(
 6139:             'XHTML 1.0 Strict',
 6140:             true,
 6141:             array_merge($common, $xml, $non_xml),
 6142:             array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
 6143:             array(),
 6144:             '-//W3C//DTD XHTML 1.0 Strict//EN',
 6145:             'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
 6146:         );
 6147: 
 6148:         $this->doctypes->register(
 6149:             'XHTML 1.1',
 6150:             true,
 6151:             // Iframe is a real XHTML 1.1 module, despite being
 6152:             // "transitional"!
 6153:             array_merge($common, $xml, array('Ruby', 'Iframe')),
 6154:             array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1
 6155:             array(),
 6156:             '-//W3C//DTD XHTML 1.1//EN',
 6157:             'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'
 6158:         );
 6159: 
 6160:     }
 6161: 
 6162:     /**
 6163:      * Registers a module to the recognized module list, useful for
 6164:      * overloading pre-existing modules.
 6165:      * @param $module Mixed: string module name, with or without
 6166:      *                HTMLPurifier_HTMLModule prefix, or instance of
 6167:      *                subclass of HTMLPurifier_HTMLModule.
 6168:      * @param $overload Boolean whether or not to overload previous modules.
 6169:      *                  If this is not set, and you do overload a module,
 6170:      *                  HTML Purifier will complain with a warning.
 6171:      * @note This function will not call autoload, you must instantiate
 6172:      *       (and thus invoke) autoload outside the method.
 6173:      * @note If a string is passed as a module name, different variants
 6174:      *       will be tested in this order:
 6175:      *          - Check for HTMLPurifier_HTMLModule_$name
 6176:      *          - Check all prefixes with $name in order they were added
 6177:      *          - Check for literal object name
 6178:      *          - Throw fatal error
 6179:      *       If your object name collides with an internal class, specify
 6180:      *       your module manually. All modules must have been included
 6181:      *       externally: registerModule will not perform inclusions for you!
 6182:      */
 6183:     public function registerModule($module, $overload = false)
 6184:     {
 6185:         if (is_string($module)) {
 6186:             // attempt to load the module
 6187:             $original_module = $module;
 6188:             $ok = false;
 6189:             foreach ($this->prefixes as $prefix) {
 6190:                 $module = $prefix . $original_module;
 6191:                 if (class_exists($module)) {
 6192:                     $ok = true;
 6193:                     break;
 6194:                 }
 6195:             }
 6196:             if (!$ok) {
 6197:                 $module = $original_module;
 6198:                 if (!class_exists($module)) {
 6199:                     trigger_error(
 6200:                         $original_module . ' module does not exist',
 6201:                         E_USER_ERROR
 6202:                     );
 6203:                     return;
 6204:                 }
 6205:             }
 6206:             $module = new $module();
 6207:         }
 6208:         if (empty($module->name)) {
 6209:             trigger_error('Module instance of ' . get_class($module) . ' must have name');
 6210:             return;
 6211:         }
 6212:         if (!$overload && isset($this->registeredModules[$module->name])) {
 6213:             trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING);
 6214:         }
 6215:         $this->registeredModules[$module->name] = $module;
 6216:     }
 6217: 
 6218:     /**
 6219:      * Adds a module to the current doctype by first registering it,
 6220:      * and then tacking it on to the active doctype
 6221:      */
 6222:     public function addModule($module)
 6223:     {
 6224:         $this->registerModule($module);
 6225:         if (is_object($module)) {
 6226:             $module = $module->name;
 6227:         }
 6228:         $this->userModules[] = $module;
 6229:     }
 6230: 
 6231:     /**
 6232:      * Adds a class prefix that registerModule() will use to resolve a
 6233:      * string name to a concrete class
 6234:      */
 6235:     public function addPrefix($prefix)
 6236:     {
 6237:         $this->prefixes[] = $prefix;
 6238:     }
 6239: 
 6240:     /**
 6241:      * Performs processing on modules, after being called you may
 6242:      * use getElement() and getElements()
 6243:      * @param HTMLPurifier_Config $config
 6244:      */
 6245:     public function setup($config)
 6246:     {
 6247:         $this->trusted = $config->get('HTML.Trusted');
 6248: 
 6249:         // generate
 6250:         $this->doctype = $this->doctypes->make($config);
 6251:         $modules = $this->doctype->modules;
 6252: 
 6253:         // take out the default modules that aren't allowed
 6254:         $lookup = $config->get('HTML.AllowedModules');
 6255:         $special_cases = $config->get('HTML.CoreModules');
 6256: 
 6257:         if (is_array($lookup)) {
 6258:             foreach ($modules as $k => $m) {
 6259:                 if (isset($special_cases[$m])) {
 6260:                     continue;
 6261:                 }
 6262:                 if (!isset($lookup[$m])) {
 6263:                     unset($modules[$k]);
 6264:                 }
 6265:             }
 6266:         }
 6267: 
 6268:         // custom modules
 6269:         if ($config->get('HTML.Proprietary')) {
 6270:             $modules[] = 'Proprietary';
 6271:         }
 6272:         if ($config->get('HTML.SafeObject')) {
 6273:             $modules[] = 'SafeObject';
 6274:         }
 6275:         if ($config->get('HTML.SafeEmbed')) {
 6276:             $modules[] = 'SafeEmbed';
 6277:         }
 6278:         if ($config->get('HTML.SafeScripting') !== array()) {
 6279:             $modules[] = 'SafeScripting';
 6280:         }
 6281:         if ($config->get('HTML.Nofollow')) {
 6282:             $modules[] = 'Nofollow';
 6283:         }
 6284:         if ($config->get('HTML.TargetBlank')) {
 6285:             $modules[] = 'TargetBlank';
 6286:         }
 6287: 
 6288:         // merge in custom modules
 6289:         $modules = array_merge($modules, $this->userModules);
 6290: 
 6291:         foreach ($modules as $module) {
 6292:             $this->processModule($module);
 6293:             $this->modules[$module]->setup($config);
 6294:         }
 6295: 
 6296:         foreach ($this->doctype->tidyModules as $module) {
 6297:             $this->processModule($module);
 6298:             $this->modules[$module]->setup($config);
 6299:         }
 6300: 
 6301:         // prepare any injectors
 6302:         foreach ($this->modules as $module) {
 6303:             $n = array();
 6304:             foreach ($module->info_injector as $injector) {
 6305:                 if (!is_object($injector)) {
 6306:                     $class = "HTMLPurifier_Injector_$injector";
 6307:                     $injector = new $class;
 6308:                 }
 6309:                 $n[$injector->name] = $injector;
 6310:             }
 6311:             $module->info_injector = $n;
 6312:         }
 6313: 
 6314:         // setup lookup table based on all valid modules
 6315:         foreach ($this->modules as $module) {
 6316:             foreach ($module->info as $name => $def) {
 6317:                 if (!isset($this->elementLookup[$name])) {
 6318:                     $this->elementLookup[$name] = array();
 6319:                 }
 6320:                 $this->elementLookup[$name][] = $module->name;
 6321:             }
 6322:         }
 6323: 
 6324:         // note the different choice
 6325:         $this->contentSets = new HTMLPurifier_ContentSets(
 6326:             // content set assembly deals with all possible modules,
 6327:             // not just ones deemed to be "safe"
 6328:             $this->modules
 6329:         );
 6330:         $this->attrCollections = new HTMLPurifier_AttrCollections(
 6331:             $this->attrTypes,
 6332:             // there is no way to directly disable a global attribute,
 6333:             // but using AllowedAttributes or simply not including
 6334:             // the module in your custom doctype should be sufficient
 6335:             $this->modules
 6336:         );
 6337:     }
 6338: 
 6339:     /**
 6340:      * Takes a module and adds it to the active module collection,
 6341:      * registering it if necessary.
 6342:      */
 6343:     public function processModule($module)
 6344:     {
 6345:         if (!isset($this->registeredModules[$module]) || is_object($module)) {
 6346:             $this->registerModule($module);
 6347:         }
 6348:         $this->modules[$module] = $this->registeredModules[$module];
 6349:     }
 6350: 
 6351:     /**
 6352:      * Retrieves merged element definitions.
 6353:      * @return Array of HTMLPurifier_ElementDef
 6354:      */
 6355:     public function getElements()
 6356:     {
 6357:         $elements = array();
 6358:         foreach ($this->modules as $module) {
 6359:             if (!$this->trusted && !$module->safe) {
 6360:                 continue;
 6361:             }
 6362:             foreach ($module->info as $name => $v) {
 6363:                 if (isset($elements[$name])) {
 6364:                     continue;
 6365:                 }
 6366:                 $elements[$name] = $this->getElement($name);
 6367:             }
 6368:         }
 6369: 
 6370:         // remove dud elements, this happens when an element that
 6371:         // appeared to be safe actually wasn't
 6372:         foreach ($elements as $n => $v) {
 6373:             if ($v === false) {
 6374:                 unset($elements[$n]);
 6375:             }
 6376:         }
 6377: 
 6378:         return $elements;
 6379: 
 6380:     }
 6381: 
 6382:     /**
 6383:      * Retrieves a single merged element definition
 6384:      * @param string $name Name of element
 6385:      * @param bool $trusted Boolean trusted overriding parameter: set to true
 6386:      *                 if you want the full version of an element
 6387:      * @return HTMLPurifier_ElementDef Merged HTMLPurifier_ElementDef
 6388:      * @note You may notice that modules are getting iterated over twice (once
 6389:      *       in getElements() and once here). This
 6390:      *       is because
 6391:      */
 6392:     public function getElement($name, $trusted = null)
 6393:     {
 6394:         if (!isset($this->elementLookup[$name])) {
 6395:             return false;
 6396:         }
 6397: 
 6398:         // setup global state variables
 6399:         $def = false;
 6400:         if ($trusted === null) {
 6401:             $trusted = $this->trusted;
 6402:         }
 6403: 
 6404:         // iterate through each module that has registered itself to this
 6405:         // element
 6406:         foreach ($this->elementLookup[$name] as $module_name) {
 6407:             $module = $this->modules[$module_name];
 6408: 
 6409:             // refuse to create/merge from a module that is deemed unsafe--
 6410:             // pretend the module doesn't exist--when trusted mode is not on.
 6411:             if (!$trusted && !$module->safe) {
 6412:                 continue;
 6413:             }
 6414: 
 6415:             // clone is used because, ideally speaking, the original
 6416:             // definition should not be modified. Usually, this will
 6417:             // make no difference, but for consistency's sake
 6418:             $new_def = clone $module->info[$name];
 6419: 
 6420:             if (!$def && $new_def->standalone) {
 6421:                 $def = $new_def;
 6422:             } elseif ($def) {
 6423:                 // This will occur even if $new_def is standalone. In practice,
 6424:                 // this will usually result in a full replacement.
 6425:                 $def->mergeIn($new_def);
 6426:             } else {
 6427:                 // :TODO:
 6428:                 // non-standalone definitions that don't have a standalone
 6429:                 // to merge into could be deferred to the end
 6430:                 // HOWEVER, it is perfectly valid for a non-standalone
 6431:                 // definition to lack a standalone definition, even
 6432:                 // after all processing: this allows us to safely
 6433:                 // specify extra attributes for elements that may not be
 6434:                 // enabled all in one place.  In particular, this might
 6435:                 // be the case for trusted elements.  WARNING: care must
 6436:                 // be taken that the /extra/ definitions are all safe.
 6437:                 continue;
 6438:             }
 6439: 
 6440:             // attribute value expansions
 6441:             $this->attrCollections->performInclusions($def->attr);
 6442:             $this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes);
 6443: 
 6444:             // descendants_are_inline, for ChildDef_Chameleon
 6445:             if (is_string($def->content_model) &&
 6446:                 strpos($def->content_model, 'Inline') !== false) {
 6447:                 if ($name != 'del' && $name != 'ins') {
 6448:                     // this is for you, ins/del
 6449:                     $def->descendants_are_inline = true;
 6450:                 }
 6451:             }
 6452: 
 6453:             $this->contentSets->generateChildDef($def, $module);
 6454:         }
 6455: 
 6456:         // This can occur if there is a blank definition, but no base to
 6457:         // mix it in with
 6458:         if (!$def) {
 6459:             return false;
 6460:         }
 6461: 
 6462:         // add information on required attributes
 6463:         foreach ($def->attr as $attr_name => $attr_def) {
 6464:             if ($attr_def->required) {
 6465:                 $def->required_attr[] = $attr_name;
 6466:             }
 6467:         }
 6468:         return $def;
 6469:     }
 6470: }
 6471: 
 6472: 
 6473: 
 6474: 
 6475: 
 6476: /**
 6477:  * Component of HTMLPurifier_AttrContext that accumulates IDs to prevent dupes
 6478:  * @note In Slashdot-speak, dupe means duplicate.
 6479:  * @note The default constructor does not accept $config or $context objects:
 6480:  *       use must use the static build() factory method to perform initialization.
 6481:  */
 6482: class HTMLPurifier_IDAccumulator
 6483: {
 6484: 
 6485:     /**
 6486:      * Lookup table of IDs we've accumulated.
 6487:      * @public
 6488:      */
 6489:     public $ids = array();
 6490: 
 6491:     /**
 6492:      * Builds an IDAccumulator, also initializing the default blacklist
 6493:      * @param HTMLPurifier_Config $config Instance of HTMLPurifier_Config
 6494:      * @param HTMLPurifier_Context $context Instance of HTMLPurifier_Context
 6495:      * @return HTMLPurifier_IDAccumulator Fully initialized HTMLPurifier_IDAccumulator
 6496:      */
 6497:     public static function build($config, $context)
 6498:     {
 6499:         $id_accumulator = new HTMLPurifier_IDAccumulator();
 6500:         $id_accumulator->load($config->get('Attr.IDBlacklist'));
 6501:         return $id_accumulator;
 6502:     }
 6503: 
 6504:     /**
 6505:      * Add an ID to the lookup table.
 6506:      * @param string $id ID to be added.
 6507:      * @return bool status, true if success, false if there's a dupe
 6508:      */
 6509:     public function add($id)
 6510:     {
 6511:         if (isset($this->ids[$id])) {
 6512:             return false;
 6513:         }
 6514:         return $this->ids[$id] = true;
 6515:     }
 6516: 
 6517:     /**
 6518:      * Load a list of IDs into the lookup table
 6519:      * @param $array_of_ids Array of IDs to load
 6520:      * @note This function doesn't care about duplicates
 6521:      */
 6522:     public function load($array_of_ids)
 6523:     {
 6524:         foreach ($array_of_ids as $id) {
 6525:             $this->ids[$id] = true;
 6526:         }
 6527:     }
 6528: }
 6529: 
 6530: 
 6531: 
 6532: 
 6533: 
 6534: /**
 6535:  * Injects tokens into the document while parsing for well-formedness.
 6536:  * This enables "formatter-like" functionality such as auto-paragraphing,
 6537:  * smiley-ification and linkification to take place.
 6538:  *
 6539:  * A note on how handlers create changes; this is done by assigning a new
 6540:  * value to the $token reference. These values can take a variety of forms and
 6541:  * are best described HTMLPurifier_Strategy_MakeWellFormed->processToken()
 6542:  * documentation.
 6543:  *
 6544:  * @todo Allow injectors to request a re-run on their output. This
 6545:  *       would help if an operation is recursive.
 6546:  */
 6547: abstract class HTMLPurifier_Injector
 6548: {
 6549: 
 6550:     /**
 6551:      * Advisory name of injector, this is for friendly error messages.
 6552:      * @type string
 6553:      */
 6554:     public $name;
 6555: 
 6556:     /**
 6557:      * @type HTMLPurifier_HTMLDefinition
 6558:      */
 6559:     protected $htmlDefinition;
 6560: 
 6561:     /**
 6562:      * Reference to CurrentNesting variable in Context. This is an array
 6563:      * list of tokens that we are currently "inside"
 6564:      * @type array
 6565:      */
 6566:     protected $currentNesting;
 6567: 
 6568:     /**
 6569:      * Reference to current token.
 6570:      * @type HTMLPurifier_Token
 6571:      */
 6572:     protected $currentToken;
 6573: 
 6574:     /**
 6575:      * Reference to InputZipper variable in Context.
 6576:      * @type HTMLPurifier_Zipper
 6577:      */
 6578:     protected $inputZipper;
 6579: 
 6580:     /**
 6581:      * Array of elements and attributes this injector creates and therefore
 6582:      * need to be allowed by the definition. Takes form of
 6583:      * array('element' => array('attr', 'attr2'), 'element2')
 6584:      * @type array
 6585:      */
 6586:     public $needed = array();
 6587: 
 6588:     /**
 6589:      * Number of elements to rewind backwards (relative).
 6590:      * @type bool|int
 6591:      */
 6592:     protected $rewindOffset = false;
 6593: 
 6594:     /**
 6595:      * Rewind to a spot to re-perform processing. This is useful if you
 6596:      * deleted a node, and now need to see if this change affected any
 6597:      * earlier nodes. Rewinding does not affect other injectors, and can
 6598:      * result in infinite loops if not used carefully.
 6599:      * @param bool|int $offset
 6600:      * @warning HTML Purifier will prevent you from fast-forwarding with this
 6601:      *          function.
 6602:      */
 6603:     public function rewindOffset($offset)
 6604:     {
 6605:         $this->rewindOffset = $offset;
 6606:     }
 6607: 
 6608:     /**
 6609:      * Retrieves rewind offset, and then unsets it.
 6610:      * @return bool|int
 6611:      */
 6612:     public function getRewindOffset()
 6613:     {
 6614:         $r = $this->rewindOffset;
 6615:         $this->rewindOffset = false;
 6616:         return $r;
 6617:     }
 6618: 
 6619:     /**
 6620:      * Prepares the injector by giving it the config and context objects:
 6621:      * this allows references to important variables to be made within
 6622:      * the injector. This function also checks if the HTML environment
 6623:      * will work with the Injector (see checkNeeded()).
 6624:      * @param HTMLPurifier_Config $config
 6625:      * @param HTMLPurifier_Context $context
 6626:      * @return bool|string Boolean false if success, string of missing needed element/attribute if failure
 6627:      */
 6628:     public function prepare($config, $context)
 6629:     {
 6630:         $this->htmlDefinition = $config->getHTMLDefinition();
 6631:         // Even though this might fail, some unit tests ignore this and
 6632:         // still test checkNeeded, so be careful. Maybe get rid of that
 6633:         // dependency.
 6634:         $result = $this->checkNeeded($config);
 6635:         if ($result !== false) {
 6636:             return $result;
 6637:         }
 6638:         $this->currentNesting =& $context->get('CurrentNesting');
 6639:         $this->currentToken   =& $context->get('CurrentToken');
 6640:         $this->inputZipper    =& $context->get('InputZipper');
 6641:         return false;
 6642:     }
 6643: 
 6644:     /**
 6645:      * This function checks if the HTML environment
 6646:      * will work with the Injector: if p tags are not allowed, the
 6647:      * Auto-Paragraphing injector should not be enabled.
 6648:      * @param HTMLPurifier_Config $config
 6649:      * @return bool|string Boolean false if success, string of missing needed element/attribute if failure
 6650:      */
 6651:     public function checkNeeded($config)
 6652:     {
 6653:         $def = $config->getHTMLDefinition();
 6654:         foreach ($this->needed as $element => $attributes) {
 6655:             if (is_int($element)) {
 6656:                 $element = $attributes;
 6657:             }
 6658:             if (!isset($def->info[$element])) {
 6659:                 return $element;
 6660:             }
 6661:             if (!is_array($attributes)) {
 6662:                 continue;
 6663:             }
 6664:             foreach ($attributes as $name) {
 6665:                 if (!isset($def->info[$element]->attr[$name])) {
 6666:                     return "$element.$name";
 6667:                 }
 6668:             }
 6669:         }
 6670:         return false;
 6671:     }
 6672: 
 6673:     /**
 6674:      * Tests if the context node allows a certain element
 6675:      * @param string $name Name of element to test for
 6676:      * @return bool True if element is allowed, false if it is not
 6677:      */
 6678:     public function allowsElement($name)
 6679:     {
 6680:         if (!empty($this->currentNesting)) {
 6681:             $parent_token = array_pop($this->currentNesting);
 6682:             $this->currentNesting[] = $parent_token;
 6683:             $parent = $this->htmlDefinition->info[$parent_token->name];
 6684:         } else {
 6685:             $parent = $this->htmlDefinition->info_parent_def;
 6686:         }
 6687:         if (!isset($parent->child->elements[$name]) || isset($parent->excludes[$name])) {
 6688:             return false;
 6689:         }
 6690:         // check for exclusion
 6691:         for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) {
 6692:             $node = $this->currentNesting[$i];
 6693:             $def  = $this->htmlDefinition->info[$node->name];
 6694:             if (isset($def->excludes[$name])) {
 6695:                 return false;
 6696:             }
 6697:         }
 6698:         return true;
 6699:     }
 6700: 
 6701:     /**
 6702:      * Iterator function, which starts with the next token and continues until
 6703:      * you reach the end of the input tokens.
 6704:      * @warning Please prevent previous references from interfering with this
 6705:      *          functions by setting $i = null beforehand!
 6706:      * @param int $i Current integer index variable for inputTokens
 6707:      * @param HTMLPurifier_Token $current Current token variable.
 6708:      *          Do NOT use $token, as that variable is also a reference
 6709:      * @return bool
 6710:      */
 6711:     protected function forward(&$i, &$current)
 6712:     {
 6713:         if ($i === null) {
 6714:             $i = count($this->inputZipper->back) - 1;
 6715:         } else {
 6716:             $i--;
 6717:         }
 6718:         if ($i < 0) {
 6719:             return false;
 6720:         }
 6721:         $current = $this->inputZipper->back[$i];
 6722:         return true;
 6723:     }
 6724: 
 6725:     /**
 6726:      * Similar to _forward, but accepts a third parameter $nesting (which
 6727:      * should be initialized at 0) and stops when we hit the end tag
 6728:      * for the node $this->inputIndex starts in.
 6729:      * @param int $i Current integer index variable for inputTokens
 6730:      * @param HTMLPurifier_Token $current Current token variable.
 6731:      *          Do NOT use $token, as that variable is also a reference
 6732:      * @param int $nesting
 6733:      * @return bool
 6734:      */
 6735:     protected function forwardUntilEndToken(&$i, &$current, &$nesting)
 6736:     {
 6737:         $result = $this->forward($i, $current);
 6738:         if (!$result) {
 6739:             return false;
 6740:         }
 6741:         if ($nesting === null) {
 6742:             $nesting = 0;
 6743:         }
 6744:         if ($current instanceof HTMLPurifier_Token_Start) {
 6745:             $nesting++;
 6746:         } elseif ($current instanceof HTMLPurifier_Token_End) {
 6747:             if ($nesting <= 0) {
 6748:                 return false;
 6749:             }
 6750:             $nesting--;
 6751:         }
 6752:         return true;
 6753:     }
 6754: 
 6755:     /**
 6756:      * Iterator function, starts with the previous token and continues until
 6757:      * you reach the beginning of input tokens.
 6758:      * @warning Please prevent previous references from interfering with this
 6759:      *          functions by setting $i = null beforehand!
 6760:      * @param int $i Current integer index variable for inputTokens
 6761:      * @param HTMLPurifier_Token $current Current token variable.
 6762:      *          Do NOT use $token, as that variable is also a reference
 6763:      * @return bool
 6764:      */
 6765:     protected function backward(&$i, &$current)
 6766:     {
 6767:         if ($i === null) {
 6768:             $i = count($this->inputZipper->front) - 1;
 6769:         } else {
 6770:             $i--;
 6771:         }
 6772:         if ($i < 0) {
 6773:             return false;
 6774:         }
 6775:         $current = $this->inputZipper->front[$i];
 6776:         return true;
 6777:     }
 6778: 
 6779:     /**
 6780:      * Handler that is called when a text token is processed
 6781:      */
 6782:     public function handleText(&$token)
 6783:     {
 6784:     }
 6785: 
 6786:     /**
 6787:      * Handler that is called when a start or empty token is processed
 6788:      */
 6789:     public function handleElement(&$token)
 6790:     {
 6791:     }
 6792: 
 6793:     /**
 6794:      * Handler that is called when an end token is processed
 6795:      */
 6796:     public function handleEnd(&$token)
 6797:     {
 6798:         $this->notifyEnd($token);
 6799:     }
 6800: 
 6801:     /**
 6802:      * Notifier that is called when an end token is processed
 6803:      * @param HTMLPurifier_Token $token Current token variable.
 6804:      * @note This differs from handlers in that the token is read-only
 6805:      * @deprecated
 6806:      */
 6807:     public function notifyEnd($token)
 6808:     {
 6809:     }
 6810: }
 6811: 
 6812: 
 6813: 
 6814: 
 6815: 
 6816: /**
 6817:  * Represents a language and defines localizable string formatting and
 6818:  * other functions, as well as the localized messages for HTML Purifier.
 6819:  */
 6820: class HTMLPurifier_Language
 6821: {
 6822: 
 6823:     /**
 6824:      * ISO 639 language code of language. Prefers shortest possible version.
 6825:      * @type string
 6826:      */
 6827:     public $code = 'en';
 6828: 
 6829:     /**
 6830:      * Fallback language code.
 6831:      * @type bool|string
 6832:      */
 6833:     public $fallback = false;
 6834: 
 6835:     /**
 6836:      * Array of localizable messages.
 6837:      * @type array
 6838:      */
 6839:     public $messages = array();
 6840: 
 6841:     /**
 6842:      * Array of localizable error codes.
 6843:      * @type array
 6844:      */
 6845:     public $errorNames = array();
 6846: 
 6847:     /**
 6848:      * True if no message file was found for this language, so English
 6849:      * is being used instead. Check this if you'd like to notify the
 6850:      * user that they've used a non-supported language.
 6851:      * @type bool
 6852:      */
 6853:     public $error = false;
 6854: 
 6855:     /**
 6856:      * Has the language object been loaded yet?
 6857:      * @type bool
 6858:      * @todo Make it private, fix usage in HTMLPurifier_LanguageTest
 6859:      */
 6860:     public $_loaded = false;
 6861: 
 6862:     /**
 6863:      * @type HTMLPurifier_Config
 6864:      */
 6865:     protected $config;
 6866: 
 6867:     /**
 6868:      * @type HTMLPurifier_Context
 6869:      */
 6870:     protected $context;
 6871: 
 6872:     /**
 6873:      * @param HTMLPurifier_Config $config
 6874:      * @param HTMLPurifier_Context $context
 6875:      */
 6876:     public function __construct($config, $context)
 6877:     {
 6878:         $this->config  = $config;
 6879:         $this->context = $context;
 6880:     }
 6881: 
 6882:     /**
 6883:      * Loads language object with necessary info from factory cache
 6884:      * @note This is a lazy loader
 6885:      */
 6886:     public function load()
 6887:     {
 6888:         if ($this->_loaded) {
 6889:             return;
 6890:         }
 6891:         $factory = HTMLPurifier_LanguageFactory::instance();
 6892:         $factory->loadLanguage($this->code);
 6893:         foreach ($factory->keys as $key) {
 6894:             $this->$key = $factory->cache[$this->code][$key];
 6895:         }
 6896:         $this->_loaded = true;
 6897:     }
 6898: 
 6899:     /**
 6900:      * Retrieves a localised message.
 6901:      * @param string $key string identifier of message
 6902:      * @return string localised message
 6903:      */
 6904:     public function getMessage($key)
 6905:     {
 6906:         if (!$this->_loaded) {
 6907:             $this->load();
 6908:         }
 6909:         if (!isset($this->messages[$key])) {
 6910:             return "[$key]";
 6911:         }
 6912:         return $this->messages[$key];
 6913:     }
 6914: 
 6915:     /**
 6916:      * Retrieves a localised error name.
 6917:      * @param int $int error number, corresponding to PHP's error reporting
 6918:      * @return string localised message
 6919:      */
 6920:     public function getErrorName($int)
 6921:     {
 6922:         if (!$this->_loaded) {
 6923:             $this->load();
 6924:         }
 6925:         if (!isset($this->errorNames[$int])) {
 6926:             return "[Error: $int]";
 6927:         }
 6928:         return $this->errorNames[$int];
 6929:     }
 6930: 
 6931:     /**
 6932:      * Converts an array list into a string readable representation
 6933:      * @param array $array
 6934:      * @return string
 6935:      */
 6936:     public function listify($array)
 6937:     {
 6938:         $sep      = $this->getMessage('Item separator');
 6939:         $sep_last = $this->getMessage('Item separator last');
 6940:         $ret = '';
 6941:         for ($i = 0, $c = count($array); $i < $c; $i++) {
 6942:             if ($i == 0) {
 6943:             } elseif ($i + 1 < $c) {
 6944:                 $ret .= $sep;
 6945:             } else {
 6946:                 $ret .= $sep_last;
 6947:             }
 6948:             $ret .= $array[$i];
 6949:         }
 6950:         return $ret;
 6951:     }
 6952: 
 6953:     /**
 6954:      * Formats a localised message with passed parameters
 6955:      * @param string $key string identifier of message
 6956:      * @param array $args Parameters to substitute in
 6957:      * @return string localised message
 6958:      * @todo Implement conditionals? Right now, some messages make
 6959:      *     reference to line numbers, but those aren't always available
 6960:      */
 6961:     public function formatMessage($key, $args = array())
 6962:     {
 6963:         if (!$this->_loaded) {
 6964:             $this->load();
 6965:         }
 6966:         if (!isset($this->messages[$key])) {
 6967:             return "[$key]";
 6968:         }
 6969:         $raw = $this->messages[$key];
 6970:         $subst = array();
 6971:         $generator = false;
 6972:         foreach ($args as $i => $value) {
 6973:             if (is_object($value)) {
 6974:                 if ($value instanceof HTMLPurifier_Token) {
 6975:                     // factor this out some time
 6976:                     if (!$generator) {
 6977:                         $generator = $this->context->get('Generator');
 6978:                     }
 6979:                     if (isset($value->name)) {
 6980:                         $subst['$'.$i.'.Name'] = $value->name;
 6981:                     }
 6982:                     if (isset($value->data)) {
 6983:                         $subst['$'.$i.'.Data'] = $value->data;
 6984:                     }
 6985:                     $subst['$'.$i.'.Compact'] =
 6986:                     $subst['$'.$i.'.Serialized'] = $generator->generateFromToken($value);
 6987:                     // a more complex algorithm for compact representation
 6988:                     // could be introduced for all types of tokens. This
 6989:                     // may need to be factored out into a dedicated class
 6990:                     if (!empty($value->attr)) {
 6991:                         $stripped_token = clone $value;
 6992:                         $stripped_token->attr = array();
 6993:                         $subst['$'.$i.'.Compact'] = $generator->generateFromToken($stripped_token);
 6994:                     }
 6995:                     $subst['$'.$i.'.Line'] = $value->line ? $value->line : 'unknown';
 6996:                 }
 6997:                 continue;
 6998:             } elseif (is_array($value)) {
 6999:                 $keys = array_keys($value);
 7000:                 if (array_keys($keys) === $keys) {
 7001:                     // list
 7002:                     $subst['$'.$i] = $this->listify($value);
 7003:                 } else {
 7004:                     // associative array
 7005:                     // no $i implementation yet, sorry
 7006:                     $subst['$'.$i.'.Keys'] = $this->listify($keys);
 7007:                     $subst['$'.$i.'.Values'] = $this->listify(array_values($value));
 7008:                 }
 7009:                 continue;
 7010:             }
 7011:             $subst['$' . $i] = $value;
 7012:         }
 7013:         return strtr($raw, $subst);
 7014:     }
 7015: }
 7016: 
 7017: 
 7018: 
 7019: 
 7020: 
 7021: /**
 7022:  * Class responsible for generating HTMLPurifier_Language objects, managing
 7023:  * caching and fallbacks.
 7024:  * @note Thanks to MediaWiki for the general logic, although this version
 7025:  *       has been entirely rewritten
 7026:  * @todo Serialized cache for languages
 7027:  */
 7028: class HTMLPurifier_LanguageFactory
 7029: {
 7030: 
 7031:     /**
 7032:      * Cache of language code information used to load HTMLPurifier_Language objects.
 7033:      * Structure is: $factory->cache[$language_code][$key] = $value
 7034:      * @type array
 7035:      */
 7036:     public $cache;
 7037: 
 7038:     /**
 7039:      * Valid keys in the HTMLPurifier_Language object. Designates which
 7040:      * variables to slurp out of a message file.
 7041:      * @type array
 7042:      */
 7043:     public $keys = array('fallback', 'messages', 'errorNames');
 7044: 
 7045:     /**
 7046:      * Instance to validate language codes.
 7047:      * @type HTMLPurifier_AttrDef_Lang
 7048:      *
 7049:      */
 7050:     protected $validator;
 7051: 
 7052:     /**
 7053:      * Cached copy of dirname(__FILE__), directory of current file without
 7054:      * trailing slash.
 7055:      * @type string
 7056:      */
 7057:     protected $dir;
 7058: 
 7059:     /**
 7060:      * Keys whose contents are a hash map and can be merged.
 7061:      * @type array
 7062:      */
 7063:     protected $mergeable_keys_map = array('messages' => true, 'errorNames' => true);
 7064: 
 7065:     /**
 7066:      * Keys whose contents are a list and can be merged.
 7067:      * @value array lookup
 7068:      */
 7069:     protected $mergeable_keys_list = array();
 7070: 
 7071:     /**
 7072:      * Retrieve sole instance of the factory.
 7073:      * @param HTMLPurifier_LanguageFactory $prototype Optional prototype to overload sole instance with,
 7074:      *                   or bool true to reset to default factory.
 7075:      * @return HTMLPurifier_LanguageFactory
 7076:      */
 7077:     public static function instance($prototype = null)
 7078:     {
 7079:         static $instance = null;
 7080:         if ($prototype !== null) {
 7081:             $instance = $prototype;
 7082:         } elseif ($instance === null || $prototype == true) {
 7083:             $instance = new HTMLPurifier_LanguageFactory();
 7084:             $instance->setup();
 7085:         }
 7086:         return $instance;
 7087:     }
 7088: 
 7089:     /**
 7090:      * Sets up the singleton, much like a constructor
 7091:      * @note Prevents people from getting this outside of the singleton
 7092:      */
 7093:     public function setup()
 7094:     {
 7095:         $this->validator = new HTMLPurifier_AttrDef_Lang();
 7096:         $this->dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier';
 7097:     }
 7098: 
 7099:     /**
 7100:      * Creates a language object, handles class fallbacks
 7101:      * @param HTMLPurifier_Config $config
 7102:      * @param HTMLPurifier_Context $context
 7103:      * @param bool|string $code Code to override configuration with. Private parameter.
 7104:      * @return HTMLPurifier_Language
 7105:      */
 7106:     public function create($config, $context, $code = false)
 7107:     {
 7108:         // validate language code
 7109:         if ($code === false) {
 7110:             $code = $this->validator->validate(
 7111:                 $config->get('Core.Language'),
 7112:                 $config,
 7113:                 $context
 7114:             );
 7115:         } else {
 7116:             $code = $this->validator->validate($code, $config, $context);
 7117:         }
 7118:         if ($code === false) {
 7119:             $code = 'en'; // malformed code becomes English
 7120:         }
 7121: 
 7122:         $pcode = str_replace('-', '_', $code); // make valid PHP classname
 7123:         static $depth = 0; // recursion protection
 7124: 
 7125:         if ($code == 'en') {
 7126:             $lang = new HTMLPurifier_Language($config, $context);
 7127:         } else {
 7128:             $class = 'HTMLPurifier_Language_' . $pcode;
 7129:             $file  = $this->dir . '/Language/classes/' . $code . '.php';
 7130:             if (file_exists($file) || class_exists($class, false)) {
 7131:                 $lang = new $class($config, $context);
 7132:             } else {
 7133:                 // Go fallback
 7134:                 $raw_fallback = $this->getFallbackFor($code);
 7135:                 $fallback = $raw_fallback ? $raw_fallback : 'en';
 7136:                 $depth++;
 7137:                 $lang = $this->create($config, $context, $fallback);
 7138:                 if (!$raw_fallback) {
 7139:                     $lang->error = true;
 7140:                 }
 7141:                 $depth--;
 7142:             }
 7143:         }
 7144:         $lang->code = $code;
 7145:         return $lang;
 7146:     }
 7147: 
 7148:     /**
 7149:      * Returns the fallback language for language
 7150:      * @note Loads the original language into cache
 7151:      * @param string $code language code
 7152:      * @return string|bool
 7153:      */
 7154:     public function getFallbackFor($code)
 7155:     {
 7156:         $this->loadLanguage($code);
 7157:         return $this->cache[$code]['fallback'];
 7158:     }
 7159: 
 7160:     /**
 7161:      * Loads language into the cache, handles message file and fallbacks
 7162:      * @param string $code language code
 7163:      */
 7164:     public function loadLanguage($code)
 7165:     {
 7166:         static $languages_seen = array(); // recursion guard
 7167: 
 7168:         // abort if we've already loaded it
 7169:         if (isset($this->cache[$code])) {
 7170:             return;
 7171:         }
 7172: 
 7173:         // generate filename
 7174:         $filename = $this->dir . '/Language/messages/' . $code . '.php';
 7175: 
 7176:         // default fallback : may be overwritten by the ensuing include
 7177:         $fallback = ($code != 'en') ? 'en' : false;
 7178: 
 7179:         // load primary localisation
 7180:         if (!file_exists($filename)) {
 7181:             // skip the include: will rely solely on fallback
 7182:             $filename = $this->dir . '/Language/messages/en.php';
 7183:             $cache = array();
 7184:         } else {
 7185:             include $filename;
 7186:             $cache = compact($this->keys);
 7187:         }
 7188: 
 7189:         // load fallback localisation
 7190:         if (!empty($fallback)) {
 7191: 
 7192:             // infinite recursion guard
 7193:             if (isset($languages_seen[$code])) {
 7194:                 trigger_error(
 7195:                     'Circular fallback reference in language ' .
 7196:                     $code,
 7197:                     E_USER_ERROR
 7198:                 );
 7199:                 $fallback = 'en';
 7200:             }
 7201:             $language_seen[$code] = true;
 7202: 
 7203:             // load the fallback recursively
 7204:             $this->loadLanguage($fallback);
 7205:             $fallback_cache = $this->cache[$fallback];
 7206: 
 7207:             // merge fallback with current language
 7208:             foreach ($this->keys as $key) {
 7209:                 if (isset($cache[$key]) && isset($fallback_cache[$key])) {
 7210:                     if (isset($this->mergeable_keys_map[$key])) {
 7211:                         $cache[$key] = $cache[$key] + $fallback_cache[$key];
 7212:                     } elseif (isset($this->mergeable_keys_list[$key])) {
 7213:                         $cache[$key] = array_merge($fallback_cache[$key], $cache[$key]);
 7214:                     }
 7215:                 } else {
 7216:                     $cache[$key] = $fallback_cache[$key];
 7217:                 }
 7218:             }
 7219:         }
 7220: 
 7221:         // save to cache for later retrieval
 7222:         $this->cache[$code] = $cache;
 7223:         return;
 7224:     }
 7225: }
 7226: 
 7227: 
 7228: 
 7229: 
 7230: 
 7231: /**
 7232:  * Represents a measurable length, with a string numeric magnitude
 7233:  * and a unit. This object is immutable.
 7234:  */
 7235: class HTMLPurifier_Length
 7236: {
 7237: 
 7238:     /**
 7239:      * String numeric magnitude.
 7240:      * @type string
 7241:      */
 7242:     protected $n;
 7243: 
 7244:     /**
 7245:      * String unit. False is permitted if $n = 0.
 7246:      * @type string|bool
 7247:      */
 7248:     protected $unit;
 7249: 
 7250:     /**
 7251:      * Whether or not this length is valid. Null if not calculated yet.
 7252:      * @type bool
 7253:      */
 7254:     protected $isValid;
 7255: 
 7256:     /**
 7257:      * Array Lookup array of units recognized by CSS 2.1
 7258:      * @type array
 7259:      */
 7260:     protected static $allowedUnits = array(
 7261:         'em' => true, 'ex' => true, 'px' => true, 'in' => true,
 7262:         'cm' => true, 'mm' => true, 'pt' => true, 'pc' => true
 7263:     );
 7264: 
 7265:     /**
 7266:      * @param string $n Magnitude
 7267:      * @param bool|string $u Unit
 7268:      */
 7269:     public function __construct($n = '0', $u = false)
 7270:     {
 7271:         $this->n = (string) $n;
 7272:         $this->unit = $u !== false ? (string) $u : false;
 7273:     }
 7274: 
 7275:     /**
 7276:      * @param string $s Unit string, like '2em' or '3.4in'
 7277:      * @return HTMLPurifier_Length
 7278:      * @warning Does not perform validation.
 7279:      */
 7280:     public static function make($s)
 7281:     {
 7282:         if ($s instanceof HTMLPurifier_Length) {
 7283:             return $s;
 7284:         }
 7285:         $n_length = strspn($s, '1234567890.+-');
 7286:         $n = substr($s, 0, $n_length);
 7287:         $unit = substr($s, $n_length);
 7288:         if ($unit === '') {
 7289:             $unit = false;
 7290:         }
 7291:         return new HTMLPurifier_Length($n, $unit);
 7292:     }
 7293: 
 7294:     /**
 7295:      * Validates the number and unit.
 7296:      * @return bool
 7297:      */
 7298:     protected function validate()
 7299:     {
 7300:         // Special case:
 7301:         if ($this->n === '+0' || $this->n === '-0') {
 7302:             $this->n = '0';
 7303:         }
 7304:         if ($this->n === '0' && $this->unit === false) {
 7305:             return true;
 7306:         }
 7307:         if (!ctype_lower($this->unit)) {
 7308:             $this->unit = strtolower($this->unit);
 7309:         }
 7310:         if (!isset(HTMLPurifier_Length::$allowedUnits[$this->unit])) {
 7311:             return false;
 7312:         }
 7313:         // Hack:
 7314:         $def = new HTMLPurifier_AttrDef_CSS_Number();
 7315:         $result = $def->validate($this->n, false, false);
 7316:         if ($result === false) {
 7317:             return false;
 7318:         }
 7319:         $this->n = $result;
 7320:         return true;
 7321:     }
 7322: 
 7323:     /**
 7324:      * Returns string representation of number.
 7325:      * @return string
 7326:      */
 7327:     public function toString()
 7328:     {
 7329:         if (!$this->isValid()) {
 7330:             return false;
 7331:         }
 7332:         return $this->n . $this->unit;
 7333:     }
 7334: 
 7335:     /**
 7336:      * Retrieves string numeric magnitude.
 7337:      * @return string
 7338:      */
 7339:     public function getN()
 7340:     {
 7341:         return $this->n;
 7342:     }
 7343: 
 7344:     /**
 7345:      * Retrieves string unit.
 7346:      * @return string
 7347:      */
 7348:     public function getUnit()
 7349:     {
 7350:         return $this->unit;
 7351:     }
 7352: 
 7353:     /**
 7354:      * Returns true if this length unit is valid.
 7355:      * @return bool
 7356:      */
 7357:     public function isValid()
 7358:     {
 7359:         if ($this->isValid === null) {
 7360:             $this->isValid = $this->validate();
 7361:         }
 7362:         return $this->isValid;
 7363:     }
 7364: 
 7365:     /**
 7366:      * Compares two lengths, and returns 1 if greater, -1 if less and 0 if equal.
 7367:      * @param HTMLPurifier_Length $l
 7368:      * @return int
 7369:      * @warning If both values are too large or small, this calculation will
 7370:      *          not work properly
 7371:      */
 7372:     public function compareTo($l)
 7373:     {
 7374:         if ($l === false) {
 7375:             return false;
 7376:         }
 7377:         if ($l->unit !== $this->unit) {
 7378:             $converter = new HTMLPurifier_UnitConverter();
 7379:             $l = $converter->convert($l, $this->unit);
 7380:             if ($l === false) {
 7381:                 return false;
 7382:             }
 7383:         }
 7384:         return $this->n - $l->n;
 7385:     }
 7386: }
 7387: 
 7388: 
 7389: 
 7390: 
 7391: 
 7392: /**
 7393:  * Forgivingly lexes HTML (SGML-style) markup into tokens.
 7394:  *
 7395:  * A lexer parses a string of SGML-style markup and converts them into
 7396:  * corresponding tokens.  It doesn't check for well-formedness, although its
 7397:  * internal mechanism may make this automatic (such as the case of
 7398:  * HTMLPurifier_Lexer_DOMLex).  There are several implementations to choose
 7399:  * from.
 7400:  *
 7401:  * A lexer is HTML-oriented: it might work with XML, but it's not
 7402:  * recommended, as we adhere to a subset of the specification for optimization
 7403:  * reasons. This might change in the future. Also, most tokenizers are not
 7404:  * expected to handle DTDs or PIs.
 7405:  *
 7406:  * This class should not be directly instantiated, but you may use create() to
 7407:  * retrieve a default copy of the lexer.  Being a supertype, this class
 7408:  * does not actually define any implementation, but offers commonly used
 7409:  * convenience functions for subclasses.
 7410:  *
 7411:  * @note The unit tests will instantiate this class for testing purposes, as
 7412:  *       many of the utility functions require a class to be instantiated.
 7413:  *       This means that, even though this class is not runnable, it will
 7414:  *       not be declared abstract.
 7415:  *
 7416:  * @par
 7417:  *
 7418:  * @note
 7419:  * We use tokens rather than create a DOM representation because DOM would:
 7420:  *
 7421:  * @par
 7422:  *  -# Require more processing and memory to create,
 7423:  *  -# Is not streamable, and
 7424:  *  -# Has the entire document structure (html and body not needed).
 7425:  *
 7426:  * @par
 7427:  * However, DOM is helpful in that it makes it easy to move around nodes
 7428:  * without a lot of lookaheads to see when a tag is closed. This is a
 7429:  * limitation of the token system and some workarounds would be nice.
 7430:  */
 7431: class HTMLPurifier_Lexer
 7432: {
 7433: 
 7434:     /**
 7435:      * Whether or not this lexer implements line-number/column-number tracking.
 7436:      * If it does, set to true.
 7437:      */
 7438:     public $tracksLineNumbers = false;
 7439: 
 7440:     // -- STATIC ----------------------------------------------------------
 7441: 
 7442:     /**
 7443:      * Retrieves or sets the default Lexer as a Prototype Factory.
 7444:      *
 7445:      * By default HTMLPurifier_Lexer_DOMLex will be returned. There are
 7446:      * a few exceptions involving special features that only DirectLex
 7447:      * implements.
 7448:      *
 7449:      * @note The behavior of this class has changed, rather than accepting
 7450:      *       a prototype object, it now accepts a configuration object.
 7451:      *       To specify your own prototype, set %Core.LexerImpl to it.
 7452:      *       This change in behavior de-singletonizes the lexer object.
 7453:      *
 7454:      * @param HTMLPurifier_Config $config
 7455:      * @return HTMLPurifier_Lexer
 7456:      * @throws HTMLPurifier_Exception
 7457:      */
 7458:     public static function create($config)
 7459:     {
 7460:         if (!($config instanceof HTMLPurifier_Config)) {
 7461:             $lexer = $config;
 7462:             trigger_error(
 7463:                 "Passing a prototype to
 7464:                 HTMLPurifier_Lexer::create() is deprecated, please instead
 7465:                 use %Core.LexerImpl",
 7466:                 E_USER_WARNING
 7467:             );
 7468:         } else {
 7469:             $lexer = $config->get('Core.LexerImpl');
 7470:         }
 7471: 
 7472:         $needs_tracking =
 7473:             $config->get('Core.MaintainLineNumbers') ||
 7474:             $config->get('Core.CollectErrors');
 7475: 
 7476:         $inst = null;
 7477:         if (is_object($lexer)) {
 7478:             $inst = $lexer;
 7479:         } else {
 7480:             if (is_null($lexer)) {
 7481:                 do {
 7482:                     // auto-detection algorithm
 7483:                     if ($needs_tracking) {
 7484:                         $lexer = 'DirectLex';
 7485:                         break;
 7486:                     }
 7487: 
 7488:                     if (class_exists('DOMDocument') &&
 7489:                         method_exists('DOMDocument', 'loadHTML') &&
 7490:                         !extension_loaded('domxml')
 7491:                     ) {
 7492:                         // check for DOM support, because while it's part of the
 7493:                         // core, it can be disabled compile time. Also, the PECL
 7494:                         // domxml extension overrides the default DOM, and is evil
 7495:                         // and nasty and we shan't bother to support it
 7496:                         $lexer = 'DOMLex';
 7497:                     } else {
 7498:                         $lexer = 'DirectLex';
 7499:                     }
 7500:                 } while (0);
 7501:             } // do..while so we can break
 7502: 
 7503:             // instantiate recognized string names
 7504:             switch ($lexer) {
 7505:                 case 'DOMLex':
 7506:                     $inst = new HTMLPurifier_Lexer_DOMLex();
 7507:                     break;
 7508:                 case 'DirectLex':
 7509:                     $inst = new HTMLPurifier_Lexer_DirectLex();
 7510:                     break;
 7511:                 case 'PH5P':
 7512:                     $inst = new HTMLPurifier_Lexer_PH5P();
 7513:                     break;
 7514:                 default:
 7515:                     throw new HTMLPurifier_Exception(
 7516:                         "Cannot instantiate unrecognized Lexer type " .
 7517:                         htmlspecialchars($lexer)
 7518:                     );
 7519:             }
 7520:         }
 7521: 
 7522:         if (!$inst) {
 7523:             throw new HTMLPurifier_Exception('No lexer was instantiated');
 7524:         }
 7525: 
 7526:         // once PHP DOM implements native line numbers, or we
 7527:         // hack out something using XSLT, remove this stipulation
 7528:         if ($needs_tracking && !$inst->tracksLineNumbers) {
 7529:             throw new HTMLPurifier_Exception(
 7530:                 'Cannot use lexer that does not support line numbers with ' .
 7531:                 'Core.MaintainLineNumbers or Core.CollectErrors (use DirectLex instead)'
 7532:             );
 7533:         }
 7534: 
 7535:         return $inst;
 7536: 
 7537:     }
 7538: 
 7539:     // -- CONVENIENCE MEMBERS ---------------------------------------------
 7540: 
 7541:     public function __construct()
 7542:     {
 7543:         $this->_entity_parser = new HTMLPurifier_EntityParser();
 7544:     }
 7545: 
 7546:     /**
 7547:      * Most common entity to raw value conversion table for special entities.
 7548:      * @type array
 7549:      */
 7550:     protected $_special_entity2str =
 7551:         array(
 7552:             '&quot;' => '"',
 7553:             '&amp;' => '&',
 7554:             '&lt;' => '<',
 7555:             '&gt;' => '>',
 7556:             '&#39;' => "'",
 7557:             '&#039;' => "'",
 7558:             '&#x27;' => "'"
 7559:         );
 7560: 
 7561:     /**
 7562:      * Parses special entities into the proper characters.
 7563:      *
 7564:      * This string will translate escaped versions of the special characters
 7565:      * into the correct ones.
 7566:      *
 7567:      * @warning
 7568:      * You should be able to treat the output of this function as
 7569:      * completely parsed, but that's only because all other entities should
 7570:      * have been handled previously in substituteNonSpecialEntities()
 7571:      *
 7572:      * @param string $string String character data to be parsed.
 7573:      * @return string Parsed character data.
 7574:      */
 7575:     public function parseData($string)
 7576:     {
 7577:         // following functions require at least one character
 7578:         if ($string === '') {
 7579:             return '';
 7580:         }
 7581: 
 7582:         // subtracts amps that cannot possibly be escaped
 7583:         $num_amp = substr_count($string, '&') - substr_count($string, '& ') -
 7584:             ($string[strlen($string) - 1] === '&' ? 1 : 0);
 7585: 
 7586:         if (!$num_amp) {
 7587:             return $string;
 7588:         } // abort if no entities
 7589:         $num_esc_amp = substr_count($string, '&amp;');
 7590:         $string = strtr($string, $this->_special_entity2str);
 7591: 
 7592:         // code duplication for sake of optimization, see above
 7593:         $num_amp_2 = substr_count($string, '&') - substr_count($string, '& ') -
 7594:             ($string[strlen($string) - 1] === '&' ? 1 : 0);
 7595: 
 7596:         if ($num_amp_2 <= $num_esc_amp) {
 7597:             return $string;
 7598:         }
 7599: 
 7600:         // hmm... now we have some uncommon entities. Use the callback.
 7601:         $string = $this->_entity_parser->substituteSpecialEntities($string);
 7602:         return $string;
 7603:     }
 7604: 
 7605:     /**
 7606:      * Lexes an HTML string into tokens.
 7607:      * @param $string String HTML.
 7608:      * @param HTMLPurifier_Config $config
 7609:      * @param HTMLPurifier_Context $context
 7610:      * @return HTMLPurifier_Token[] array representation of HTML.
 7611:      */
 7612:     public function tokenizeHTML($string, $config, $context)
 7613:     {
 7614:         trigger_error('Call to abstract class', E_USER_ERROR);
 7615:     }
 7616: 
 7617:     /**
 7618:      * Translates CDATA sections into regular sections (through escaping).
 7619:      * @param string $string HTML string to process.
 7620:      * @return string HTML with CDATA sections escaped.
 7621:      */
 7622:     protected static function escapeCDATA($string)
 7623:     {
 7624:         return preg_replace_callback(
 7625:             '/<!\[CDATA\[(.+?)\]\]>/s',
 7626:             array('HTMLPurifier_Lexer', 'CDATACallback'),
 7627:             $string
 7628:         );
 7629:     }
 7630: 
 7631:     /**
 7632:      * Special CDATA case that is especially convoluted for <script>
 7633:      * @param string $string HTML string to process.
 7634:      * @return string HTML with CDATA sections escaped.
 7635:      */
 7636:     protected static function escapeCommentedCDATA($string)
 7637:     {
 7638:         return preg_replace_callback(
 7639:             '#<!--//--><!\[CDATA\[//><!--(.+?)//--><!\]\]>#s',
 7640:             array('HTMLPurifier_Lexer', 'CDATACallback'),
 7641:             $string
 7642:         );
 7643:     }
 7644: 
 7645:     /**
 7646:      * Special Internet Explorer conditional comments should be removed.
 7647:      * @param string $string HTML string to process.
 7648:      * @return string HTML with conditional comments removed.
 7649:      */
 7650:     protected static function removeIEConditional($string)
 7651:     {
 7652:         return preg_replace(
 7653:             '#<!--\[if [^>]+\]>.*?<!\[endif\]-->#si', // probably should generalize for all strings
 7654:             '',
 7655:             $string
 7656:         );
 7657:     }
 7658: 
 7659:     /**
 7660:      * Callback function for escapeCDATA() that does the work.
 7661:      *
 7662:      * @warning Though this is public in order to let the callback happen,
 7663:      *          calling it directly is not recommended.
 7664:      * @param array $matches PCRE matches array, with index 0 the entire match
 7665:      *                  and 1 the inside of the CDATA section.
 7666:      * @return string Escaped internals of the CDATA section.
 7667:      */
 7668:     protected static function CDATACallback($matches)
 7669:     {
 7670:         // not exactly sure why the character set is needed, but whatever
 7671:         return htmlspecialchars($matches[1], ENT_COMPAT, 'UTF-8');
 7672:     }
 7673: 
 7674:     /**
 7675:      * Takes a piece of HTML and normalizes it by converting entities, fixing
 7676:      * encoding, extracting bits, and other good stuff.
 7677:      * @param string $html HTML.
 7678:      * @param HTMLPurifier_Config $config
 7679:      * @param HTMLPurifier_Context $context
 7680:      * @return string
 7681:      * @todo Consider making protected
 7682:      */
 7683:     public function normalize($html, $config, $context)
 7684:     {
 7685:         // normalize newlines to \n
 7686:         if ($config->get('Core.NormalizeNewlines')) {
 7687:             $html = str_replace("\r\n", "\n", $html);
 7688:             $html = str_replace("\r", "\n", $html);
 7689:         }
 7690: 
 7691:         if ($config->get('HTML.Trusted')) {
 7692:             // escape convoluted CDATA
 7693:             $html = $this->escapeCommentedCDATA($html);
 7694:         }
 7695: 
 7696:         // escape CDATA
 7697:         $html = $this->escapeCDATA($html);
 7698: 
 7699:         $html = $this->removeIEConditional($html);
 7700: 
 7701:         // extract body from document if applicable
 7702:         if ($config->get('Core.ConvertDocumentToFragment')) {
 7703:             $e = false;
 7704:             if ($config->get('Core.CollectErrors')) {
 7705:                 $e =& $context->get('ErrorCollector');
 7706:             }
 7707:             $new_html = $this->extractBody($html);
 7708:             if ($e && $new_html != $html) {
 7709:                 $e->send(E_WARNING, 'Lexer: Extracted body');
 7710:             }
 7711:             $html = $new_html;
 7712:         }
 7713: 
 7714:         // expand entities that aren't the big five
 7715:         $html = $this->_entity_parser->substituteNonSpecialEntities($html);
 7716: 
 7717:         // clean into wellformed UTF-8 string for an SGML context: this has
 7718:         // to be done after entity expansion because the entities sometimes
 7719:         // represent non-SGML characters (horror, horror!)
 7720:         $html = HTMLPurifier_Encoder::cleanUTF8($html);
 7721: 
 7722:         // if processing instructions are to removed, remove them now
 7723:         if ($config->get('Core.RemoveProcessingInstructions')) {
 7724:             $html = preg_replace('#<\?.+?\?>#s', '', $html);
 7725:         }
 7726: 
 7727:         return $html;
 7728:     }
 7729: 
 7730:     /**
 7731:      * Takes a string of HTML (fragment or document) and returns the content
 7732:      * @todo Consider making protected
 7733:      */
 7734:     public function extractBody($html)
 7735:     {
 7736:         $matches = array();
 7737:         $result = preg_match('!<body[^>]*>(.*)</body>!is', $html, $matches);
 7738:         if ($result) {
 7739:             return $matches[1];
 7740:         } else {
 7741:             return $html;
 7742:         }
 7743:     }
 7744: }
 7745: 
 7746: 
 7747: 
 7748: 
 7749: 
 7750: /**
 7751:  * Abstract base node class that all others inherit from.
 7752:  *
 7753:  * Why do we not use the DOM extension?  (1) It is not always available,
 7754:  * (2) it has funny constraints on the data it can represent,
 7755:  * whereas we want a maximally flexible representation, and (3) its
 7756:  * interface is a bit cumbersome.
 7757:  */
 7758: abstract class HTMLPurifier_Node
 7759: {
 7760:     /**
 7761:      * Line number of the start token in the source document
 7762:      * @type int
 7763:      */
 7764:     public $line;
 7765: 
 7766:     /**
 7767:      * Column number of the start token in the source document. Null if unknown.
 7768:      * @type int
 7769:      */
 7770:     public $col;
 7771: 
 7772:     /**
 7773:      * Lookup array of processing that this token is exempt from.
 7774:      * Currently, valid values are "ValidateAttributes".
 7775:      * @type array
 7776:      */
 7777:     public $armor = array();
 7778: 
 7779:     /**
 7780:      * When true, this node should be ignored as non-existent.
 7781:      *
 7782:      * Who is responsible for ignoring dead nodes?  FixNesting is
 7783:      * responsible for removing them before passing on to child
 7784:      * validators.
 7785:      */
 7786:     public $dead = false;
 7787: 
 7788:     /**
 7789:      * Returns a pair of start and end tokens, where the end token
 7790:      * is null if it is not necessary. Does not include children.
 7791:      * @type array
 7792:      */
 7793:     abstract public function toTokenPair();
 7794: }
 7795: 
 7796: 
 7797: 
 7798: 
 7799: 
 7800: /**
 7801:  * Class that handles operations involving percent-encoding in URIs.
 7802:  *
 7803:  * @warning
 7804:  *      Be careful when reusing instances of PercentEncoder. The object
 7805:  *      you use for normalize() SHOULD NOT be used for encode(), or
 7806:  *      vice-versa.
 7807:  */
 7808: class HTMLPurifier_PercentEncoder
 7809: {
 7810: 
 7811:     /**
 7812:      * Reserved characters to preserve when using encode().
 7813:      * @type array
 7814:      */
 7815:     protected $preserve = array();
 7816: 
 7817:     /**
 7818:      * String of characters that should be preserved while using encode().
 7819:      * @param bool $preserve
 7820:      */
 7821:     public function __construct($preserve = false)
 7822:     {
 7823:         // unreserved letters, ought to const-ify
 7824:         for ($i = 48; $i <= 57; $i++) { // digits
 7825:             $this->preserve[$i] = true;
 7826:         }
 7827:         for ($i = 65; $i <= 90; $i++) { // upper-case
 7828:             $this->preserve[$i] = true;
 7829:         }
 7830:         for ($i = 97; $i <= 122; $i++) { // lower-case
 7831:             $this->preserve[$i] = true;
 7832:         }
 7833:         $this->preserve[45] = true; // Dash         -
 7834:         $this->preserve[46] = true; // Period       .
 7835:         $this->preserve[95] = true; // Underscore   _
 7836:         $this->preserve[126]= true; // Tilde        ~
 7837: 
 7838:         // extra letters not to escape
 7839:         if ($preserve !== false) {
 7840:             for ($i = 0, $c = strlen($preserve); $i < $c; $i++) {
 7841:                 $this->preserve[ord($preserve[$i])] = true;
 7842:             }
 7843:         }
 7844:     }
 7845: 
 7846:     /**
 7847:      * Our replacement for urlencode, it encodes all non-reserved characters,
 7848:      * as well as any extra characters that were instructed to be preserved.
 7849:      * @note
 7850:      *      Assumes that the string has already been normalized, making any
 7851:      *      and all percent escape sequences valid. Percents will not be
 7852:      *      re-escaped, regardless of their status in $preserve
 7853:      * @param string $string String to be encoded
 7854:      * @return string Encoded string.
 7855:      */
 7856:     public function encode($string)
 7857:     {
 7858:         $ret = '';
 7859:         for ($i = 0, $c = strlen($string); $i < $c; $i++) {
 7860:             if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])])) {
 7861:                 $ret .= '%' . sprintf('%02X', $int);
 7862:             } else {
 7863:                 $ret .= $string[$i];
 7864:             }
 7865:         }
 7866:         return $ret;
 7867:     }
 7868: 
 7869:     /**
 7870:      * Fix up percent-encoding by decoding unreserved characters and normalizing.
 7871:      * @warning This function is affected by $preserve, even though the
 7872:      *          usual desired behavior is for this not to preserve those
 7873:      *          characters. Be careful when reusing instances of PercentEncoder!
 7874:      * @param string $string String to normalize
 7875:      * @return string
 7876:      */
 7877:     public function normalize($string)
 7878:     {
 7879:         if ($string == '') {
 7880:             return '';
 7881:         }
 7882:         $parts = explode('%', $string);
 7883:         $ret = array_shift($parts);
 7884:         foreach ($parts as $part) {
 7885:             $length = strlen($part);
 7886:             if ($length < 2) {
 7887:                 $ret .= '%25' . $part;
 7888:                 continue;
 7889:             }
 7890:             $encoding = substr($part, 0, 2);
 7891:             $text     = substr($part, 2);
 7892:             if (!ctype_xdigit($encoding)) {
 7893:                 $ret .= '%25' . $part;
 7894:                 continue;
 7895:             }
 7896:             $int = hexdec($encoding);
 7897:             if (isset($this->preserve[$int])) {
 7898:                 $ret .= chr($int) . $text;
 7899:                 continue;
 7900:             }
 7901:             $encoding = strtoupper($encoding);
 7902:             $ret .= '%' . $encoding . $text;
 7903:         }
 7904:         return $ret;
 7905:     }
 7906: }
 7907: 
 7908: 
 7909: 
 7910: 
 7911: 
 7912: /**
 7913:  * Generic property list implementation
 7914:  */
 7915: class HTMLPurifier_PropertyList
 7916: {
 7917:     /**
 7918:      * Internal data-structure for properties.
 7919:      * @type array
 7920:      */
 7921:     protected $data = array();
 7922: 
 7923:     /**
 7924:      * Parent plist.
 7925:      * @type HTMLPurifier_PropertyList
 7926:      */
 7927:     protected $parent;
 7928: 
 7929:     /**
 7930:      * Cache.
 7931:      * @type array
 7932:      */
 7933:     protected $cache;
 7934: 
 7935:     /**
 7936:      * @param HTMLPurifier_PropertyList $parent Parent plist
 7937:      */
 7938:     public function __construct($parent = null)
 7939:     {
 7940:         $this->parent = $parent;
 7941:     }
 7942: 
 7943:     /**
 7944:      * Recursively retrieves the value for a key
 7945:      * @param string $name
 7946:      * @throws HTMLPurifier_Exception
 7947:      */
 7948:     public function get($name)
 7949:     {
 7950:         if ($this->has($name)) {
 7951:             return $this->data[$name];
 7952:         }
 7953:         // possible performance bottleneck, convert to iterative if necessary
 7954:         if ($this->parent) {
 7955:             return $this->parent->get($name);
 7956:         }
 7957:         throw new HTMLPurifier_Exception("Key '$name' not found");
 7958:     }
 7959: 
 7960:     /**
 7961:      * Sets the value of a key, for this plist
 7962:      * @param string $name
 7963:      * @param mixed $value
 7964:      */
 7965:     public function set($name, $value)
 7966:     {
 7967:         $this->data[$name] = $value;
 7968:     }
 7969: 
 7970:     /**
 7971:      * Returns true if a given key exists
 7972:      * @param string $name
 7973:      * @return bool
 7974:      */
 7975:     public function has($name)
 7976:     {
 7977:         return array_key_exists($name, $this->data);
 7978:     }
 7979: 
 7980:     /**
 7981:      * Resets a value to the value of it's parent, usually the default. If
 7982:      * no value is specified, the entire plist is reset.
 7983:      * @param string $name
 7984:      */
 7985:     public function reset($name = null)
 7986:     {
 7987:         if ($name == null) {
 7988:             $this->data = array();
 7989:         } else {
 7990:             unset($this->data[$name]);
 7991:         }
 7992:     }
 7993: 
 7994:     /**
 7995:      * Squashes this property list and all of its property lists into a single
 7996:      * array, and returns the array. This value is cached by default.
 7997:      * @param bool $force If true, ignores the cache and regenerates the array.
 7998:      * @return array
 7999:      */
 8000:     public function squash($force = false)
 8001:     {
 8002:         if ($this->cache !== null && !$force) {
 8003:             return $this->cache;
 8004:         }
 8005:         if ($this->parent) {
 8006:             return $this->cache = array_merge($this->parent->squash($force), $this->data);
 8007:         } else {
 8008:             return $this->cache = $this->data;
 8009:         }
 8010:     }
 8011: 
 8012:     /**
 8013:      * Returns the parent plist.
 8014:      * @return HTMLPurifier_PropertyList
 8015:      */
 8016:     public function getParent()
 8017:     {
 8018:         return $this->parent;
 8019:     }
 8020: 
 8021:     /**
 8022:      * Sets the parent plist.
 8023:      * @param HTMLPurifier_PropertyList $plist Parent plist
 8024:      */
 8025:     public function setParent($plist)
 8026:     {
 8027:         $this->parent = $plist;
 8028:     }
 8029: }
 8030: 
 8031: 
 8032: 
 8033: 
 8034: 
 8035: /**
 8036:  * Property list iterator. Do not instantiate this class directly.
 8037:  */
 8038: class HTMLPurifier_PropertyListIterator extends FilterIterator
 8039: {
 8040: 
 8041:     /**
 8042:      * @type int
 8043:      */
 8044:     protected $l;
 8045:     /**
 8046:      * @type string
 8047:      */
 8048:     protected $filter;
 8049: 
 8050:     /**
 8051:      * @param Iterator $iterator Array of data to iterate over
 8052:      * @param string $filter Optional prefix to only allow values of
 8053:      */
 8054:     public function __construct(Iterator $iterator, $filter = null)
 8055:     {
 8056:         parent::__construct($iterator);
 8057:         $this->l = strlen($filter);
 8058:         $this->filter = $filter;
 8059:     }
 8060: 
 8061:     /**
 8062:      * @return bool
 8063:      */
 8064:     public function accept()
 8065:     {
 8066:         $key = $this->getInnerIterator()->key();
 8067:         if (strncmp($key, $this->filter, $this->l) !== 0) {
 8068:             return false;
 8069:         }
 8070:         return true;
 8071:     }
 8072: }
 8073: 
 8074: 
 8075: 
 8076: 
 8077: 
 8078: /**
 8079:  * A simple array-backed queue, based off of the classic Okasaki
 8080:  * persistent amortized queue.  The basic idea is to maintain two
 8081:  * stacks: an input stack and an output stack.  When the output
 8082:  * stack runs out, reverse the input stack and use it as the output
 8083:  * stack.
 8084:  *
 8085:  * We don't use the SPL implementation because it's only supported
 8086:  * on PHP 5.3 and later.
 8087:  *
 8088:  * Exercise: Prove that push/pop on this queue take amortized O(1) time.
 8089:  *
 8090:  * Exercise: Extend this queue to be a deque, while preserving amortized
 8091:  * O(1) time.  Some care must be taken on rebalancing to avoid quadratic
 8092:  * behaviour caused by repeatedly shuffling data from the input stack
 8093:  * to the output stack and back.
 8094:  */
 8095: class HTMLPurifier_Queue {
 8096:     private $input;
 8097:     private $output;
 8098: 
 8099:     public function __construct($input = array()) {
 8100:         $this->input = $input;
 8101:         $this->output = array();
 8102:     }
 8103: 
 8104:     /**
 8105:      * Shifts an element off the front of the queue.
 8106:      */
 8107:     public function shift() {
 8108:         if (empty($this->output)) {
 8109:             $this->output = array_reverse($this->input);
 8110:             $this->input = array();
 8111:         }
 8112:         if (empty($this->output)) {
 8113:             return NULL;
 8114:         }
 8115:         return array_pop($this->output);
 8116:     }
 8117: 
 8118:     /**
 8119:      * Pushes an element onto the front of the queue.
 8120:      */
 8121:     public function push($x) {
 8122:         array_push($this->input, $x);
 8123:     }
 8124: 
 8125:     /**
 8126:      * Checks if it's empty.
 8127:      */
 8128:     public function isEmpty() {
 8129:         return empty($this->input) && empty($this->output);
 8130:     }
 8131: }
 8132: 
 8133: 
 8134: 
 8135: /**
 8136:  * Supertype for classes that define a strategy for modifying/purifying tokens.
 8137:  *
 8138:  * While HTMLPurifier's core purpose is fixing HTML into something proper,
 8139:  * strategies provide plug points for extra configuration or even extra
 8140:  * features, such as custom tags, custom parsing of text, etc.
 8141:  */
 8142: 
 8143: 
 8144: abstract class HTMLPurifier_Strategy
 8145: {
 8146: 
 8147:     /**
 8148:      * Executes the strategy on the tokens.
 8149:      *
 8150:      * @param HTMLPurifier_Token[] $tokens Array of HTMLPurifier_Token objects to be operated on.
 8151:      * @param HTMLPurifier_Config $config
 8152:      * @param HTMLPurifier_Context $context
 8153:      * @return HTMLPurifier_Token[] Processed array of token objects.
 8154:      */
 8155:     abstract public function execute($tokens, $config, $context);
 8156: }
 8157: 
 8158: 
 8159: 
 8160: 
 8161: 
 8162: /**
 8163:  * This is in almost every respect equivalent to an array except
 8164:  * that it keeps track of which keys were accessed.
 8165:  *
 8166:  * @warning For the sake of backwards compatibility with early versions
 8167:  *     of PHP 5, you must not use the $hash[$key] syntax; if you do
 8168:  *     our version of offsetGet is never called.
 8169:  */
 8170: class HTMLPurifier_StringHash extends ArrayObject
 8171: {
 8172:     /**
 8173:      * @type array
 8174:      */
 8175:     protected $accessed = array();
 8176: 
 8177:     /**
 8178:      * Retrieves a value, and logs the access.
 8179:      * @param mixed $index
 8180:      * @return mixed
 8181:      */
 8182:     public function offsetGet($index)
 8183:     {
 8184:         $this->accessed[$index] = true;
 8185:         return parent::offsetGet($index);
 8186:     }
 8187: 
 8188:     /**
 8189:      * Returns a lookup array of all array indexes that have been accessed.
 8190:      * @return array in form array($index => true).
 8191:      */
 8192:     public function getAccessed()
 8193:     {
 8194:         return $this->accessed;
 8195:     }
 8196: 
 8197:     /**
 8198:      * Resets the access array.
 8199:      */
 8200:     public function resetAccessed()
 8201:     {
 8202:         $this->accessed = array();
 8203:     }
 8204: }
 8205: 
 8206: 
 8207: 
 8208: 
 8209: 
 8210: /**
 8211:  * Parses string hash files. File format is as such:
 8212:  *
 8213:  *      DefaultKeyValue
 8214:  *      KEY: Value
 8215:  *      KEY2: Value2
 8216:  *      --MULTILINE-KEY--
 8217:  *      Multiline
 8218:  *      value.
 8219:  *
 8220:  * Which would output something similar to:
 8221:  *
 8222:  *      array(
 8223:  *          'ID' => 'DefaultKeyValue',
 8224:  *          'KEY' => 'Value',
 8225:  *          'KEY2' => 'Value2',
 8226:  *          'MULTILINE-KEY' => "Multiline\nvalue.\n",
 8227:  *      )
 8228:  *
 8229:  * We use this as an easy to use file-format for configuration schema
 8230:  * files, but the class itself is usage agnostic.
 8231:  *
 8232:  * You can use ---- to forcibly terminate parsing of a single string-hash;
 8233:  * this marker is used in multi string-hashes to delimit boundaries.
 8234:  */
 8235: class HTMLPurifier_StringHashParser
 8236: {
 8237: 
 8238:     /**
 8239:      * @type string
 8240:      */
 8241:     public $default = 'ID';
 8242: 
 8243:     /**
 8244:      * Parses a file that contains a single string-hash.
 8245:      * @param string $file
 8246:      * @return array
 8247:      */
 8248:     public function parseFile($file)
 8249:     {
 8250:         if (!file_exists($file)) {
 8251:             return false;
 8252:         }
 8253:         $fh = fopen($file, 'r');
 8254:         if (!$fh) {
 8255:             return false;
 8256:         }
 8257:         $ret = $this->parseHandle($fh);
 8258:         fclose($fh);
 8259:         return $ret;
 8260:     }
 8261: 
 8262:     /**
 8263:      * Parses a file that contains multiple string-hashes delimited by '----'
 8264:      * @param string $file
 8265:      * @return array
 8266:      */
 8267:     public function parseMultiFile($file)
 8268:     {
 8269:         if (!file_exists($file)) {
 8270:             return false;
 8271:         }
 8272:         $ret = array();
 8273:         $fh = fopen($file, 'r');
 8274:         if (!$fh) {
 8275:             return false;
 8276:         }
 8277:         while (!feof($fh)) {
 8278:             $ret[] = $this->parseHandle($fh);
 8279:         }
 8280:         fclose($fh);
 8281:         return $ret;
 8282:     }
 8283: 
 8284:     /**
 8285:      * Internal parser that acepts a file handle.
 8286:      * @note While it's possible to simulate in-memory parsing by using
 8287:      *       custom stream wrappers, if such a use-case arises we should
 8288:      *       factor out the file handle into its own class.
 8289:      * @param resource $fh File handle with pointer at start of valid string-hash
 8290:      *            block.
 8291:      * @return array
 8292:      */
 8293:     protected function parseHandle($fh)
 8294:     {
 8295:         $state   = false;
 8296:         $single  = false;
 8297:         $ret     = array();
 8298:         do {
 8299:             $line = fgets($fh);
 8300:             if ($line === false) {
 8301:                 break;
 8302:             }
 8303:             $line = rtrim($line, "\n\r");
 8304:             if (!$state && $line === '') {
 8305:                 continue;
 8306:             }
 8307:             if ($line === '----') {
 8308:                 break;
 8309:             }
 8310:             if (strncmp('--#', $line, 3) === 0) {
 8311:                 // Comment
 8312:                 continue;
 8313:             } elseif (strncmp('--', $line, 2) === 0) {
 8314:                 // Multiline declaration
 8315:                 $state = trim($line, '- ');
 8316:                 if (!isset($ret[$state])) {
 8317:                     $ret[$state] = '';
 8318:                 }
 8319:                 continue;
 8320:             } elseif (!$state) {
 8321:                 $single = true;
 8322:                 if (strpos($line, ':') !== false) {
 8323:                     // Single-line declaration
 8324:                     list($state, $line) = explode(':', $line, 2);
 8325:                     $line = trim($line);
 8326:                 } else {
 8327:                     // Use default declaration
 8328:                     $state  = $this->default;
 8329:                 }
 8330:             }
 8331:             if ($single) {
 8332:                 $ret[$state] = $line;
 8333:                 $single = false;
 8334:                 $state  = false;
 8335:             } else {
 8336:                 $ret[$state] .= "$line\n";
 8337:             }
 8338:         } while (!feof($fh));
 8339:         return $ret;
 8340:     }
 8341: }
 8342: 
 8343: 
 8344: 
 8345: 
 8346: 
 8347: /**
 8348:  * Defines a mutation of an obsolete tag into a valid tag.
 8349:  */
 8350: abstract class HTMLPurifier_TagTransform
 8351: {
 8352: 
 8353:     /**
 8354:      * Tag name to transform the tag to.
 8355:      * @type string
 8356:      */
 8357:     public $transform_to;
 8358: 
 8359:     /**
 8360:      * Transforms the obsolete tag into the valid tag.
 8361:      * @param HTMLPurifier_Token_Tag $tag Tag to be transformed.
 8362:      * @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object
 8363:      * @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object
 8364:      */
 8365:     abstract public function transform($tag, $config, $context);
 8366: 
 8367:     /**
 8368:      * Prepends CSS properties to the style attribute, creating the
 8369:      * attribute if it doesn't exist.
 8370:      * @warning Copied over from AttrTransform, be sure to keep in sync
 8371:      * @param array $attr Attribute array to process (passed by reference)
 8372:      * @param string $css CSS to prepend
 8373:      */
 8374:     protected function prependCSS(&$attr, $css)
 8375:     {
 8376:         $attr['style'] = isset($attr['style']) ? $attr['style'] : '';
 8377:         $attr['style'] = $css . $attr['style'];
 8378:     }
 8379: }
 8380: 
 8381: 
 8382: 
 8383: 
 8384: 
 8385: /**
 8386:  * Abstract base token class that all others inherit from.
 8387:  */
 8388: abstract class HTMLPurifier_Token
 8389: {
 8390:     /**
 8391:      * Line number node was on in source document. Null if unknown.
 8392:      * @type int
 8393:      */
 8394:     public $line;
 8395: 
 8396:     /**
 8397:      * Column of line node was on in source document. Null if unknown.
 8398:      * @type int
 8399:      */
 8400:     public $col;
 8401: 
 8402:     /**
 8403:      * Lookup array of processing that this token is exempt from.
 8404:      * Currently, valid values are "ValidateAttributes" and
 8405:      * "MakeWellFormed_TagClosedError"
 8406:      * @type array
 8407:      */
 8408:     public $armor = array();
 8409: 
 8410:     /**
 8411:      * Used during MakeWellFormed.
 8412:      * @type
 8413:      */
 8414:     public $skip;
 8415: 
 8416:     /**
 8417:      * @type
 8418:      */
 8419:     public $rewind;
 8420: 
 8421:     /**
 8422:      * @type
 8423:      */
 8424:     public $carryover;
 8425: 
 8426:     /**
 8427:      * @param string $n
 8428:      * @return null|string
 8429:      */
 8430:     public function __get($n)
 8431:     {
 8432:         if ($n === 'type') {
 8433:             trigger_error('Deprecated type property called; use instanceof', E_USER_NOTICE);
 8434:             switch (get_class($this)) {
 8435:                 case 'HTMLPurifier_Token_Start':
 8436:                     return 'start';
 8437:                 case 'HTMLPurifier_Token_Empty':
 8438:                     return 'empty';
 8439:                 case 'HTMLPurifier_Token_End':
 8440:                     return 'end';
 8441:                 case 'HTMLPurifier_Token_Text':
 8442:                     return 'text';
 8443:                 case 'HTMLPurifier_Token_Comment':
 8444:                     return 'comment';
 8445:                 default:
 8446:                     return null;
 8447:             }
 8448:         }
 8449:     }
 8450: 
 8451:     /**
 8452:      * Sets the position of the token in the source document.
 8453:      * @param int $l
 8454:      * @param int $c
 8455:      */
 8456:     public function position($l = null, $c = null)
 8457:     {
 8458:         $this->line = $l;
 8459:         $this->col = $c;
 8460:     }
 8461: 
 8462:     /**
 8463:      * Convenience function for DirectLex settings line/col position.
 8464:      * @param int $l
 8465:      * @param int $c
 8466:      */
 8467:     public function rawPosition($l, $c)
 8468:     {
 8469:         if ($c === -1) {
 8470:             $l++;
 8471:         }
 8472:         $this->line = $l;
 8473:         $this->col = $c;
 8474:     }
 8475: 
 8476:     /**
 8477:      * Converts a token into its corresponding node.
 8478:      */
 8479:     abstract public function toNode();
 8480: }
 8481: 
 8482: 
 8483: 
 8484: 
 8485: 
 8486: /**
 8487:  * Factory for token generation.
 8488:  *
 8489:  * @note Doing some benchmarking indicates that the new operator is much
 8490:  *       slower than the clone operator (even discounting the cost of the
 8491:  *       constructor).  This class is for that optimization.
 8492:  *       Other then that, there's not much point as we don't
 8493:  *       maintain parallel HTMLPurifier_Token hierarchies (the main reason why
 8494:  *       you'd want to use an abstract factory).
 8495:  * @todo Port DirectLex to use this
 8496:  */
 8497: class HTMLPurifier_TokenFactory
 8498: {
 8499:     // p stands for prototype
 8500: 
 8501:     /**
 8502:      * @type HTMLPurifier_Token_Start
 8503:      */
 8504:     private $p_start;
 8505: 
 8506:     /**
 8507:      * @type HTMLPurifier_Token_End
 8508:      */
 8509:     private $p_end;
 8510: 
 8511:     /**
 8512:      * @type HTMLPurifier_Token_Empty
 8513:      */
 8514:     private $p_empty;
 8515: 
 8516:     /**
 8517:      * @type HTMLPurifier_Token_Text
 8518:      */
 8519:     private $p_text;
 8520: 
 8521:     /**
 8522:      * @type HTMLPurifier_Token_Comment
 8523:      */
 8524:     private $p_comment;
 8525: 
 8526:     /**
 8527:      * Generates blank prototypes for cloning.
 8528:      */
 8529:     public function __construct()
 8530:     {
 8531:         $this->p_start = new HTMLPurifier_Token_Start('', array());
 8532:         $this->p_end = new HTMLPurifier_Token_End('');
 8533:         $this->p_empty = new HTMLPurifier_Token_Empty('', array());
 8534:         $this->p_text = new HTMLPurifier_Token_Text('');
 8535:         $this->p_comment = new HTMLPurifier_Token_Comment('');
 8536:     }
 8537: 
 8538:     /**
 8539:      * Creates a HTMLPurifier_Token_Start.
 8540:      * @param string $name Tag name
 8541:      * @param array $attr Associative array of attributes
 8542:      * @return HTMLPurifier_Token_Start Generated HTMLPurifier_Token_Start
 8543:      */
 8544:     public function createStart($name, $attr = array())
 8545:     {
 8546:         $p = clone $this->p_start;
 8547:         $p->__construct($name, $attr);
 8548:         return $p;
 8549:     }
 8550: 
 8551:     /**
 8552:      * Creates a HTMLPurifier_Token_End.
 8553:      * @param string $name Tag name
 8554:      * @return HTMLPurifier_Token_End Generated HTMLPurifier_Token_End
 8555:      */
 8556:     public function createEnd($name)
 8557:     {
 8558:         $p = clone $this->p_end;
 8559:         $p->__construct($name);
 8560:         return $p;
 8561:     }
 8562: 
 8563:     /**
 8564:      * Creates a HTMLPurifier_Token_Empty.
 8565:      * @param string $name Tag name
 8566:      * @param array $attr Associative array of attributes
 8567:      * @return HTMLPurifier_Token_Empty Generated HTMLPurifier_Token_Empty
 8568:      */
 8569:     public function createEmpty($name, $attr = array())
 8570:     {
 8571:         $p = clone $this->p_empty;
 8572:         $p->__construct($name, $attr);
 8573:         return $p;
 8574:     }
 8575: 
 8576:     /**
 8577:      * Creates a HTMLPurifier_Token_Text.
 8578:      * @param string $data Data of text token
 8579:      * @return HTMLPurifier_Token_Text Generated HTMLPurifier_Token_Text
 8580:      */
 8581:     public function createText($data)
 8582:     {
 8583:         $p = clone $this->p_text;
 8584:         $p->__construct($data);
 8585:         return $p;
 8586:     }
 8587: 
 8588:     /**
 8589:      * Creates a HTMLPurifier_Token_Comment.
 8590:      * @param string $data Data of comment token
 8591:      * @return HTMLPurifier_Token_Comment Generated HTMLPurifier_Token_Comment
 8592:      */
 8593:     public function createComment($data)
 8594:     {
 8595:         $p = clone $this->p_comment;
 8596:         $p->__construct($data);
 8597:         return $p;
 8598:     }
 8599: }
 8600: 
 8601: 
 8602: 
 8603: 
 8604: 
 8605: /**
 8606:  * HTML Purifier's internal representation of a URI.
 8607:  * @note
 8608:  *      Internal data-structures are completely escaped. If the data needs
 8609:  *      to be used in a non-URI context (which is very unlikely), be sure
 8610:  *      to decode it first. The URI may not necessarily be well-formed until
 8611:  *      validate() is called.
 8612:  */
 8613: class HTMLPurifier_URI
 8614: {
 8615:     /**
 8616:      * @type string
 8617:      */
 8618:     public $scheme;
 8619: 
 8620:     /**
 8621:      * @type string
 8622:      */
 8623:     public $userinfo;
 8624: 
 8625:     /**
 8626:      * @type string
 8627:      */
 8628:     public $host;
 8629: 
 8630:     /**
 8631:      * @type int
 8632:      */
 8633:     public $port;
 8634: 
 8635:     /**
 8636:      * @type string
 8637:      */
 8638:     public $path;
 8639: 
 8640:     /**
 8641:      * @type string
 8642:      */
 8643:     public $query;
 8644: 
 8645:     /**
 8646:      * @type string
 8647:      */
 8648:     public $fragment;
 8649: 
 8650:     /**
 8651:      * @param string $scheme
 8652:      * @param string $userinfo
 8653:      * @param string $host
 8654:      * @param int $port
 8655:      * @param string $path
 8656:      * @param string $query
 8657:      * @param string $fragment
 8658:      * @note Automatically normalizes scheme and port
 8659:      */
 8660:     public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment)
 8661:     {
 8662:         $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);
 8663:         $this->userinfo = $userinfo;
 8664:         $this->host = $host;
 8665:         $this->port = is_null($port) ? $port : (int)$port;
 8666:         $this->path = $path;
 8667:         $this->query = $query;
 8668:         $this->fragment = $fragment;
 8669:     }
 8670: 
 8671:     /**
 8672:      * Retrieves a scheme object corresponding to the URI's scheme/default
 8673:      * @param HTMLPurifier_Config $config
 8674:      * @param HTMLPurifier_Context $context
 8675:      * @return HTMLPurifier_URIScheme Scheme object appropriate for validating this URI
 8676:      */
 8677:     public function getSchemeObj($config, $context)
 8678:     {
 8679:         $registry = HTMLPurifier_URISchemeRegistry::instance();
 8680:         if ($this->scheme !== null) {
 8681:             $scheme_obj = $registry->getScheme($this->scheme, $config, $context);
 8682:             if (!$scheme_obj) {
 8683:                 return false;
 8684:             } // invalid scheme, clean it out
 8685:         } else {
 8686:             // no scheme: retrieve the default one
 8687:             $def = $config->getDefinition('URI');
 8688:             $scheme_obj = $def->getDefaultScheme($config, $context);
 8689:             if (!$scheme_obj) {
 8690:                 // something funky happened to the default scheme object
 8691:                 trigger_error(
 8692:                     'Default scheme object "' . $def->defaultScheme . '" was not readable',
 8693:                     E_USER_WARNING
 8694:                 );
 8695:                 return false;
 8696:             }
 8697:         }
 8698:         return $scheme_obj;
 8699:     }
 8700: 
 8701:     /**
 8702:      * Generic validation method applicable for all schemes. May modify
 8703:      * this URI in order to get it into a compliant form.
 8704:      * @param HTMLPurifier_Config $config
 8705:      * @param HTMLPurifier_Context $context
 8706:      * @return bool True if validation/filtering succeeds, false if failure
 8707:      */
 8708:     public function validate($config, $context)
 8709:     {
 8710:         // ABNF definitions from RFC 3986
 8711:         $chars_sub_delims = '!$&\'()*+,;=';
 8712:         $chars_gen_delims = ':/?#[]@';
 8713:         $chars_pchar = $chars_sub_delims . ':@';
 8714: 
 8715:         // validate host
 8716:         if (!is_null($this->host)) {
 8717:             $host_def = new HTMLPurifier_AttrDef_URI_Host();
 8718:             $this->host = $host_def->validate($this->host, $config, $context);
 8719:             if ($this->host === false) {
 8720:                 $this->host = null;
 8721:             }
 8722:         }
 8723: 
 8724:         // validate scheme
 8725:         // NOTE: It's not appropriate to check whether or not this
 8726:         // scheme is in our registry, since a URIFilter may convert a
 8727:         // URI that we don't allow into one we do.  So instead, we just
 8728:         // check if the scheme can be dropped because there is no host
 8729:         // and it is our default scheme.
 8730:         if (!is_null($this->scheme) && is_null($this->host) || $this->host === '') {
 8731:             // support for relative paths is pretty abysmal when the
 8732:             // scheme is present, so axe it when possible
 8733:             $def = $config->getDefinition('URI');
 8734:             if ($def->defaultScheme === $this->scheme) {
 8735:                 $this->scheme = null;
 8736:             }
 8737:         }
 8738: 
 8739:         // validate username
 8740:         if (!is_null($this->userinfo)) {
 8741:             $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':');
 8742:             $this->userinfo = $encoder->encode($this->userinfo);
 8743:         }
 8744: 
 8745:         // validate port
 8746:         if (!is_null($this->port)) {
 8747:             if ($this->port < 1 || $this->port > 65535) {
 8748:                 $this->port = null;
 8749:             }
 8750:         }
 8751: 
 8752:         // validate path
 8753:         $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');
 8754:         if (!is_null($this->host)) { // this catches $this->host === ''
 8755:             // path-abempty (hier and relative)
 8756:             // http://www.example.com/my/path
 8757:             // //www.example.com/my/path (looks odd, but works, and
 8758:             //                            recognized by most browsers)
 8759:             // (this set is valid or invalid on a scheme by scheme
 8760:             // basis, so we'll deal with it later)
 8761:             // file:///my/path
 8762:             // ///my/path
 8763:             $this->path = $segments_encoder->encode($this->path);
 8764:         } elseif ($this->path !== '') {
 8765:             if ($this->path[0] === '/') {
 8766:                 // path-absolute (hier and relative)
 8767:                 // http:/my/path
 8768:                 // /my/path
 8769:                 if (strlen($this->path) >= 2 && $this->path[1] === '/') {
 8770:                     // This could happen if both the host gets stripped
 8771:                     // out
 8772:                     // http://my/path
 8773:                     // //my/path
 8774:                     $this->path = '';
 8775:                 } else {
 8776:                     $this->path = $segments_encoder->encode($this->path);
 8777:                 }
 8778:             } elseif (!is_null($this->scheme)) {
 8779:                 // path-rootless (hier)
 8780:                 // http:my/path
 8781:                 // Short circuit evaluation means we don't need to check nz
 8782:                 $this->path = $segments_encoder->encode($this->path);
 8783:             } else {
 8784:                 // path-noscheme (relative)
 8785:                 // my/path
 8786:                 // (once again, not checking nz)
 8787:                 $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');
 8788:                 $c = strpos($this->path, '/');
 8789:                 if ($c !== false) {
 8790:                     $this->path =
 8791:                         $segment_nc_encoder->encode(substr($this->path, 0, $c)) .
 8792:                         $segments_encoder->encode(substr($this->path, $c));
 8793:                 } else {
 8794:                     $this->path = $segment_nc_encoder->encode($this->path);
 8795:                 }
 8796:             }
 8797:         } else {
 8798:             // path-empty (hier and relative)
 8799:             $this->path = ''; // just to be safe
 8800:         }
 8801: 
 8802:         // qf = query and fragment
 8803:         $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?');
 8804: 
 8805:         if (!is_null($this->query)) {
 8806:             $this->query = $qf_encoder->encode($this->query);
 8807:         }
 8808: 
 8809:         if (!is_null($this->fragment)) {
 8810:             $this->fragment = $qf_encoder->encode($this->fragment);
 8811:         }
 8812:         return true;
 8813:     }
 8814: 
 8815:     /**
 8816:      * Convert URI back to string
 8817:      * @return string URI appropriate for output
 8818:      */
 8819:     public function toString()
 8820:     {
 8821:         // reconstruct authority
 8822:         $authority = null;
 8823:         // there is a rendering difference between a null authority
 8824:         // (http:foo-bar) and an empty string authority
 8825:         // (http:///foo-bar).
 8826:         if (!is_null($this->host)) {
 8827:             $authority = '';
 8828:             if (!is_null($this->userinfo)) {
 8829:                 $authority .= $this->userinfo . '@';
 8830:             }
 8831:             $authority .= $this->host;
 8832:             if (!is_null($this->port)) {
 8833:                 $authority .= ':' . $this->port;
 8834:             }
 8835:         }
 8836: 
 8837:         // Reconstruct the result
 8838:         // One might wonder about parsing quirks from browsers after
 8839:         // this reconstruction.  Unfortunately, parsing behavior depends
 8840:         // on what *scheme* was employed (file:///foo is handled *very*
 8841:         // differently than http:///foo), so unfortunately we have to
 8842:         // defer to the schemes to do the right thing.
 8843:         $result = '';
 8844:         if (!is_null($this->scheme)) {
 8845:             $result .= $this->scheme . ':';
 8846:         }
 8847:         if (!is_null($authority)) {
 8848:             $result .= '//' . $authority;
 8849:         }
 8850:         $result .= $this->path;
 8851:         if (!is_null($this->query)) {
 8852:             $result .= '?' . $this->query;
 8853:         }
 8854:         if (!is_null($this->fragment)) {
 8855:             $result .= '#' . $this->fragment;
 8856:         }
 8857: 
 8858:         return $result;
 8859:     }
 8860: 
 8861:     /**
 8862:      * Returns true if this URL might be considered a 'local' URL given
 8863:      * the current context.  This is true when the host is null, or
 8864:      * when it matches the host supplied to the configuration.
 8865:      *
 8866:      * Note that this does not do any scheme checking, so it is mostly
 8867:      * only appropriate for metadata that doesn't care about protocol
 8868:      * security.  isBenign is probably what you actually want.
 8869:      * @param HTMLPurifier_Config $config
 8870:      * @param HTMLPurifier_Context $context
 8871:      * @return bool
 8872:      */
 8873:     public function isLocal($config, $context)
 8874:     {
 8875:         if ($this->host === null) {
 8876:             return true;
 8877:         }
 8878:         $uri_def = $config->getDefinition('URI');
 8879:         if ($uri_def->host === $this->host) {
 8880:             return true;
 8881:         }
 8882:         return false;
 8883:     }
 8884: 
 8885:     /**
 8886:      * Returns true if this URL should be considered a 'benign' URL,
 8887:      * that is:
 8888:      *
 8889:      *      - It is a local URL (isLocal), and
 8890:      *      - It has a equal or better level of security
 8891:      * @param HTMLPurifier_Config $config
 8892:      * @param HTMLPurifier_Context $context
 8893:      * @return bool
 8894:      */
 8895:     public function isBenign($config, $context)
 8896:     {
 8897:         if (!$this->isLocal($config, $context)) {
 8898:             return false;
 8899:         }
 8900: 
 8901:         $scheme_obj = $this->getSchemeObj($config, $context);
 8902:         if (!$scheme_obj) {
 8903:             return false;
 8904:         } // conservative approach
 8905: 
 8906:         $current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context);
 8907:         if ($current_scheme_obj->secure) {
 8908:             if (!$scheme_obj->secure) {
 8909:                 return false;
 8910:             }
 8911:         }
 8912:         return true;
 8913:     }
 8914: }
 8915: 
 8916: 
 8917: 
 8918: 
 8919: 
 8920: class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
 8921: {
 8922: 
 8923:     public $type = 'URI';
 8924:     protected $filters = array();
 8925:     protected $postFilters = array();
 8926:     protected $registeredFilters = array();
 8927: 
 8928:     /**
 8929:      * HTMLPurifier_URI object of the base specified at %URI.Base
 8930:      */
 8931:     public $base;
 8932: 
 8933:     /**
 8934:      * String host to consider "home" base, derived off of $base
 8935:      */
 8936:     public $host;
 8937: 
 8938:     /**
 8939:      * Name of default scheme based on %URI.DefaultScheme and %URI.Base
 8940:      */
 8941:     public $defaultScheme;
 8942: 
 8943:     public function __construct()
 8944:     {
 8945:         $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternal());
 8946:         $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternalResources());
 8947:         $this->registerFilter(new HTMLPurifier_URIFilter_DisableResources());
 8948:         $this->registerFilter(new HTMLPurifier_URIFilter_HostBlacklist());
 8949:         $this->registerFilter(new HTMLPurifier_URIFilter_SafeIframe());
 8950:         $this->registerFilter(new HTMLPurifier_URIFilter_MakeAbsolute());
 8951:         $this->registerFilter(new HTMLPurifier_URIFilter_Munge());
 8952:     }
 8953: 
 8954:     public function registerFilter($filter)
 8955:     {
 8956:         $this->registeredFilters[$filter->name] = $filter;
 8957:     }
 8958: 
 8959:     public function addFilter($filter, $config)
 8960:     {
 8961:         $r = $filter->prepare($config);
 8962:         if ($r === false) return; // null is ok, for backwards compat
 8963:         if ($filter->post) {
 8964:             $this->postFilters[$filter->name] = $filter;
 8965:         } else {
 8966:             $this->filters[$filter->name] = $filter;
 8967:         }
 8968:     }
 8969: 
 8970:     protected function doSetup($config)
 8971:     {
 8972:         $this->setupMemberVariables($config);
 8973:         $this->setupFilters($config);
 8974:     }
 8975: 
 8976:     protected function setupFilters($config)
 8977:     {
 8978:         foreach ($this->registeredFilters as $name => $filter) {
 8979:             if ($filter->always_load) {
 8980:                 $this->addFilter($filter, $config);
 8981:             } else {
 8982:                 $conf = $config->get('URI.' . $name);
 8983:                 if ($conf !== false && $conf !== null) {
 8984:                     $this->addFilter($filter, $config);
 8985:                 }
 8986:             }
 8987:         }
 8988:         unset($this->registeredFilters);
 8989:     }
 8990: 
 8991:     protected function setupMemberVariables($config)
 8992:     {
 8993:         $this->host = $config->get('URI.Host');
 8994:         $base_uri = $config->get('URI.Base');
 8995:         if (!is_null($base_uri)) {
 8996:             $parser = new HTMLPurifier_URIParser();
 8997:             $this->base = $parser->parse($base_uri);
 8998:             $this->defaultScheme = $this->base->scheme;
 8999:             if (is_null($this->host)) $this->host = $this->base->host;
 9000:         }
 9001:         if (is_null($this->defaultScheme)) $this->defaultScheme = $config->get('URI.DefaultScheme');
 9002:     }
 9003: 
 9004:     public function getDefaultScheme($config, $context)
 9005:     {
 9006:         return HTMLPurifier_URISchemeRegistry::instance()->getScheme($this->defaultScheme, $config, $context);
 9007:     }
 9008: 
 9009:     public function filter(&$uri, $config, $context)
 9010:     {
 9011:         foreach ($this->filters as $name => $f) {
 9012:             $result = $f->filter($uri, $config, $context);
 9013:             if (!$result) return false;
 9014:         }
 9015:         return true;
 9016:     }
 9017: 
 9018:     public function postFilter(&$uri, $config, $context)
 9019:     {
 9020:         foreach ($this->postFilters as $name => $f) {
 9021:             $result = $f->filter($uri, $config, $context);
 9022:             if (!$result) return false;
 9023:         }
 9024:         return true;
 9025:     }
 9026: 
 9027: }
 9028: 
 9029: 
 9030: 
 9031: 
 9032: 
 9033: /**
 9034:  * Chainable filters for custom URI processing.
 9035:  *
 9036:  * These filters can perform custom actions on a URI filter object,
 9037:  * including transformation or blacklisting.  A filter named Foo
 9038:  * must have a corresponding configuration directive %URI.Foo,
 9039:  * unless always_load is specified to be true.
 9040:  *
 9041:  * The following contexts may be available while URIFilters are being
 9042:  * processed:
 9043:  *
 9044:  *      - EmbeddedURI: true if URI is an embedded resource that will
 9045:  *        be loaded automatically on page load
 9046:  *      - CurrentToken: a reference to the token that is currently
 9047:  *        being processed
 9048:  *      - CurrentAttr: the name of the attribute that is currently being
 9049:  *        processed
 9050:  *      - CurrentCSSProperty: the name of the CSS property that is
 9051:  *        currently being processed (if applicable)
 9052:  *
 9053:  * @warning This filter is called before scheme object validation occurs.
 9054:  *          Make sure, if you require a specific scheme object, you
 9055:  *          you check that it exists. This allows filters to convert
 9056:  *          proprietary URI schemes into regular ones.
 9057:  */
 9058: abstract class HTMLPurifier_URIFilter
 9059: {
 9060: 
 9061:     /**
 9062:      * Unique identifier of filter.
 9063:      * @type string
 9064:      */
 9065:     public $name;
 9066: 
 9067:     /**
 9068:      * True if this filter should be run after scheme validation.
 9069:      * @type bool
 9070:      */
 9071:     public $post = false;
 9072: 
 9073:     /**
 9074:      * True if this filter should always be loaded.
 9075:      * This permits a filter to be named Foo without the corresponding
 9076:      * %URI.Foo directive existing.
 9077:      * @type bool
 9078:      */
 9079:     public $always_load = false;
 9080: 
 9081:     /**
 9082:      * Performs initialization for the filter.  If the filter returns
 9083:      * false, this means that it shouldn't be considered active.
 9084:      * @param HTMLPurifier_Config $config
 9085:      * @return bool
 9086:      */
 9087:     public function prepare($config)
 9088:     {
 9089:         return true;
 9090:     }
 9091: 
 9092:     /**
 9093:      * Filter a URI object
 9094:      * @param HTMLPurifier_URI $uri Reference to URI object variable
 9095:      * @param HTMLPurifier_Config $config
 9096:      * @param HTMLPurifier_Context $context
 9097:      * @return bool Whether or not to continue processing: false indicates
 9098:      *         URL is no good, true indicates continue processing. Note that
 9099:      *         all changes are committed directly on the URI object
 9100:      */
 9101:     abstract public function filter(&$uri, $config, $context);
 9102: }
 9103: 
 9104: 
 9105: 
 9106: 
 9107: 
 9108: /**
 9109:  * Parses a URI into the components and fragment identifier as specified
 9110:  * by RFC 3986.
 9111:  */
 9112: class HTMLPurifier_URIParser
 9113: {
 9114: 
 9115:     /**
 9116:      * Instance of HTMLPurifier_PercentEncoder to do normalization with.
 9117:      */
 9118:     protected $percentEncoder;
 9119: 
 9120:     public function __construct()
 9121:     {
 9122:         $this->percentEncoder = new HTMLPurifier_PercentEncoder();
 9123:     }
 9124: 
 9125:     /**
 9126:      * Parses a URI.
 9127:      * @param $uri string URI to parse
 9128:      * @return HTMLPurifier_URI representation of URI. This representation has
 9129:      *         not been validated yet and may not conform to RFC.
 9130:      */
 9131:     public function parse($uri)
 9132:     {
 9133:         $uri = $this->percentEncoder->normalize($uri);
 9134: 
 9135:         // Regexp is as per Appendix B.
 9136:         // Note that ["<>] are an addition to the RFC's recommended
 9137:         // characters, because they represent external delimeters.
 9138:         $r_URI = '!'.
 9139:             '(([a-zA-Z0-9\.\+\-]+):)?'. // 2. Scheme
 9140:             '(//([^/?#"<>]*))?'. // 4. Authority
 9141:             '([^?#"<>]*)'.       // 5. Path
 9142:             '(\?([^#"<>]*))?'.   // 7. Query
 9143:             '(#([^"<>]*))?'.     // 8. Fragment
 9144:             '!';
 9145: 
 9146:         $matches = array();
 9147:         $result = preg_match($r_URI, $uri, $matches);
 9148: 
 9149:         if (!$result) return false; // *really* invalid URI
 9150: 
 9151:         // seperate out parts
 9152:         $scheme     = !empty($matches[1]) ? $matches[2] : null;
 9153:         $authority  = !empty($matches[3]) ? $matches[4] : null;
 9154:         $path       = $matches[5]; // always present, can be empty
 9155:         $query      = !empty($matches[6]) ? $matches[7] : null;
 9156:         $fragment   = !empty($matches[8]) ? $matches[9] : null;
 9157: 
 9158:         // further parse authority
 9159:         if ($authority !== null) {
 9160:             $r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
 9161:             $matches = array();
 9162:             preg_match($r_authority, $authority, $matches);
 9163:             $userinfo   = !empty($matches[1]) ? $matches[2] : null;
 9164:             $host       = !empty($matches[3]) ? $matches[3] : '';
 9165:             $port       = !empty($matches[4]) ? (int) $matches[5] : null;
 9166:         } else {
 9167:             $port = $host = $userinfo = null;
 9168:         }
 9169: 
 9170:         return new HTMLPurifier_URI(
 9171:             $scheme, $userinfo, $host, $port, $path, $query, $fragment);
 9172:     }
 9173: 
 9174: }
 9175: 
 9176: 
 9177: 
 9178: 
 9179: 
 9180: /**
 9181:  * Validator for the components of a URI for a specific scheme
 9182:  */
 9183: abstract class HTMLPurifier_URIScheme
 9184: {
 9185: 
 9186:     /**
 9187:      * Scheme's default port (integer). If an explicit port number is
 9188:      * specified that coincides with the default port, it will be
 9189:      * elided.
 9190:      * @type int
 9191:      */
 9192:     public $default_port = null;
 9193: 
 9194:     /**
 9195:      * Whether or not URIs of this scheme are locatable by a browser
 9196:      * http and ftp are accessible, while mailto and news are not.
 9197:      * @type bool
 9198:      */
 9199:     public $browsable = false;
 9200: 
 9201:     /**
 9202:      * Whether or not data transmitted over this scheme is encrypted.
 9203:      * https is secure, http is not.
 9204:      * @type bool
 9205:      */
 9206:     public $secure = false;
 9207: 
 9208:     /**
 9209:      * Whether or not the URI always uses <hier_part>, resolves edge cases
 9210:      * with making relative URIs absolute
 9211:      * @type bool
 9212:      */
 9213:     public $hierarchical = false;
 9214: 
 9215:     /**
 9216:      * Whether or not the URI may omit a hostname when the scheme is
 9217:      * explicitly specified, ala file:///path/to/file. As of writing,
 9218:      * 'file' is the only scheme that browsers support his properly.
 9219:      * @type bool
 9220:      */
 9221:     public $may_omit_host = false;
 9222: 
 9223:     /**
 9224:      * Validates the components of a URI for a specific scheme.
 9225:      * @param HTMLPurifier_URI $uri Reference to a HTMLPurifier_URI object
 9226:      * @param HTMLPurifier_Config $config
 9227:      * @param HTMLPurifier_Context $context
 9228:      * @return bool success or failure
 9229:      */
 9230:     abstract public function doValidate(&$uri, $config, $context);
 9231: 
 9232:     /**
 9233:      * Public interface for validating components of a URI.  Performs a
 9234:      * bunch of default actions. Don't overload this method.
 9235:      * @param HTMLPurifier_URI $uri Reference to a HTMLPurifier_URI object
 9236:      * @param HTMLPurifier_Config $config
 9237:      * @param HTMLPurifier_Context $context
 9238:      * @return bool success or failure
 9239:      */
 9240:     public function validate(&$uri, $config, $context)
 9241:     {
 9242:         if ($this->default_port == $uri->port) {
 9243:             $uri->port = null;
 9244:         }
 9245:         // kludge: browsers do funny things when the scheme but not the
 9246:         // authority is set
 9247:         if (!$this->may_omit_host &&
 9248:             // if the scheme is present, a missing host is always in error
 9249:             (!is_null($uri->scheme) && ($uri->host === '' || is_null($uri->host))) ||
 9250:             // if the scheme is not present, a *blank* host is in error,
 9251:             // since this translates into '///path' which most browsers
 9252:             // interpret as being 'http://path'.
 9253:             (is_null($uri->scheme) && $uri->host === '')
 9254:         ) {
 9255:             do {
 9256:                 if (is_null($uri->scheme)) {
 9257:                     if (substr($uri->path, 0, 2) != '//') {
 9258:                         $uri->host = null;
 9259:                         break;
 9260:                     }
 9261:                     // URI is '////path', so we cannot nullify the
 9262:                     // host to preserve semantics.  Try expanding the
 9263:                     // hostname instead (fall through)
 9264:                 }
 9265:                 // first see if we can manually insert a hostname
 9266:                 $host = $config->get('URI.Host');
 9267:                 if (!is_null($host)) {
 9268:                     $uri->host = $host;
 9269:                 } else {
 9270:                     // we can't do anything sensible, reject the URL.
 9271:                     return false;
 9272:                 }
 9273:             } while (false);
 9274:         }
 9275:         return $this->doValidate($uri, $config, $context);
 9276:     }
 9277: }
 9278: 
 9279: 
 9280: 
 9281: 
 9282: 
 9283: /**
 9284:  * Registry for retrieving specific URI scheme validator objects.
 9285:  */
 9286: class HTMLPurifier_URISchemeRegistry
 9287: {
 9288: 
 9289:     /**
 9290:      * Retrieve sole instance of the registry.
 9291:      * @param HTMLPurifier_URISchemeRegistry $prototype Optional prototype to overload sole instance with,
 9292:      *                   or bool true to reset to default registry.
 9293:      * @return HTMLPurifier_URISchemeRegistry
 9294:      * @note Pass a registry object $prototype with a compatible interface and
 9295:      *       the function will copy it and return it all further times.
 9296:      */
 9297:     public static function instance($prototype = null)
 9298:     {
 9299:         static $instance = null;
 9300:         if ($prototype !== null) {
 9301:             $instance = $prototype;
 9302:         } elseif ($instance === null || $prototype == true) {
 9303:             $instance = new HTMLPurifier_URISchemeRegistry();
 9304:         }
 9305:         return $instance;
 9306:     }
 9307: 
 9308:     /**
 9309:      * Cache of retrieved schemes.
 9310:      * @type HTMLPurifier_URIScheme[]
 9311:      */
 9312:     protected $schemes = array();
 9313: 
 9314:     /**
 9315:      * Retrieves a scheme validator object
 9316:      * @param string $scheme String scheme name like http or mailto
 9317:      * @param HTMLPurifier_Config $config
 9318:      * @param HTMLPurifier_Context $context
 9319:      * @return HTMLPurifier_URIScheme
 9320:      */
 9321:     public function getScheme($scheme, $config, $context)
 9322:     {
 9323:         if (!$config) {
 9324:             $config = HTMLPurifier_Config::createDefault();
 9325:         }
 9326: 
 9327:         // important, otherwise attacker could include arbitrary file
 9328:         $allowed_schemes = $config->get('URI.AllowedSchemes');
 9329:         if (!$config->get('URI.OverrideAllowedSchemes') &&
 9330:             !isset($allowed_schemes[$scheme])
 9331:         ) {
 9332:             return;
 9333:         }
 9334: 
 9335:         if (isset($this->schemes[$scheme])) {
 9336:             return $this->schemes[$scheme];
 9337:         }
 9338:         if (!isset($allowed_schemes[$scheme])) {
 9339:             return;
 9340:         }
 9341: 
 9342:         $class = 'HTMLPurifier_URIScheme_' . $scheme;
 9343:         if (!class_exists($class)) {
 9344:             return;
 9345:         }
 9346:         $this->schemes[$scheme] = new $class();
 9347:         return $this->schemes[$scheme];
 9348:     }
 9349: 
 9350:     /**
 9351:      * Registers a custom scheme to the cache, bypassing reflection.
 9352:      * @param string $scheme Scheme name
 9353:      * @param HTMLPurifier_URIScheme $scheme_obj
 9354:      */
 9355:     public function register($scheme, $scheme_obj)
 9356:     {
 9357:         $this->schemes[$scheme] = $scheme_obj;
 9358:     }
 9359: }
 9360: 
 9361: 
 9362: 
 9363: 
 9364: 
 9365: /**
 9366:  * Class for converting between different unit-lengths as specified by
 9367:  * CSS.
 9368:  */
 9369: class HTMLPurifier_UnitConverter
 9370: {
 9371: 
 9372:     const ENGLISH = 1;
 9373:     const METRIC = 2;
 9374:     const DIGITAL = 3;
 9375: 
 9376:     /**
 9377:      * Units information array. Units are grouped into measuring systems
 9378:      * (English, Metric), and are assigned an integer representing
 9379:      * the conversion factor between that unit and the smallest unit in
 9380:      * the system. Numeric indexes are actually magical constants that
 9381:      * encode conversion data from one system to the next, with a O(n^2)
 9382:      * constraint on memory (this is generally not a problem, since
 9383:      * the number of measuring systems is small.)
 9384:      */
 9385:     protected static $units = array(
 9386:         self::ENGLISH => array(
 9387:             'px' => 3, // This is as per CSS 2.1 and Firefox. Your mileage may vary
 9388:             'pt' => 4,
 9389:             'pc' => 48,
 9390:             'in' => 288,
 9391:             self::METRIC => array('pt', '0.352777778', 'mm'),
 9392:         ),
 9393:         self::METRIC => array(
 9394:             'mm' => 1,
 9395:             'cm' => 10,
 9396:             self::ENGLISH => array('mm', '2.83464567', 'pt'),
 9397:         ),
 9398:     );
 9399: 
 9400:     /**
 9401:      * Minimum bcmath precision for output.
 9402:      * @type int
 9403:      */
 9404:     protected $outputPrecision;
 9405: 
 9406:     /**
 9407:      * Bcmath precision for internal calculations.
 9408:      * @type int
 9409:      */
 9410:     protected $internalPrecision;
 9411: 
 9412:     /**
 9413:      * Whether or not BCMath is available.
 9414:      * @type bool
 9415:      */
 9416:     private $bcmath;
 9417: 
 9418:     public function __construct($output_precision = 4, $internal_precision = 10, $force_no_bcmath = false)
 9419:     {
 9420:         $this->outputPrecision = $output_precision;
 9421:         $this->internalPrecision = $internal_precision;
 9422:         $this->bcmath = !$force_no_bcmath && function_exists('bcmul');
 9423:     }
 9424: 
 9425:     /**
 9426:      * Converts a length object of one unit into another unit.
 9427:      * @param HTMLPurifier_Length $length
 9428:      *      Instance of HTMLPurifier_Length to convert. You must validate()
 9429:      *      it before passing it here!
 9430:      * @param string $to_unit
 9431:      *      Unit to convert to.
 9432:      * @return HTMLPurifier_Length|bool
 9433:      * @note
 9434:      *      About precision: This conversion function pays very special
 9435:      *      attention to the incoming precision of values and attempts
 9436:      *      to maintain a number of significant figure. Results are
 9437:      *      fairly accurate up to nine digits. Some caveats:
 9438:      *          - If a number is zero-padded as a result of this significant
 9439:      *            figure tracking, the zeroes will be eliminated.
 9440:      *          - If a number contains less than four sigfigs ($outputPrecision)
 9441:      *            and this causes some decimals to be excluded, those
 9442:      *            decimals will be added on.
 9443:      */
 9444:     public function convert($length, $to_unit)
 9445:     {
 9446:         if (!$length->isValid()) {
 9447:             return false;
 9448:         }
 9449: 
 9450:         $n = $length->getN();
 9451:         $unit = $length->getUnit();
 9452: 
 9453:         if ($n === '0' || $unit === false) {
 9454:             return new HTMLPurifier_Length('0', false);
 9455:         }
 9456: 
 9457:         $state = $dest_state = false;
 9458:         foreach (self::$units as $k => $x) {
 9459:             if (isset($x[$unit])) {
 9460:                 $state = $k;
 9461:             }
 9462:             if (isset($x[$to_unit])) {
 9463:                 $dest_state = $k;
 9464:             }
 9465:         }
 9466:         if (!$state || !$dest_state) {
 9467:             return false;
 9468:         }
 9469: 
 9470:         // Some calculations about the initial precision of the number;
 9471:         // this will be useful when we need to do final rounding.
 9472:         $sigfigs = $this->getSigFigs($n);
 9473:         if ($sigfigs < $this->outputPrecision) {
 9474:             $sigfigs = $this->outputPrecision;
 9475:         }
 9476: 
 9477:         // BCMath's internal precision deals only with decimals. Use
 9478:         // our default if the initial number has no decimals, or increase
 9479:         // it by how ever many decimals, thus, the number of guard digits
 9480:         // will always be greater than or equal to internalPrecision.
 9481:         $log = (int)floor(log(abs($n), 10));
 9482:         $cp = ($log < 0) ? $this->internalPrecision - $log : $this->internalPrecision; // internal precision
 9483: 
 9484:         for ($i = 0; $i < 2; $i++) {
 9485: 
 9486:             // Determine what unit IN THIS SYSTEM we need to convert to
 9487:             if ($dest_state === $state) {
 9488:                 // Simple conversion
 9489:                 $dest_unit = $to_unit;
 9490:             } else {
 9491:                 // Convert to the smallest unit, pending a system shift
 9492:                 $dest_unit = self::$units[$state][$dest_state][0];
 9493:             }
 9494: 
 9495:             // Do the conversion if necessary
 9496:             if ($dest_unit !== $unit) {
 9497:                 $factor = $this->div(self::$units[$state][$unit], self::$units[$state][$dest_unit], $cp);
 9498:                 $n = $this->mul($n, $factor, $cp);
 9499:                 $unit = $dest_unit;
 9500:             }
 9501: 
 9502:             // Output was zero, so bail out early. Shouldn't ever happen.
 9503:             if ($n === '') {
 9504:                 $n = '0';
 9505:                 $unit = $to_unit;
 9506:                 break;
 9507:             }
 9508: 
 9509:             // It was a simple conversion, so bail out
 9510:             if ($dest_state === $state) {
 9511:                 break;
 9512:             }
 9513: 
 9514:             if ($i !== 0) {
 9515:                 // Conversion failed! Apparently, the system we forwarded
 9516:                 // to didn't have this unit. This should never happen!
 9517:                 return false;
 9518:             }
 9519: 
 9520:             // Pre-condition: $i == 0
 9521: 
 9522:             // Perform conversion to next system of units
 9523:             $n = $this->mul($n, self::$units[$state][$dest_state][1], $cp);
 9524:             $unit = self::$units[$state][$dest_state][2];
 9525:             $state = $dest_state;
 9526: 
 9527:             // One more loop around to convert the unit in the new system.
 9528: 
 9529:         }
 9530: 
 9531:         // Post-condition: $unit == $to_unit
 9532:         if ($unit !== $to_unit) {
 9533:             return false;
 9534:         }
 9535: 
 9536:         // Useful for debugging:
 9537:         //echo "<pre>n";
 9538:         //echo "$n\nsigfigs = $sigfigs\nnew_log = $new_log\nlog = $log\nrp = $rp\n</pre>\n";
 9539: 
 9540:         $n = $this->round($n, $sigfigs);
 9541:         if (strpos($n, '.') !== false) {
 9542:             $n = rtrim($n, '0');
 9543:         }
 9544:         $n = rtrim($n, '.');
 9545: 
 9546:         return new HTMLPurifier_Length($n, $unit);
 9547:     }
 9548: 
 9549:     /**
 9550:      * Returns the number of significant figures in a string number.
 9551:      * @param string $n Decimal number
 9552:      * @return int number of sigfigs
 9553:      */
 9554:     public function getSigFigs($n)
 9555:     {
 9556:         $n = ltrim($n, '0+-');
 9557:         $dp = strpos($n, '.'); // decimal position
 9558:         if ($dp === false) {
 9559:             $sigfigs = strlen(rtrim($n, '0'));
 9560:         } else {
 9561:             $sigfigs = strlen(ltrim($n, '0.')); // eliminate extra decimal character
 9562:             if ($dp !== 0) {
 9563:                 $sigfigs--;
 9564:             }
 9565:         }
 9566:         return $sigfigs;
 9567:     }
 9568: 
 9569:     /**
 9570:      * Adds two numbers, using arbitrary precision when available.
 9571:      * @param string $s1
 9572:      * @param string $s2
 9573:      * @param int $scale
 9574:      * @return string
 9575:      */
 9576:     private function add($s1, $s2, $scale)
 9577:     {
 9578:         if ($this->bcmath) {
 9579:             return bcadd($s1, $s2, $scale);
 9580:         } else {
 9581:             return $this->scale((float)$s1 + (float)$s2, $scale);
 9582:         }
 9583:     }
 9584: 
 9585:     /**
 9586:      * Multiples two numbers, using arbitrary precision when available.
 9587:      * @param string $s1
 9588:      * @param string $s2
 9589:      * @param int $scale
 9590:      * @return string
 9591:      */
 9592:     private function mul($s1, $s2, $scale)
 9593:     {
 9594:         if ($this->bcmath) {
 9595:             return bcmul($s1, $s2, $scale);
 9596:         } else {
 9597:             return $this->scale((float)$s1 * (float)$s2, $scale);
 9598:         }
 9599:     }
 9600: 
 9601:     /**
 9602:      * Divides two numbers, using arbitrary precision when available.
 9603:      * @param string $s1
 9604:      * @param string $s2
 9605:      * @param int $scale
 9606:      * @return string
 9607:      */
 9608:     private function div($s1, $s2, $scale)
 9609:     {
 9610:         if ($this->bcmath) {
 9611:             return bcdiv($s1, $s2, $scale);
 9612:         } else {
 9613:             return $this->scale((float)$s1 / (float)$s2, $scale);
 9614:         }
 9615:     }
 9616: 
 9617:     /**
 9618:      * Rounds a number according to the number of sigfigs it should have,
 9619:      * using arbitrary precision when available.
 9620:      * @param float $n
 9621:      * @param int $sigfigs
 9622:      * @return string
 9623:      */
 9624:     private function round($n, $sigfigs)
 9625:     {
 9626:         $new_log = (int)floor(log(abs($n), 10)); // Number of digits left of decimal - 1
 9627:         $rp = $sigfigs - $new_log - 1; // Number of decimal places needed
 9628:         $neg = $n < 0 ? '-' : ''; // Negative sign
 9629:         if ($this->bcmath) {
 9630:             if ($rp >= 0) {
 9631:                 $n = bcadd($n, $neg . '0.' . str_repeat('0', $rp) . '5', $rp + 1);
 9632:                 $n = bcdiv($n, '1', $rp);
 9633:             } else {
 9634:                 // This algorithm partially depends on the standardized
 9635:                 // form of numbers that comes out of bcmath.
 9636:                 $n = bcadd($n, $neg . '5' . str_repeat('0', $new_log - $sigfigs), 0);
 9637:                 $n = substr($n, 0, $sigfigs + strlen($neg)) . str_repeat('0', $new_log - $sigfigs + 1);
 9638:             }
 9639:             return $n;
 9640:         } else {
 9641:             return $this->scale(round($n, $sigfigs - $new_log - 1), $rp + 1);
 9642:         }
 9643:     }
 9644: 
 9645:     /**
 9646:      * Scales a float to $scale digits right of decimal point, like BCMath.
 9647:      * @param float $r
 9648:      * @param int $scale
 9649:      * @return string
 9650:      */
 9651:     private function scale($r, $scale)
 9652:     {
 9653:         if ($scale < 0) {
 9654:             // The f sprintf type doesn't support negative numbers, so we
 9655:             // need to cludge things manually. First get the string.
 9656:             $r = sprintf('%.0f', (float)$r);
 9657:             // Due to floating point precision loss, $r will more than likely
 9658:             // look something like 4652999999999.9234. We grab one more digit
 9659:             // than we need to precise from $r and then use that to round
 9660:             // appropriately.
 9661:             $precise = (string)round(substr($r, 0, strlen($r) + $scale), -1);
 9662:             // Now we return it, truncating the zero that was rounded off.
 9663:             return substr($precise, 0, -1) . str_repeat('0', -$scale + 1);
 9664:         }
 9665:         return sprintf('%.' . $scale . 'f', (float)$r);
 9666:     }
 9667: }
 9668: 
 9669: 
 9670: 
 9671: 
 9672: 
 9673: /**
 9674:  * Parses string representations into their corresponding native PHP
 9675:  * variable type. The base implementation does a simple type-check.
 9676:  */
 9677: class HTMLPurifier_VarParser
 9678: {
 9679: 
 9680:     const STRING = 1;
 9681:     const ISTRING = 2;
 9682:     const TEXT = 3;
 9683:     const ITEXT = 4;
 9684:     const INT = 5;
 9685:     const FLOAT = 6;
 9686:     const BOOL = 7;
 9687:     const LOOKUP = 8;
 9688:     const ALIST = 9;
 9689:     const HASH = 10;
 9690:     const MIXED = 11;
 9691: 
 9692:     /**
 9693:      * Lookup table of allowed types. Mainly for backwards compatibility, but
 9694:      * also convenient for transforming string type names to the integer constants.
 9695:      */
 9696:     public static $types = array(
 9697:         'string' => self::STRING,
 9698:         'istring' => self::ISTRING,
 9699:         'text' => self::TEXT,
 9700:         'itext' => self::ITEXT,
 9701:         'int' => self::INT,
 9702:         'float' => self::FLOAT,
 9703:         'bool' => self::BOOL,
 9704:         'lookup' => self::LOOKUP,
 9705:         'list' => self::ALIST,
 9706:         'hash' => self::HASH,
 9707:         'mixed' => self::MIXED
 9708:     );
 9709: 
 9710:     /**
 9711:      * Lookup table of types that are string, and can have aliases or
 9712:      * allowed value lists.
 9713:      */
 9714:     public static $stringTypes = array(
 9715:         self::STRING => true,
 9716:         self::ISTRING => true,
 9717:         self::TEXT => true,
 9718:         self::ITEXT => true,
 9719:     );
 9720: 
 9721:     /**
 9722:      * Validate a variable according to type.
 9723:      * It may return NULL as a valid type if $allow_null is true.
 9724:      *
 9725:      * @param mixed $var Variable to validate
 9726:      * @param int $type Type of variable, see HTMLPurifier_VarParser->types
 9727:      * @param bool $allow_null Whether or not to permit null as a value
 9728:      * @return string Validated and type-coerced variable
 9729:      * @throws HTMLPurifier_VarParserException
 9730:      */
 9731:     final public function parse($var, $type, $allow_null = false)
 9732:     {
 9733:         if (is_string($type)) {
 9734:             if (!isset(HTMLPurifier_VarParser::$types[$type])) {
 9735:                 throw new HTMLPurifier_VarParserException("Invalid type '$type'");
 9736:             } else {
 9737:                 $type = HTMLPurifier_VarParser::$types[$type];
 9738:             }
 9739:         }
 9740:         $var = $this->parseImplementation($var, $type, $allow_null);
 9741:         if ($allow_null && $var === null) {
 9742:             return null;
 9743:         }
 9744:         // These are basic checks, to make sure nothing horribly wrong
 9745:         // happened in our implementations.
 9746:         switch ($type) {
 9747:             case (self::STRING):
 9748:             case (self::ISTRING):
 9749:             case (self::TEXT):
 9750:             case (self::ITEXT):
 9751:                 if (!is_string($var)) {
 9752:                     break;
 9753:                 }
 9754:                 if ($type == self::ISTRING || $type == self::ITEXT) {
 9755:                     $var = strtolower($var);
 9756:                 }
 9757:                 return $var;
 9758:             case (self::INT):
 9759:                 if (!is_int($var)) {
 9760:                     break;
 9761:                 }
 9762:                 return $var;
 9763:             case (self::FLOAT):
 9764:                 if (!is_float($var)) {
 9765:                     break;
 9766:                 }
 9767:                 return $var;
 9768:             case (self::BOOL):
 9769:                 if (!is_bool($var)) {
 9770:                     break;
 9771:                 }
 9772:                 return $var;
 9773:             case (self::LOOKUP):
 9774:             case (self::ALIST):
 9775:             case (self::HASH):
 9776:                 if (!is_array($var)) {
 9777:                     break;
 9778:                 }
 9779:                 if ($type === self::LOOKUP) {
 9780:                     foreach ($var as $k) {
 9781:                         if ($k !== true) {
 9782:                             $this->error('Lookup table contains value other than true');
 9783:                         }
 9784:                     }
 9785:                 } elseif ($type === self::ALIST) {
 9786:                     $keys = array_keys($var);
 9787:                     if (array_keys($keys) !== $keys) {
 9788:                         $this->error('Indices for list are not uniform');
 9789:                     }
 9790:                 }
 9791:                 return $var;
 9792:             case (self::MIXED):
 9793:                 return $var;
 9794:             default:
 9795:                 $this->errorInconsistent(get_class($this), $type);
 9796:         }
 9797:         $this->errorGeneric($var, $type);
 9798:     }
 9799: 
 9800:     /**
 9801:      * Actually implements the parsing. Base implementation does not
 9802:      * do anything to $var. Subclasses should overload this!
 9803:      * @param mixed $var
 9804:      * @param int $type
 9805:      * @param bool $allow_null
 9806:      * @return string
 9807:      */
 9808:     protected function parseImplementation($var, $type, $allow_null)
 9809:     {
 9810:         return $var;
 9811:     }
 9812: 
 9813:     /**
 9814:      * Throws an exception.
 9815:      * @throws HTMLPurifier_VarParserException
 9816:      */
 9817:     protected function error($msg)
 9818:     {
 9819:         throw new HTMLPurifier_VarParserException($msg);
 9820:     }
 9821: 
 9822:     /**
 9823:      * Throws an inconsistency exception.
 9824:      * @note This should not ever be called. It would be called if we
 9825:      *       extend the allowed values of HTMLPurifier_VarParser without
 9826:      *       updating subclasses.
 9827:      * @param string $class
 9828:      * @param int $type
 9829:      * @throws HTMLPurifier_Exception
 9830:      */
 9831:     protected function errorInconsistent($class, $type)
 9832:     {
 9833:         throw new HTMLPurifier_Exception(
 9834:             "Inconsistency in $class: " . HTMLPurifier_VarParser::getTypeName($type) .
 9835:             " not implemented"
 9836:         );
 9837:     }
 9838: 
 9839:     /**
 9840:      * Generic error for if a type didn't work.
 9841:      * @param mixed $var
 9842:      * @param int $type
 9843:      */
 9844:     protected function errorGeneric($var, $type)
 9845:     {
 9846:         $vtype = gettype($var);
 9847:         $this->error("Expected type " . HTMLPurifier_VarParser::getTypeName($type) . ", got $vtype");
 9848:     }
 9849: 
 9850:     /**
 9851:      * @param int $type
 9852:      * @return string
 9853:      */
 9854:     public static function getTypeName($type)
 9855:     {
 9856:         static $lookup;
 9857:         if (!$lookup) {
 9858:             // Lazy load the alternative lookup table
 9859:             $lookup = array_flip(HTMLPurifier_VarParser::$types);
 9860:         }
 9861:         if (!isset($lookup[$type])) {
 9862:             return 'unknown';
 9863:         }
 9864:         return $lookup[$type];
 9865:     }
 9866: }
 9867: 
 9868: 
 9869: 
 9870: 
 9871: 
 9872: /**
 9873:  * Exception type for HTMLPurifier_VarParser
 9874:  */
 9875: class HTMLPurifier_VarParserException extends HTMLPurifier_Exception
 9876: {
 9877: 
 9878: }
 9879: 
 9880: 
 9881: 
 9882: 
 9883: 
 9884: /**
 9885:  * A zipper is a purely-functional data structure which contains
 9886:  * a focus that can be efficiently manipulated.  It is known as
 9887:  * a "one-hole context".  This mutable variant implements a zipper
 9888:  * for a list as a pair of two arrays, laid out as follows:
 9889:  *
 9890:  *      Base list: 1 2 3 4 [ ] 6 7 8 9
 9891:  *      Front list: 1 2 3 4
 9892:  *      Back list: 9 8 7 6
 9893:  *
 9894:  * User is expected to keep track of the "current element" and properly
 9895:  * fill it back in as necessary.  (ToDo: Maybe it's more user friendly
 9896:  * to implicitly track the current element?)
 9897:  *
 9898:  * Nota bene: the current class gets confused if you try to store NULLs
 9899:  * in the list.
 9900:  */
 9901: 
 9902: class HTMLPurifier_Zipper
 9903: {
 9904:     public $front, $back;
 9905: 
 9906:     public function __construct($front, $back) {
 9907:         $this->front = $front;
 9908:         $this->back = $back;
 9909:     }
 9910: 
 9911:     /**
 9912:      * Creates a zipper from an array, with a hole in the
 9913:      * 0-index position.
 9914:      * @param Array to zipper-ify.
 9915:      * @return Tuple of zipper and element of first position.
 9916:      */
 9917:     static public function fromArray($array) {
 9918:         $z = new self(array(), array_reverse($array));
 9919:         $t = $z->delete(); // delete the "dummy hole"
 9920:         return array($z, $t);
 9921:     }
 9922: 
 9923:     /**
 9924:      * Convert zipper back into a normal array, optionally filling in
 9925:      * the hole with a value. (Usually you should supply a $t, unless you
 9926:      * are at the end of the array.)
 9927:      */
 9928:     public function toArray($t = NULL) {
 9929:         $a = $this->front;
 9930:         if ($t !== NULL) $a[] = $t;
 9931:         for ($i = count($this->back)-1; $i >= 0; $i--) {
 9932:             $a[] = $this->back[$i];
 9933:         }
 9934:         return $a;
 9935:     }
 9936: 
 9937:     /**
 9938:      * Move hole to the next element.
 9939:      * @param $t Element to fill hole with
 9940:      * @return Original contents of new hole.
 9941:      */
 9942:     public function next($t) {
 9943:         if ($t !== NULL) array_push($this->front, $t);
 9944:         return empty($this->back) ? NULL : array_pop($this->back);
 9945:     }
 9946: 
 9947:     /**
 9948:      * Iterated hole advancement.
 9949:      * @param $t Element to fill hole with
 9950:      * @param $i How many forward to advance hole
 9951:      * @return Original contents of new hole, i away
 9952:      */
 9953:     public function advance($t, $n) {
 9954:         for ($i = 0; $i < $n; $i++) {
 9955:             $t = $this->next($t);
 9956:         }
 9957:         return $t;
 9958:     }
 9959: 
 9960:     /**
 9961:      * Move hole to the previous element
 9962:      * @param $t Element to fill hole with
 9963:      * @return Original contents of new hole.
 9964:      */
 9965:     public function prev($t) {
 9966:         if ($t !== NULL) array_push($this->back, $t);
 9967:         return empty($this->front) ? NULL : array_pop($this->front);
 9968:     }
 9969: 
 9970:     /**
 9971:      * Delete contents of current hole, shifting hole to
 9972:      * next element.
 9973:      * @return Original contents of new hole.
 9974:      */
 9975:     public function delete() {
 9976:         return empty($this->back) ? NULL : array_pop($this->back);
 9977:     }
 9978: 
 9979:     /**
 9980:      * Returns true if we are at the end of the list.
 9981:      * @return bool
 9982:      */
 9983:     public function done() {
 9984:         return empty($this->back);
 9985:     }
 9986: 
 9987:     /**
 9988:      * Insert element before hole.
 9989:      * @param Element to insert
 9990:      */
 9991:     public function insertBefore($t) {
 9992:         if ($t !== NULL) array_push($this->front, $t);
 9993:     }
 9994: 
 9995:     /**
 9996:      * Insert element after hole.
 9997:      * @param Element to insert
 9998:      */
 9999:     public function insertAfter($t) {
10000:         if ($t !== NULL) array_push($this->back, $t);
10001:     }
10002: 
10003:     /**
10004:      * Splice in multiple elements at hole.  Functional specification
10005:      * in terms of array_splice:
10006:      *
10007:      *      $arr1 = $arr;
10008:      *      $old1 = array_splice($arr1, $i, $delete, $replacement);
10009:      *
10010:      *      list($z, $t) = HTMLPurifier_Zipper::fromArray($arr);
10011:      *      $t = $z->advance($t, $i);
10012:      *      list($old2, $t) = $z->splice($t, $delete, $replacement);
10013:      *      $arr2 = $z->toArray($t);
10014:      *
10015:      *      assert($old1 === $old2);
10016:      *      assert($arr1 === $arr2);
10017:      *
10018:      * NB: the absolute index location after this operation is
10019:      * *unchanged!*
10020:      *
10021:      * @param Current contents of hole.
10022:      */
10023:     public function splice($t, $delete, $replacement) {
10024:         // delete
10025:         $old = array();
10026:         $r = $t;
10027:         for ($i = $delete; $i > 0; $i--) {
10028:             $old[] = $r;
10029:             $r = $this->delete();
10030:         }
10031:         // insert
10032:         for ($i = count($replacement)-1; $i >= 0; $i--) {
10033:             $this->insertAfter($r);
10034:             $r = $replacement[$i];
10035:         }
10036:         return array($old, $r);
10037:     }
10038: }
10039: 
10040: 
10041: 
10042: /**
10043:  * Validates the HTML attribute style, otherwise known as CSS.
10044:  * @note We don't implement the whole CSS specification, so it might be
10045:  *       difficult to reuse this component in the context of validating
10046:  *       actual stylesheet declarations.
10047:  * @note If we were really serious about validating the CSS, we would
10048:  *       tokenize the styles and then parse the tokens. Obviously, we
10049:  *       are not doing that. Doing that could seriously harm performance,
10050:  *       but would make these components a lot more viable for a CSS
10051:  *       filtering solution.
10052:  */
10053: class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
10054: {
10055: 
10056:     /**
10057:      * @param string $css
10058:      * @param HTMLPurifier_Config $config
10059:      * @param HTMLPurifier_Context $context
10060:      * @return bool|string
10061:      */
10062:     public function validate($css, $config, $context)
10063:     {
10064:         $css = $this->parseCDATA($css);
10065: 
10066:         $definition = $config->getCSSDefinition();
10067: 
10068:         // we're going to break the spec and explode by semicolons.
10069:         // This is because semicolon rarely appears in escaped form
10070:         // Doing this is generally flaky but fast
10071:         // IT MIGHT APPEAR IN URIs, see HTMLPurifier_AttrDef_CSSURI
10072:         // for details
10073: 
10074:         $declarations = explode(';', $css);
10075:         $propvalues = array();
10076: 
10077:         /**
10078:          * Name of the current CSS property being validated.
10079:          */
10080:         $property = false;
10081:         $context->register('CurrentCSSProperty', $property);
10082: 
10083:         foreach ($declarations as $declaration) {
10084:             if (!$declaration) {
10085:                 continue;
10086:             }
10087:             if (!strpos($declaration, ':')) {
10088:                 continue;
10089:             }
10090:             list($property, $value) = explode(':', $declaration, 2);
10091:             $property = trim($property);
10092:             $value = trim($value);
10093:             $ok = false;
10094:             do {
10095:                 if (isset($definition->info[$property])) {
10096:                     $ok = true;
10097:                     break;
10098:                 }
10099:                 if (ctype_lower($property)) {
10100:                     break;
10101:                 }
10102:                 $property = strtolower($property);
10103:                 if (isset($definition->info[$property])) {
10104:                     $ok = true;
10105:                     break;
10106:                 }
10107:             } while (0);
10108:             if (!$ok) {
10109:                 continue;
10110:             }
10111:             // inefficient call, since the validator will do this again
10112:             if (strtolower(trim($value)) !== 'inherit') {
10113:                 // inherit works for everything (but only on the base property)
10114:                 $result = $definition->info[$property]->validate(
10115:                     $value,
10116:                     $config,
10117:                     $context
10118:                 );
10119:             } else {
10120:                 $result = 'inherit';
10121:             }
10122:             if ($result === false) {
10123:                 continue;
10124:             }
10125:             $propvalues[$property] = $result;
10126:         }
10127: 
10128:         $context->destroy('CurrentCSSProperty');
10129: 
10130:         // procedure does not write the new CSS simultaneously, so it's
10131:         // slightly inefficient, but it's the only way of getting rid of
10132:         // duplicates. Perhaps config to optimize it, but not now.
10133: 
10134:         $new_declarations = '';
10135:         foreach ($propvalues as $prop => $value) {
10136:             $new_declarations .= "$prop:$value;";
10137:         }
10138: 
10139:         return $new_declarations ? $new_declarations : false;
10140: 
10141:     }
10142: 
10143: }
10144: 
10145: 
10146: 
10147: 
10148: 
10149: /**
10150:  * Dummy AttrDef that mimics another AttrDef, BUT it generates clones
10151:  * with make.
10152:  */
10153: class HTMLPurifier_AttrDef_Clone extends HTMLPurifier_AttrDef
10154: {
10155:     /**
10156:      * What we're cloning.
10157:      * @type HTMLPurifier_AttrDef
10158:      */
10159:     protected $clone;
10160: 
10161:     /**
10162:      * @param HTMLPurifier_AttrDef $clone
10163:      */
10164:     public function __construct($clone)
10165:     {
10166:         $this->clone = $clone;
10167:     }
10168: 
10169:     /**
10170:      * @param string $v
10171:      * @param HTMLPurifier_Config $config
10172:      * @param HTMLPurifier_Context $context
10173:      * @return bool|string
10174:      */
10175:     public function validate($v, $config, $context)
10176:     {
10177:         return $this->clone->validate($v, $config, $context);
10178:     }
10179: 
10180:     /**
10181:      * @param string $string
10182:      * @return HTMLPurifier_AttrDef
10183:      */
10184:     public function make($string)
10185:     {
10186:         return clone $this->clone;
10187:     }
10188: }
10189: 
10190: 
10191: 
10192: 
10193: 
10194: // Enum = Enumerated
10195: /**
10196:  * Validates a keyword against a list of valid values.
10197:  * @warning The case-insensitive compare of this function uses PHP's
10198:  *          built-in strtolower and ctype_lower functions, which may
10199:  *          cause problems with international comparisons
10200:  */
10201: class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef
10202: {
10203: 
10204:     /**
10205:      * Lookup table of valid values.
10206:      * @type array
10207:      * @todo Make protected
10208:      */
10209:     public $valid_values = array();
10210: 
10211:     /**
10212:      * Bool indicating whether or not enumeration is case sensitive.
10213:      * @note In general this is always case insensitive.
10214:      */
10215:     protected $case_sensitive = false; // values according to W3C spec
10216: 
10217:     /**
10218:      * @param array $valid_values List of valid values
10219:      * @param bool $case_sensitive Whether or not case sensitive
10220:      */
10221:     public function __construct($valid_values = array(), $case_sensitive = false)
10222:     {
10223:         $this->valid_values = array_flip($valid_values);
10224:         $this->case_sensitive = $case_sensitive;
10225:     }
10226: 
10227:     /**
10228:      * @param string $string
10229:      * @param HTMLPurifier_Config $config
10230:      * @param HTMLPurifier_Context $context
10231:      * @return bool|string
10232:      */
10233:     public function validate($string, $config, $context)
10234:     {
10235:         $string = trim($string);
10236:         if (!$this->case_sensitive) {
10237:             // we may want to do full case-insensitive libraries
10238:             $string = ctype_lower($string) ? $string : strtolower($string);
10239:         }
10240:         $result = isset($this->valid_values[$string]);
10241: 
10242:         return $result ? $string : false;
10243:     }
10244: 
10245:     /**
10246:      * @param string $string In form of comma-delimited list of case-insensitive
10247:      *      valid values. Example: "foo,bar,baz". Prepend "s:" to make
10248:      *      case sensitive
10249:      * @return HTMLPurifier_AttrDef_Enum
10250:      */
10251:     public function make($string)
10252:     {
10253:         if (strlen($string) > 2 && $string[0] == 's' && $string[1] == ':') {
10254:             $string = substr($string, 2);
10255:             $sensitive = true;
10256:         } else {
10257:             $sensitive = false;
10258:         }
10259:         $values = explode(',', $string);
10260:         return new HTMLPurifier_AttrDef_Enum($values, $sensitive);
10261:     }
10262: }
10263: 
10264: 
10265: 
10266: 
10267: 
10268: /**
10269:  * Validates an integer.
10270:  * @note While this class was modeled off the CSS definition, no currently
10271:  *       allowed CSS uses this type.  The properties that do are: widows,
10272:  *       orphans, z-index, counter-increment, counter-reset.  Some of the
10273:  *       HTML attributes, however, find use for a non-negative version of this.
10274:  */
10275: class HTMLPurifier_AttrDef_Integer extends HTMLPurifier_AttrDef
10276: {
10277: 
10278:     /**
10279:      * Whether or not negative values are allowed.
10280:      * @type bool
10281:      */
10282:     protected $negative = true;
10283: 
10284:     /**
10285:      * Whether or not zero is allowed.
10286:      * @type bool
10287:      */
10288:     protected $zero = true;
10289: 
10290:     /**
10291:      * Whether or not positive values are allowed.
10292:      * @type bool
10293:      */
10294:     protected $positive = true;
10295: 
10296:     /**
10297:      * @param $negative Bool indicating whether or not negative values are allowed
10298:      * @param $zero Bool indicating whether or not zero is allowed
10299:      * @param $positive Bool indicating whether or not positive values are allowed
10300:      */
10301:     public function __construct($negative = true, $zero = true, $positive = true)
10302:     {
10303:         $this->negative = $negative;
10304:         $this->zero = $zero;
10305:         $this->positive = $positive;
10306:     }
10307: 
10308:     /**
10309:      * @param string $integer
10310:      * @param HTMLPurifier_Config $config
10311:      * @param HTMLPurifier_Context $context
10312:      * @return bool|string
10313:      */
10314:     public function validate($integer, $config, $context)
10315:     {
10316:         $integer = $this->parseCDATA($integer);
10317:         if ($integer === '') {
10318:             return false;
10319:         }
10320: 
10321:         // we could possibly simply typecast it to integer, but there are
10322:         // certain fringe cases that must not return an integer.
10323: 
10324:         // clip leading sign
10325:         if ($this->negative && $integer[0] === '-') {
10326:             $digits = substr($integer, 1);
10327:             if ($digits === '0') {
10328:                 $integer = '0';
10329:             } // rm minus sign for zero
10330:         } elseif ($this->positive && $integer[0] === '+') {
10331:             $digits = $integer = substr($integer, 1); // rm unnecessary plus
10332:         } else {
10333:             $digits = $integer;
10334:         }
10335: 
10336:         // test if it's numeric
10337:         if (!ctype_digit($digits)) {
10338:             return false;
10339:         }
10340: 
10341:         // perform scope tests
10342:         if (!$this->zero && $integer == 0) {
10343:             return false;
10344:         }
10345:         if (!$this->positive && $integer > 0) {
10346:             return false;
10347:         }
10348:         if (!$this->negative && $integer < 0) {
10349:             return false;
10350:         }
10351: 
10352:         return $integer;
10353:     }
10354: }
10355: 
10356: 
10357: 
10358: 
10359: 
10360: /**
10361:  * Validates the HTML attribute lang, effectively a language code.
10362:  * @note Built according to RFC 3066, which obsoleted RFC 1766
10363:  */
10364: class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
10365: {
10366: 
10367:     /**
10368:      * @param string $string
10369:      * @param HTMLPurifier_Config $config
10370:      * @param HTMLPurifier_Context $context
10371:      * @return bool|string
10372:      */
10373:     public function validate($string, $config, $context)
10374:     {
10375:         $string = trim($string);
10376:         if (!$string) {
10377:             return false;
10378:         }
10379: 
10380:         $subtags = explode('-', $string);
10381:         $num_subtags = count($subtags);
10382: 
10383:         if ($num_subtags == 0) { // sanity check
10384:             return false;
10385:         }
10386: 
10387:         // process primary subtag : $subtags[0]
10388:         $length = strlen($subtags[0]);
10389:         switch ($length) {
10390:             case 0:
10391:                 return false;
10392:             case 1:
10393:                 if (!($subtags[0] == 'x' || $subtags[0] == 'i')) {
10394:                     return false;
10395:                 }
10396:                 break;
10397:             case 2:
10398:             case 3:
10399:                 if (!ctype_alpha($subtags[0])) {
10400:                     return false;
10401:                 } elseif (!ctype_lower($subtags[0])) {
10402:                     $subtags[0] = strtolower($subtags[0]);
10403:                 }
10404:                 break;
10405:             default:
10406:                 return false;
10407:         }
10408: 
10409:         $new_string = $subtags[0];
10410:         if ($num_subtags == 1) {
10411:             return $new_string;
10412:         }
10413: 
10414:         // process second subtag : $subtags[1]
10415:         $length = strlen($subtags[1]);
10416:         if ($length == 0 || ($length == 1 && $subtags[1] != 'x') || $length > 8 || !ctype_alnum($subtags[1])) {
10417:             return $new_string;
10418:         }
10419:         if (!ctype_lower($subtags[1])) {
10420:             $subtags[1] = strtolower($subtags[1]);
10421:         }
10422: 
10423:         $new_string .= '-' . $subtags[1];
10424:         if ($num_subtags == 2) {
10425:             return $new_string;
10426:         }
10427: 
10428:         // process all other subtags, index 2 and up
10429:         for ($i = 2; $i < $num_subtags; $i++) {
10430:             $length = strlen($subtags[$i]);
10431:             if ($length == 0 || $length > 8 || !ctype_alnum($subtags[$i])) {
10432:                 return $new_string;
10433:             }
10434:             if (!ctype_lower($subtags[$i])) {
10435:                 $subtags[$i] = strtolower($subtags[$i]);
10436:             }
10437:             $new_string .= '-' . $subtags[$i];
10438:         }
10439:         return $new_string;
10440:     }
10441: }
10442: 
10443: 
10444: 
10445: 
10446: 
10447: /**
10448:  * Decorator that, depending on a token, switches between two definitions.
10449:  */
10450: class HTMLPurifier_AttrDef_Switch
10451: {
10452: 
10453:     /**
10454:      * @type string
10455:      */
10456:     protected $tag;
10457: 
10458:     /**
10459:      * @type HTMLPurifier_AttrDef
10460:      */
10461:     protected $withTag;
10462: 
10463:     /**
10464:      * @type HTMLPurifier_AttrDef
10465:      */
10466:     protected $withoutTag;
10467: 
10468:     /**
10469:      * @param string $tag Tag name to switch upon
10470:      * @param HTMLPurifier_AttrDef $with_tag Call if token matches tag
10471:      * @param HTMLPurifier_AttrDef $without_tag Call if token doesn't match, or there is no token
10472:      */
10473:     public function __construct($tag, $with_tag, $without_tag)
10474:     {
10475:         $this->tag = $tag;
10476:         $this->withTag = $with_tag;
10477:         $this->withoutTag = $without_tag;
10478:     }
10479: 
10480:     /**
10481:      * @param string $string
10482:      * @param HTMLPurifier_Config $config
10483:      * @param HTMLPurifier_Context $context
10484:      * @return bool|string
10485:      */
10486:     public function validate($string, $config, $context)
10487:     {
10488:         $token = $context->get('CurrentToken', true);
10489:         if (!$token || $token->name !== $this->tag) {
10490:             return $this->withoutTag->validate($string, $config, $context);
10491:         } else {
10492:             return $this->withTag->validate($string, $config, $context);
10493:         }
10494:     }
10495: }
10496: 
10497: 
10498: 
10499: 
10500: 
10501: /**
10502:  * Validates arbitrary text according to the HTML spec.
10503:  */
10504: class HTMLPurifier_AttrDef_Text extends HTMLPurifier_AttrDef
10505: {
10506: 
10507:     /**
10508:      * @param string $string
10509:      * @param HTMLPurifier_Config $config
10510:      * @param HTMLPurifier_Context $context
10511:      * @return bool|string
10512:      */
10513:     public function validate($string, $config, $context)
10514:     {
10515:         return $this->parseCDATA($string);
10516:     }
10517: }
10518: 
10519: 
10520: 
10521: 
10522: 
10523: /**
10524:  * Validates a URI as defined by RFC 3986.
10525:  * @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme
10526:  */
10527: class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
10528: {
10529: 
10530:     /**
10531:      * @type HTMLPurifier_URIParser
10532:      */
10533:     protected $parser;
10534: 
10535:     /**
10536:      * @type bool
10537:      */
10538:     protected $embedsResource;
10539: 
10540:     /**
10541:      * @param bool $embeds_resource Does the URI here result in an extra HTTP request?
10542:      */
10543:     public function __construct($embeds_resource = false)
10544:     {
10545:         $this->parser = new HTMLPurifier_URIParser();
10546:         $this->embedsResource = (bool)$embeds_resource;
10547:     }
10548: 
10549:     /**
10550:      * @param string $string
10551:      * @return HTMLPurifier_AttrDef_URI
10552:      */
10553:     public function make($string)
10554:     {
10555:         $embeds = ($string === 'embedded');
10556:         return new HTMLPurifier_AttrDef_URI($embeds);
10557:     }
10558: 
10559:     /**
10560:      * @param string $uri
10561:      * @param HTMLPurifier_Config $config
10562:      * @param HTMLPurifier_Context $context
10563:      * @return bool|string
10564:      */
10565:     public function validate($uri, $config, $context)
10566:     {
10567:         if ($config->get('URI.Disable')) {
10568:             return false;
10569:         }
10570: 
10571:         $uri = $this->parseCDATA($uri);
10572: 
10573:         // parse the URI
10574:         $uri = $this->parser->parse($uri);
10575:         if ($uri === false) {
10576:             return false;
10577:         }
10578: 
10579:         // add embedded flag to context for validators
10580:         $context->register('EmbeddedURI', $this->embedsResource);
10581: 
10582:         $ok = false;
10583:         do {
10584: 
10585:             // generic validation
10586:             $result = $uri->validate($config, $context);
10587:             if (!$result) {
10588:                 break;
10589:             }
10590: 
10591:             // chained filtering
10592:             $uri_def = $config->getDefinition('URI');
10593:             $result = $uri_def->filter($uri, $config, $context);
10594:             if (!$result) {
10595:                 break;
10596:             }
10597: 
10598:             // scheme-specific validation
10599:             $scheme_obj = $uri->getSchemeObj($config, $context);
10600:             if (!$scheme_obj) {
10601:                 break;
10602:             }
10603:             if ($this->embedsResource && !$scheme_obj->browsable) {
10604:                 break;
10605:             }
10606:             $result = $scheme_obj->validate($uri, $config, $context);
10607:             if (!$result) {
10608:                 break;
10609:             }
10610: 
10611:             // Post chained filtering
10612:             $result = $uri_def->postFilter($uri, $config, $context);
10613:             if (!$result) {
10614:                 break;
10615:             }
10616: 
10617:             // survived gauntlet
10618:             $ok = true;
10619: 
10620:         } while (false);
10621: 
10622:         $context->destroy('EmbeddedURI');
10623:         if (!$ok) {
10624:             return false;
10625:         }
10626:         // back to string
10627:         return $uri->toString();
10628:     }
10629: }
10630: 
10631: 
10632: 
10633: 
10634: 
10635: /**
10636:  * Validates a number as defined by the CSS spec.
10637:  */
10638: class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef
10639: {
10640: 
10641:     /**
10642:      * Indicates whether or not only positive values are allowed.
10643:      * @type bool
10644:      */
10645:     protected $non_negative = false;
10646: 
10647:     /**
10648:      * @param bool $non_negative indicates whether negatives are forbidden
10649:      */
10650:     public function __construct($non_negative = false)
10651:     {
10652:         $this->non_negative = $non_negative;
10653:     }
10654: 
10655:     /**
10656:      * @param string $number
10657:      * @param HTMLPurifier_Config $config
10658:      * @param HTMLPurifier_Context $context
10659:      * @return string|bool
10660:      * @warning Some contexts do not pass $config, $context. These
10661:      *          variables should not be used without checking HTMLPurifier_Length
10662:      */
10663:     public function validate($number, $config, $context)
10664:     {
10665:         $number = $this->parseCDATA($number);
10666: 
10667:         if ($number === '') {
10668:             return false;
10669:         }
10670:         if ($number === '0') {
10671:             return '0';
10672:         }
10673: 
10674:         $sign = '';
10675:         switch ($number[0]) {
10676:             case '-':
10677:                 if ($this->non_negative) {
10678:                     return false;
10679:                 }
10680:                 $sign = '-';
10681:             case '+':
10682:                 $number = substr($number, 1);
10683:         }
10684: 
10685:         if (ctype_digit($number)) {
10686:             $number = ltrim($number, '0');
10687:             return $number ? $sign . $number : '0';
10688:         }
10689: 
10690:         // Period is the only non-numeric character allowed
10691:         if (strpos($number, '.') === false) {
10692:             return false;
10693:         }
10694: 
10695:         list($left, $right) = explode('.', $number, 2);
10696: 
10697:         if ($left === '' && $right === '') {
10698:             return false;
10699:         }
10700:         if ($left !== '' && !ctype_digit($left)) {
10701:             return false;
10702:         }
10703: 
10704:         $left = ltrim($left, '0');
10705:         $right = rtrim($right, '0');
10706: 
10707:         if ($right === '') {
10708:             return $left ? $sign . $left : '0';
10709:         } elseif (!ctype_digit($right)) {
10710:             return false;
10711:         }
10712:         return $sign . $left . '.' . $right;
10713:     }
10714: }
10715: 
10716: 
10717: 
10718: 
10719: 
10720: class HTMLPurifier_AttrDef_CSS_AlphaValue extends HTMLPurifier_AttrDef_CSS_Number
10721: {
10722: 
10723:     public function __construct()
10724:     {
10725:         parent::__construct(false); // opacity is non-negative, but we will clamp it
10726:     }
10727: 
10728:     /**
10729:      * @param string $number
10730:      * @param HTMLPurifier_Config $config
10731:      * @param HTMLPurifier_Context $context
10732:      * @return string
10733:      */
10734:     public function validate($number, $config, $context)
10735:     {
10736:         $result = parent::validate($number, $config, $context);
10737:         if ($result === false) {
10738:             return $result;
10739:         }
10740:         $float = (float)$result;
10741:         if ($float < 0.0) {
10742:             $result = '0';
10743:         }
10744:         if ($float > 1.0) {
10745:             $result = '1';
10746:         }
10747:         return $result;
10748:     }
10749: }
10750: 
10751: 
10752: 
10753: 
10754: 
10755: /**
10756:  * Validates shorthand CSS property background.
10757:  * @warning Does not support url tokens that have internal spaces.
10758:  */
10759: class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
10760: {
10761: 
10762:     /**
10763:      * Local copy of component validators.
10764:      * @type HTMLPurifier_AttrDef[]
10765:      * @note See HTMLPurifier_AttrDef_Font::$info for a similar impl.
10766:      */
10767:     protected $info;
10768: 
10769:     /**
10770:      * @param HTMLPurifier_Config $config
10771:      */
10772:     public function __construct($config)
10773:     {
10774:         $def = $config->getCSSDefinition();
10775:         $this->info['background-color'] = $def->info['background-color'];
10776:         $this->info['background-image'] = $def->info['background-image'];
10777:         $this->info['background-repeat'] = $def->info['background-repeat'];
10778:         $this->info['background-attachment'] = $def->info['background-attachment'];
10779:         $this->info['background-position'] = $def->info['background-position'];
10780:     }
10781: 
10782:     /**
10783:      * @param string $string
10784:      * @param HTMLPurifier_Config $config
10785:      * @param HTMLPurifier_Context $context
10786:      * @return bool|string
10787:      */
10788:     public function validate($string, $config, $context)
10789:     {
10790:         // regular pre-processing
10791:         $string = $this->parseCDATA($string);
10792:         if ($string === '') {
10793:             return false;
10794:         }
10795: 
10796:         // munge rgb() decl if necessary
10797:         $string = $this->mungeRgb($string);
10798: 
10799:         // assumes URI doesn't have spaces in it
10800:         $bits = explode(' ', $string); // bits to process
10801: 
10802:         $caught = array();
10803:         $caught['color'] = false;
10804:         $caught['image'] = false;
10805:         $caught['repeat'] = false;
10806:         $caught['attachment'] = false;
10807:         $caught['position'] = false;
10808: 
10809:         $i = 0; // number of catches
10810: 
10811:         foreach ($bits as $bit) {
10812:             if ($bit === '') {
10813:                 continue;
10814:             }
10815:             foreach ($caught as $key => $status) {
10816:                 if ($key != 'position') {
10817:                     if ($status !== false) {
10818:                         continue;
10819:                     }
10820:                     $r = $this->info['background-' . $key]->validate($bit, $config, $context);
10821:                 } else {
10822:                     $r = $bit;
10823:                 }
10824:                 if ($r === false) {
10825:                     continue;
10826:                 }
10827:                 if ($key == 'position') {
10828:                     if ($caught[$key] === false) {
10829:                         $caught[$key] = '';
10830:                     }
10831:                     $caught[$key] .= $r . ' ';
10832:                 } else {
10833:                     $caught[$key] = $r;
10834:                 }
10835:                 $i++;
10836:                 break;
10837:             }
10838:         }
10839: 
10840:         if (!$i) {
10841:             return false;
10842:         }
10843:         if ($caught['position'] !== false) {
10844:             $caught['position'] = $this->info['background-position']->
10845:                 validate($caught['position'], $config, $context);
10846:         }
10847: 
10848:         $ret = array();
10849:         foreach ($caught as $value) {
10850:             if ($value === false) {
10851:                 continue;
10852:             }
10853:             $ret[] = $value;
10854:         }
10855: 
10856:         if (empty($ret)) {
10857:             return false;
10858:         }
10859:         return implode(' ', $ret);
10860:     }
10861: }
10862: 
10863: 
10864: 
10865: 
10866: 
10867: /* W3C says:
10868:     [ // adjective and number must be in correct order, even if
10869:       // you could switch them without introducing ambiguity.
10870:       // some browsers support that syntax
10871:         [
10872:             <percentage> | <length> | left | center | right
10873:         ]
10874:         [
10875:             <percentage> | <length> | top | center | bottom
10876:         ]?
10877:     ] |
10878:     [ // this signifies that the vertical and horizontal adjectives
10879:       // can be arbitrarily ordered, however, there can only be two,
10880:       // one of each, or none at all
10881:         [
10882:             left | center | right
10883:         ] ||
10884:         [
10885:             top | center | bottom
10886:         ]
10887:     ]
10888:     top, left = 0%
10889:     center, (none) = 50%
10890:     bottom, right = 100%
10891: */
10892: 
10893: /* QuirksMode says:
10894:     keyword + length/percentage must be ordered correctly, as per W3C
10895: 
10896:     Internet Explorer and Opera, however, support arbitrary ordering. We
10897:     should fix it up.
10898: 
10899:     Minor issue though, not strictly necessary.
10900: */
10901: 
10902: // control freaks may appreciate the ability to convert these to
10903: // percentages or something, but it's not necessary
10904: 
10905: /**
10906:  * Validates the value of background-position.
10907:  */
10908: class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
10909: {
10910: 
10911:     /**
10912:      * @type HTMLPurifier_AttrDef_CSS_Length
10913:      */
10914:     protected $length;
10915: 
10916:     /**
10917:      * @type HTMLPurifier_AttrDef_CSS_Percentage
10918:      */
10919:     protected $percentage;
10920: 
10921:     public function __construct()
10922:     {
10923:         $this->length = new HTMLPurifier_AttrDef_CSS_Length();
10924:         $this->percentage = new HTMLPurifier_AttrDef_CSS_Percentage();
10925:     }
10926: 
10927:     /**
10928:      * @param string $string
10929:      * @param HTMLPurifier_Config $config
10930:      * @param HTMLPurifier_Context $context
10931:      * @return bool|string
10932:      */
10933:     public function validate($string, $config, $context)
10934:     {
10935:         $string = $this->parseCDATA($string);
10936:         $bits = explode(' ', $string);
10937: 
10938:         $keywords = array();
10939:         $keywords['h'] = false; // left, right
10940:         $keywords['v'] = false; // top, bottom
10941:         $keywords['ch'] = false; // center (first word)
10942:         $keywords['cv'] = false; // center (second word)
10943:         $measures = array();
10944: 
10945:         $i = 0;
10946: 
10947:         $lookup = array(
10948:             'top' => 'v',
10949:             'bottom' => 'v',
10950:             'left' => 'h',
10951:             'right' => 'h',
10952:             'center' => 'c'
10953:         );
10954: 
10955:         foreach ($bits as $bit) {
10956:             if ($bit === '') {
10957:                 continue;
10958:             }
10959: 
10960:             // test for keyword
10961:             $lbit = ctype_lower($bit) ? $bit : strtolower($bit);
10962:             if (isset($lookup[$lbit])) {
10963:                 $status = $lookup[$lbit];
10964:                 if ($status == 'c') {
10965:                     if ($i == 0) {
10966:                         $status = 'ch';
10967:                     } else {
10968:                         $status = 'cv';
10969:                     }
10970:                 }
10971:                 $keywords[$status] = $lbit;
10972:                 $i++;
10973:             }
10974: 
10975:             // test for length
10976:             $r = $this->length->validate($bit, $config, $context);
10977:             if ($r !== false) {
10978:                 $measures[] = $r;
10979:                 $i++;
10980:             }
10981: 
10982:             // test for percentage
10983:             $r = $this->percentage->validate($bit, $config, $context);
10984:             if ($r !== false) {
10985:                 $measures[] = $r;
10986:                 $i++;
10987:             }
10988:         }
10989: 
10990:         if (!$i) {
10991:             return false;
10992:         } // no valid values were caught
10993: 
10994:         $ret = array();
10995: 
10996:         // first keyword
10997:         if ($keywords['h']) {
10998:             $ret[] = $keywords['h'];
10999:         } elseif ($keywords['ch']) {
11000:             $ret[] = $keywords['ch'];
11001:             $keywords['cv'] = false; // prevent re-use: center = center center
11002:         } elseif (count($measures)) {
11003:             $ret[] = array_shift($measures);
11004:         }
11005: 
11006:         if ($keywords['v']) {
11007:             $ret[] = $keywords['v'];
11008:         } elseif ($keywords['cv']) {
11009:             $ret[] = $keywords['cv'];
11010:         } elseif (count($measures)) {
11011:             $ret[] = array_shift($measures);
11012:         }
11013: 
11014:         if (empty($ret)) {
11015:             return false;
11016:         }
11017:         return implode(' ', $ret);
11018:     }
11019: }
11020: 
11021: 
11022: 
11023: 
11024: 
11025: /**
11026:  * Validates the border property as defined by CSS.
11027:  */
11028: class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef
11029: {
11030: 
11031:     /**
11032:      * Local copy of properties this property is shorthand for.
11033:      * @type HTMLPurifier_AttrDef[]
11034:      */
11035:     protected $info = array();
11036: 
11037:     /**
11038:      * @param HTMLPurifier_Config $config
11039:      */
11040:     public function __construct($config)
11041:     {
11042:         $def = $config->getCSSDefinition();
11043:         $this->info['border-width'] = $def->info['border-width'];
11044:         $this->info['border-style'] = $def->info['border-style'];
11045:         $this->info['border-top-color'] = $def->info['border-top-color'];
11046:     }
11047: 
11048:     /**
11049:      * @param string $string
11050:      * @param HTMLPurifier_Config $config
11051:      * @param HTMLPurifier_Context $context
11052:      * @return bool|string
11053:      */
11054:     public function validate($string, $config, $context)
11055:     {
11056:         $string = $this->parseCDATA($string);
11057:         $string = $this->mungeRgb($string);
11058:         $bits = explode(' ', $string);
11059:         $done = array(); // segments we've finished
11060:         $ret = ''; // return value
11061:         foreach ($bits as $bit) {
11062:             foreach ($this->info as $propname => $validator) {
11063:                 if (isset($done[$propname])) {
11064:                     continue;
11065:                 }
11066:                 $r = $validator->validate($bit, $config, $context);
11067:                 if ($r !== false) {
11068:                     $ret .= $r . ' ';
11069:                     $done[$propname] = true;
11070:                     break;
11071:                 }
11072:             }
11073:         }
11074:         return rtrim($ret);
11075:     }
11076: }
11077: 
11078: 
11079: 
11080: 
11081: 
11082: /**
11083:  * Validates Color as defined by CSS.
11084:  */
11085: class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
11086: {
11087: 
11088:     /**
11089:      * @param string $color
11090:      * @param HTMLPurifier_Config $config
11091:      * @param HTMLPurifier_Context $context
11092:      * @return bool|string
11093:      */
11094:     public function validate($color, $config, $context)
11095:     {
11096:         static $colors = null;
11097:         if ($colors === null) {
11098:             $colors = $config->get('Core.ColorKeywords');
11099:         }
11100: 
11101:         $color = trim($color);
11102:         if ($color === '') {
11103:             return false;
11104:         }
11105: 
11106:         $lower = strtolower($color);
11107:         if (isset($colors[$lower])) {
11108:             return $colors[$lower];
11109:         }
11110: 
11111:         if (strpos($color, 'rgb(') !== false) {
11112:             // rgb literal handling
11113:             $length = strlen($color);
11114:             if (strpos($color, ')') !== $length - 1) {
11115:                 return false;
11116:             }
11117:             $triad = substr($color, 4, $length - 4 - 1);
11118:             $parts = explode(',', $triad);
11119:             if (count($parts) !== 3) {
11120:                 return false;
11121:             }
11122:             $type = false; // to ensure that they're all the same type
11123:             $new_parts = array();
11124:             foreach ($parts as $part) {
11125:                 $part = trim($part);
11126:                 if ($part === '') {
11127:                     return false;
11128:                 }
11129:                 $length = strlen($part);
11130:                 if ($part[$length - 1] === '%') {
11131:                     // handle percents
11132:                     if (!$type) {
11133:                         $type = 'percentage';
11134:                     } elseif ($type !== 'percentage') {
11135:                         return false;
11136:                     }
11137:                     $num = (float)substr($part, 0, $length - 1);
11138:                     if ($num < 0) {
11139:                         $num = 0;
11140:                     }
11141:                     if ($num > 100) {
11142:                         $num = 100;
11143:                     }
11144:                     $new_parts[] = "$num%";
11145:                 } else {
11146:                     // handle integers
11147:                     if (!$type) {
11148:                         $type = 'integer';
11149:                     } elseif ($type !== 'integer') {
11150:                         return false;
11151:                     }
11152:                     $num = (int)$part;
11153:                     if ($num < 0) {
11154:                         $num = 0;
11155:                     }
11156:                     if ($num > 255) {
11157:                         $num = 255;
11158:                     }
11159:                     $new_parts[] = (string)$num;
11160:                 }
11161:             }
11162:             $new_triad = implode(',', $new_parts);
11163:             $color = "rgb($new_triad)";
11164:         } else {
11165:             // hexadecimal handling
11166:             if ($color[0] === '#') {
11167:                 $hex = substr($color, 1);
11168:             } else {
11169:                 $hex = $color;
11170:                 $color = '#' . $color;
11171:             }
11172:             $length = strlen($hex);
11173:             if ($length !== 3 && $length !== 6) {
11174:                 return false;
11175:             }
11176:             if (!ctype_xdigit($hex)) {
11177:                 return false;
11178:             }
11179:         }
11180:         return $color;
11181:     }
11182: }
11183: 
11184: 
11185: 
11186: 
11187: 
11188: /**
11189:  * Allows multiple validators to attempt to validate attribute.
11190:  *
11191:  * Composite is just what it sounds like: a composite of many validators.
11192:  * This means that multiple HTMLPurifier_AttrDef objects will have a whack
11193:  * at the string.  If one of them passes, that's what is returned.  This is
11194:  * especially useful for CSS values, which often are a choice between
11195:  * an enumerated set of predefined values or a flexible data type.
11196:  */
11197: class HTMLPurifier_AttrDef_CSS_Composite extends HTMLPurifier_AttrDef
11198: {
11199: 
11200:     /**
11201:      * List of objects that may process strings.
11202:      * @type HTMLPurifier_AttrDef[]
11203:      * @todo Make protected
11204:      */
11205:     public $defs;
11206: 
11207:     /**
11208:      * @param HTMLPurifier_AttrDef[] $defs List of HTMLPurifier_AttrDef objects
11209:      */
11210:     public function __construct($defs)
11211:     {
11212:         $this->defs = $defs;
11213:     }
11214: 
11215:     /**
11216:      * @param string $string
11217:      * @param HTMLPurifier_Config $config
11218:      * @param HTMLPurifier_Context $context
11219:      * @return bool|string
11220:      */
11221:     public function validate($string, $config, $context)
11222:     {
11223:         foreach ($this->defs as $i => $def) {
11224:             $result = $this->defs[$i]->validate($string, $config, $context);
11225:             if ($result !== false) {
11226:                 return $result;
11227:             }
11228:         }
11229:         return false;
11230:     }
11231: }
11232: 
11233: 
11234: 
11235: 
11236: 
11237: /**
11238:  * Decorator which enables CSS properties to be disabled for specific elements.
11239:  */
11240: class HTMLPurifier_AttrDef_CSS_DenyElementDecorator extends HTMLPurifier_AttrDef
11241: {
11242:     /**
11243:      * @type HTMLPurifier_AttrDef
11244:      */
11245:     public $def;
11246:     /**
11247:      * @type string
11248:      */
11249:     public $element;
11250: 
11251:     /**
11252:      * @param HTMLPurifier_AttrDef $def Definition to wrap
11253:      * @param string $element Element to deny
11254:      */
11255:     public function __construct($def, $element)
11256:     {
11257:         $this->def = $def;
11258:         $this->element = $element;
11259:     }
11260: 
11261:     /**
11262:      * Checks if CurrentToken is set and equal to $this->element
11263:      * @param string $string
11264:      * @param HTMLPurifier_Config $config
11265:      * @param HTMLPurifier_Context $context
11266:      * @return bool|string
11267:      */
11268:     public function validate($string, $config, $context)
11269:     {
11270:         $token = $context->get('CurrentToken', true);
11271:         if ($token && $token->name == $this->element) {
11272:             return false;
11273:         }
11274:         return $this->def->validate($string, $config, $context);
11275:     }
11276: }
11277: 
11278: 
11279: 
11280: 
11281: 
11282: /**
11283:  * Microsoft's proprietary filter: CSS property
11284:  * @note Currently supports the alpha filter. In the future, this will
11285:  *       probably need an extensible framework
11286:  */
11287: class HTMLPurifier_AttrDef_CSS_Filter extends HTMLPurifier_AttrDef
11288: {
11289:     /**
11290:      * @type HTMLPurifier_AttrDef_Integer
11291:      */
11292:     protected $intValidator;
11293: 
11294:     public function __construct()
11295:     {
11296:         $this->intValidator = new HTMLPurifier_AttrDef_Integer();
11297:     }
11298: 
11299:     /**
11300:      * @param string $value
11301:      * @param HTMLPurifier_Config $config
11302:      * @param HTMLPurifier_Context $context
11303:      * @return bool|string
11304:      */
11305:     public function validate($value, $config, $context)
11306:     {
11307:         $value = $this->parseCDATA($value);
11308:         if ($value === 'none') {
11309:             return $value;
11310:         }
11311:         // if we looped this we could support multiple filters
11312:         $function_length = strcspn($value, '(');
11313:         $function = trim(substr($value, 0, $function_length));
11314:         if ($function !== 'alpha' &&
11315:             $function !== 'Alpha' &&
11316:             $function !== 'progid:DXImageTransform.Microsoft.Alpha'
11317:         ) {
11318:             return false;
11319:         }
11320:         $cursor = $function_length + 1;
11321:         $parameters_length = strcspn($value, ')', $cursor);
11322:         $parameters = substr($value, $cursor, $parameters_length);
11323:         $params = explode(',', $parameters);
11324:         $ret_params = array();
11325:         $lookup = array();
11326:         foreach ($params as $param) {
11327:             list($key, $value) = explode('=', $param);
11328:             $key = trim($key);
11329:             $value = trim($value);
11330:             if (isset($lookup[$key])) {
11331:                 continue;
11332:             }
11333:             if ($key !== 'opacity') {
11334:                 continue;
11335:             }
11336:             $value = $this->intValidator->validate($value, $config, $context);
11337:             if ($value === false) {
11338:                 continue;
11339:             }
11340:             $int = (int)$value;
11341:             if ($int > 100) {
11342:                 $value = '100';
11343:             }
11344:             if ($int < 0) {
11345:                 $value = '0';
11346:             }
11347:             $ret_params[] = "$key=$value";
11348:             $lookup[$key] = true;
11349:         }
11350:         $ret_parameters = implode(',', $ret_params);
11351:         $ret_function = "$function($ret_parameters)";
11352:         return $ret_function;
11353:     }
11354: }
11355: 
11356: 
11357: 
11358: 
11359: 
11360: /**
11361:  * Validates shorthand CSS property font.
11362:  */
11363: class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
11364: {
11365: 
11366:     /**
11367:      * Local copy of validators
11368:      * @type HTMLPurifier_AttrDef[]
11369:      * @note If we moved specific CSS property definitions to their own
11370:      *       classes instead of having them be assembled at run time by
11371:      *       CSSDefinition, this wouldn't be necessary.  We'd instantiate
11372:      *       our own copies.
11373:      */
11374:     protected $info = array();
11375: 
11376:     /**
11377:      * @param HTMLPurifier_Config $config
11378:      */
11379:     public function __construct($config)
11380:     {
11381:         $def = $config->getCSSDefinition();
11382:         $this->info['font-style'] = $def->info['font-style'];
11383:         $this->info['font-variant'] = $def->info['font-variant'];
11384:         $this->info['font-weight'] = $def->info['font-weight'];
11385:         $this->info['font-size'] = $def->info['font-size'];
11386:         $this->info['line-height'] = $def->info['line-height'];
11387:         $this->info['font-family'] = $def->info['font-family'];
11388:     }
11389: 
11390:     /**
11391:      * @param string $string
11392:      * @param HTMLPurifier_Config $config
11393:      * @param HTMLPurifier_Context $context
11394:      * @return bool|string
11395:      */
11396:     public function validate($string, $config, $context)
11397:     {
11398:         static $system_fonts = array(
11399:             'caption' => true,
11400:             'icon' => true,
11401:             'menu' => true,
11402:             'message-box' => true,
11403:             'small-caption' => true,
11404:             'status-bar' => true
11405:         );
11406: 
11407:         // regular pre-processing
11408:         $string = $this->parseCDATA($string);
11409:         if ($string === '') {
11410:             return false;
11411:         }
11412: 
11413:         // check if it's one of the keywords
11414:         $lowercase_string = strtolower($string);
11415:         if (isset($system_fonts[$lowercase_string])) {
11416:             return $lowercase_string;
11417:         }
11418: 
11419:         $bits = explode(' ', $string); // bits to process
11420:         $stage = 0; // this indicates what we're looking for
11421:         $caught = array(); // which stage 0 properties have we caught?
11422:         $stage_1 = array('font-style', 'font-variant', 'font-weight');
11423:         $final = ''; // output
11424: 
11425:         for ($i = 0, $size = count($bits); $i < $size; $i++) {
11426:             if ($bits[$i] === '') {
11427:                 continue;
11428:             }
11429:             switch ($stage) {
11430:                 case 0: // attempting to catch font-style, font-variant or font-weight
11431:                     foreach ($stage_1 as $validator_name) {
11432:                         if (isset($caught[$validator_name])) {
11433:                             continue;
11434:                         }
11435:                         $r = $this->info[$validator_name]->validate(
11436:                             $bits[$i],
11437:                             $config,
11438:                             $context
11439:                         );
11440:                         if ($r !== false) {
11441:                             $final .= $r . ' ';
11442:                             $caught[$validator_name] = true;
11443:                             break;
11444:                         }
11445:                     }
11446:                     // all three caught, continue on
11447:                     if (count($caught) >= 3) {
11448:                         $stage = 1;
11449:                     }
11450:                     if ($r !== false) {
11451:                         break;
11452:                     }
11453:                 case 1: // attempting to catch font-size and perhaps line-height
11454:                     $found_slash = false;
11455:                     if (strpos($bits[$i], '/') !== false) {
11456:                         list($font_size, $line_height) =
11457:                             explode('/', $bits[$i]);
11458:                         if ($line_height === '') {
11459:                             // ooh, there's a space after the slash!
11460:                             $line_height = false;
11461:                             $found_slash = true;
11462:                         }
11463:                     } else {
11464:                         $font_size = $bits[$i];
11465:                         $line_height = false;
11466:                     }
11467:                     $r = $this->info['font-size']->validate(
11468:                         $font_size,
11469:                         $config,
11470:                         $context
11471:                     );
11472:                     if ($r !== false) {
11473:                         $final .= $r;
11474:                         // attempt to catch line-height
11475:                         if ($line_height === false) {
11476:                             // we need to scroll forward
11477:                             for ($j = $i + 1; $j < $size; $j++) {
11478:                                 if ($bits[$j] === '') {
11479:                                     continue;
11480:                                 }
11481:                                 if ($bits[$j] === '/') {
11482:                                     if ($found_slash) {
11483:                                         return false;
11484:                                     } else {
11485:                                         $found_slash = true;
11486:                                         continue;
11487:                                     }
11488:                                 }
11489:                                 $line_height = $bits[$j];
11490:                                 break;
11491:                             }
11492:                         } else {
11493:                             // slash already found
11494:                             $found_slash = true;
11495:                             $j = $i;
11496:                         }
11497:                         if ($found_slash) {
11498:                             $i = $j;
11499:                             $r = $this->info['line-height']->validate(
11500:                                 $line_height,
11501:                                 $config,
11502:                                 $context
11503:                             );
11504:                             if ($r !== false) {
11505:                                 $final .= '/' . $r;
11506:                             }
11507:                         }
11508:                         $final .= ' ';
11509:                         $stage = 2;
11510:                         break;
11511:                     }
11512:                     return false;
11513:                 case 2: // attempting to catch font-family
11514:                     $font_family =
11515:                         implode(' ', array_slice($bits, $i, $size - $i));
11516:                     $r = $this->info['font-family']->validate(
11517:                         $font_family,
11518:                         $config,
11519:                         $context
11520:                     );
11521:                     if ($r !== false) {
11522:                         $final .= $r . ' ';
11523:                         // processing completed successfully
11524:                         return rtrim($final);
11525:                     }
11526:                     return false;
11527:             }
11528:         }
11529:         return false;
11530:     }
11531: }
11532: 
11533: 
11534: 
11535: 
11536: 
11537: /**
11538:  * Validates a font family list according to CSS spec
11539:  */
11540: class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
11541: {
11542: 
11543:     protected $mask = null;
11544: 
11545:     public function __construct()
11546:     {
11547:         $this->mask = '_- ';
11548:         for ($c = 'a'; $c <= 'z'; $c++) {
11549:             $this->mask .= $c;
11550:         }
11551:         for ($c = 'A'; $c <= 'Z'; $c++) {
11552:             $this->mask .= $c;
11553:         }
11554:         for ($c = '0'; $c <= '9'; $c++) {
11555:             $this->mask .= $c;
11556:         } // cast-y, but should be fine
11557:         // special bytes used by UTF-8
11558:         for ($i = 0x80; $i <= 0xFF; $i++) {
11559:             // We don't bother excluding invalid bytes in this range,
11560:             // because the our restriction of well-formed UTF-8 will
11561:             // prevent these from ever occurring.
11562:             $this->mask .= chr($i);
11563:         }
11564: 
11565:         /*
11566:             PHP's internal strcspn implementation is
11567:             O(length of string * length of mask), making it inefficient
11568:             for large masks.  However, it's still faster than
11569:             preg_match 8)
11570:           for (p = s1;;) {
11571:             spanp = s2;
11572:             do {
11573:               if (*spanp == c || p == s1_end) {
11574:                 return p - s1;
11575:               }
11576:             } while (spanp++ < (s2_end - 1));
11577:             c = *++p;
11578:           }
11579:          */
11580:         // possible optimization: invert the mask.
11581:     }
11582: 
11583:     /**
11584:      * @param string $string
11585:      * @param HTMLPurifier_Config $config
11586:      * @param HTMLPurifier_Context $context
11587:      * @return bool|string
11588:      */
11589:     public function validate($string, $config, $context)
11590:     {
11591:         static $generic_names = array(
11592:             'serif' => true,
11593:             'sans-serif' => true,
11594:             'monospace' => true,
11595:             'fantasy' => true,
11596:             'cursive' => true
11597:         );
11598:         $allowed_fonts = $config->get('CSS.AllowedFonts');
11599: 
11600:         // assume that no font names contain commas in them
11601:         $fonts = explode(',', $string);
11602:         $final = '';
11603:         foreach ($fonts as $font) {
11604:             $font = trim($font);
11605:             if ($font === '') {
11606:                 continue;
11607:             }
11608:             // match a generic name
11609:             if (isset($generic_names[$font])) {
11610:                 if ($allowed_fonts === null || isset($allowed_fonts[$font])) {
11611:                     $final .= $font . ', ';
11612:                 }
11613:                 continue;
11614:             }
11615:             // match a quoted name
11616:             if ($font[0] === '"' || $font[0] === "'") {
11617:                 $length = strlen($font);
11618:                 if ($length <= 2) {
11619:                     continue;
11620:                 }
11621:                 $quote = $font[0];
11622:                 if ($font[$length - 1] !== $quote) {
11623:                     continue;
11624:                 }
11625:                 $font = substr($font, 1, $length - 2);
11626:             }
11627: 
11628:             $font = $this->expandCSSEscape($font);
11629: 
11630:             // $font is a pure representation of the font name
11631: 
11632:             if ($allowed_fonts !== null && !isset($allowed_fonts[$font])) {
11633:                 continue;
11634:             }
11635: 
11636:             if (ctype_alnum($font) && $font !== '') {
11637:                 // very simple font, allow it in unharmed
11638:                 $final .= $font . ', ';
11639:                 continue;
11640:             }
11641: 
11642:             // bugger out on whitespace.  form feed (0C) really
11643:             // shouldn't show up regardless
11644:             $font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font);
11645: 
11646:             // Here, there are various classes of characters which need
11647:             // to be treated differently:
11648:             //  - Alphanumeric characters are essentially safe.  We
11649:             //    handled these above.
11650:             //  - Spaces require quoting, though most parsers will do
11651:             //    the right thing if there aren't any characters that
11652:             //    can be misinterpreted
11653:             //  - Dashes rarely occur, but they fairly unproblematic
11654:             //    for parsing/rendering purposes.
11655:             //  The above characters cover the majority of Western font
11656:             //  names.
11657:             //  - Arbitrary Unicode characters not in ASCII.  Because
11658:             //    most parsers give little thought to Unicode, treatment
11659:             //    of these codepoints is basically uniform, even for
11660:             //    punctuation-like codepoints.  These characters can
11661:             //    show up in non-Western pages and are supported by most
11662:             //    major browsers, for example: "MS 明朝" is a
11663:             //    legitimate font-name
11664:             //    <http://ja.wikipedia.org/wiki/MS_明朝>.  See
11665:             //    the CSS3 spec for more examples:
11666:             //    <http://www.w3.org/TR/2011/WD-css3-fonts-20110324/localizedfamilynames.png>
11667:             //    You can see live samples of these on the Internet:
11668:             //    <http://www.google.co.jp/search?q=font-family+MS+明朝|ゴシック>
11669:             //    However, most of these fonts have ASCII equivalents:
11670:             //    for example, 'MS Mincho', and it's considered
11671:             //    professional to use ASCII font names instead of
11672:             //    Unicode font names.  Thanks Takeshi Terada for
11673:             //    providing this information.
11674:             //  The following characters, to my knowledge, have not been
11675:             //  used to name font names.
11676:             //  - Single quote.  While theoretically you might find a
11677:             //    font name that has a single quote in its name (serving
11678:             //    as an apostrophe, e.g. Dave's Scribble), I haven't
11679:             //    been able to find any actual examples of this.
11680:             //    Internet Explorer's cssText translation (which I
11681:             //    believe is invoked by innerHTML) normalizes any
11682:             //    quoting to single quotes, and fails to escape single
11683:             //    quotes.  (Note that this is not IE's behavior for all
11684:             //    CSS properties, just some sort of special casing for
11685:             //    font-family).  So a single quote *cannot* be used
11686:             //    safely in the font-family context if there will be an
11687:             //    innerHTML/cssText translation.  Note that Firefox 3.x
11688:             //    does this too.
11689:             //  - Double quote.  In IE, these get normalized to
11690:             //    single-quotes, no matter what the encoding.  (Fun
11691:             //    fact, in IE8, the 'content' CSS property gained
11692:             //    support, where they special cased to preserve encoded
11693:             //    double quotes, but still translate unadorned double
11694:             //    quotes into single quotes.)  So, because their
11695:             //    fixpoint behavior is identical to single quotes, they
11696:             //    cannot be allowed either.  Firefox 3.x displays
11697:             //    single-quote style behavior.
11698:             //  - Backslashes are reduced by one (so \\ -> \) every
11699:             //    iteration, so they cannot be used safely.  This shows
11700:             //    up in IE7, IE8 and FF3
11701:             //  - Semicolons, commas and backticks are handled properly.
11702:             //  - The rest of the ASCII punctuation is handled properly.
11703:             // We haven't checked what browsers do to unadorned
11704:             // versions, but this is not important as long as the
11705:             // browser doesn't /remove/ surrounding quotes (as IE does
11706:             // for HTML).
11707:             //
11708:             // With these results in hand, we conclude that there are
11709:             // various levels of safety:
11710:             //  - Paranoid: alphanumeric, spaces and dashes(?)
11711:             //  - International: Paranoid + non-ASCII Unicode
11712:             //  - Edgy: Everything except quotes, backslashes
11713:             //  - NoJS: Standards compliance, e.g. sod IE. Note that
11714:             //    with some judicious character escaping (since certain
11715:             //    types of escaping doesn't work) this is theoretically
11716:             //    OK as long as innerHTML/cssText is not called.
11717:             // We believe that international is a reasonable default
11718:             // (that we will implement now), and once we do more
11719:             // extensive research, we may feel comfortable with dropping
11720:             // it down to edgy.
11721: 
11722:             // Edgy: alphanumeric, spaces, dashes, underscores and Unicode.  Use of
11723:             // str(c)spn assumes that the string was already well formed
11724:             // Unicode (which of course it is).
11725:             if (strspn($font, $this->mask) !== strlen($font)) {
11726:                 continue;
11727:             }
11728: 
11729:             // Historical:
11730:             // In the absence of innerHTML/cssText, these ugly
11731:             // transforms don't pose a security risk (as \\ and \"
11732:             // might--these escapes are not supported by most browsers).
11733:             // We could try to be clever and use single-quote wrapping
11734:             // when there is a double quote present, but I have choosen
11735:             // not to implement that.  (NOTE: you can reduce the amount
11736:             // of escapes by one depending on what quoting style you use)
11737:             // $font = str_replace('\\', '\\5C ', $font);
11738:             // $font = str_replace('"',  '\\22 ', $font);
11739:             // $font = str_replace("'",  '\\27 ', $font);
11740: 
11741:             // font possibly with spaces, requires quoting
11742:             $final .= "'$font', ";
11743:         }
11744:         $final = rtrim($final, ', ');
11745:         if ($final === '') {
11746:             return false;
11747:         }
11748:         return $final;
11749:     }
11750: 
11751: }
11752: 
11753: 
11754: 
11755: 
11756: 
11757: /**
11758:  * Validates based on {ident} CSS grammar production
11759:  */
11760: class HTMLPurifier_AttrDef_CSS_Ident extends HTMLPurifier_AttrDef
11761: {
11762: 
11763:     /**
11764:      * @param string $string
11765:      * @param HTMLPurifier_Config $config
11766:      * @param HTMLPurifier_Context $context
11767:      * @return bool|string
11768:      */
11769:     public function validate($string, $config, $context)
11770:     {
11771:         $string = trim($string);
11772: 
11773:         // early abort: '' and '0' (strings that convert to false) are invalid
11774:         if (!$string) {
11775:             return false;
11776:         }
11777: 
11778:         $pattern = '/^(-?[A-Za-z_][A-Za-z_\-0-9]*)$/';
11779:         if (!preg_match($pattern, $string)) {
11780:             return false;
11781:         }
11782:         return $string;
11783:     }
11784: }
11785: 
11786: 
11787: 
11788: 
11789: 
11790: /**
11791:  * Decorator which enables !important to be used in CSS values.
11792:  */
11793: class HTMLPurifier_AttrDef_CSS_ImportantDecorator extends HTMLPurifier_AttrDef
11794: {
11795:     /**
11796:      * @type HTMLPurifier_AttrDef
11797:      */
11798:     public $def;
11799:     /**
11800:      * @type bool
11801:      */
11802:     public $allow;
11803: 
11804:     /**
11805:      * @param HTMLPurifier_AttrDef $def Definition to wrap
11806:      * @param bool $allow Whether or not to allow !important
11807:      */
11808:     public function __construct($def, $allow = false)
11809:     {
11810:         $this->def = $def;
11811:         $this->allow = $allow;
11812:     }
11813: 
11814:     /**
11815:      * Intercepts and removes !important if necessary
11816:      * @param string $string
11817:      * @param HTMLPurifier_Config $config
11818:      * @param HTMLPurifier_Context $context
11819:      * @return bool|string
11820:      */
11821:     public function validate($string, $config, $context)
11822:     {
11823:         // test for ! and important tokens
11824:         $string = trim($string);
11825:         $is_important = false;
11826:         // :TODO: optimization: test directly for !important and ! important
11827:         if (strlen($string) >= 9 && substr($string, -9) === 'important') {
11828:             $temp = rtrim(substr($string, 0, -9));
11829:             // use a temp, because we might want to restore important
11830:             if (strlen($temp) >= 1 && substr($temp, -1) === '!') {
11831:                 $string = rtrim(substr($temp, 0, -1));
11832:                 $is_important = true;
11833:             }
11834:         }
11835:         $string = $this->def->validate($string, $config, $context);
11836:         if ($this->allow && $is_important) {
11837:             $string .= ' !important';
11838:         }
11839:         return $string;
11840:     }
11841: }
11842: 
11843: 
11844: 
11845: 
11846: 
11847: /**
11848:  * Represents a Length as defined by CSS.
11849:  */
11850: class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef
11851: {
11852: 
11853:     /**
11854:      * @type HTMLPurifier_Length|string
11855:      */
11856:     protected $min;
11857: 
11858:     /**
11859:      * @type HTMLPurifier_Length|string
11860:      */
11861:     protected $max;
11862: 
11863:     /**
11864:      * @param HTMLPurifier_Length|string $min Minimum length, or null for no bound. String is also acceptable.
11865:      * @param HTMLPurifier_Length|string $max Maximum length, or null for no bound. String is also acceptable.
11866:      */
11867:     public function __construct($min = null, $max = null)
11868:     {
11869:         $this->min = $min !== null ? HTMLPurifier_Length::make($min) : null;
11870:         $this->max = $max !== null ? HTMLPurifier_Length::make($max) : null;
11871:     }
11872: 
11873:     /**
11874:      * @param string $string
11875:      * @param HTMLPurifier_Config $config
11876:      * @param HTMLPurifier_Context $context
11877:      * @return bool|string
11878:      */
11879:     public function validate($string, $config, $context)
11880:     {
11881:         $string = $this->parseCDATA($string);
11882: 
11883:         // Optimizations
11884:         if ($string === '') {
11885:             return false;
11886:         }
11887:         if ($string === '0') {
11888:             return '0';
11889:         }
11890:         if (strlen($string) === 1) {
11891:             return false;
11892:         }
11893: 
11894:         $length = HTMLPurifier_Length::make($string);
11895:         if (!$length->isValid()) {
11896:             return false;
11897:         }
11898: 
11899:         if ($this->min) {
11900:             $c = $length->compareTo($this->min);
11901:             if ($c === false) {
11902:                 return false;
11903:             }
11904:             if ($c < 0) {
11905:                 return false;
11906:             }
11907:         }
11908:         if ($this->max) {
11909:             $c = $length->compareTo($this->max);
11910:             if ($c === false) {
11911:                 return false;
11912:             }
11913:             if ($c > 0) {
11914:                 return false;
11915:             }
11916:         }
11917:         return $length->toString();
11918:     }
11919: }
11920: 
11921: 
11922: 
11923: 
11924: 
11925: /**
11926:  * Validates shorthand CSS property list-style.
11927:  * @warning Does not support url tokens that have internal spaces.
11928:  */
11929: class HTMLPurifier_AttrDef_CSS_ListStyle extends HTMLPurifier_AttrDef
11930: {
11931: 
11932:     /**
11933:      * Local copy of validators.
11934:      * @type HTMLPurifier_AttrDef[]
11935:      * @note See HTMLPurifier_AttrDef_CSS_Font::$info for a similar impl.
11936:      */
11937:     protected $info;
11938: 
11939:     /**
11940:      * @param HTMLPurifier_Config $config
11941:      */
11942:     public function __construct($config)
11943:     {
11944:         $def = $config->getCSSDefinition();
11945:         $this->info['list-style-type'] = $def->info['list-style-type'];
11946:         $this->info['list-style-position'] = $def->info['list-style-position'];
11947:         $this->info['list-style-image'] = $def->info['list-style-image'];
11948:     }
11949: 
11950:     /**
11951:      * @param string $string
11952:      * @param HTMLPurifier_Config $config
11953:      * @param HTMLPurifier_Context $context
11954:      * @return bool|string
11955:      */
11956:     public function validate($string, $config, $context)
11957:     {
11958:         // regular pre-processing
11959:         $string = $this->parseCDATA($string);
11960:         if ($string === '') {
11961:             return false;
11962:         }
11963: 
11964:         // assumes URI doesn't have spaces in it
11965:         $bits = explode(' ', strtolower($string)); // bits to process
11966: 
11967:         $caught = array();
11968:         $caught['type'] = false;
11969:         $caught['position'] = false;
11970:         $caught['image'] = false;
11971: 
11972:         $i = 0; // number of catches
11973:         $none = false;
11974: 
11975:         foreach ($bits as $bit) {
11976:             if ($i >= 3) {
11977:                 return;
11978:             } // optimization bit
11979:             if ($bit === '') {
11980:                 continue;
11981:             }
11982:             foreach ($caught as $key => $status) {
11983:                 if ($status !== false) {
11984:                     continue;
11985:                 }
11986:                 $r = $this->info['list-style-' . $key]->validate($bit, $config, $context);
11987:                 if ($r === false) {
11988:                     continue;
11989:                 }
11990:                 if ($r === 'none') {
11991:                     if ($none) {
11992:                         continue;
11993:                     } else {
11994:                         $none = true;
11995:                     }
11996:                     if ($key == 'image') {
11997:                         continue;
11998:                     }
11999:                 }
12000:                 $caught[$key] = $r;
12001:                 $i++;
12002:                 break;
12003:             }
12004:         }
12005: 
12006:         if (!$i) {
12007:             return false;
12008:         }
12009: 
12010:         $ret = array();
12011: 
12012:         // construct type
12013:         if ($caught['type']) {
12014:             $ret[] = $caught['type'];
12015:         }
12016: 
12017:         // construct image
12018:         if ($caught['image']) {
12019:             $ret[] = $caught['image'];
12020:         }
12021: 
12022:         // construct position
12023:         if ($caught['position']) {
12024:             $ret[] = $caught['position'];
12025:         }
12026: 
12027:         if (empty($ret)) {
12028:             return false;
12029:         }
12030:         return implode(' ', $ret);
12031:     }
12032: }
12033: 
12034: 
12035: 
12036: 
12037: 
12038: /**
12039:  * Framework class for strings that involve multiple values.
12040:  *
12041:  * Certain CSS properties such as border-width and margin allow multiple
12042:  * lengths to be specified.  This class can take a vanilla border-width
12043:  * definition and multiply it, usually into a max of four.
12044:  *
12045:  * @note Even though the CSS specification isn't clear about it, inherit
12046:  *       can only be used alone: it will never manifest as part of a multi
12047:  *       shorthand declaration.  Thus, this class does not allow inherit.
12048:  */
12049: class HTMLPurifier_AttrDef_CSS_Multiple extends HTMLPurifier_AttrDef
12050: {
12051:     /**
12052:      * Instance of component definition to defer validation to.
12053:      * @type HTMLPurifier_AttrDef
12054:      * @todo Make protected
12055:      */
12056:     public $single;
12057: 
12058:     /**
12059:      * Max number of values allowed.
12060:      * @todo Make protected
12061:      */
12062:     public $max;
12063: 
12064:     /**
12065:      * @param HTMLPurifier_AttrDef $single HTMLPurifier_AttrDef to multiply
12066:      * @param int $max Max number of values allowed (usually four)
12067:      */
12068:     public function __construct($single, $max = 4)
12069:     {
12070:         $this->single = $single;
12071:         $this->max = $max;
12072:     }
12073: 
12074:     /**
12075:      * @param string $string
12076:      * @param HTMLPurifier_Config $config
12077:      * @param HTMLPurifier_Context $context
12078:      * @return bool|string
12079:      */
12080:     public function validate($string, $config, $context)
12081:     {
12082:         $string = $this->parseCDATA($string);
12083:         if ($string === '') {
12084:             return false;
12085:         }
12086:         $parts = explode(' ', $string); // parseCDATA replaced \r, \t and \n
12087:         $length = count($parts);
12088:         $final = '';
12089:         for ($i = 0, $num = 0; $i < $length && $num < $this->max; $i++) {
12090:             if (ctype_space($parts[$i])) {
12091:                 continue;
12092:             }
12093:             $result = $this->single->validate($parts[$i], $config, $context);
12094:             if ($result !== false) {
12095:                 $final .= $result . ' ';
12096:                 $num++;
12097:             }
12098:         }
12099:         if ($final === '') {
12100:             return false;
12101:         }
12102:         return rtrim($final);
12103:     }
12104: }
12105: 
12106: 
12107: 
12108: 
12109: 
12110: /**
12111:  * Validates a Percentage as defined by the CSS spec.
12112:  */
12113: class HTMLPurifier_AttrDef_CSS_Percentage extends HTMLPurifier_AttrDef
12114: {
12115: 
12116:     /**
12117:      * Instance to defer number validation to.
12118:      * @type HTMLPurifier_AttrDef_CSS_Number
12119:      */
12120:     protected $number_def;
12121: 
12122:     /**
12123:      * @param bool $non_negative Whether to forbid negative values
12124:      */
12125:     public function __construct($non_negative = false)
12126:     {
12127:         $this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative);
12128:     }
12129: 
12130:     /**
12131:      * @param string $string
12132:      * @param HTMLPurifier_Config $config
12133:      * @param HTMLPurifier_Context $context
12134:      * @return bool|string
12135:      */
12136:     public function validate($string, $config, $context)
12137:     {
12138:         $string = $this->parseCDATA($string);
12139: 
12140:         if ($string === '') {
12141:             return false;
12142:         }
12143:         $length = strlen($string);
12144:         if ($length === 1) {
12145:             return false;
12146:         }
12147:         if ($string[$length - 1] !== '%') {
12148:             return false;
12149:         }
12150: 
12151:         $number = substr($string, 0, $length - 1);
12152:         $number = $this->number_def->validate($number, $config, $context);
12153: 
12154:         if ($number === false) {
12155:             return false;
12156:         }
12157:         return "$number%";
12158:     }
12159: }
12160: 
12161: 
12162: 
12163: 
12164: 
12165: /**
12166:  * Validates the value for the CSS property text-decoration
12167:  * @note This class could be generalized into a version that acts sort of
12168:  *       like Enum except you can compound the allowed values.
12169:  */
12170: class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef
12171: {
12172: 
12173:     /**
12174:      * @param string $string
12175:      * @param HTMLPurifier_Config $config
12176:      * @param HTMLPurifier_Context $context
12177:      * @return bool|string
12178:      */
12179:     public function validate($string, $config, $context)
12180:     {
12181:         static $allowed_values = array(
12182:             'line-through' => true,
12183:             'overline' => true,
12184:             'underline' => true,
12185:         );
12186: 
12187:         $string = strtolower($this->parseCDATA($string));
12188: 
12189:         if ($string === 'none') {
12190:             return $string;
12191:         }
12192: 
12193:         $parts = explode(' ', $string);
12194:         $final = '';
12195:         foreach ($parts as $part) {
12196:             if (isset($allowed_values[$part])) {
12197:                 $final .= $part . ' ';
12198:             }
12199:         }
12200:         $final = rtrim($final);
12201:         if ($final === '') {
12202:             return false;
12203:         }
12204:         return $final;
12205:     }
12206: }
12207: 
12208: 
12209: 
12210: 
12211: 
12212: /**
12213:  * Validates a URI in CSS syntax, which uses url('http://example.com')
12214:  * @note While theoretically speaking a URI in a CSS document could
12215:  *       be non-embedded, as of CSS2 there is no such usage so we're
12216:  *       generalizing it. This may need to be changed in the future.
12217:  * @warning Since HTMLPurifier_AttrDef_CSS blindly uses semicolons as
12218:  *          the separator, you cannot put a literal semicolon in
12219:  *          in the URI. Try percent encoding it, in that case.
12220:  */
12221: class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
12222: {
12223: 
12224:     public function __construct()
12225:     {
12226:         parent::__construct(true); // always embedded
12227:     }
12228: 
12229:     /**
12230:      * @param string $uri_string
12231:      * @param HTMLPurifier_Config $config
12232:      * @param HTMLPurifier_Context $context
12233:      * @return bool|string
12234:      */
12235:     public function validate($uri_string, $config, $context)
12236:     {
12237:         // parse the URI out of the string and then pass it onto
12238:         // the parent object
12239: 
12240:         $uri_string = $this->parseCDATA($uri_string);
12241:         if (strpos($uri_string, 'url(') !== 0) {
12242:             return false;
12243:         }
12244:         $uri_string = substr($uri_string, 4);
12245:         $new_length = strlen($uri_string) - 1;
12246:         if ($uri_string[$new_length] != ')') {
12247:             return false;
12248:         }
12249:         $uri = trim(substr($uri_string, 0, $new_length));
12250: 
12251:         if (!empty($uri) && ($uri[0] == "'" || $uri[0] == '"')) {
12252:             $quote = $uri[0];
12253:             $new_length = strlen($uri) - 1;
12254:             if ($uri[$new_length] !== $quote) {
12255:                 return false;
12256:             }
12257:             $uri = substr($uri, 1, $new_length - 1);
12258:         }
12259: 
12260:         $uri = $this->expandCSSEscape($uri);
12261: 
12262:         $result = parent::validate($uri, $config, $context);
12263: 
12264:         if ($result === false) {
12265:             return false;
12266:         }
12267: 
12268:         // extra sanity check; should have been done by URI
12269:         $result = str_replace(array('"', "\\", "\n", "\x0c", "\r"), "", $result);
12270: 
12271:         // suspicious characters are ()'; we're going to percent encode
12272:         // them for safety.
12273:         $result = str_replace(array('(', ')', "'"), array('%28', '%29', '%27'), $result);
12274: 
12275:         // there's an extra bug where ampersands lose their escaping on
12276:         // an innerHTML cycle, so a very unlucky query parameter could
12277:         // then change the meaning of the URL.  Unfortunately, there's
12278:         // not much we can do about that...
12279:         return "url(\"$result\")";
12280:     }
12281: }
12282: 
12283: 
12284: 
12285: 
12286: 
12287: /**
12288:  * Validates a boolean attribute
12289:  */
12290: class HTMLPurifier_AttrDef_HTML_Bool extends HTMLPurifier_AttrDef
12291: {
12292: 
12293:     /**
12294:      * @type bool
12295:      */
12296:     protected $name;
12297: 
12298:     /**
12299:      * @type bool
12300:      */
12301:     public $minimized = true;
12302: 
12303:     /**
12304:      * @param bool $name
12305:      */
12306:     public function __construct($name = false)
12307:     {
12308:         $this->name = $name;
12309:     }
12310: 
12311:     /**
12312:      * @param string $string
12313:      * @param HTMLPurifier_Config $config
12314:      * @param HTMLPurifier_Context $context
12315:      * @return bool|string
12316:      */
12317:     public function validate($string, $config, $context)
12318:     {
12319:         if (empty($string)) {
12320:             return false;
12321:         }
12322:         return $this->name;
12323:     }
12324: 
12325:     /**
12326:      * @param string $string Name of attribute
12327:      * @return HTMLPurifier_AttrDef_HTML_Bool
12328:      */
12329:     public function make($string)
12330:     {
12331:         return new HTMLPurifier_AttrDef_HTML_Bool($string);
12332:     }
12333: }
12334: 
12335: 
12336: 
12337: 
12338: 
12339: /**
12340:  * Validates contents based on NMTOKENS attribute type.
12341:  */
12342: class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
12343: {
12344: 
12345:     /**
12346:      * @param string $string
12347:      * @param HTMLPurifier_Config $config
12348:      * @param HTMLPurifier_Context $context
12349:      * @return bool|string
12350:      */
12351:     public function validate($string, $config, $context)
12352:     {
12353:         $string = trim($string);
12354: 
12355:         // early abort: '' and '0' (strings that convert to false) are invalid
12356:         if (!$string) {
12357:             return false;
12358:         }
12359: 
12360:         $tokens = $this->split($string, $config, $context);
12361:         $tokens = $this->filter($tokens, $config, $context);
12362:         if (empty($tokens)) {
12363:             return false;
12364:         }
12365:         return implode(' ', $tokens);
12366:     }
12367: 
12368:     /**
12369:      * Splits a space separated list of tokens into its constituent parts.
12370:      * @param string $string
12371:      * @param HTMLPurifier_Config $config
12372:      * @param HTMLPurifier_Context $context
12373:      * @return array
12374:      */
12375:     protected function split($string, $config, $context)
12376:     {
12377:         // OPTIMIZABLE!
12378:         // do the preg_match, capture all subpatterns for reformulation
12379: 
12380:         // we don't support U+00A1 and up codepoints or
12381:         // escaping because I don't know how to do that with regexps
12382:         // and plus it would complicate optimization efforts (you never
12383:         // see that anyway).
12384:         $pattern = '/(?:(?<=\s)|\A)' . // look behind for space or string start
12385:             '((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)' .
12386:             '(?:(?=\s)|\z)/'; // look ahead for space or string end
12387:         preg_match_all($pattern, $string, $matches);
12388:         return $matches[1];
12389:     }
12390: 
12391:     /**
12392:      * Template method for removing certain tokens based on arbitrary criteria.
12393:      * @note If we wanted to be really functional, we'd do an array_filter
12394:      *       with a callback. But... we're not.
12395:      * @param array $tokens
12396:      * @param HTMLPurifier_Config $config
12397:      * @param HTMLPurifier_Context $context
12398:      * @return array
12399:      */
12400:     protected function filter($tokens, $config, $context)
12401:     {
12402:         return $tokens;
12403:     }
12404: }
12405: 
12406: 
12407: 
12408: 
12409: 
12410: /**
12411:  * Implements special behavior for class attribute (normally NMTOKENS)
12412:  */
12413: class HTMLPurifier_AttrDef_HTML_Class extends HTMLPurifier_AttrDef_HTML_Nmtokens
12414: {
12415:     /**
12416:      * @param string $string
12417:      * @param HTMLPurifier_Config $config
12418:      * @param HTMLPurifier_Context $context
12419:      * @return bool|string
12420:      */
12421:     protected function split($string, $config, $context)
12422:     {
12423:         // really, this twiddle should be lazy loaded
12424:         $name = $config->getDefinition('HTML')->doctype->name;
12425:         if ($name == "XHTML 1.1" || $name == "XHTML 2.0") {
12426:             return parent::split($string, $config, $context);
12427:         } else {
12428:             return preg_split('/\s+/', $string);
12429:         }
12430:     }
12431: 
12432:     /**
12433:      * @param array $tokens
12434:      * @param HTMLPurifier_Config $config
12435:      * @param HTMLPurifier_Context $context
12436:      * @return array
12437:      */
12438:     protected function filter($tokens, $config, $context)
12439:     {
12440:         $allowed = $config->get('Attr.AllowedClasses');
12441:         $forbidden = $config->get('Attr.ForbiddenClasses');
12442:         $ret = array();
12443:         foreach ($tokens as $token) {
12444:             if (($allowed === null || isset($allowed[$token])) &&
12445:                 !isset($forbidden[$token]) &&
12446:                 // We need this O(n) check because of PHP's array
12447:                 // implementation that casts -0 to 0.
12448:                 !in_array($token, $ret, true)
12449:             ) {
12450:                 $ret[] = $token;
12451:             }
12452:         }
12453:         return $ret;
12454:     }
12455: }
12456: 
12457: 
12458: 
12459: /**
12460:  * Validates a color according to the HTML spec.
12461:  */
12462: class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef
12463: {
12464: 
12465:     /**
12466:      * @param string $string
12467:      * @param HTMLPurifier_Config $config
12468:      * @param HTMLPurifier_Context $context
12469:      * @return bool|string
12470:      */
12471:     public function validate($string, $config, $context)
12472:     {
12473:         static $colors = null;
12474:         if ($colors === null) {
12475:             $colors = $config->get('Core.ColorKeywords');
12476:         }
12477: 
12478:         $string = trim($string);
12479: 
12480:         if (empty($string)) {
12481:             return false;
12482:         }
12483:         $lower = strtolower($string);
12484:         if (isset($colors[$lower])) {
12485:             return $colors[$lower];
12486:         }
12487:         if ($string[0] === '#') {
12488:             $hex = substr($string, 1);
12489:         } else {
12490:             $hex = $string;
12491:         }
12492: 
12493:         $length = strlen($hex);
12494:         if ($length !== 3 && $length !== 6) {
12495:             return false;
12496:         }
12497:         if (!ctype_xdigit($hex)) {
12498:             return false;
12499:         }
12500:         if ($length === 3) {
12501:             $hex = $hex[0] . $hex[0] . $hex[1] . $hex[1] . $hex[2] . $hex[2];
12502:         }
12503:         return "#$hex";
12504:     }
12505: }
12506: 
12507: 
12508: 
12509: 
12510: 
12511: /**
12512:  * Special-case enum attribute definition that lazy loads allowed frame targets
12513:  */
12514: class HTMLPurifier_AttrDef_HTML_FrameTarget extends HTMLPurifier_AttrDef_Enum
12515: {
12516: 
12517:     /**
12518:      * @type array
12519:      */
12520:     public $valid_values = false; // uninitialized value
12521: 
12522:     /**
12523:      * @type bool
12524:      */
12525:     protected $case_sensitive = false;
12526: 
12527:     public function __construct()
12528:     {
12529:     }
12530: 
12531:     /**
12532:      * @param string $string
12533:      * @param HTMLPurifier_Config $config
12534:      * @param HTMLPurifier_Context $context
12535:      * @return bool|string
12536:      */
12537:     public function validate($string, $config, $context)
12538:     {
12539:         if ($this->valid_values === false) {
12540:             $this->valid_values = $config->get('Attr.AllowedFrameTargets');
12541:         }
12542:         return parent::validate($string, $config, $context);
12543:     }
12544: }
12545: 
12546: 
12547: 
12548: 
12549: 
12550: /**
12551:  * Validates the HTML attribute ID.
12552:  * @warning Even though this is the id processor, it
12553:  *          will ignore the directive Attr:IDBlacklist, since it will only
12554:  *          go according to the ID accumulator. Since the accumulator is
12555:  *          automatically generated, it will have already absorbed the
12556:  *          blacklist. If you're hacking around, make sure you use load()!
12557:  */
12558: 
12559: class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
12560: {
12561: 
12562:     // selector is NOT a valid thing to use for IDREFs, because IDREFs
12563:     // *must* target IDs that exist, whereas selector #ids do not.
12564: 
12565:     /**
12566:      * Determines whether or not we're validating an ID in a CSS
12567:      * selector context.
12568:      * @type bool
12569:      */
12570:     protected $selector;
12571: 
12572:     /**
12573:      * @param bool $selector
12574:      */
12575:     public function __construct($selector = false)
12576:     {
12577:         $this->selector = $selector;
12578:     }
12579: 
12580:     /**
12581:      * @param string $id
12582:      * @param HTMLPurifier_Config $config
12583:      * @param HTMLPurifier_Context $context
12584:      * @return bool|string
12585:      */
12586:     public function validate($id, $config, $context)
12587:     {
12588:         if (!$this->selector && !$config->get('Attr.EnableID')) {
12589:             return false;
12590:         }
12591: 
12592:         $id = trim($id); // trim it first
12593: 
12594:         if ($id === '') {
12595:             return false;
12596:         }
12597: 
12598:         $prefix = $config->get('Attr.IDPrefix');
12599:         if ($prefix !== '') {
12600:             $prefix .= $config->get('Attr.IDPrefixLocal');
12601:             // prevent re-appending the prefix
12602:             if (strpos($id, $prefix) !== 0) {
12603:                 $id = $prefix . $id;
12604:             }
12605:         } elseif ($config->get('Attr.IDPrefixLocal') !== '') {
12606:             trigger_error(
12607:                 '%Attr.IDPrefixLocal cannot be used unless ' .
12608:                 '%Attr.IDPrefix is set',
12609:                 E_USER_WARNING
12610:             );
12611:         }
12612: 
12613:         if (!$this->selector) {
12614:             $id_accumulator =& $context->get('IDAccumulator');
12615:             if (isset($id_accumulator->ids[$id])) {
12616:                 return false;
12617:             }
12618:         }
12619: 
12620:         // we purposely avoid using regex, hopefully this is faster
12621: 
12622:         if (ctype_alpha($id)) {
12623:             $result = true;
12624:         } else {
12625:             if (!ctype_alpha(@$id[0])) {
12626:                 return false;
12627:             }
12628:             // primitive style of regexps, I suppose
12629:             $trim = trim(
12630:                 $id,
12631:                 'A..Za..z0..9:-._'
12632:             );
12633:             $result = ($trim === '');
12634:         }
12635: 
12636:         $regexp = $config->get('Attr.IDBlacklistRegexp');
12637:         if ($regexp && preg_match($regexp, $id)) {
12638:             return false;
12639:         }
12640: 
12641:         if (!$this->selector && $result) {
12642:             $id_accumulator->add($id);
12643:         }
12644: 
12645:         // if no change was made to the ID, return the result
12646:         // else, return the new id if stripping whitespace made it
12647:         //     valid, or return false.
12648:         return $result ? $id : false;
12649:     }
12650: }
12651: 
12652: 
12653: 
12654: 
12655: 
12656: /**
12657:  * Validates an integer representation of pixels according to the HTML spec.
12658:  */
12659: class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef
12660: {
12661: 
12662:     /**
12663:      * @type int
12664:      */
12665:     protected $max;
12666: 
12667:     /**
12668:      * @param int $max
12669:      */
12670:     public function __construct($max = null)
12671:     {
12672:         $this->max = $max;
12673:     }
12674: 
12675:     /**
12676:      * @param string $string
12677:      * @param HTMLPurifier_Config $config
12678:      * @param HTMLPurifier_Context $context
12679:      * @return bool|string
12680:      */
12681:     public function validate($string, $config, $context)
12682:     {
12683:         $string = trim($string);
12684:         if ($string === '0') {
12685:             return $string;
12686:         }
12687:         if ($string === '') {
12688:             return false;
12689:         }
12690:         $length = strlen($string);
12691:         if (substr($string, $length - 2) == 'px') {
12692:             $string = substr($string, 0, $length - 2);
12693:         }
12694:         if (!is_numeric($string)) {
12695:             return false;
12696:         }
12697:         $int = (int)$string;
12698: 
12699:         if ($int < 0) {
12700:             return '0';
12701:         }
12702: 
12703:         // upper-bound value, extremely high values can
12704:         // crash operating systems, see <http://ha.ckers.org/imagecrash.html>
12705:         // WARNING, above link WILL crash you if you're using Windows
12706: 
12707:         if ($this->max !== null && $int > $this->max) {
12708:             return (string)$this->max;
12709:         }
12710:         return (string)$int;
12711:     }
12712: 
12713:     /**
12714:      * @param string $string
12715:      * @return HTMLPurifier_AttrDef
12716:      */
12717:     public function make($string)
12718:     {
12719:         if ($string === '') {
12720:             $max = null;
12721:         } else {
12722:             $max = (int)$string;
12723:         }
12724:         $class = get_class($this);
12725:         return new $class($max);
12726:     }
12727: }
12728: 
12729: 
12730: 
12731: 
12732: 
12733: /**
12734:  * Validates the HTML type length (not to be confused with CSS's length).
12735:  *
12736:  * This accepts integer pixels or percentages as lengths for certain
12737:  * HTML attributes.
12738:  */
12739: 
12740: class HTMLPurifier_AttrDef_HTML_Length extends HTMLPurifier_AttrDef_HTML_Pixels
12741: {
12742: 
12743:     /**
12744:      * @param string $string
12745:      * @param HTMLPurifier_Config $config
12746:      * @param HTMLPurifier_Context $context
12747:      * @return bool|string
12748:      */
12749:     public function validate($string, $config, $context)
12750:     {
12751:         $string = trim($string);
12752:         if ($string === '') {
12753:             return false;
12754:         }
12755: 
12756:         $parent_result = parent::validate($string, $config, $context);
12757:         if ($parent_result !== false) {
12758:             return $parent_result;
12759:         }
12760: 
12761:         $length = strlen($string);
12762:         $last_char = $string[$length - 1];
12763: 
12764:         if ($last_char !== '%') {
12765:             return false;
12766:         }
12767: 
12768:         $points = substr($string, 0, $length - 1);
12769: 
12770:         if (!is_numeric($points)) {
12771:             return false;
12772:         }
12773: 
12774:         $points = (int)$points;
12775: 
12776:         if ($points < 0) {
12777:             return '0%';
12778:         }
12779:         if ($points > 100) {
12780:             return '100%';
12781:         }
12782:         return ((string)$points) . '%';
12783:     }
12784: }
12785: 
12786: 
12787: 
12788: 
12789: 
12790: /**
12791:  * Validates a rel/rev link attribute against a directive of allowed values
12792:  * @note We cannot use Enum because link types allow multiple
12793:  *       values.
12794:  * @note Assumes link types are ASCII text
12795:  */
12796: class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef
12797: {
12798: 
12799:     /**
12800:      * Name config attribute to pull.
12801:      * @type string
12802:      */
12803:     protected $name;
12804: 
12805:     /**
12806:      * @param string $name
12807:      */
12808:     public function __construct($name)
12809:     {
12810:         $configLookup = array(
12811:             'rel' => 'AllowedRel',
12812:             'rev' => 'AllowedRev'
12813:         );
12814:         if (!isset($configLookup[$name])) {
12815:             trigger_error(
12816:                 'Unrecognized attribute name for link ' .
12817:                 'relationship.',
12818:                 E_USER_ERROR
12819:             );
12820:             return;
12821:         }
12822:         $this->name = $configLookup[$name];
12823:     }
12824: 
12825:     /**
12826:      * @param string $string
12827:      * @param HTMLPurifier_Config $config
12828:      * @param HTMLPurifier_Context $context
12829:      * @return bool|string
12830:      */
12831:     public function validate($string, $config, $context)
12832:     {
12833:         $allowed = $config->get('Attr.' . $this->name);
12834:         if (empty($allowed)) {
12835:             return false;
12836:         }
12837: 
12838:         $string = $this->parseCDATA($string);
12839:         $parts = explode(' ', $string);
12840: 
12841:         // lookup to prevent duplicates
12842:         $ret_lookup = array();
12843:         foreach ($parts as $part) {
12844:             $part = strtolower(trim($part));
12845:             if (!isset($allowed[$part])) {
12846:                 continue;
12847:             }
12848:             $ret_lookup[$part] = true;
12849:         }
12850: 
12851:         if (empty($ret_lookup)) {
12852:             return false;
12853:         }
12854:         $string = implode(' ', array_keys($ret_lookup));
12855:         return $string;
12856:     }
12857: }
12858: 
12859: 
12860: 
12861: 
12862: 
12863: /**
12864:  * Validates a MultiLength as defined by the HTML spec.
12865:  *
12866:  * A multilength is either a integer (pixel count), a percentage, or
12867:  * a relative number.
12868:  */
12869: class HTMLPurifier_AttrDef_HTML_MultiLength extends HTMLPurifier_AttrDef_HTML_Length
12870: {
12871: 
12872:     /**
12873:      * @param string $string
12874:      * @param HTMLPurifier_Config $config
12875:      * @param HTMLPurifier_Context $context
12876:      * @return bool|string
12877:      */
12878:     public function validate($string, $config, $context)
12879:     {
12880:         $string = trim($string);
12881:         if ($string === '') {
12882:             return false;
12883:         }
12884: 
12885:         $parent_result = parent::validate($string, $config, $context);
12886:         if ($parent_result !== false) {
12887:             return $parent_result;
12888:         }
12889: 
12890:         $length = strlen($string);
12891:         $last_char = $string[$length - 1];
12892: 
12893:         if ($last_char !== '*') {
12894:             return false;
12895:         }
12896: 
12897:         $int = substr($string, 0, $length - 1);
12898: 
12899:         if ($int == '') {
12900:             return '*';
12901:         }
12902:         if (!is_numeric($int)) {
12903:             return false;
12904:         }
12905: 
12906:         $int = (int)$int;
12907:         if ($int < 0) {
12908:             return false;
12909:         }
12910:         if ($int == 0) {
12911:             return '0';
12912:         }
12913:         if ($int == 1) {
12914:             return '*';
12915:         }
12916:         return ((string)$int) . '*';
12917:     }
12918: }
12919: 
12920: 
12921: 
12922: 
12923: 
12924: abstract class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
12925: {
12926: 
12927:     /**
12928:      * Unpacks a mailbox into its display-name and address
12929:      * @param string $string
12930:      * @return mixed
12931:      */
12932:     public function unpack($string)
12933:     {
12934:         // needs to be implemented
12935:     }
12936: 
12937: }
12938: 
12939: // sub-implementations
12940: 
12941: 
12942: 
12943: 
12944: 
12945: /**
12946:  * Validates a host according to the IPv4, IPv6 and DNS (future) specifications.
12947:  */
12948: class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
12949: {
12950: 
12951:     /**
12952:      * IPv4 sub-validator.
12953:      * @type HTMLPurifier_AttrDef_URI_IPv4
12954:      */
12955:     protected $ipv4;
12956: 
12957:     /**
12958:      * IPv6 sub-validator.
12959:      * @type HTMLPurifier_AttrDef_URI_IPv6
12960:      */
12961:     protected $ipv6;
12962: 
12963:     public function __construct()
12964:     {
12965:         $this->ipv4 = new HTMLPurifier_AttrDef_URI_IPv4();
12966:         $this->ipv6 = new HTMLPurifier_AttrDef_URI_IPv6();
12967:     }
12968: 
12969:     /**
12970:      * @param string $string
12971:      * @param HTMLPurifier_Config $config
12972:      * @param HTMLPurifier_Context $context
12973:      * @return bool|string
12974:      */
12975:     public function validate($string, $config, $context)
12976:     {
12977:         $length = strlen($string);
12978:         // empty hostname is OK; it's usually semantically equivalent:
12979:         // the default host as defined by a URI scheme is used:
12980:         //
12981:         //      If the URI scheme defines a default for host, then that
12982:         //      default applies when the host subcomponent is undefined
12983:         //      or when the registered name is empty (zero length).
12984:         if ($string === '') {
12985:             return '';
12986:         }
12987:         if ($length > 1 && $string[0] === '[' && $string[$length - 1] === ']') {
12988:             //IPv6
12989:             $ip = substr($string, 1, $length - 2);
12990:             $valid = $this->ipv6->validate($ip, $config, $context);
12991:             if ($valid === false) {
12992:                 return false;
12993:             }
12994:             return '[' . $valid . ']';
12995:         }
12996: 
12997:         // need to do checks on unusual encodings too
12998:         $ipv4 = $this->ipv4->validate($string, $config, $context);
12999:         if ($ipv4 !== false) {
13000:             return $ipv4;
13001:         }
13002: 
13003:         // A regular domain name.
13004: 
13005:         // This doesn't match I18N domain names, but we don't have proper IRI support,
13006:         // so force users to insert Punycode.
13007: 
13008:         // There is not a good sense in which underscores should be
13009:         // allowed, since it's technically not! (And if you go as
13010:         // far to allow everything as specified by the DNS spec...
13011:         // well, that's literally everything, modulo some space limits
13012:         // for the components and the overall name (which, by the way,
13013:         // we are NOT checking!).  So we (arbitrarily) decide this:
13014:         // let's allow underscores wherever we would have allowed
13015:         // hyphens, if they are enabled.  This is a pretty good match
13016:         // for browser behavior, for example, a large number of browsers
13017:         // cannot handle foo_.example.com, but foo_bar.example.com is
13018:         // fairly well supported.
13019:         $underscore = $config->get('Core.AllowHostnameUnderscore') ? '_' : '';
13020: 
13021:         // The productions describing this are:
13022:         $a   = '[a-z]';     // alpha
13023:         $an  = '[a-z0-9]';  // alphanum
13024:         $and = "[a-z0-9-$underscore]"; // alphanum | "-"
13025:         // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
13026:         $domainlabel = "$an($and*$an)?";
13027:         // toplabel    = alpha | alpha *( alphanum | "-" ) alphanum
13028:         $toplabel = "$a($and*$an)?";
13029:         // hostname    = *( domainlabel "." ) toplabel [ "." ]
13030:         if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
13031:             return $string;
13032:         }
13033: 
13034:         // If we have Net_IDNA2 support, we can support IRIs by
13035:         // punycoding them. (This is the most portable thing to do,
13036:         // since otherwise we have to assume browsers support
13037: 
13038:         if ($config->get('Core.EnableIDNA')) {
13039:             $idna = new Net_IDNA2(array('encoding' => 'utf8', 'overlong' => false, 'strict' => true));
13040:             // we need to encode each period separately
13041:             $parts = explode('.', $string);
13042:             try {
13043:                 $new_parts = array();
13044:                 foreach ($parts as $part) {
13045:                     $encodable = false;
13046:                     for ($i = 0, $c = strlen($part); $i < $c; $i++) {
13047:                         if (ord($part[$i]) > 0x7a) {
13048:                             $encodable = true;
13049:                             break;
13050:                         }
13051:                     }
13052:                     if (!$encodable) {
13053:                         $new_parts[] = $part;
13054:                     } else {
13055:                         $new_parts[] = $idna->encode($part);
13056:                     }
13057:                 }
13058:                 $string = implode('.', $new_parts);
13059:                 if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
13060:                     return $string;
13061:                 }
13062:             } catch (Exception $e) {
13063:                 // XXX error reporting
13064:             }
13065:         }
13066:         return false;
13067:     }
13068: }
13069: 
13070: 
13071: 
13072: 
13073: 
13074: /**
13075:  * Validates an IPv4 address
13076:  * @author Feyd @ forums.devnetwork.net (public domain)
13077:  */
13078: class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef
13079: {
13080: 
13081:     /**
13082:      * IPv4 regex, protected so that IPv6 can reuse it.
13083:      * @type string
13084:      */
13085:     protected $ip4;
13086: 
13087:     /**
13088:      * @param string $aIP
13089:      * @param HTMLPurifier_Config $config
13090:      * @param HTMLPurifier_Context $context
13091:      * @return bool|string
13092:      */
13093:     public function validate($aIP, $config, $context)
13094:     {
13095:         if (!$this->ip4) {
13096:             $this->_loadRegex();
13097:         }
13098: 
13099:         if (preg_match('#^' . $this->ip4 . '$#s', $aIP)) {
13100:             return $aIP;
13101:         }
13102:         return false;
13103:     }
13104: 
13105:     /**
13106:      * Lazy load function to prevent regex from being stuffed in
13107:      * cache.
13108:      */
13109:     protected function _loadRegex()
13110:     {
13111:         $oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
13112:         $this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
13113:     }
13114: }
13115: 
13116: 
13117: 
13118: 
13119: 
13120: /**
13121:  * Validates an IPv6 address.
13122:  * @author Feyd @ forums.devnetwork.net (public domain)
13123:  * @note This function requires brackets to have been removed from address
13124:  *       in URI.
13125:  */
13126: class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4
13127: {
13128: 
13129:     /**
13130:      * @param string $aIP
13131:      * @param HTMLPurifier_Config $config
13132:      * @param HTMLPurifier_Context $context
13133:      * @return bool|string
13134:      */
13135:     public function validate($aIP, $config, $context)
13136:     {
13137:         if (!$this->ip4) {
13138:             $this->_loadRegex();
13139:         }
13140: 
13141:         $original = $aIP;
13142: 
13143:         $hex = '[0-9a-fA-F]';
13144:         $blk = '(?:' . $hex . '{1,4})';
13145:         $pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))'; // /0 - /128
13146: 
13147:         //      prefix check
13148:         if (strpos($aIP, '/') !== false) {
13149:             if (preg_match('#' . $pre . '$#s', $aIP, $find)) {
13150:                 $aIP = substr($aIP, 0, 0 - strlen($find[0]));
13151:                 unset($find);
13152:             } else {
13153:                 return false;
13154:             }
13155:         }
13156: 
13157:         //      IPv4-compatiblity check
13158:         if (preg_match('#(?<=:' . ')' . $this->ip4 . '$#s', $aIP, $find)) {
13159:             $aIP = substr($aIP, 0, 0 - strlen($find[0]));
13160:             $ip = explode('.', $find[0]);
13161:             $ip = array_map('dechex', $ip);
13162:             $aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3];
13163:             unset($find, $ip);
13164:         }
13165: 
13166:         //      compression check
13167:         $aIP = explode('::', $aIP);
13168:         $c = count($aIP);
13169:         if ($c > 2) {
13170:             return false;
13171:         } elseif ($c == 2) {
13172:             list($first, $second) = $aIP;
13173:             $first = explode(':', $first);
13174:             $second = explode(':', $second);
13175: 
13176:             if (count($first) + count($second) > 8) {
13177:                 return false;
13178:             }
13179: 
13180:             while (count($first) < 8) {
13181:                 array_push($first, '0');
13182:             }
13183: 
13184:             array_splice($first, 8 - count($second), 8, $second);
13185:             $aIP = $first;
13186:             unset($first, $second);
13187:         } else {
13188:             $aIP = explode(':', $aIP[0]);
13189:         }
13190:         $c = count($aIP);
13191: 
13192:         if ($c != 8) {
13193:             return false;
13194:         }
13195: 
13196:         //      All the pieces should be 16-bit hex strings. Are they?
13197:         foreach ($aIP as $piece) {
13198:             if (!preg_match('#^[0-9a-fA-F]{4}$#s', sprintf('%04s', $piece))) {
13199:                 return false;
13200:             }
13201:         }
13202:         return $original;
13203:     }
13204: }
13205: 
13206: 
13207: 
13208: 
13209: 
13210: /**
13211:  * Primitive email validation class based on the regexp found at
13212:  * http://www.regular-expressions.info/email.html
13213:  */
13214: class HTMLPurifier_AttrDef_URI_Email_SimpleCheck extends HTMLPurifier_AttrDef_URI_Email
13215: {
13216: 
13217:     /**
13218:      * @param string $string
13219:      * @param HTMLPurifier_Config $config
13220:      * @param HTMLPurifier_Context $context
13221:      * @return bool|string
13222:      */
13223:     public function validate($string, $config, $context)
13224:     {
13225:         // no support for named mailboxes i.e. "Bob <bob@example.com>"
13226:         // that needs more percent encoding to be done
13227:         if ($string == '') {
13228:             return false;
13229:         }
13230:         $string = trim($string);
13231:         $result = preg_match('/^[A-Z0-9._%-]+@[A-Z0-9.-]+\.[A-Z]{2,4}$/i', $string);
13232:         return $result ? $string : false;
13233:     }
13234: }
13235: 
13236: 
13237: 
13238: 
13239: 
13240: /**
13241:  * Pre-transform that changes proprietary background attribute to CSS.
13242:  */
13243: class HTMLPurifier_AttrTransform_Background extends HTMLPurifier_AttrTransform
13244: {
13245:     /**
13246:      * @param array $attr
13247:      * @param HTMLPurifier_Config $config
13248:      * @param HTMLPurifier_Context $context
13249:      * @return array
13250:      */
13251:     public function transform($attr, $config, $context)
13252:     {
13253:         if (!isset($attr['background'])) {
13254:             return $attr;
13255:         }
13256: 
13257:         $background = $this->confiscateAttr($attr, 'background');
13258:         // some validation should happen here
13259: 
13260:         $this->prependCSS($attr, "background-image:url($background);");
13261:         return $attr;
13262:     }
13263: }
13264: 
13265: 
13266: 
13267: 
13268: 
13269: // this MUST be placed in post, as it assumes that any value in dir is valid
13270: 
13271: /**
13272:  * Post-trasnform that ensures that bdo tags have the dir attribute set.
13273:  */
13274: class HTMLPurifier_AttrTransform_BdoDir extends HTMLPurifier_AttrTransform
13275: {
13276: 
13277:     /**
13278:      * @param array $attr
13279:      * @param HTMLPurifier_Config $config
13280:      * @param HTMLPurifier_Context $context
13281:      * @return array
13282:      */
13283:     public function transform($attr, $config, $context)
13284:     {
13285:         if (isset($attr['dir'])) {
13286:             return $attr;
13287:         }
13288:         $attr['dir'] = $config->get('Attr.DefaultTextDir');
13289:         return $attr;
13290:     }
13291: }
13292: 
13293: 
13294: 
13295: 
13296: 
13297: /**
13298:  * Pre-transform that changes deprecated bgcolor attribute to CSS.
13299:  */
13300: class HTMLPurifier_AttrTransform_BgColor extends HTMLPurifier_AttrTransform
13301: {
13302:     /**
13303:      * @param array $attr
13304:      * @param HTMLPurifier_Config $config
13305:      * @param HTMLPurifier_Context $context
13306:      * @return array
13307:      */
13308:     public function transform($attr, $config, $context)
13309:     {
13310:         if (!isset($attr['bgcolor'])) {
13311:             return $attr;
13312:         }
13313: 
13314:         $bgcolor = $this->confiscateAttr($attr, 'bgcolor');
13315:         // some validation should happen here
13316: 
13317:         $this->prependCSS($attr, "background-color:$bgcolor;");
13318:         return $attr;
13319:     }
13320: }
13321: 
13322: 
13323: 
13324: 
13325: 
13326: /**
13327:  * Pre-transform that changes converts a boolean attribute to fixed CSS
13328:  */
13329: class HTMLPurifier_AttrTransform_BoolToCSS extends HTMLPurifier_AttrTransform
13330: {
13331:     /**
13332:      * Name of boolean attribute that is trigger.
13333:      * @type string
13334:      */
13335:     protected $attr;
13336: 
13337:     /**
13338:      * CSS declarations to add to style, needs trailing semicolon.
13339:      * @type string
13340:      */
13341:     protected $css;
13342: 
13343:     /**
13344:      * @param string $attr attribute name to convert from
13345:      * @param string $css CSS declarations to add to style (needs semicolon)
13346:      */
13347:     public function __construct($attr, $css)
13348:     {
13349:         $this->attr = $attr;
13350:         $this->css = $css;
13351:     }
13352: 
13353:     /**
13354:      * @param array $attr
13355:      * @param HTMLPurifier_Config $config
13356:      * @param HTMLPurifier_Context $context
13357:      * @return array
13358:      */
13359:     public function transform($attr, $config, $context)
13360:     {
13361:         if (!isset($attr[$this->attr])) {
13362:             return $attr;
13363:         }
13364:         unset($attr[$this->attr]);
13365:         $this->prependCSS($attr, $this->css);
13366:         return $attr;
13367:     }
13368: }
13369: 
13370: 
13371: 
13372: 
13373: 
13374: /**
13375:  * Pre-transform that changes deprecated border attribute to CSS.
13376:  */
13377: class HTMLPurifier_AttrTransform_Border extends HTMLPurifier_AttrTransform
13378: {
13379:     /**
13380:      * @param array $attr
13381:      * @param HTMLPurifier_Config $config
13382:      * @param HTMLPurifier_Context $context
13383:      * @return array
13384:      */
13385:     public function transform($attr, $config, $context)
13386:     {
13387:         if (!isset($attr['border'])) {
13388:             return $attr;
13389:         }
13390:         $border_width = $this->confiscateAttr($attr, 'border');
13391:         // some validation should happen here
13392:         $this->prependCSS($attr, "border:{$border_width}px solid;");
13393:         return $attr;
13394:     }
13395: }
13396: 
13397: 
13398: 
13399: 
13400: 
13401: /**
13402:  * Generic pre-transform that converts an attribute with a fixed number of
13403:  * values (enumerated) to CSS.
13404:  */
13405: class HTMLPurifier_AttrTransform_EnumToCSS extends HTMLPurifier_AttrTransform
13406: {
13407:     /**
13408:      * Name of attribute to transform from.
13409:      * @type string
13410:      */
13411:     protected $attr;
13412: 
13413:     /**
13414:      * Lookup array of attribute values to CSS.
13415:      * @type array
13416:      */
13417:     protected $enumToCSS = array();
13418: 
13419:     /**
13420:      * Case sensitivity of the matching.
13421:      * @type bool
13422:      * @warning Currently can only be guaranteed to work with ASCII
13423:      *          values.
13424:      */
13425:     protected $caseSensitive = false;
13426: 
13427:     /**
13428:      * @param string $attr Attribute name to transform from
13429:      * @param array $enum_to_css Lookup array of attribute values to CSS
13430:      * @param bool $case_sensitive Case sensitivity indicator, default false
13431:      */
13432:     public function __construct($attr, $enum_to_css, $case_sensitive = false)
13433:     {
13434:         $this->attr = $attr;
13435:         $this->enumToCSS = $enum_to_css;
13436:         $this->caseSensitive = (bool)$case_sensitive;
13437:     }
13438: 
13439:     /**
13440:      * @param array $attr
13441:      * @param HTMLPurifier_Config $config
13442:      * @param HTMLPurifier_Context $context
13443:      * @return array
13444:      */
13445:     public function transform($attr, $config, $context)
13446:     {
13447:         if (!isset($attr[$this->attr])) {
13448:             return $attr;
13449:         }
13450: 
13451:         $value = trim($attr[$this->attr]);
13452:         unset($attr[$this->attr]);
13453: 
13454:         if (!$this->caseSensitive) {
13455:             $value = strtolower($value);
13456:         }
13457: 
13458:         if (!isset($this->enumToCSS[$value])) {
13459:             return $attr;
13460:         }
13461:         $this->prependCSS($attr, $this->enumToCSS[$value]);
13462:         return $attr;
13463:     }
13464: }
13465: 
13466: 
13467: 
13468: 
13469: 
13470: // must be called POST validation
13471: 
13472: /**
13473:  * Transform that supplies default values for the src and alt attributes
13474:  * in img tags, as well as prevents the img tag from being removed
13475:  * because of a missing alt tag. This needs to be registered as both
13476:  * a pre and post attribute transform.
13477:  */
13478: class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform
13479: {
13480: 
13481:     /**
13482:      * @param array $attr
13483:      * @param HTMLPurifier_Config $config
13484:      * @param HTMLPurifier_Context $context
13485:      * @return array
13486:      */
13487:     public function transform($attr, $config, $context)
13488:     {
13489:         $src = true;
13490:         if (!isset($attr['src'])) {
13491:             if ($config->get('Core.RemoveInvalidImg')) {
13492:                 return $attr;
13493:             }
13494:             $attr['src'] = $config->get('Attr.DefaultInvalidImage');
13495:             $src = false;
13496:         }
13497: 
13498:         if (!isset($attr['alt'])) {
13499:             if ($src) {
13500:                 $alt = $config->get('Attr.DefaultImageAlt');
13501:                 if ($alt === null) {
13502:                     // truncate if the alt is too long
13503:                     $attr['alt'] = substr(basename($attr['src']), 0, 40);
13504:                 } else {
13505:                     $attr['alt'] = $alt;
13506:                 }
13507:             } else {
13508:                 $attr['alt'] = $config->get('Attr.DefaultInvalidImageAlt');
13509:             }
13510:         }
13511:         return $attr;
13512:     }
13513: }
13514: 
13515: 
13516: 
13517: 
13518: 
13519: /**
13520:  * Pre-transform that changes deprecated hspace and vspace attributes to CSS
13521:  */
13522: class HTMLPurifier_AttrTransform_ImgSpace extends HTMLPurifier_AttrTransform
13523: {
13524:     /**
13525:      * @type string
13526:      */
13527:     protected $attr;
13528: 
13529:     /**
13530:      * @type array
13531:      */
13532:     protected $css = array(
13533:         'hspace' => array('left', 'right'),
13534:         'vspace' => array('top', 'bottom')
13535:     );
13536: 
13537:     /**
13538:      * @param string $attr
13539:      */
13540:     public function __construct($attr)
13541:     {
13542:         $this->attr = $attr;
13543:         if (!isset($this->css[$attr])) {
13544:             trigger_error(htmlspecialchars($attr) . ' is not valid space attribute');
13545:         }
13546:     }
13547: 
13548:     /**
13549:      * @param array $attr
13550:      * @param HTMLPurifier_Config $config
13551:      * @param HTMLPurifier_Context $context
13552:      * @return array
13553:      */
13554:     public function transform($attr, $config, $context)
13555:     {
13556:         if (!isset($attr[$this->attr])) {
13557:             return $attr;
13558:         }
13559: 
13560:         $width = $this->confiscateAttr($attr, $this->attr);
13561:         // some validation could happen here
13562: 
13563:         if (!isset($this->css[$this->attr])) {
13564:             return $attr;
13565:         }
13566: 
13567:         $style = '';
13568:         foreach ($this->css[$this->attr] as $suffix) {
13569:             $property = "margin-$suffix";
13570:             $style .= "$property:{$width}px;";
13571:         }
13572:         $this->prependCSS($attr, $style);
13573:         return $attr;
13574:     }
13575: }
13576: 
13577: 
13578: 
13579: 
13580: 
13581: /**
13582:  * Performs miscellaneous cross attribute validation and filtering for
13583:  * input elements. This is meant to be a post-transform.
13584:  */
13585: class HTMLPurifier_AttrTransform_Input extends HTMLPurifier_AttrTransform
13586: {
13587:     /**
13588:      * @type HTMLPurifier_AttrDef_HTML_Pixels
13589:      */
13590:     protected $pixels;
13591: 
13592:     public function __construct()
13593:     {
13594:         $this->pixels = new HTMLPurifier_AttrDef_HTML_Pixels();
13595:     }
13596: 
13597:     /**
13598:      * @param array $attr
13599:      * @param HTMLPurifier_Config $config
13600:      * @param HTMLPurifier_Context $context
13601:      * @return array
13602:      */
13603:     public function transform($attr, $config, $context)
13604:     {
13605:         if (!isset($attr['type'])) {
13606:             $t = 'text';
13607:         } else {
13608:             $t = strtolower($attr['type']);
13609:         }
13610:         if (isset($attr['checked']) && $t !== 'radio' && $t !== 'checkbox') {
13611:             unset($attr['checked']);
13612:         }
13613:         if (isset($attr['maxlength']) && $t !== 'text' && $t !== 'password') {
13614:             unset($attr['maxlength']);
13615:         }
13616:         if (isset($attr['size']) && $t !== 'text' && $t !== 'password') {
13617:             $result = $this->pixels->validate($attr['size'], $config, $context);
13618:             if ($result === false) {
13619:                 unset($attr['size']);
13620:             } else {
13621:                 $attr['size'] = $result;
13622:             }
13623:         }
13624:         if (isset($attr['src']) && $t !== 'image') {
13625:             unset($attr['src']);
13626:         }
13627:         if (!isset($attr['value']) && ($t === 'radio' || $t === 'checkbox')) {
13628:             $attr['value'] = '';
13629:         }
13630:         return $attr;
13631:     }
13632: }
13633: 
13634: 
13635: 
13636: 
13637: 
13638: /**
13639:  * Post-transform that copies lang's value to xml:lang (and vice-versa)
13640:  * @note Theoretically speaking, this could be a pre-transform, but putting
13641:  *       post is more efficient.
13642:  */
13643: class HTMLPurifier_AttrTransform_Lang extends HTMLPurifier_AttrTransform
13644: {
13645: 
13646:     /**
13647:      * @param array $attr
13648:      * @param HTMLPurifier_Config $config
13649:      * @param HTMLPurifier_Context $context
13650:      * @return array
13651:      */
13652:     public function transform($attr, $config, $context)
13653:     {
13654:         $lang = isset($attr['lang']) ? $attr['lang'] : false;
13655:         $xml_lang = isset($attr['xml:lang']) ? $attr['xml:lang'] : false;
13656: 
13657:         if ($lang !== false && $xml_lang === false) {
13658:             $attr['xml:lang'] = $lang;
13659:         } elseif ($xml_lang !== false) {
13660:             $attr['lang'] = $xml_lang;
13661:         }
13662:         return $attr;
13663:     }
13664: }
13665: 
13666: 
13667: 
13668: 
13669: 
13670: /**
13671:  * Class for handling width/height length attribute transformations to CSS
13672:  */
13673: class HTMLPurifier_AttrTransform_Length extends HTMLPurifier_AttrTransform
13674: {
13675: 
13676:     /**
13677:      * @type string
13678:      */
13679:     protected $name;
13680: 
13681:     /**
13682:      * @type string
13683:      */
13684:     protected $cssName;
13685: 
13686:     public function __construct($name, $css_name = null)
13687:     {
13688:         $this->name = $name;
13689:         $this->cssName = $css_name ? $css_name : $name;
13690:     }
13691: 
13692:     /**
13693:      * @param array $attr
13694:      * @param HTMLPurifier_Config $config
13695:      * @param HTMLPurifier_Context $context
13696:      * @return array
13697:      */
13698:     public function transform($attr, $config, $context)
13699:     {
13700:         if (!isset($attr[$this->name])) {
13701:             return $attr;
13702:         }
13703:         $length = $this->confiscateAttr($attr, $this->name);
13704:         if (ctype_digit($length)) {
13705:             $length .= 'px';
13706:         }
13707:         $this->prependCSS($attr, $this->cssName . ":$length;");
13708:         return $attr;
13709:     }
13710: }
13711: 
13712: 
13713: 
13714: 
13715: 
13716: /**
13717:  * Pre-transform that changes deprecated name attribute to ID if necessary
13718:  */
13719: class HTMLPurifier_AttrTransform_Name extends HTMLPurifier_AttrTransform
13720: {
13721: 
13722:     /**
13723:      * @param array $attr
13724:      * @param HTMLPurifier_Config $config
13725:      * @param HTMLPurifier_Context $context
13726:      * @return array
13727:      */
13728:     public function transform($attr, $config, $context)
13729:     {
13730:         // Abort early if we're using relaxed definition of name
13731:         if ($config->get('HTML.Attr.Name.UseCDATA')) {
13732:             return $attr;
13733:         }
13734:         if (!isset($attr['name'])) {
13735:             return $attr;
13736:         }
13737:         $id = $this->confiscateAttr($attr, 'name');
13738:         if (isset($attr['id'])) {
13739:             return $attr;
13740:         }
13741:         $attr['id'] = $id;
13742:         return $attr;
13743:     }
13744: }
13745: 
13746: 
13747: 
13748: 
13749: 
13750: /**
13751:  * Post-transform that performs validation to the name attribute; if
13752:  * it is present with an equivalent id attribute, it is passed through;
13753:  * otherwise validation is performed.
13754:  */
13755: class HTMLPurifier_AttrTransform_NameSync extends HTMLPurifier_AttrTransform
13756: {
13757: 
13758:     public function __construct()
13759:     {
13760:         $this->idDef = new HTMLPurifier_AttrDef_HTML_ID();
13761:     }
13762: 
13763:     /**
13764:      * @param array $attr
13765:      * @param HTMLPurifier_Config $config
13766:      * @param HTMLPurifier_Context $context
13767:      * @return array
13768:      */
13769:     public function transform($attr, $config, $context)
13770:     {
13771:         if (!isset($attr['name'])) {
13772:             return $attr;
13773:         }
13774:         $name = $attr['name'];
13775:         if (isset($attr['id']) && $attr['id'] === $name) {
13776:             return $attr;
13777:         }
13778:         $result = $this->idDef->validate($name, $config, $context);
13779:         if ($result === false) {
13780:             unset($attr['name']);
13781:         } else {
13782:             $attr['name'] = $result;
13783:         }
13784:         return $attr;
13785:     }
13786: }
13787: 
13788: 
13789: 
13790: 
13791: 
13792: // must be called POST validation
13793: 
13794: /**
13795:  * Adds rel="nofollow" to all outbound links.  This transform is
13796:  * only attached if Attr.Nofollow is TRUE.
13797:  */
13798: class HTMLPurifier_AttrTransform_Nofollow extends HTMLPurifier_AttrTransform
13799: {
13800:     /**
13801:      * @type HTMLPurifier_URIParser
13802:      */
13803:     private $parser;
13804: 
13805:     public function __construct()
13806:     {
13807:         $this->parser = new HTMLPurifier_URIParser();
13808:     }
13809: 
13810:     /**
13811:      * @param array $attr
13812:      * @param HTMLPurifier_Config $config
13813:      * @param HTMLPurifier_Context $context
13814:      * @return array
13815:      */
13816:     public function transform($attr, $config, $context)
13817:     {
13818:         if (!isset($attr['href'])) {
13819:             return $attr;
13820:         }
13821: 
13822:         // XXX Kind of inefficient
13823:         $url = $this->parser->parse($attr['href']);
13824:         $scheme = $url->getSchemeObj($config, $context);
13825: 
13826:         if ($scheme->browsable && !$url->isLocal($config, $context)) {
13827:             if (isset($attr['rel'])) {
13828:                 $rels = explode(' ', $attr['rel']);
13829:                 if (!in_array('nofollow', $rels)) {
13830:                     $rels[] = 'nofollow';
13831:                 }
13832:                 $attr['rel'] = implode(' ', $rels);
13833:             } else {
13834:                 $attr['rel'] = 'nofollow';
13835:             }
13836:         }
13837:         return $attr;
13838:     }
13839: }
13840: 
13841: 
13842: 
13843: 
13844: 
13845: class HTMLPurifier_AttrTransform_SafeEmbed extends HTMLPurifier_AttrTransform
13846: {
13847:     /**
13848:      * @type string
13849:      */
13850:     public $name = "SafeEmbed";
13851: 
13852:     /**
13853:      * @param array $attr
13854:      * @param HTMLPurifier_Config $config
13855:      * @param HTMLPurifier_Context $context
13856:      * @return array
13857:      */
13858:     public function transform($attr, $config, $context)
13859:     {
13860:         $attr['allowscriptaccess'] = 'never';
13861:         $attr['allownetworking'] = 'internal';
13862:         $attr['type'] = 'application/x-shockwave-flash';
13863:         return $attr;
13864:     }
13865: }
13866: 
13867: 
13868: 
13869: 
13870: 
13871: /**
13872:  * Writes default type for all objects. Currently only supports flash.
13873:  */
13874: class HTMLPurifier_AttrTransform_SafeObject extends HTMLPurifier_AttrTransform
13875: {
13876:     /**
13877:      * @type string
13878:      */
13879:     public $name = "SafeObject";
13880: 
13881:     /**
13882:      * @param array $attr
13883:      * @param HTMLPurifier_Config $config
13884:      * @param HTMLPurifier_Context $context
13885:      * @return array
13886:      */
13887:     public function transform($attr, $config, $context)
13888:     {
13889:         if (!isset($attr['type'])) {
13890:             $attr['type'] = 'application/x-shockwave-flash';
13891:         }
13892:         return $attr;
13893:     }
13894: }
13895: 
13896: 
13897: 
13898: 
13899: 
13900: /**
13901:  * Validates name/value pairs in param tags to be used in safe objects. This
13902:  * will only allow name values it recognizes, and pre-fill certain attributes
13903:  * with required values.
13904:  *
13905:  * @note
13906:  *      This class only supports Flash. In the future, Quicktime support
13907:  *      may be added.
13908:  *
13909:  * @warning
13910:  *      This class expects an injector to add the necessary parameters tags.
13911:  */
13912: class HTMLPurifier_AttrTransform_SafeParam extends HTMLPurifier_AttrTransform
13913: {
13914:     /**
13915:      * @type string
13916:      */
13917:     public $name = "SafeParam";
13918: 
13919:     /**
13920:      * @type HTMLPurifier_AttrDef_URI
13921:      */
13922:     private $uri;
13923: 
13924:     public function __construct()
13925:     {
13926:         $this->uri = new HTMLPurifier_AttrDef_URI(true); // embedded
13927:         $this->wmode = new HTMLPurifier_AttrDef_Enum(array('window', 'opaque', 'transparent'));
13928:     }
13929: 
13930:     /**
13931:      * @param array $attr
13932:      * @param HTMLPurifier_Config $config
13933:      * @param HTMLPurifier_Context $context
13934:      * @return array
13935:      */
13936:     public function transform($attr, $config, $context)
13937:     {
13938:         // If we add support for other objects, we'll need to alter the
13939:         // transforms.
13940:         switch ($attr['name']) {
13941:             // application/x-shockwave-flash
13942:             // Keep this synchronized with Injector/SafeObject.php
13943:             case 'allowScriptAccess':
13944:                 $attr['value'] = 'never';
13945:                 break;
13946:             case 'allowNetworking':
13947:                 $attr['value'] = 'internal';
13948:                 break;
13949:             case 'allowFullScreen':
13950:                 if ($config->get('HTML.FlashAllowFullScreen')) {
13951:                     $attr['value'] = ($attr['value'] == 'true') ? 'true' : 'false';
13952:                 } else {
13953:                     $attr['value'] = 'false';
13954:                 }
13955:                 break;
13956:             case 'wmode':
13957:                 $attr['value'] = $this->wmode->validate($attr['value'], $config, $context);
13958:                 break;
13959:             case 'movie':
13960:             case 'src':
13961:                 $attr['name'] = "movie";
13962:                 $attr['value'] = $this->uri->validate($attr['value'], $config, $context);
13963:                 break;
13964:             case 'flashvars':
13965:                 // we're going to allow arbitrary inputs to the SWF, on
13966:                 // the reasoning that it could only hack the SWF, not us.
13967:                 break;
13968:             // add other cases to support other param name/value pairs
13969:             default:
13970:                 $attr['name'] = $attr['value'] = null;
13971:         }
13972:         return $attr;
13973:     }
13974: }
13975: 
13976: 
13977: 
13978: 
13979: 
13980: /**
13981:  * Implements required attribute stipulation for <script>
13982:  */
13983: class HTMLPurifier_AttrTransform_ScriptRequired extends HTMLPurifier_AttrTransform
13984: {
13985:     /**
13986:      * @param array $attr
13987:      * @param HTMLPurifier_Config $config
13988:      * @param HTMLPurifier_Context $context
13989:      * @return array
13990:      */
13991:     public function transform($attr, $config, $context)
13992:     {
13993:         if (!isset($attr['type'])) {
13994:             $attr['type'] = 'text/javascript';
13995:         }
13996:         return $attr;
13997:     }
13998: }
13999: 
14000: 
14001: 
14002: 
14003: 
14004: // must be called POST validation
14005: 
14006: /**
14007:  * Adds target="blank" to all outbound links.  This transform is
14008:  * only attached if Attr.TargetBlank is TRUE.  This works regardless
14009:  * of whether or not Attr.AllowedFrameTargets
14010:  */
14011: class HTMLPurifier_AttrTransform_TargetBlank extends HTMLPurifier_AttrTransform
14012: {
14013:     /**
14014:      * @type HTMLPurifier_URIParser
14015:      */
14016:     private $parser;
14017: 
14018:     public function __construct()
14019:     {
14020:         $this->parser = new HTMLPurifier_URIParser();
14021:     }
14022: 
14023:     /**
14024:      * @param array $attr
14025:      * @param HTMLPurifier_Config $config
14026:      * @param HTMLPurifier_Context $context
14027:      * @return array
14028:      */
14029:     public function transform($attr, $config, $context)
14030:     {
14031:         if (!isset($attr['href'])) {
14032:             return $attr;
14033:         }
14034: 
14035:         // XXX Kind of inefficient
14036:         $url = $this->parser->parse($attr['href']);
14037:         $scheme = $url->getSchemeObj($config, $context);
14038: 
14039:         if ($scheme->browsable && !$url->isBenign($config, $context)) {
14040:             $attr['target'] = '_blank';
14041:         }
14042:         return $attr;
14043:     }
14044: }
14045: 
14046: 
14047: 
14048: 
14049: 
14050: /**
14051:  * Sets height/width defaults for <textarea>
14052:  */
14053: class HTMLPurifier_AttrTransform_Textarea extends HTMLPurifier_AttrTransform
14054: {
14055:     /**
14056:      * @param array $attr
14057:      * @param HTMLPurifier_Config $config
14058:      * @param HTMLPurifier_Context $context
14059:      * @return array
14060:      */
14061:     public function transform($attr, $config, $context)
14062:     {
14063:         // Calculated from Firefox
14064:         if (!isset($attr['cols'])) {
14065:             $attr['cols'] = '22';
14066:         }
14067:         if (!isset($attr['rows'])) {
14068:             $attr['rows'] = '3';
14069:         }
14070:         return $attr;
14071:     }
14072: }
14073: 
14074: 
14075: 
14076: 
14077: 
14078: /**
14079:  * Definition that uses different definitions depending on context.
14080:  *
14081:  * The del and ins tags are notable because they allow different types of
14082:  * elements depending on whether or not they're in a block or inline context.
14083:  * Chameleon allows this behavior to happen by using two different
14084:  * definitions depending on context.  While this somewhat generalized,
14085:  * it is specifically intended for those two tags.
14086:  */
14087: class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
14088: {
14089: 
14090:     /**
14091:      * Instance of the definition object to use when inline. Usually stricter.
14092:      * @type HTMLPurifier_ChildDef_Optional
14093:      */
14094:     public $inline;
14095: 
14096:     /**
14097:      * Instance of the definition object to use when block.
14098:      * @type HTMLPurifier_ChildDef_Optional
14099:      */
14100:     public $block;
14101: 
14102:     /**
14103:      * @type string
14104:      */
14105:     public $type = 'chameleon';
14106: 
14107:     /**
14108:      * @param array $inline List of elements to allow when inline.
14109:      * @param array $block List of elements to allow when block.
14110:      */
14111:     public function __construct($inline, $block)
14112:     {
14113:         $this->inline = new HTMLPurifier_ChildDef_Optional($inline);
14114:         $this->block = new HTMLPurifier_ChildDef_Optional($block);
14115:         $this->elements = $this->block->elements;
14116:     }
14117: 
14118:     /**
14119:      * @param HTMLPurifier_Node[] $children
14120:      * @param HTMLPurifier_Config $config
14121:      * @param HTMLPurifier_Context $context
14122:      * @return bool
14123:      */
14124:     public function validateChildren($children, $config, $context)
14125:     {
14126:         if ($context->get('IsInline') === false) {
14127:             return $this->block->validateChildren(
14128:                 $children,
14129:                 $config,
14130:                 $context
14131:             );
14132:         } else {
14133:             return $this->inline->validateChildren(
14134:                 $children,
14135:                 $config,
14136:                 $context
14137:             );
14138:         }
14139:     }
14140: }
14141: 
14142: 
14143: 
14144: 
14145: 
14146: /**
14147:  * Custom validation class, accepts DTD child definitions
14148:  *
14149:  * @warning Currently this class is an all or nothing proposition, that is,
14150:  *          it will only give a bool return value.
14151:  */
14152: class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
14153: {
14154:     /**
14155:      * @type string
14156:      */
14157:     public $type = 'custom';
14158: 
14159:     /**
14160:      * @type bool
14161:      */
14162:     public $allow_empty = false;
14163: 
14164:     /**
14165:      * Allowed child pattern as defined by the DTD.
14166:      * @type string
14167:      */
14168:     public $dtd_regex;
14169: 
14170:     /**
14171:      * PCRE regex derived from $dtd_regex.
14172:      * @type string
14173:      */
14174:     private $_pcre_regex;
14175: 
14176:     /**
14177:      * @param $dtd_regex Allowed child pattern from the DTD
14178:      */
14179:     public function __construct($dtd_regex)
14180:     {
14181:         $this->dtd_regex = $dtd_regex;
14182:         $this->_compileRegex();
14183:     }
14184: 
14185:     /**
14186:      * Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex)
14187:      */
14188:     protected function _compileRegex()
14189:     {
14190:         $raw = str_replace(' ', '', $this->dtd_regex);
14191:         if ($raw{0} != '(') {
14192:             $raw = "($raw)";
14193:         }
14194:         $el = '[#a-zA-Z0-9_.-]+';
14195:         $reg = $raw;
14196: 
14197:         // COMPLICATED! AND MIGHT BE BUGGY! I HAVE NO CLUE WHAT I'M
14198:         // DOING! Seriously: if there's problems, please report them.
14199: 
14200:         // collect all elements into the $elements array
14201:         preg_match_all("/$el/", $reg, $matches);
14202:         foreach ($matches[0] as $match) {
14203:             $this->elements[$match] = true;
14204:         }
14205: 
14206:         // setup all elements as parentheticals with leading commas
14207:         $reg = preg_replace("/$el/", '(,\\0)', $reg);
14208: 
14209:         // remove commas when they were not solicited
14210:         $reg = preg_replace("/([^,(|]\(+),/", '\\1', $reg);
14211: 
14212:         // remove all non-paranthetical commas: they are handled by first regex
14213:         $reg = preg_replace("/,\(/", '(', $reg);
14214: 
14215:         $this->_pcre_regex = $reg;
14216:     }
14217: 
14218:     /**
14219:      * @param HTMLPurifier_Node[] $children
14220:      * @param HTMLPurifier_Config $config
14221:      * @param HTMLPurifier_Context $context
14222:      * @return bool
14223:      */
14224:     public function validateChildren($children, $config, $context)
14225:     {
14226:         $list_of_children = '';
14227:         $nesting = 0; // depth into the nest
14228:         foreach ($children as $node) {
14229:             if (!empty($node->is_whitespace)) {
14230:                 continue;
14231:             }
14232:             $list_of_children .= $node->name . ',';
14233:         }
14234:         // add leading comma to deal with stray comma declarations
14235:         $list_of_children = ',' . rtrim($list_of_children, ',');
14236:         $okay =
14237:             preg_match(
14238:                 '/^,?' . $this->_pcre_regex . '$/',
14239:                 $list_of_children
14240:             );
14241:         return (bool)$okay;
14242:     }
14243: }
14244: 
14245: 
14246: 
14247: 
14248: 
14249: /**
14250:  * Definition that disallows all elements.
14251:  * @warning validateChildren() in this class is actually never called, because
14252:  *          empty elements are corrected in HTMLPurifier_Strategy_MakeWellFormed
14253:  *          before child definitions are parsed in earnest by
14254:  *          HTMLPurifier_Strategy_FixNesting.
14255:  */
14256: class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
14257: {
14258:     /**
14259:      * @type bool
14260:      */
14261:     public $allow_empty = true;
14262: 
14263:     /**
14264:      * @type string
14265:      */
14266:     public $type = 'empty';
14267: 
14268:     public function __construct()
14269:     {
14270:     }
14271: 
14272:     /**
14273:      * @param HTMLPurifier_Node[] $children
14274:      * @param HTMLPurifier_Config $config
14275:      * @param HTMLPurifier_Context $context
14276:      * @return array
14277:      */
14278:     public function validateChildren($children, $config, $context)
14279:     {
14280:         return array();
14281:     }
14282: }
14283: 
14284: 
14285: 
14286: 
14287: 
14288: /**
14289:  * Definition for list containers ul and ol.
14290:  *
14291:  * What does this do?  The big thing is to handle ol/ul at the top
14292:  * level of list nodes, which should be handled specially by /folding/
14293:  * them into the previous list node.  We generally shouldn't ever
14294:  * see other disallowed elements, because the autoclose behavior
14295:  * in MakeWellFormed handles it.
14296:  */
14297: class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef
14298: {
14299:     /**
14300:      * @type string
14301:      */
14302:     public $type = 'list';
14303:     /**
14304:      * @type array
14305:      */
14306:     // lying a little bit, so that we can handle ul and ol ourselves
14307:     // XXX: This whole business with 'wrap' is all a bit unsatisfactory
14308:     public $elements = array('li' => true, 'ul' => true, 'ol' => true);
14309: 
14310:     /**
14311:      * @param array $children
14312:      * @param HTMLPurifier_Config $config
14313:      * @param HTMLPurifier_Context $context
14314:      * @return array
14315:      */
14316:     public function validateChildren($children, $config, $context)
14317:     {
14318:         // Flag for subclasses
14319:         $this->whitespace = false;
14320: 
14321:         // if there are no tokens, delete parent node
14322:         if (empty($children)) {
14323:             return false;
14324:         }
14325: 
14326:         // the new set of children
14327:         $result = array();
14328: 
14329:         // a little sanity check to make sure it's not ALL whitespace
14330:         $all_whitespace = true;
14331: 
14332:         $current_li = false;
14333: 
14334:         foreach ($children as $node) {
14335:             if (!empty($node->is_whitespace)) {
14336:                 $result[] = $node;
14337:                 continue;
14338:             }
14339:             $all_whitespace = false; // phew, we're not talking about whitespace
14340: 
14341:             if ($node->name === 'li') {
14342:                 // good
14343:                 $current_li = $node;
14344:                 $result[] = $node;
14345:             } else {
14346:                 // we want to tuck this into the previous li
14347:                 // Invariant: we expect the node to be ol/ul
14348:                 // ToDo: Make this more robust in the case of not ol/ul
14349:                 // by distinguishing between existing li and li created
14350:                 // to handle non-list elements; non-list elements should
14351:                 // not be appended to an existing li; only li created
14352:                 // for non-list. This distinction is not currently made.
14353:                 if ($current_li === false) {
14354:                     $current_li = new HTMLPurifier_Node_Element('li');
14355:                     $result[] = $current_li;
14356:                 }
14357:                 $current_li->children[] = $node;
14358:                 $current_li->empty = false; // XXX fascinating! Check for this error elsewhere ToDo
14359:             }
14360:         }
14361:         if (empty($result)) {
14362:             return false;
14363:         }
14364:         if ($all_whitespace) {
14365:             return false;
14366:         }
14367:         return $result;
14368:     }
14369: }
14370: 
14371: 
14372: 
14373: 
14374: 
14375: /**
14376:  * Definition that allows a set of elements, but disallows empty children.
14377:  */
14378: class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
14379: {
14380:     /**
14381:      * Lookup table of allowed elements.
14382:      * @type array
14383:      */
14384:     public $elements = array();
14385: 
14386:     /**
14387:      * Whether or not the last passed node was all whitespace.
14388:      * @type bool
14389:      */
14390:     protected $whitespace = false;
14391: 
14392:     /**
14393:      * @param array|string $elements List of allowed element names (lowercase).
14394:      */
14395:     public function __construct($elements)
14396:     {
14397:         if (is_string($elements)) {
14398:             $elements = str_replace(' ', '', $elements);
14399:             $elements = explode('|', $elements);
14400:         }
14401:         $keys = array_keys($elements);
14402:         if ($keys == array_keys($keys)) {
14403:             $elements = array_flip($elements);
14404:             foreach ($elements as $i => $x) {
14405:                 $elements[$i] = true;
14406:                 if (empty($i)) {
14407:                     unset($elements[$i]);
14408:                 } // remove blank
14409:             }
14410:         }
14411:         $this->elements = $elements;
14412:     }
14413: 
14414:     /**
14415:      * @type bool
14416:      */
14417:     public $allow_empty = false;
14418: 
14419:     /**
14420:      * @type string
14421:      */
14422:     public $type = 'required';
14423: 
14424:     /**
14425:      * @param array $children
14426:      * @param HTMLPurifier_Config $config
14427:      * @param HTMLPurifier_Context $context
14428:      * @return array
14429:      */
14430:     public function validateChildren($children, $config, $context)
14431:     {
14432:         // Flag for subclasses
14433:         $this->whitespace = false;
14434: 
14435:         // if there are no tokens, delete parent node
14436:         if (empty($children)) {
14437:             return false;
14438:         }
14439: 
14440:         // the new set of children
14441:         $result = array();
14442: 
14443:         // whether or not parsed character data is allowed
14444:         // this controls whether or not we silently drop a tag
14445:         // or generate escaped HTML from it
14446:         $pcdata_allowed = isset($this->elements['#PCDATA']);
14447: 
14448:         // a little sanity check to make sure it's not ALL whitespace
14449:         $all_whitespace = true;
14450: 
14451:         $stack = array_reverse($children);
14452:         while (!empty($stack)) {
14453:             $node = array_pop($stack);
14454:             if (!empty($node->is_whitespace)) {
14455:                 $result[] = $node;
14456:                 continue;
14457:             }
14458:             $all_whitespace = false; // phew, we're not talking about whitespace
14459: 
14460:             if (!isset($this->elements[$node->name])) {
14461:                 // special case text
14462:                 // XXX One of these ought to be redundant or something
14463:                 if ($pcdata_allowed && $node instanceof HTMLPurifier_Node_Text) {
14464:                     $result[] = $node;
14465:                     continue;
14466:                 }
14467:                 // spill the child contents in
14468:                 // ToDo: Make configurable
14469:                 if ($node instanceof HTMLPurifier_Node_Element) {
14470:                     for ($i = count($node->children) - 1; $i >= 0; $i--) {
14471:                         $stack[] = $node->children[$i];
14472:                     }
14473:                     continue;
14474:                 }
14475:                 continue;
14476:             }
14477:             $result[] = $node;
14478:         }
14479:         if (empty($result)) {
14480:             return false;
14481:         }
14482:         if ($all_whitespace) {
14483:             $this->whitespace = true;
14484:             return false;
14485:         }
14486:         return $result;
14487:     }
14488: }
14489: 
14490: 
14491: 
14492: 
14493: 
14494: /**
14495:  * Definition that allows a set of elements, and allows no children.
14496:  * @note This is a hack to reuse code from HTMLPurifier_ChildDef_Required,
14497:  *       really, one shouldn't inherit from the other.  Only altered behavior
14498:  *       is to overload a returned false with an array.  Thus, it will never
14499:  *       return false.
14500:  */
14501: class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
14502: {
14503:     /**
14504:      * @type bool
14505:      */
14506:     public $allow_empty = true;
14507: 
14508:     /**
14509:      * @type string
14510:      */
14511:     public $type = 'optional';
14512: 
14513:     /**
14514:      * @param array $children
14515:      * @param HTMLPurifier_Config $config
14516:      * @param HTMLPurifier_Context $context
14517:      * @return array
14518:      */
14519:     public function validateChildren($children, $config, $context)
14520:     {
14521:         $result = parent::validateChildren($children, $config, $context);
14522:         // we assume that $children is not modified
14523:         if ($result === false) {
14524:             if (empty($children)) {
14525:                 return true;
14526:             } elseif ($this->whitespace) {
14527:                 return $children;
14528:             } else {
14529:                 return array();
14530:             }
14531:         }
14532:         return $result;
14533:     }
14534: }
14535: 
14536: 
14537: 
14538: 
14539: 
14540: /**
14541:  * Takes the contents of blockquote when in strict and reformats for validation.
14542:  */
14543: class HTMLPurifier_ChildDef_StrictBlockquote extends HTMLPurifier_ChildDef_Required
14544: {
14545:     /**
14546:      * @type array
14547:      */
14548:     protected $real_elements;
14549: 
14550:     /**
14551:      * @type array
14552:      */
14553:     protected $fake_elements;
14554: 
14555:     /**
14556:      * @type bool
14557:      */
14558:     public $allow_empty = true;
14559: 
14560:     /**
14561:      * @type string
14562:      */
14563:     public $type = 'strictblockquote';
14564: 
14565:     /**
14566:      * @type bool
14567:      */
14568:     protected $init = false;
14569: 
14570:     /**
14571:      * @param HTMLPurifier_Config $config
14572:      * @return array
14573:      * @note We don't want MakeWellFormed to auto-close inline elements since
14574:      *       they might be allowed.
14575:      */
14576:     public function getAllowedElements($config)
14577:     {
14578:         $this->init($config);
14579:         return $this->fake_elements;
14580:     }
14581: 
14582:     /**
14583:      * @param array $children
14584:      * @param HTMLPurifier_Config $config
14585:      * @param HTMLPurifier_Context $context
14586:      * @return array
14587:      */
14588:     public function validateChildren($children, $config, $context)
14589:     {
14590:         $this->init($config);
14591: 
14592:         // trick the parent class into thinking it allows more
14593:         $this->elements = $this->fake_elements;
14594:         $result = parent::validateChildren($children, $config, $context);
14595:         $this->elements = $this->real_elements;
14596: 
14597:         if ($result === false) {
14598:             return array();
14599:         }
14600:         if ($result === true) {
14601:             $result = $children;
14602:         }
14603: 
14604:         $def = $config->getHTMLDefinition();
14605:         $block_wrap_name = $def->info_block_wrapper;
14606:         $block_wrap = false;
14607:         $ret = array();
14608: 
14609:         foreach ($result as $node) {
14610:             if ($block_wrap === false) {
14611:                 if (($node instanceof HTMLPurifier_Node_Text && !$node->is_whitespace) ||
14612:                     ($node instanceof HTMLPurifier_Node_Element && !isset($this->elements[$node->name]))) {
14613:                         $block_wrap = new HTMLPurifier_Node_Element($def->info_block_wrapper);
14614:                         $ret[] = $block_wrap;
14615:                 }
14616:             } else {
14617:                 if ($node instanceof HTMLPurifier_Node_Element && isset($this->elements[$node->name])) {
14618:                     $block_wrap = false;
14619: 
14620:                 }
14621:             }
14622:             if ($block_wrap) {
14623:                 $block_wrap->children[] = $node;
14624:             } else {
14625:                 $ret[] = $node;
14626:             }
14627:         }
14628:         return $ret;
14629:     }
14630: 
14631:     /**
14632:      * @param HTMLPurifier_Config $config
14633:      */
14634:     private function init($config)
14635:     {
14636:         if (!$this->init) {
14637:             $def = $config->getHTMLDefinition();
14638:             // allow all inline elements
14639:             $this->real_elements = $this->elements;
14640:             $this->fake_elements = $def->info_content_sets['Flow'];
14641:             $this->fake_elements['#PCDATA'] = true;
14642:             $this->init = true;
14643:         }
14644:     }
14645: }
14646: 
14647: 
14648: 
14649: 
14650: 
14651: /**
14652:  * Definition for tables.  The general idea is to extract out all of the
14653:  * essential bits, and then reconstruct it later.
14654:  *
14655:  * This is a bit confusing, because the DTDs and the W3C
14656:  * validators seem to disagree on the appropriate definition. The
14657:  * DTD claims:
14658:  *
14659:  *      (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+)
14660:  *
14661:  * But actually, the HTML4 spec then has this to say:
14662:  *
14663:  *      The TBODY start tag is always required except when the table
14664:  *      contains only one table body and no table head or foot sections.
14665:  *      The TBODY end tag may always be safely omitted.
14666:  *
14667:  * So the DTD is kind of wrong.  The validator is, unfortunately, kind
14668:  * of on crack.
14669:  *
14670:  * The definition changed again in XHTML1.1; and in my opinion, this
14671:  * formulation makes the most sense.
14672:  *
14673:  *      caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ ))
14674:  *
14675:  * Essentially, we have two modes: thead/tfoot/tbody mode, and tr mode.
14676:  * If we encounter a thead, tfoot or tbody, we are placed in the former
14677:  * mode, and we *must* wrap any stray tr segments with a tbody. But if
14678:  * we don't run into any of them, just have tr tags is OK.
14679:  */
14680: class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
14681: {
14682:     /**
14683:      * @type bool
14684:      */
14685:     public $allow_empty = false;
14686: 
14687:     /**
14688:      * @type string
14689:      */
14690:     public $type = 'table';
14691: 
14692:     /**
14693:      * @type array
14694:      */
14695:     public $elements = array(
14696:         'tr' => true,
14697:         'tbody' => true,
14698:         'thead' => true,
14699:         'tfoot' => true,
14700:         'caption' => true,
14701:         'colgroup' => true,
14702:         'col' => true
14703:     );
14704: 
14705:     public function __construct()
14706:     {
14707:     }
14708: 
14709:     /**
14710:      * @param array $children
14711:      * @param HTMLPurifier_Config $config
14712:      * @param HTMLPurifier_Context $context
14713:      * @return array
14714:      */
14715:     public function validateChildren($children, $config, $context)
14716:     {
14717:         if (empty($children)) {
14718:             return false;
14719:         }
14720: 
14721:         // only one of these elements is allowed in a table
14722:         $caption = false;
14723:         $thead = false;
14724:         $tfoot = false;
14725: 
14726:         // whitespace
14727:         $initial_ws = array();
14728:         $after_caption_ws = array();
14729:         $after_thead_ws = array();
14730:         $after_tfoot_ws = array();
14731: 
14732:         // as many of these as you want
14733:         $cols = array();
14734:         $content = array();
14735: 
14736:         $tbody_mode = false; // if true, then we need to wrap any stray
14737:                              // <tr>s with a <tbody>.
14738: 
14739:         $ws_accum =& $initial_ws;
14740: 
14741:         foreach ($children as $node) {
14742:             if ($node instanceof HTMLPurifier_Node_Comment) {
14743:                 $ws_accum[] = $node;
14744:                 continue;
14745:             }
14746:             switch ($node->name) {
14747:             case 'tbody':
14748:                 $tbody_mode = true;
14749:                 // fall through
14750:             case 'tr':
14751:                 $content[] = $node;
14752:                 $ws_accum =& $content;
14753:                 break;
14754:             case 'caption':
14755:                 // there can only be one caption!
14756:                 if ($caption !== false)  break;
14757:                 $caption = $node;
14758:                 $ws_accum =& $after_caption_ws;
14759:                 break;
14760:             case 'thead':
14761:                 $tbody_mode = true;
14762:                 // XXX This breaks rendering properties with
14763:                 // Firefox, which never floats a <thead> to
14764:                 // the top. Ever. (Our scheme will float the
14765:                 // first <thead> to the top.)  So maybe
14766:                 // <thead>s that are not first should be
14767:                 // turned into <tbody>? Very tricky, indeed.
14768:                 if ($thead === false) {
14769:                     $thead = $node;
14770:                     $ws_accum =& $after_thead_ws;
14771:                 } else {
14772:                     // Oops, there's a second one! What
14773:                     // should we do?  Current behavior is to
14774:                     // transmutate the first and last entries into
14775:                     // tbody tags, and then put into content.
14776:                     // Maybe a better idea is to *attach
14777:                     // it* to the existing thead or tfoot?
14778:                     // We don't do this, because Firefox
14779:                     // doesn't float an extra tfoot to the
14780:                     // bottom like it does for the first one.
14781:                     $node->name = 'tbody';
14782:                     $content[] = $node;
14783:                     $ws_accum =& $content;
14784:                 }
14785:                 break;
14786:             case 'tfoot':
14787:                 // see above for some aveats
14788:                 $tbody_mode = true;
14789:                 if ($tfoot === false) {
14790:                     $tfoot = $node;
14791:                     $ws_accum =& $after_tfoot_ws;
14792:                 } else {
14793:                     $node->name = 'tbody';
14794:                     $content[] = $node;
14795:                     $ws_accum =& $content;
14796:                 }
14797:                 break;
14798:             case 'colgroup':
14799:             case 'col':
14800:                 $cols[] = $node;
14801:                 $ws_accum =& $cols;
14802:                 break;
14803:             case '#PCDATA':
14804:                 // How is whitespace handled? We treat is as sticky to
14805:                 // the *end* of the previous element. So all of the
14806:                 // nonsense we have worked on is to keep things
14807:                 // together.
14808:                 if (!empty($node->is_whitespace)) {
14809:                     $ws_accum[] = $node;
14810:                 }
14811:                 break;
14812:             }
14813:         }
14814: 
14815:         if (empty($content)) {
14816:             return false;
14817:         }
14818: 
14819:         $ret = $initial_ws;
14820:         if ($caption !== false) {
14821:             $ret[] = $caption;
14822:             $ret = array_merge($ret, $after_caption_ws);
14823:         }
14824:         if ($cols !== false) {
14825:             $ret = array_merge($ret, $cols);
14826:         }
14827:         if ($thead !== false) {
14828:             $ret[] = $thead;
14829:             $ret = array_merge($ret, $after_thead_ws);
14830:         }
14831:         if ($tfoot !== false) {
14832:             $ret[] = $tfoot;
14833:             $ret = array_merge($ret, $after_tfoot_ws);
14834:         }
14835: 
14836:         if ($tbody_mode) {
14837:             // we have to shuffle tr into tbody
14838:             $current_tr_tbody = null;
14839: 
14840:             foreach($content as $node) {
14841:                 switch ($node->name) {
14842:                 case 'tbody':
14843:                     $current_tr_tbody = null;
14844:                     $ret[] = $node;
14845:                     break;
14846:                 case 'tr':
14847:                     if ($current_tr_tbody === null) {
14848:                         $current_tr_tbody = new HTMLPurifier_Node_Element('tbody');
14849:                         $ret[] = $current_tr_tbody;
14850:                     }
14851:                     $current_tr_tbody->children[] = $node;
14852:                     break;
14853:                 case '#PCDATA':
14854:                     assert($node->is_whitespace);
14855:                     if ($current_tr_tbody === null) {
14856:                         $ret[] = $node;
14857:                     } else {
14858:                         $current_tr_tbody->children[] = $node;
14859:                     }
14860:                     break;
14861:                 }
14862:             }
14863:         } else {
14864:             $ret = array_merge($ret, $content);
14865:         }
14866: 
14867:         return $ret;
14868: 
14869:     }
14870: }
14871: 
14872: 
14873: 
14874: 
14875: 
14876: class HTMLPurifier_DefinitionCache_Decorator extends HTMLPurifier_DefinitionCache
14877: {
14878: 
14879:     /**
14880:      * Cache object we are decorating
14881:      * @type HTMLPurifier_DefinitionCache
14882:      */
14883:     public $cache;
14884: 
14885:     /**
14886:      * The name of the decorator
14887:      * @var string
14888:      */
14889:     public $name;
14890: 
14891:     public function __construct()
14892:     {
14893:     }
14894: 
14895:     /**
14896:      * Lazy decorator function
14897:      * @param HTMLPurifier_DefinitionCache $cache Reference to cache object to decorate
14898:      * @return HTMLPurifier_DefinitionCache_Decorator
14899:      */
14900:     public function decorate(&$cache)
14901:     {
14902:         $decorator = $this->copy();
14903:         // reference is necessary for mocks in PHP 4
14904:         $decorator->cache =& $cache;
14905:         $decorator->type = $cache->type;
14906:         return $decorator;
14907:     }
14908: 
14909:     /**
14910:      * Cross-compatible clone substitute
14911:      * @return HTMLPurifier_DefinitionCache_Decorator
14912:      */
14913:     public function copy()
14914:     {
14915:         return new HTMLPurifier_DefinitionCache_Decorator();
14916:     }
14917: 
14918:     /**
14919:      * @param HTMLPurifier_Definition $def
14920:      * @param HTMLPurifier_Config $config
14921:      * @return mixed
14922:      */
14923:     public function add($def, $config)
14924:     {
14925:         return $this->cache->add($def, $config);
14926:     }
14927: 
14928:     /**
14929:      * @param HTMLPurifier_Definition $def
14930:      * @param HTMLPurifier_Config $config
14931:      * @return mixed
14932:      */
14933:     public function set($def, $config)
14934:     {
14935:         return $this->cache->set($def, $config);
14936:     }
14937: 
14938:     /**
14939:      * @param HTMLPurifier_Definition $def
14940:      * @param HTMLPurifier_Config $config
14941:      * @return mixed
14942:      */
14943:     public function replace($def, $config)
14944:     {
14945:         return $this->cache->replace($def, $config);
14946:     }
14947: 
14948:     /**
14949:      * @param HTMLPurifier_Config $config
14950:      * @return mixed
14951:      */
14952:     public function get($config)
14953:     {
14954:         return $this->cache->get($config);
14955:     }
14956: 
14957:     /**
14958:      * @param HTMLPurifier_Config $config
14959:      * @return mixed
14960:      */
14961:     public function remove($config)
14962:     {
14963:         return $this->cache->remove($config);
14964:     }
14965: 
14966:     /**
14967:      * @param HTMLPurifier_Config $config
14968:      * @return mixed
14969:      */
14970:     public function flush($config)
14971:     {
14972:         return $this->cache->flush($config);
14973:     }
14974: 
14975:     /**
14976:      * @param HTMLPurifier_Config $config
14977:      * @return mixed
14978:      */
14979:     public function cleanup($config)
14980:     {
14981:         return $this->cache->cleanup($config);
14982:     }
14983: }
14984: 
14985: 
14986: 
14987: 
14988: 
14989: /**
14990:  * Null cache object to use when no caching is on.
14991:  */
14992: class HTMLPurifier_DefinitionCache_Null extends HTMLPurifier_DefinitionCache
14993: {
14994: 
14995:     /**
14996:      * @param HTMLPurifier_Definition $def
14997:      * @param HTMLPurifier_Config $config
14998:      * @return bool
14999:      */
15000:     public function add($def, $config)
15001:     {
15002:         return false;
15003:     }
15004: 
15005:     /**
15006:      * @param HTMLPurifier_Definition $def
15007:      * @param HTMLPurifier_Config $config
15008:      * @return bool
15009:      */
15010:     public function set($def, $config)
15011:     {
15012:         return false;
15013:     }
15014: 
15015:     /**
15016:      * @param HTMLPurifier_Definition $def
15017:      * @param HTMLPurifier_Config $config
15018:      * @return bool
15019:      */
15020:     public function replace($def, $config)
15021:     {
15022:         return false;
15023:     }
15024: 
15025:     /**
15026:      * @param HTMLPurifier_Config $config
15027:      * @return bool
15028:      */
15029:     public function remove($config)
15030:     {
15031:         return false;
15032:     }
15033: 
15034:     /**
15035:      * @param HTMLPurifier_Config $config
15036:      * @return bool
15037:      */
15038:     public function get($config)
15039:     {
15040:         return false;
15041:     }
15042: 
15043:     /**
15044:      * @param HTMLPurifier_Config $config
15045:      * @return bool
15046:      */
15047:     public function flush($config)
15048:     {
15049:         return false;
15050:     }
15051: 
15052:     /**
15053:      * @param HTMLPurifier_Config $config
15054:      * @return bool
15055:      */
15056:     public function cleanup($config)
15057:     {
15058:         return false;
15059:     }
15060: }
15061: 
15062: 
15063: 
15064: 
15065: 
15066: class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCache
15067: {
15068: 
15069:     /**
15070:      * @param HTMLPurifier_Definition $def
15071:      * @param HTMLPurifier_Config $config
15072:      * @return int|bool
15073:      */
15074:     public function add($def, $config)
15075:     {
15076:         if (!$this->checkDefType($def)) {
15077:             return;
15078:         }
15079:         $file = $this->generateFilePath($config);
15080:         if (file_exists($file)) {
15081:             return false;
15082:         }
15083:         if (!$this->_prepareDir($config)) {
15084:             return false;
15085:         }
15086:         return $this->_write($file, serialize($def), $config);
15087:     }
15088: 
15089:     /**
15090:      * @param HTMLPurifier_Definition $def
15091:      * @param HTMLPurifier_Config $config
15092:      * @return int|bool
15093:      */
15094:     public function set($def, $config)
15095:     {
15096:         if (!$this->checkDefType($def)) {
15097:             return;
15098:         }
15099:         $file = $this->generateFilePath($config);
15100:         if (!$this->_prepareDir($config)) {
15101:             return false;
15102:         }
15103:         return $this->_write($file, serialize($def), $config);
15104:     }
15105: 
15106:     /**
15107:      * @param HTMLPurifier_Definition $def
15108:      * @param HTMLPurifier_Config $config
15109:      * @return int|bool
15110:      */
15111:     public function replace($def, $config)
15112:     {
15113:         if (!$this->checkDefType($def)) {
15114:             return;
15115:         }
15116:         $file = $this->generateFilePath($config);
15117:         if (!file_exists($file)) {
15118:             return false;
15119:         }
15120:         if (!$this->_prepareDir($config)) {
15121:             return false;
15122:         }
15123:         return $this->_write($file, serialize($def), $config);
15124:     }
15125: 
15126:     /**
15127:      * @param HTMLPurifier_Config $config
15128:      * @return bool|HTMLPurifier_Config
15129:      */
15130:     public function get($config)
15131:     {
15132:         $file = $this->generateFilePath($config);
15133:         if (!file_exists($file)) {
15134:             return false;
15135:         }
15136:         return unserialize(file_get_contents($file));
15137:     }
15138: 
15139:     /**
15140:      * @param HTMLPurifier_Config $config
15141:      * @return bool
15142:      */
15143:     public function remove($config)
15144:     {
15145:         $file = $this->generateFilePath($config);
15146:         if (!file_exists($file)) {
15147:             return false;
15148:         }
15149:         return unlink($file);
15150:     }
15151: 
15152:     /**
15153:      * @param HTMLPurifier_Config $config
15154:      * @return bool
15155:      */
15156:     public function flush($config)
15157:     {
15158:         if (!$this->_prepareDir($config)) {
15159:             return false;
15160:         }
15161:         $dir = $this->generateDirectoryPath($config);
15162:         $dh = opendir($dir);
15163:         while (false !== ($filename = readdir($dh))) {
15164:             if (empty($filename)) {
15165:                 continue;
15166:             }
15167:             if ($filename[0] === '.') {
15168:                 continue;
15169:             }
15170:             unlink($dir . '/' . $filename);
15171:         }
15172:     }
15173: 
15174:     /**
15175:      * @param HTMLPurifier_Config $config
15176:      * @return bool
15177:      */
15178:     public function cleanup($config)
15179:     {
15180:         if (!$this->_prepareDir($config)) {
15181:             return false;
15182:         }
15183:         $dir = $this->generateDirectoryPath($config);
15184:         $dh = opendir($dir);
15185:         while (false !== ($filename = readdir($dh))) {
15186:             if (empty($filename)) {
15187:                 continue;
15188:             }
15189:             if ($filename[0] === '.') {
15190:                 continue;
15191:             }
15192:             $key = substr($filename, 0, strlen($filename) - 4);
15193:             if ($this->isOld($key, $config)) {
15194:                 unlink($dir . '/' . $filename);
15195:             }
15196:         }
15197:     }
15198: 
15199:     /**
15200:      * Generates the file path to the serial file corresponding to
15201:      * the configuration and definition name
15202:      * @param HTMLPurifier_Config $config
15203:      * @return string
15204:      * @todo Make protected
15205:      */
15206:     public function generateFilePath($config)
15207:     {
15208:         $key = $this->generateKey($config);
15209:         return $this->generateDirectoryPath($config) . '/' . $key . '.ser';
15210:     }
15211: 
15212:     /**
15213:      * Generates the path to the directory contain this cache's serial files
15214:      * @param HTMLPurifier_Config $config
15215:      * @return string
15216:      * @note No trailing slash
15217:      * @todo Make protected
15218:      */
15219:     public function generateDirectoryPath($config)
15220:     {
15221:         $base = $this->generateBaseDirectoryPath($config);
15222:         return $base . '/' . $this->type;
15223:     }
15224: 
15225:     /**
15226:      * Generates path to base directory that contains all definition type
15227:      * serials
15228:      * @param HTMLPurifier_Config $config
15229:      * @return mixed|string
15230:      * @todo Make protected
15231:      */
15232:     public function generateBaseDirectoryPath($config)
15233:     {
15234:         $base = $config->get('Cache.SerializerPath');
15235:         $base = is_null($base) ? HTMLPURIFIER_PREFIX . '/HTMLPurifier/DefinitionCache/Serializer' : $base;
15236:         return $base;
15237:     }
15238: 
15239:     /**
15240:      * Convenience wrapper function for file_put_contents
15241:      * @param string $file File name to write to
15242:      * @param string $data Data to write into file
15243:      * @param HTMLPurifier_Config $config
15244:      * @return int|bool Number of bytes written if success, or false if failure.
15245:      */
15246:     private function _write($file, $data, $config)
15247:     {
15248:         $result = file_put_contents($file, $data);
15249:         if ($result !== false) {
15250:             // set permissions of the new file (no execute)
15251:             $chmod = $config->get('Cache.SerializerPermissions');
15252:             if (!$chmod) {
15253:                 $chmod = 0644; // invalid config or simpletest
15254:             }
15255:             $chmod = $chmod & 0666;
15256:             chmod($file, $chmod);
15257:         }
15258:         return $result;
15259:     }
15260: 
15261:     /**
15262:      * Prepares the directory that this type stores the serials in
15263:      * @param HTMLPurifier_Config $config
15264:      * @return bool True if successful
15265:      */
15266:     private function _prepareDir($config)
15267:     {
15268:         $directory = $this->generateDirectoryPath($config);
15269:         $chmod = $config->get('Cache.SerializerPermissions');
15270:         if (!$chmod) {
15271:             $chmod = 0755; // invalid config or simpletest
15272:         }
15273:         if (!is_dir($directory)) {
15274:             $base = $this->generateBaseDirectoryPath($config);
15275:             if (!is_dir($base)) {
15276:                 trigger_error(
15277:                     'Base directory ' . $base . ' does not exist,
15278:                     please create or change using %Cache.SerializerPath',
15279:                     E_USER_WARNING
15280:                 );
15281:                 return false;
15282:             } elseif (!$this->_testPermissions($base, $chmod)) {
15283:                 return false;
15284:             }
15285:             $old = umask(0000);
15286:             mkdir($directory, $chmod);
15287:             umask($old);
15288:         } elseif (!$this->_testPermissions($directory, $chmod)) {
15289:             return false;
15290:         }
15291:         return true;
15292:     }
15293: 
15294:     /**
15295:      * Tests permissions on a directory and throws out friendly
15296:      * error messages and attempts to chmod it itself if possible
15297:      * @param string $dir Directory path
15298:      * @param int $chmod Permissions
15299:      * @return bool True if directory is writable
15300:      */
15301:     private function _testPermissions($dir, $chmod)
15302:     {
15303:         // early abort, if it is writable, everything is hunky-dory
15304:         if (is_writable($dir)) {
15305:             return true;
15306:         }
15307:         if (!is_dir($dir)) {
15308:             // generally, you'll want to handle this beforehand
15309:             // so a more specific error message can be given
15310:             trigger_error(
15311:                 'Directory ' . $dir . ' does not exist',
15312:                 E_USER_WARNING
15313:             );
15314:             return false;
15315:         }
15316:         if (function_exists('posix_getuid')) {
15317:             // POSIX system, we can give more specific advice
15318:             if (fileowner($dir) === posix_getuid()) {
15319:                 // we can chmod it ourselves
15320:                 $chmod = $chmod | 0700;
15321:                 if (chmod($dir, $chmod)) {
15322:                     return true;
15323:                 }
15324:             } elseif (filegroup($dir) === posix_getgid()) {
15325:                 $chmod = $chmod | 0070;
15326:             } else {
15327:                 // PHP's probably running as nobody, so we'll
15328:                 // need to give global permissions
15329:                 $chmod = $chmod | 0777;
15330:             }
15331:             trigger_error(
15332:                 'Directory ' . $dir . ' not writable, ' .
15333:                 'please chmod to ' . decoct($chmod),
15334:                 E_USER_WARNING
15335:             );
15336:         } else {
15337:             // generic error message
15338:             trigger_error(
15339:                 'Directory ' . $dir . ' not writable, ' .
15340:                 'please alter file permissions',
15341:                 E_USER_WARNING
15342:             );
15343:         }
15344:         return false;
15345:     }
15346: }
15347: 
15348: 
15349: 
15350: 
15351: 
15352: /**
15353:  * Definition cache decorator class that cleans up the cache
15354:  * whenever there is a cache miss.
15355:  */
15356: class HTMLPurifier_DefinitionCache_Decorator_Cleanup extends HTMLPurifier_DefinitionCache_Decorator
15357: {
15358:     /**
15359:      * @type string
15360:      */
15361:     public $name = 'Cleanup';
15362: 
15363:     /**
15364:      * @return HTMLPurifier_DefinitionCache_Decorator_Cleanup
15365:      */
15366:     public function copy()
15367:     {
15368:         return new HTMLPurifier_DefinitionCache_Decorator_Cleanup();
15369:     }
15370: 
15371:     /**
15372:      * @param HTMLPurifier_Definition $def
15373:      * @param HTMLPurifier_Config $config
15374:      * @return mixed
15375:      */
15376:     public function add($def, $config)
15377:     {
15378:         $status = parent::add($def, $config);
15379:         if (!$status) {
15380:             parent::cleanup($config);
15381:         }
15382:         return $status;
15383:     }
15384: 
15385:     /**
15386:      * @param HTMLPurifier_Definition $def
15387:      * @param HTMLPurifier_Config $config
15388:      * @return mixed
15389:      */
15390:     public function set($def, $config)
15391:     {
15392:         $status = parent::set($def, $config);
15393:         if (!$status) {
15394:             parent::cleanup($config);
15395:         }
15396:         return $status;
15397:     }
15398: 
15399:     /**
15400:      * @param HTMLPurifier_Definition $def
15401:      * @param HTMLPurifier_Config $config
15402:      * @return mixed
15403:      */
15404:     public function replace($def, $config)
15405:     {
15406:         $status = parent::replace($def, $config);
15407:         if (!$status) {
15408:             parent::cleanup($config);
15409:         }
15410:         return $status;
15411:     }
15412: 
15413:     /**
15414:      * @param HTMLPurifier_Config $config
15415:      * @return mixed
15416:      */
15417:     public function get($config)
15418:     {
15419:         $ret = parent::get($config);
15420:         if (!$ret) {
15421:             parent::cleanup($config);
15422:         }
15423:         return $ret;
15424:     }
15425: }
15426: 
15427: 
15428: 
15429: 
15430: 
15431: /**
15432:  * Definition cache decorator class that saves all cache retrievals
15433:  * to PHP's memory; good for unit tests or circumstances where
15434:  * there are lots of configuration objects floating around.
15435:  */
15436: class HTMLPurifier_DefinitionCache_Decorator_Memory extends HTMLPurifier_DefinitionCache_Decorator
15437: {
15438:     /**
15439:      * @type array
15440:      */
15441:     protected $definitions;
15442: 
15443:     /**
15444:      * @type string
15445:      */
15446:     public $name = 'Memory';
15447: 
15448:     /**
15449:      * @return HTMLPurifier_DefinitionCache_Decorator_Memory
15450:      */
15451:     public function copy()
15452:     {
15453:         return new HTMLPurifier_DefinitionCache_Decorator_Memory();
15454:     }
15455: 
15456:     /**
15457:      * @param HTMLPurifier_Definition $def
15458:      * @param HTMLPurifier_Config $config
15459:      * @return mixed
15460:      */
15461:     public function add($def, $config)
15462:     {
15463:         $status = parent::add($def, $config);
15464:         if ($status) {
15465:             $this->definitions[$this->generateKey($config)] = $def;
15466:         }
15467:         return $status;
15468:     }
15469: 
15470:     /**
15471:      * @param HTMLPurifier_Definition $def
15472:      * @param HTMLPurifier_Config $config
15473:      * @return mixed
15474:      */
15475:     public function set($def, $config)
15476:     {
15477:         $status = parent::set($def, $config);
15478:         if ($status) {
15479:             $this->definitions[$this->generateKey($config)] = $def;
15480:         }
15481:         return $status;
15482:     }
15483: 
15484:     /**
15485:      * @param HTMLPurifier_Definition $def
15486:      * @param HTMLPurifier_Config $config
15487:      * @return mixed
15488:      */
15489:     public function replace($def, $config)
15490:     {
15491:         $status = parent::replace($def, $config);
15492:         if ($status) {
15493:             $this->definitions[$this->generateKey($config)] = $def;
15494:         }
15495:         return $status;
15496:     }
15497: 
15498:     /**
15499:      * @param HTMLPurifier_Config $config
15500:      * @return mixed
15501:      */
15502:     public function get($config)
15503:     {
15504:         $key = $this->generateKey($config);
15505:         if (isset($this->definitions[$key])) {
15506:             return $this->definitions[$key];
15507:         }
15508:         $this->definitions[$key] = parent::get($config);
15509:         return $this->definitions[$key];
15510:     }
15511: }
15512: 
15513: 
15514: 
15515: 
15516: 
15517: /**
15518:  * XHTML 1.1 Bi-directional Text Module, defines elements that
15519:  * declare directionality of content. Text Extension Module.
15520:  */
15521: class HTMLPurifier_HTMLModule_Bdo extends HTMLPurifier_HTMLModule
15522: {
15523: 
15524:     /**
15525:      * @type string
15526:      */
15527:     public $name = 'Bdo';
15528: 
15529:     /**
15530:      * @type array
15531:      */
15532:     public $attr_collections = array(
15533:         'I18N' => array('dir' => false)
15534:     );
15535: 
15536:     /**
15537:      * @param HTMLPurifier_Config $config
15538:      */
15539:     public function setup($config)
15540:     {
15541:         $bdo = $this->addElement(
15542:             'bdo',
15543:             'Inline',
15544:             'Inline',
15545:             array('Core', 'Lang'),
15546:             array(
15547:                 'dir' => 'Enum#ltr,rtl', // required
15548:                 // The Abstract Module specification has the attribute
15549:                 // inclusions wrong for bdo: bdo allows Lang
15550:             )
15551:         );
15552:         $bdo->attr_transform_post[] = new HTMLPurifier_AttrTransform_BdoDir();
15553: 
15554:         $this->attr_collections['I18N']['dir'] = 'Enum#ltr,rtl';
15555:     }
15556: }
15557: 
15558: 
15559: 
15560: 
15561: 
15562: class HTMLPurifier_HTMLModule_CommonAttributes extends HTMLPurifier_HTMLModule
15563: {
15564:     /**
15565:      * @type string
15566:      */
15567:     public $name = 'CommonAttributes';
15568: 
15569:     /**
15570:      * @type array
15571:      */
15572:     public $attr_collections = array(
15573:         'Core' => array(
15574:             0 => array('Style'),
15575:             // 'xml:space' => false,
15576:             'class' => 'Class',
15577:             'id' => 'ID',
15578:             'title' => 'CDATA',
15579:         ),
15580:         'Lang' => array(),
15581:         'I18N' => array(
15582:             0 => array('Lang'), // proprietary, for xml:lang/lang
15583:         ),
15584:         'Common' => array(
15585:             0 => array('Core', 'I18N')
15586:         )
15587:     );
15588: }
15589: 
15590: 
15591: 
15592: 
15593: 
15594: /**
15595:  * XHTML 1.1 Edit Module, defines editing-related elements. Text Extension
15596:  * Module.
15597:  */
15598: class HTMLPurifier_HTMLModule_Edit extends HTMLPurifier_HTMLModule
15599: {
15600: 
15601:     /**
15602:      * @type string
15603:      */
15604:     public $name = 'Edit';
15605: 
15606:     /**
15607:      * @param HTMLPurifier_Config $config
15608:      */
15609:     public function setup($config)
15610:     {
15611:         $contents = 'Chameleon: #PCDATA | Inline ! #PCDATA | Flow';
15612:         $attr = array(
15613:             'cite' => 'URI',
15614:             // 'datetime' => 'Datetime', // not implemented
15615:         );
15616:         $this->addElement('del', 'Inline', $contents, 'Common', $attr);
15617:         $this->addElement('ins', 'Inline', $contents, 'Common', $attr);
15618:     }
15619: 
15620:     // HTML 4.01 specifies that ins/del must not contain block
15621:     // elements when used in an inline context, chameleon is
15622:     // a complicated workaround to acheive this effect
15623: 
15624:     // Inline context ! Block context (exclamation mark is
15625:     // separator, see getChildDef for parsing)
15626: 
15627:     /**
15628:      * @type bool
15629:      */
15630:     public $defines_child_def = true;
15631: 
15632:     /**
15633:      * @param HTMLPurifier_ElementDef $def
15634:      * @return HTMLPurifier_ChildDef_Chameleon
15635:      */
15636:     public function getChildDef($def)
15637:     {
15638:         if ($def->content_model_type != 'chameleon') {
15639:             return false;
15640:         }
15641:         $value = explode('!', $def->content_model);
15642:         return new HTMLPurifier_ChildDef_Chameleon($value[0], $value[1]);
15643:     }
15644: }
15645: 
15646: 
15647: 
15648: 
15649: 
15650: /**
15651:  * XHTML 1.1 Forms module, defines all form-related elements found in HTML 4.
15652:  */
15653: class HTMLPurifier_HTMLModule_Forms extends HTMLPurifier_HTMLModule
15654: {
15655:     /**
15656:      * @type string
15657:      */
15658:     public $name = 'Forms';
15659: 
15660:     /**
15661:      * @type bool
15662:      */
15663:     public $safe = false;
15664: 
15665:     /**
15666:      * @type array
15667:      */
15668:     public $content_sets = array(
15669:         'Block' => 'Form',
15670:         'Inline' => 'Formctrl',
15671:     );
15672: 
15673:     /**
15674:      * @param HTMLPurifier_Config $config
15675:      */
15676:     public function setup($config)
15677:     {
15678:         $form = $this->addElement(
15679:             'form',
15680:             'Form',
15681:             'Required: Heading | List | Block | fieldset',
15682:             'Common',
15683:             array(
15684:                 'accept' => 'ContentTypes',
15685:                 'accept-charset' => 'Charsets',
15686:                 'action*' => 'URI',
15687:                 'method' => 'Enum#get,post',
15688:                 // really ContentType, but these two are the only ones used today
15689:                 'enctype' => 'Enum#application/x-www-form-urlencoded,multipart/form-data',
15690:             )
15691:         );
15692:         $form->excludes = array('form' => true);
15693: 
15694:         $input = $this->addElement(
15695:             'input',
15696:             'Formctrl',
15697:             'Empty',
15698:             'Common',
15699:             array(
15700:                 'accept' => 'ContentTypes',
15701:                 'accesskey' => 'Character',
15702:                 'alt' => 'Text',
15703:                 'checked' => 'Bool#checked',
15704:                 'disabled' => 'Bool#disabled',
15705:                 'maxlength' => 'Number',
15706:                 'name' => 'CDATA',
15707:                 'readonly' => 'Bool#readonly',
15708:                 'size' => 'Number',
15709:                 'src' => 'URI#embedded',
15710:                 'tabindex' => 'Number',
15711:                 'type' => 'Enum#text,password,checkbox,button,radio,submit,reset,file,hidden,image',
15712:                 'value' => 'CDATA',
15713:             )
15714:         );
15715:         $input->attr_transform_post[] = new HTMLPurifier_AttrTransform_Input();
15716: 
15717:         $this->addElement(
15718:             'select',
15719:             'Formctrl',
15720:             'Required: optgroup | option',
15721:             'Common',
15722:             array(
15723:                 'disabled' => 'Bool#disabled',
15724:                 'multiple' => 'Bool#multiple',
15725:                 'name' => 'CDATA',
15726:                 'size' => 'Number',
15727:                 'tabindex' => 'Number',
15728:             )
15729:         );
15730: 
15731:         $this->addElement(
15732:             'option',
15733:             false,
15734:             'Optional: #PCDATA',
15735:             'Common',
15736:             array(
15737:                 'disabled' => 'Bool#disabled',
15738:                 'label' => 'Text',
15739:                 'selected' => 'Bool#selected',
15740:                 'value' => 'CDATA',
15741:             )
15742:         );
15743:         // It's illegal for there to be more than one selected, but not
15744:         // be multiple. Also, no selected means undefined behavior. This might
15745:         // be difficult to implement; perhaps an injector, or a context variable.
15746: 
15747:         $textarea = $this->addElement(
15748:             'textarea',
15749:             'Formctrl',
15750:             'Optional: #PCDATA',
15751:             'Common',
15752:             array(
15753:                 'accesskey' => 'Character',
15754:                 'cols*' => 'Number',
15755:                 'disabled' => 'Bool#disabled',
15756:                 'name' => 'CDATA',
15757:                 'readonly' => 'Bool#readonly',
15758:                 'rows*' => 'Number',
15759:                 'tabindex' => 'Number',
15760:             )
15761:         );
15762:         $textarea->attr_transform_pre[] = new HTMLPurifier_AttrTransform_Textarea();
15763: 
15764:         $button = $this->addElement(
15765:             'button',
15766:             'Formctrl',
15767:             'Optional: #PCDATA | Heading | List | Block | Inline',
15768:             'Common',
15769:             array(
15770:                 'accesskey' => 'Character',
15771:                 'disabled' => 'Bool#disabled',
15772:                 'name' => 'CDATA',
15773:                 'tabindex' => 'Number',
15774:                 'type' => 'Enum#button,submit,reset',
15775:                 'value' => 'CDATA',
15776:             )
15777:         );
15778: 
15779:         // For exclusions, ideally we'd specify content sets, not literal elements
15780:         $button->excludes = $this->makeLookup(
15781:             'form',
15782:             'fieldset', // Form
15783:             'input',
15784:             'select',
15785:             'textarea',
15786:             'label',
15787:             'button', // Formctrl
15788:             'a', // as per HTML 4.01 spec, this is omitted by modularization
15789:             'isindex',
15790:             'iframe' // legacy items
15791:         );
15792: 
15793:         // Extra exclusion: img usemap="" is not permitted within this element.
15794:         // We'll omit this for now, since we don't have any good way of
15795:         // indicating it yet.
15796: 
15797:         // This is HIGHLY user-unfriendly; we need a custom child-def for this
15798:         $this->addElement('fieldset', 'Form', 'Custom: (#WS?,legend,(Flow|#PCDATA)*)', 'Common');
15799: 
15800:         $label = $this->addElement(
15801:             'label',
15802:             'Formctrl',
15803:             'Optional: #PCDATA | Inline',
15804:             'Common',
15805:             array(
15806:                 'accesskey' => 'Character',
15807:                 // 'for' => 'IDREF', // IDREF not implemented, cannot allow
15808:             )
15809:         );
15810:         $label->excludes = array('label' => true);
15811: 
15812:         $this->addElement(
15813:             'legend',
15814:             false,
15815:             'Optional: #PCDATA | Inline',
15816:             'Common',
15817:             array(
15818:                 'accesskey' => 'Character',
15819:             )
15820:         );
15821: 
15822:         $this->addElement(
15823:             'optgroup',
15824:             false,
15825:             'Required: option',
15826:             'Common',
15827:             array(
15828:                 'disabled' => 'Bool#disabled',
15829:                 'label*' => 'Text',
15830:             )
15831:         );
15832:         // Don't forget an injector for <isindex>. This one's a little complex
15833:         // because it maps to multiple elements.
15834:     }
15835: }
15836: 
15837: 
15838: 
15839: 
15840: 
15841: /**
15842:  * XHTML 1.1 Hypertext Module, defines hypertext links. Core Module.
15843:  */
15844: class HTMLPurifier_HTMLModule_Hypertext extends HTMLPurifier_HTMLModule
15845: {
15846: 
15847:     /**
15848:      * @type string
15849:      */
15850:     public $name = 'Hypertext';
15851: 
15852:     /**
15853:      * @param HTMLPurifier_Config $config
15854:      */
15855:     public function setup($config)
15856:     {
15857:         $a = $this->addElement(
15858:             'a',
15859:             'Inline',
15860:             'Inline',
15861:             'Common',
15862:             array(
15863:                 // 'accesskey' => 'Character',
15864:                 // 'charset' => 'Charset',
15865:                 'href' => 'URI',
15866:                 // 'hreflang' => 'LanguageCode',
15867:                 'rel' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rel'),
15868:                 'rev' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rev'),
15869:                 // 'tabindex' => 'Number',
15870:                 // 'type' => 'ContentType',
15871:             )
15872:         );
15873:         $a->formatting = true;
15874:         $a->excludes = array('a' => true);
15875:     }
15876: }
15877: 
15878: 
15879: 
15880: 
15881: 
15882: /**
15883:  * XHTML 1.1 Iframe Module provides inline frames.
15884:  *
15885:  * @note This module is not considered safe unless an Iframe
15886:  * whitelisting mechanism is specified.  Currently, the only
15887:  * such mechanism is %URL.SafeIframeRegexp
15888:  */
15889: class HTMLPurifier_HTMLModule_Iframe extends HTMLPurifier_HTMLModule
15890: {
15891: 
15892:     /**
15893:      * @type string
15894:      */
15895:     public $name = 'Iframe';
15896: 
15897:     /**
15898:      * @type bool
15899:      */
15900:     public $safe = false;
15901: 
15902:     /**
15903:      * @param HTMLPurifier_Config $config
15904:      */
15905:     public function setup($config)
15906:     {
15907:         if ($config->get('HTML.SafeIframe')) {
15908:             $this->safe = true;
15909:         }
15910:         $this->addElement(
15911:             'iframe',
15912:             'Inline',
15913:             'Flow',
15914:             'Common',
15915:             array(
15916:                 'src' => 'URI#embedded',
15917:                 'width' => 'Length',
15918:                 'height' => 'Length',
15919:                 'name' => 'ID',
15920:                 'scrolling' => 'Enum#yes,no,auto',
15921:                 'frameborder' => 'Enum#0,1',
15922:                 'longdesc' => 'URI',
15923:                 'marginheight' => 'Pixels',
15924:                 'marginwidth' => 'Pixels',
15925:             )
15926:         );
15927:     }
15928: }
15929: 
15930: 
15931: 
15932: 
15933: 
15934: /**
15935:  * XHTML 1.1 Image Module provides basic image embedding.
15936:  * @note There is specialized code for removing empty images in
15937:  *       HTMLPurifier_Strategy_RemoveForeignElements
15938:  */
15939: class HTMLPurifier_HTMLModule_Image extends HTMLPurifier_HTMLModule
15940: {
15941: 
15942:     /**
15943:      * @type string
15944:      */
15945:     public $name = 'Image';
15946: 
15947:     /**
15948:      * @param HTMLPurifier_Config $config
15949:      */
15950:     public function setup($config)
15951:     {
15952:         $max = $config->get('HTML.MaxImgLength');
15953:         $img = $this->addElement(
15954:             'img',
15955:             'Inline',
15956:             'Empty',
15957:             'Common',
15958:             array(
15959:                 'alt*' => 'Text',
15960:                 // According to the spec, it's Length, but percents can
15961:                 // be abused, so we allow only Pixels.
15962:                 'height' => 'Pixels#' . $max,
15963:                 'width' => 'Pixels#' . $max,
15964:                 'longdesc' => 'URI',
15965:                 'src*' => new HTMLPurifier_AttrDef_URI(true), // embedded
15966:             )
15967:         );
15968:         if ($max === null || $config->get('HTML.Trusted')) {
15969:             $img->attr['height'] =
15970:             $img->attr['width'] = 'Length';
15971:         }
15972: 
15973:         // kind of strange, but splitting things up would be inefficient
15974:         $img->attr_transform_pre[] =
15975:         $img->attr_transform_post[] =
15976:             new HTMLPurifier_AttrTransform_ImgRequired();
15977:     }
15978: }
15979: 
15980: 
15981: 
15982: 
15983: 
15984: /**
15985:  * XHTML 1.1 Legacy module defines elements that were previously
15986:  * deprecated.
15987:  *
15988:  * @note Not all legacy elements have been implemented yet, which
15989:  *       is a bit of a reverse problem as compared to browsers! In
15990:  *       addition, this legacy module may implement a bit more than
15991:  *       mandated by XHTML 1.1.
15992:  *
15993:  * This module can be used in combination with TransformToStrict in order
15994:  * to transform as many deprecated elements as possible, but retain
15995:  * questionably deprecated elements that do not have good alternatives
15996:  * as well as transform elements that don't have an implementation.
15997:  * See docs/ref-strictness.txt for more details.
15998:  */
15999: 
16000: class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule
16001: {
16002:     /**
16003:      * @type string
16004:      */
16005:     public $name = 'Legacy';
16006: 
16007:     /**
16008:      * @param HTMLPurifier_Config $config
16009:      */
16010:     public function setup($config)
16011:     {
16012:         $this->addElement(
16013:             'basefont',
16014:             'Inline',
16015:             'Empty',
16016:             null,
16017:             array(
16018:                 'color' => 'Color',
16019:                 'face' => 'Text', // extremely broad, we should
16020:                 'size' => 'Text', // tighten it
16021:                 'id' => 'ID'
16022:             )
16023:         );
16024:         $this->addElement('center', 'Block', 'Flow', 'Common');
16025:         $this->addElement(
16026:             'dir',
16027:             'Block',
16028:             'Required: li',
16029:             'Common',
16030:             array(
16031:                 'compact' => 'Bool#compact'
16032:             )
16033:         );
16034:         $this->addElement(
16035:             'font',
16036:             'Inline',
16037:             'Inline',
16038:             array('Core', 'I18N'),
16039:             array(
16040:                 'color' => 'Color',
16041:                 'face' => 'Text', // extremely broad, we should
16042:                 'size' => 'Text', // tighten it
16043:             )
16044:         );
16045:         $this->addElement(
16046:             'menu',
16047:             'Block',
16048:             'Required: li',
16049:             'Common',
16050:             array(
16051:                 'compact' => 'Bool#compact'
16052:             )
16053:         );
16054: 
16055:         $s = $this->addElement('s', 'Inline', 'Inline', 'Common');
16056:         $s->formatting = true;
16057: 
16058:         $strike = $this->addElement('strike', 'Inline', 'Inline', 'Common');
16059:         $strike->formatting = true;
16060: 
16061:         $u = $this->addElement('u', 'Inline', 'Inline', 'Common');
16062:         $u->formatting = true;
16063: 
16064:         // setup modifications to old elements
16065: 
16066:         $align = 'Enum#left,right,center,justify';
16067: 
16068:         $address = $this->addBlankElement('address');
16069:         $address->content_model = 'Inline | #PCDATA | p';
16070:         $address->content_model_type = 'optional';
16071:         $address->child = false;
16072: 
16073:         $blockquote = $this->addBlankElement('blockquote');
16074:         $blockquote->content_model = 'Flow | #PCDATA';
16075:         $blockquote->content_model_type = 'optional';
16076:         $blockquote->child = false;
16077: 
16078:         $br = $this->addBlankElement('br');
16079:         $br->attr['clear'] = 'Enum#left,all,right,none';
16080: 
16081:         $caption = $this->addBlankElement('caption');
16082:         $caption->attr['align'] = 'Enum#top,bottom,left,right';
16083: 
16084:         $div = $this->addBlankElement('div');
16085:         $div->attr['align'] = $align;
16086: 
16087:         $dl = $this->addBlankElement('dl');
16088:         $dl->attr['compact'] = 'Bool#compact';
16089: 
16090:         for ($i = 1; $i <= 6; $i++) {
16091:             $h = $this->addBlankElement("h$i");
16092:             $h->attr['align'] = $align;
16093:         }
16094: 
16095:         $hr = $this->addBlankElement('hr');
16096:         $hr->attr['align'] = $align;
16097:         $hr->attr['noshade'] = 'Bool#noshade';
16098:         $hr->attr['size'] = 'Pixels';
16099:         $hr->attr['width'] = 'Length';
16100: 
16101:         $img = $this->addBlankElement('img');
16102:         $img->attr['align'] = 'IAlign';
16103:         $img->attr['border'] = 'Pixels';
16104:         $img->attr['hspace'] = 'Pixels';
16105:         $img->attr['vspace'] = 'Pixels';
16106: 
16107:         // figure out this integer business
16108: 
16109:         $li = $this->addBlankElement('li');
16110:         $li->attr['value'] = new HTMLPurifier_AttrDef_Integer();
16111:         $li->attr['type'] = 'Enum#s:1,i,I,a,A,disc,square,circle';
16112: 
16113:         $ol = $this->addBlankElement('ol');
16114:         $ol->attr['compact'] = 'Bool#compact';
16115:         $ol->attr['start'] = new HTMLPurifier_AttrDef_Integer();
16116:         $ol->attr['type'] = 'Enum#s:1,i,I,a,A';
16117: 
16118:         $p = $this->addBlankElement('p');
16119:         $p->attr['align'] = $align;
16120: 
16121:         $pre = $this->addBlankElement('pre');
16122:         $pre->attr['width'] = 'Number';
16123: 
16124:         // script omitted
16125: 
16126:         $table = $this->addBlankElement('table');
16127:         $table->attr['align'] = 'Enum#left,center,right';
16128:         $table->attr['bgcolor'] = 'Color';
16129: 
16130:         $tr = $this->addBlankElement('tr');
16131:         $tr->attr['bgcolor'] = 'Color';
16132: 
16133:         $th = $this->addBlankElement('th');
16134:         $th->attr['bgcolor'] = 'Color';
16135:         $th->attr['height'] = 'Length';
16136:         $th->attr['nowrap'] = 'Bool#nowrap';
16137:         $th->attr['width'] = 'Length';
16138: 
16139:         $td = $this->addBlankElement('td');
16140:         $td->attr['bgcolor'] = 'Color';
16141:         $td->attr['height'] = 'Length';
16142:         $td->attr['nowrap'] = 'Bool#nowrap';
16143:         $td->attr['width'] = 'Length';
16144: 
16145:         $ul = $this->addBlankElement('ul');
16146:         $ul->attr['compact'] = 'Bool#compact';
16147:         $ul->attr['type'] = 'Enum#square,disc,circle';
16148: 
16149:         // "safe" modifications to "unsafe" elements
16150:         // WARNING: If you want to add support for an unsafe, legacy
16151:         // attribute, make a new TrustedLegacy module with the trusted
16152:         // bit set appropriately
16153: 
16154:         $form = $this->addBlankElement('form');
16155:         $form->content_model = 'Flow | #PCDATA';
16156:         $form->content_model_type = 'optional';
16157:         $form->attr['target'] = 'FrameTarget';
16158: 
16159:         $input = $this->addBlankElement('input');
16160:         $input->attr['align'] = 'IAlign';
16161: 
16162:         $legend = $this->addBlankElement('legend');
16163:         $legend->attr['align'] = 'LAlign';
16164:     }
16165: }
16166: 
16167: 
16168: 
16169: 
16170: 
16171: /**
16172:  * XHTML 1.1 List Module, defines list-oriented elements. Core Module.
16173:  */
16174: class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule
16175: {
16176:     /**
16177:      * @type string
16178:      */
16179:     public $name = 'List';
16180: 
16181:     // According to the abstract schema, the List content set is a fully formed
16182:     // one or more expr, but it invariably occurs in an optional declaration
16183:     // so we're not going to do that subtlety. It might cause trouble
16184:     // if a user defines "List" and expects that multiple lists are
16185:     // allowed to be specified, but then again, that's not very intuitive.
16186:     // Furthermore, the actual XML Schema may disagree. Regardless,
16187:     // we don't have support for such nested expressions without using
16188:     // the incredibly inefficient and draconic Custom ChildDef.
16189: 
16190:     /**
16191:      * @type array
16192:      */
16193:     public $content_sets = array('Flow' => 'List');
16194: 
16195:     /**
16196:      * @param HTMLPurifier_Config $config
16197:      */
16198:     public function setup($config)
16199:     {
16200:         $ol = $this->addElement('ol', 'List', new HTMLPurifier_ChildDef_List(), 'Common');
16201:         $ul = $this->addElement('ul', 'List', new HTMLPurifier_ChildDef_List(), 'Common');
16202:         // XXX The wrap attribute is handled by MakeWellFormed.  This is all
16203:         // quite unsatisfactory, because we generated this
16204:         // *specifically* for lists, and now a big chunk of the handling
16205:         // is done properly by the List ChildDef.  So actually, we just
16206:         // want enough information to make autoclosing work properly,
16207:         // and then hand off the tricky stuff to the ChildDef.
16208:         $ol->wrap = 'li';
16209:         $ul->wrap = 'li';
16210:         $this->addElement('dl', 'List', 'Required: dt | dd', 'Common');
16211: 
16212:         $this->addElement('li', false, 'Flow', 'Common');
16213: 
16214:         $this->addElement('dd', false, 'Flow', 'Common');
16215:         $this->addElement('dt', false, 'Inline', 'Common');
16216:     }
16217: }
16218: 
16219: 
16220: 
16221: 
16222: 
16223: class HTMLPurifier_HTMLModule_Name extends HTMLPurifier_HTMLModule
16224: {
16225:     /**
16226:      * @type string
16227:      */
16228:     public $name = 'Name';
16229: 
16230:     /**
16231:      * @param HTMLPurifier_Config $config
16232:      */
16233:     public function setup($config)
16234:     {
16235:         $elements = array('a', 'applet', 'form', 'frame', 'iframe', 'img', 'map');
16236:         foreach ($elements as $name) {
16237:             $element = $this->addBlankElement($name);
16238:             $element->attr['name'] = 'CDATA';
16239:             if (!$config->get('HTML.Attr.Name.UseCDATA')) {
16240:                 $element->attr_transform_post[] = new HTMLPurifier_AttrTransform_NameSync();
16241:             }
16242:         }
16243:     }
16244: }
16245: 
16246: 
16247: 
16248: 
16249: 
16250: /**
16251:  * Module adds the nofollow attribute transformation to a tags.  It
16252:  * is enabled by HTML.Nofollow
16253:  */
16254: class HTMLPurifier_HTMLModule_Nofollow extends HTMLPurifier_HTMLModule
16255: {
16256: 
16257:     /**
16258:      * @type string
16259:      */
16260:     public $name = 'Nofollow';
16261: 
16262:     /**
16263:      * @param HTMLPurifier_Config $config
16264:      */
16265:     public function setup($config)
16266:     {
16267:         $a = $this->addBlankElement('a');
16268:         $a->attr_transform_post[] = new HTMLPurifier_AttrTransform_Nofollow();
16269:     }
16270: }
16271: 
16272: 
16273: 
16274: 
16275: 
16276: class HTMLPurifier_HTMLModule_NonXMLCommonAttributes extends HTMLPurifier_HTMLModule
16277: {
16278:     /**
16279:      * @type string
16280:      */
16281:     public $name = 'NonXMLCommonAttributes';
16282: 
16283:     /**
16284:      * @type array
16285:      */
16286:     public $attr_collections = array(
16287:         'Lang' => array(
16288:             'lang' => 'LanguageCode',
16289:         )
16290:     );
16291: }
16292: 
16293: 
16294: 
16295: 
16296: 
16297: /**
16298:  * XHTML 1.1 Object Module, defines elements for generic object inclusion
16299:  * @warning Users will commonly use <embed> to cater to legacy browsers: this
16300:  *      module does not allow this sort of behavior
16301:  */
16302: class HTMLPurifier_HTMLModule_Object extends HTMLPurifier_HTMLModule
16303: {
16304:     /**
16305:      * @type string
16306:      */
16307:     public $name = 'Object';
16308: 
16309:     /**
16310:      * @type bool
16311:      */
16312:     public $safe = false;
16313: 
16314:     /**
16315:      * @param HTMLPurifier_Config $config
16316:      */
16317:     public function setup($config)
16318:     {
16319:         $this->addElement(
16320:             'object',
16321:             'Inline',
16322:             'Optional: #PCDATA | Flow | param',
16323:             'Common',
16324:             array(
16325:                 'archive' => 'URI',
16326:                 'classid' => 'URI',
16327:                 'codebase' => 'URI',
16328:                 'codetype' => 'Text',
16329:                 'data' => 'URI',
16330:                 'declare' => 'Bool#declare',
16331:                 'height' => 'Length',
16332:                 'name' => 'CDATA',
16333:                 'standby' => 'Text',
16334:                 'tabindex' => 'Number',
16335:                 'type' => 'ContentType',
16336:                 'width' => 'Length'
16337:             )
16338:         );
16339: 
16340:         $this->addElement(
16341:             'param',
16342:             false,
16343:             'Empty',
16344:             null,
16345:             array(
16346:                 'id' => 'ID',
16347:                 'name*' => 'Text',
16348:                 'type' => 'Text',
16349:                 'value' => 'Text',
16350:                 'valuetype' => 'Enum#data,ref,object'
16351:             )
16352:         );
16353:     }
16354: }
16355: 
16356: 
16357: 
16358: 
16359: 
16360: /**
16361:  * XHTML 1.1 Presentation Module, defines simple presentation-related
16362:  * markup. Text Extension Module.
16363:  * @note The official XML Schema and DTD specs further divide this into
16364:  *       two modules:
16365:  *          - Block Presentation (hr)
16366:  *          - Inline Presentation (b, big, i, small, sub, sup, tt)
16367:  *       We have chosen not to heed this distinction, as content_sets
16368:  *       provides satisfactory disambiguation.
16369:  */
16370: class HTMLPurifier_HTMLModule_Presentation extends HTMLPurifier_HTMLModule
16371: {
16372: 
16373:     /**
16374:      * @type string
16375:      */
16376:     public $name = 'Presentation';
16377: 
16378:     /**
16379:      * @param HTMLPurifier_Config $config
16380:      */
16381:     public function setup($config)
16382:     {
16383:         $this->addElement('hr', 'Block', 'Empty', 'Common');
16384:         $this->addElement('sub', 'Inline', 'Inline', 'Common');
16385:         $this->addElement('sup', 'Inline', 'Inline', 'Common');
16386:         $b = $this->addElement('b', 'Inline', 'Inline', 'Common');
16387:         $b->formatting = true;
16388:         $big = $this->addElement('big', 'Inline', 'Inline', 'Common');
16389:         $big->formatting = true;
16390:         $i = $this->addElement('i', 'Inline', 'Inline', 'Common');
16391:         $i->formatting = true;
16392:         $small = $this->addElement('small', 'Inline', 'Inline', 'Common');
16393:         $small->formatting = true;
16394:         $tt = $this->addElement('tt', 'Inline', 'Inline', 'Common');
16395:         $tt->formatting = true;
16396:     }
16397: }
16398: 
16399: 
16400: 
16401: 
16402: 
16403: /**
16404:  * Module defines proprietary tags and attributes in HTML.
16405:  * @warning If this module is enabled, standards-compliance is off!
16406:  */
16407: class HTMLPurifier_HTMLModule_Proprietary extends HTMLPurifier_HTMLModule
16408: {
16409:     /**
16410:      * @type string
16411:      */
16412:     public $name = 'Proprietary';
16413: 
16414:     /**
16415:      * @param HTMLPurifier_Config $config
16416:      */
16417:     public function setup($config)
16418:     {
16419:         $this->addElement(
16420:             'marquee',
16421:             'Inline',
16422:             'Flow',
16423:             'Common',
16424:             array(
16425:                 'direction' => 'Enum#left,right,up,down',
16426:                 'behavior' => 'Enum#alternate',
16427:                 'width' => 'Length',
16428:                 'height' => 'Length',
16429:                 'scrolldelay' => 'Number',
16430:                 'scrollamount' => 'Number',
16431:                 'loop' => 'Number',
16432:                 'bgcolor' => 'Color',
16433:                 'hspace' => 'Pixels',
16434:                 'vspace' => 'Pixels',
16435:             )
16436:         );
16437:     }
16438: }
16439: 
16440: 
16441: 
16442: 
16443: 
16444: /**
16445:  * XHTML 1.1 Ruby Annotation Module, defines elements that indicate
16446:  * short runs of text alongside base text for annotation or pronounciation.
16447:  */
16448: class HTMLPurifier_HTMLModule_Ruby extends HTMLPurifier_HTMLModule
16449: {
16450: 
16451:     /**
16452:      * @type string
16453:      */
16454:     public $name = 'Ruby';
16455: 
16456:     /**
16457:      * @param HTMLPurifier_Config $config
16458:      */
16459:     public function setup($config)
16460:     {
16461:         $this->addElement(
16462:             'ruby',
16463:             'Inline',
16464:             'Custom: ((rb, (rt | (rp, rt, rp))) | (rbc, rtc, rtc?))',
16465:             'Common'
16466:         );
16467:         $this->addElement('rbc', false, 'Required: rb', 'Common');
16468:         $this->addElement('rtc', false, 'Required: rt', 'Common');
16469:         $rb = $this->addElement('rb', false, 'Inline', 'Common');
16470:         $rb->excludes = array('ruby' => true);
16471:         $rt = $this->addElement('rt', false, 'Inline', 'Common', array('rbspan' => 'Number'));
16472:         $rt->excludes = array('ruby' => true);
16473:         $this->addElement('rp', false, 'Optional: #PCDATA', 'Common');
16474:     }
16475: }
16476: 
16477: 
16478: 
16479: 
16480: 
16481: /**
16482:  * A "safe" embed module. See SafeObject. This is a proprietary element.
16483:  */
16484: class HTMLPurifier_HTMLModule_SafeEmbed extends HTMLPurifier_HTMLModule
16485: {
16486:     /**
16487:      * @type string
16488:      */
16489:     public $name = 'SafeEmbed';
16490: 
16491:     /**
16492:      * @param HTMLPurifier_Config $config
16493:      */
16494:     public function setup($config)
16495:     {
16496:         $max = $config->get('HTML.MaxImgLength');
16497:         $embed = $this->addElement(
16498:             'embed',
16499:             'Inline',
16500:             'Empty',
16501:             'Common',
16502:             array(
16503:                 'src*' => 'URI#embedded',
16504:                 'type' => 'Enum#application/x-shockwave-flash',
16505:                 'width' => 'Pixels#' . $max,
16506:                 'height' => 'Pixels#' . $max,
16507:                 'allowscriptaccess' => 'Enum#never',
16508:                 'allownetworking' => 'Enum#internal',
16509:                 'flashvars' => 'Text',
16510:                 'wmode' => 'Enum#window,transparent,opaque',
16511:                 'name' => 'ID',
16512:             )
16513:         );
16514:         $embed->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeEmbed();
16515:     }
16516: }
16517: 
16518: 
16519: 
16520: 
16521: 
16522: /**
16523:  * A "safe" object module. In theory, objects permitted by this module will
16524:  * be safe, and untrusted users can be allowed to embed arbitrary flash objects
16525:  * (maybe other types too, but only Flash is supported as of right now).
16526:  * Highly experimental.
16527:  */
16528: class HTMLPurifier_HTMLModule_SafeObject extends HTMLPurifier_HTMLModule
16529: {
16530:     /**
16531:      * @type string
16532:      */
16533:     public $name = 'SafeObject';
16534: 
16535:     /**
16536:      * @param HTMLPurifier_Config $config
16537:      */
16538:     public function setup($config)
16539:     {
16540:         // These definitions are not intrinsically safe: the attribute transforms
16541:         // are a vital part of ensuring safety.
16542: 
16543:         $max = $config->get('HTML.MaxImgLength');
16544:         $object = $this->addElement(
16545:             'object',
16546:             'Inline',
16547:             'Optional: param | Flow | #PCDATA',
16548:             'Common',
16549:             array(
16550:                 // While technically not required by the spec, we're forcing
16551:                 // it to this value.
16552:                 'type' => 'Enum#application/x-shockwave-flash',
16553:                 'width' => 'Pixels#' . $max,
16554:                 'height' => 'Pixels#' . $max,
16555:                 'data' => 'URI#embedded',
16556:                 'codebase' => new HTMLPurifier_AttrDef_Enum(
16557:                     array(
16558:                         'http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=6,0,40,0'
16559:                     )
16560:                 ),
16561:             )
16562:         );
16563:         $object->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeObject();
16564: 
16565:         $param = $this->addElement(
16566:             'param',
16567:             false,
16568:             'Empty',
16569:             false,
16570:             array(
16571:                 'id' => 'ID',
16572:                 'name*' => 'Text',
16573:                 'value' => 'Text'
16574:             )
16575:         );
16576:         $param->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeParam();
16577:         $this->info_injector[] = 'SafeObject';
16578:     }
16579: }
16580: 
16581: 
16582: 
16583: 
16584: 
16585: /**
16586:  * A "safe" script module. No inline JS is allowed, and pointed to JS
16587:  * files must match whitelist.
16588:  */
16589: class HTMLPurifier_HTMLModule_SafeScripting extends HTMLPurifier_HTMLModule
16590: {
16591:     /**
16592:      * @type string
16593:      */
16594:     public $name = 'SafeScripting';
16595: 
16596:     /**
16597:      * @param HTMLPurifier_Config $config
16598:      */
16599:     public function setup($config)
16600:     {
16601:         // These definitions are not intrinsically safe: the attribute transforms
16602:         // are a vital part of ensuring safety.
16603: 
16604:         $allowed = $config->get('HTML.SafeScripting');
16605:         $script = $this->addElement(
16606:             'script',
16607:             'Inline',
16608:             'Empty',
16609:             null,
16610:             array(
16611:                 // While technically not required by the spec, we're forcing
16612:                 // it to this value.
16613:                 'type' => 'Enum#text/javascript',
16614:                 'src*' => new HTMLPurifier_AttrDef_Enum(array_keys($allowed))
16615:             )
16616:         );
16617:         $script->attr_transform_pre[] =
16618:         $script->attr_transform_post[] = new HTMLPurifier_AttrTransform_ScriptRequired();
16619:     }
16620: }
16621: 
16622: 
16623: 
16624: 
16625: 
16626: /*
16627: 
16628: WARNING: THIS MODULE IS EXTREMELY DANGEROUS AS IT ENABLES INLINE SCRIPTING
16629: INSIDE HTML PURIFIER DOCUMENTS. USE ONLY WITH TRUSTED USER INPUT!!!
16630: 
16631: */
16632: 
16633: /**
16634:  * XHTML 1.1 Scripting module, defines elements that are used to contain
16635:  * information pertaining to executable scripts or the lack of support
16636:  * for executable scripts.
16637:  * @note This module does not contain inline scripting elements
16638:  */
16639: class HTMLPurifier_HTMLModule_Scripting extends HTMLPurifier_HTMLModule
16640: {
16641:     /**
16642:      * @type string
16643:      */
16644:     public $name = 'Scripting';
16645: 
16646:     /**
16647:      * @type array
16648:      */
16649:     public $elements = array('script', 'noscript');
16650: 
16651:     /**
16652:      * @type array
16653:      */
16654:     public $content_sets = array('Block' => 'script | noscript', 'Inline' => 'script | noscript');
16655: 
16656:     /**
16657:      * @type bool
16658:      */
16659:     public $safe = false;
16660: 
16661:     /**
16662:      * @param HTMLPurifier_Config $config
16663:      */
16664:     public function setup($config)
16665:     {
16666:         // TODO: create custom child-definition for noscript that
16667:         // auto-wraps stray #PCDATA in a similar manner to
16668:         // blockquote's custom definition (we would use it but
16669:         // blockquote's contents are optional while noscript's contents
16670:         // are required)
16671: 
16672:         // TODO: convert this to new syntax, main problem is getting
16673:         // both content sets working
16674: 
16675:         // In theory, this could be safe, but I don't see any reason to
16676:         // allow it.
16677:         $this->info['noscript'] = new HTMLPurifier_ElementDef();
16678:         $this->info['noscript']->attr = array(0 => array('Common'));
16679:         $this->info['noscript']->content_model = 'Heading | List | Block';
16680:         $this->info['noscript']->content_model_type = 'required';
16681: 
16682:         $this->info['script'] = new HTMLPurifier_ElementDef();
16683:         $this->info['script']->attr = array(
16684:             'defer' => new HTMLPurifier_AttrDef_Enum(array('defer')),
16685:             'src' => new HTMLPurifier_AttrDef_URI(true),
16686:             'type' => new HTMLPurifier_AttrDef_Enum(array('text/javascript'))
16687:         );
16688:         $this->info['script']->content_model = '#PCDATA';
16689:         $this->info['script']->content_model_type = 'optional';
16690:         $this->info['script']->attr_transform_pre[] =
16691:         $this->info['script']->attr_transform_post[] =
16692:             new HTMLPurifier_AttrTransform_ScriptRequired();
16693:     }
16694: }
16695: 
16696: 
16697: 
16698: 
16699: 
16700: /**
16701:  * XHTML 1.1 Edit Module, defines editing-related elements. Text Extension
16702:  * Module.
16703:  */
16704: class HTMLPurifier_HTMLModule_StyleAttribute extends HTMLPurifier_HTMLModule
16705: {
16706:     /**
16707:      * @type string
16708:      */
16709:     public $name = 'StyleAttribute';
16710: 
16711:     /**
16712:      * @type array
16713:      */
16714:     public $attr_collections = array(
16715:         // The inclusion routine differs from the Abstract Modules but
16716:         // is in line with the DTD and XML Schemas.
16717:         'Style' => array('style' => false), // see constructor
16718:         'Core' => array(0 => array('Style'))
16719:     );
16720: 
16721:     /**
16722:      * @param HTMLPurifier_Config $config
16723:      */
16724:     public function setup($config)
16725:     {
16726:         $this->attr_collections['Style']['style'] = new HTMLPurifier_AttrDef_CSS();
16727:     }
16728: }
16729: 
16730: 
16731: 
16732: 
16733: 
16734: /**
16735:  * XHTML 1.1 Tables Module, fully defines accessible table elements.
16736:  */
16737: class HTMLPurifier_HTMLModule_Tables extends HTMLPurifier_HTMLModule
16738: {
16739:     /**
16740:      * @type string
16741:      */
16742:     public $name = 'Tables';
16743: 
16744:     /**
16745:      * @param HTMLPurifier_Config $config
16746:      */
16747:     public function setup($config)
16748:     {
16749:         $this->addElement('caption', false, 'Inline', 'Common');
16750: 
16751:         $this->addElement(
16752:             'table',
16753:             'Block',
16754:             new HTMLPurifier_ChildDef_Table(),
16755:             'Common',
16756:             array(
16757:                 'border' => 'Pixels',
16758:                 'cellpadding' => 'Length',
16759:                 'cellspacing' => 'Length',
16760:                 'frame' => 'Enum#void,above,below,hsides,lhs,rhs,vsides,box,border',
16761:                 'rules' => 'Enum#none,groups,rows,cols,all',
16762:                 'summary' => 'Text',
16763:                 'width' => 'Length'
16764:             )
16765:         );
16766: 
16767:         // common attributes
16768:         $cell_align = array(
16769:             'align' => 'Enum#left,center,right,justify,char',
16770:             'charoff' => 'Length',
16771:             'valign' => 'Enum#top,middle,bottom,baseline',
16772:         );
16773: 
16774:         $cell_t = array_merge(
16775:             array(
16776:                 'abbr' => 'Text',
16777:                 'colspan' => 'Number',
16778:                 'rowspan' => 'Number',
16779:                 // Apparently, as of HTML5 this attribute only applies
16780:                 // to 'th' elements.
16781:                 'scope' => 'Enum#row,col,rowgroup,colgroup',
16782:             ),
16783:             $cell_align
16784:         );
16785:         $this->addElement('td', false, 'Flow', 'Common', $cell_t);
16786:         $this->addElement('th', false, 'Flow', 'Common', $cell_t);
16787: 
16788:         $this->addElement('tr', false, 'Required: td | th', 'Common', $cell_align);
16789: 
16790:         $cell_col = array_merge(
16791:             array(
16792:                 'span' => 'Number',
16793:                 'width' => 'MultiLength',
16794:             ),
16795:             $cell_align
16796:         );
16797:         $this->addElement('col', false, 'Empty', 'Common', $cell_col);
16798:         $this->addElement('colgroup', false, 'Optional: col', 'Common', $cell_col);
16799: 
16800:         $this->addElement('tbody', false, 'Required: tr', 'Common', $cell_align);
16801:         $this->addElement('thead', false, 'Required: tr', 'Common', $cell_align);
16802:         $this->addElement('tfoot', false, 'Required: tr', 'Common', $cell_align);
16803:     }
16804: }
16805: 
16806: 
16807: 
16808: 
16809: 
16810: /**
16811:  * XHTML 1.1 Target Module, defines target attribute in link elements.
16812:  */
16813: class HTMLPurifier_HTMLModule_Target extends HTMLPurifier_HTMLModule
16814: {
16815:     /**
16816:      * @type string
16817:      */
16818:     public $name = 'Target';
16819: 
16820:     /**
16821:      * @param HTMLPurifier_Config $config
16822:      */
16823:     public function setup($config)
16824:     {
16825:         $elements = array('a');
16826:         foreach ($elements as $name) {
16827:             $e = $this->addBlankElement($name);
16828:             $e->attr = array(
16829:                 'target' => new HTMLPurifier_AttrDef_HTML_FrameTarget()
16830:             );
16831:         }
16832:     }
16833: }
16834: 
16835: 
16836: 
16837: 
16838: 
16839: /**
16840:  * Module adds the target=blank attribute transformation to a tags.  It
16841:  * is enabled by HTML.TargetBlank
16842:  */
16843: class HTMLPurifier_HTMLModule_TargetBlank extends HTMLPurifier_HTMLModule
16844: {
16845:     /**
16846:      * @type string
16847:      */
16848:     public $name = 'TargetBlank';
16849: 
16850:     /**
16851:      * @param HTMLPurifier_Config $config
16852:      */
16853:     public function setup($config)
16854:     {
16855:         $a = $this->addBlankElement('a');
16856:         $a->attr_transform_post[] = new HTMLPurifier_AttrTransform_TargetBlank();
16857:     }
16858: }
16859: 
16860: 
16861: 
16862: 
16863: 
16864: /**
16865:  * XHTML 1.1 Text Module, defines basic text containers. Core Module.
16866:  * @note In the normative XML Schema specification, this module
16867:  *       is further abstracted into the following modules:
16868:  *          - Block Phrasal (address, blockquote, pre, h1, h2, h3, h4, h5, h6)
16869:  *          - Block Structural (div, p)
16870:  *          - Inline Phrasal (abbr, acronym, cite, code, dfn, em, kbd, q, samp, strong, var)
16871:  *          - Inline Structural (br, span)
16872:  *       This module, functionally, does not distinguish between these
16873:  *       sub-modules, but the code is internally structured to reflect
16874:  *       these distinctions.
16875:  */
16876: class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule
16877: {
16878:     /**
16879:      * @type string
16880:      */
16881:     public $name = 'Text';
16882: 
16883:     /**
16884:      * @type array
16885:      */
16886:     public $content_sets = array(
16887:         'Flow' => 'Heading | Block | Inline'
16888:     );
16889: 
16890:     /**
16891:      * @param HTMLPurifier_Config $config
16892:      */
16893:     public function setup($config)
16894:     {
16895:         // Inline Phrasal -------------------------------------------------
16896:         $this->addElement('abbr', 'Inline', 'Inline', 'Common');
16897:         $this->addElement('acronym', 'Inline', 'Inline', 'Common');
16898:         $this->addElement('cite', 'Inline', 'Inline', 'Common');
16899:         $this->addElement('dfn', 'Inline', 'Inline', 'Common');
16900:         $this->addElement('kbd', 'Inline', 'Inline', 'Common');
16901:         $this->addElement('q', 'Inline', 'Inline', 'Common', array('cite' => 'URI'));
16902:         $this->addElement('samp', 'Inline', 'Inline', 'Common');
16903:         $this->addElement('var', 'Inline', 'Inline', 'Common');
16904: 
16905:         $em = $this->addElement('em', 'Inline', 'Inline', 'Common');
16906:         $em->formatting = true;
16907: 
16908:         $strong = $this->addElement('strong', 'Inline', 'Inline', 'Common');
16909:         $strong->formatting = true;
16910: 
16911:         $code = $this->addElement('code', 'Inline', 'Inline', 'Common');
16912:         $code->formatting = true;
16913: 
16914:         // Inline Structural ----------------------------------------------
16915:         $this->addElement('span', 'Inline', 'Inline', 'Common');
16916:         $this->addElement('br', 'Inline', 'Empty', 'Core');
16917: 
16918:         // Block Phrasal --------------------------------------------------
16919:         $this->addElement('address', 'Block', 'Inline', 'Common');
16920:         $this->addElement('blockquote', 'Block', 'Optional: Heading | Block | List', 'Common', array('cite' => 'URI'));
16921:         $pre = $this->addElement('pre', 'Block', 'Inline', 'Common');
16922:         $pre->excludes = $this->makeLookup(
16923:             'img',
16924:             'big',
16925:             'small',
16926:             'object',
16927:             'applet',
16928:             'font',
16929:             'basefont'
16930:         );
16931:         $this->addElement('h1', 'Heading', 'Inline', 'Common');
16932:         $this->addElement('h2', 'Heading', 'Inline', 'Common');
16933:         $this->addElement('h3', 'Heading', 'Inline', 'Common');
16934:         $this->addElement('h4', 'Heading', 'Inline', 'Common');
16935:         $this->addElement('h5', 'Heading', 'Inline', 'Common');
16936:         $this->addElement('h6', 'Heading', 'Inline', 'Common');
16937: 
16938:         // Block Structural -----------------------------------------------
16939:         $p = $this->addElement('p', 'Block', 'Inline', 'Common');
16940:         $p->autoclose = array_flip(
16941:             array("address", "blockquote", "center", "dir", "div", "dl", "fieldset", "ol", "p", "ul")
16942:         );
16943: 
16944:         $this->addElement('div', 'Block', 'Flow', 'Common');
16945:     }
16946: }
16947: 
16948: 
16949: 
16950: 
16951: 
16952: /**
16953:  * Abstract class for a set of proprietary modules that clean up (tidy)
16954:  * poorly written HTML.
16955:  * @todo Figure out how to protect some of these methods/properties
16956:  */
16957: class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule
16958: {
16959:     /**
16960:      * List of supported levels.
16961:      * Index zero is a special case "no fixes" level.
16962:      * @type array
16963:      */
16964:     public $levels = array(0 => 'none', 'light', 'medium', 'heavy');
16965: 
16966:     /**
16967:      * Default level to place all fixes in.
16968:      * Disabled by default.
16969:      * @type string
16970:      */
16971:     public $defaultLevel = null;
16972: 
16973:     /**
16974:      * Lists of fixes used by getFixesForLevel().
16975:      * Format is:
16976:      *      HTMLModule_Tidy->fixesForLevel[$level] = array('fix-1', 'fix-2');
16977:      * @type array
16978:      */
16979:     public $fixesForLevel = array(
16980:         'light' => array(),
16981:         'medium' => array(),
16982:         'heavy' => array()
16983:     );
16984: 
16985:     /**
16986:      * Lazy load constructs the module by determining the necessary
16987:      * fixes to create and then delegating to the populate() function.
16988:      * @param HTMLPurifier_Config $config
16989:      * @todo Wildcard matching and error reporting when an added or
16990:      *       subtracted fix has no effect.
16991:      */
16992:     public function setup($config)
16993:     {
16994:         // create fixes, initialize fixesForLevel
16995:         $fixes = $this->makeFixes();
16996:         $this->makeFixesForLevel($fixes);
16997: 
16998:         // figure out which fixes to use
16999:         $level = $config->get('HTML.TidyLevel');
17000:         $fixes_lookup = $this->getFixesForLevel($level);
17001: 
17002:         // get custom fix declarations: these need namespace processing
17003:         $add_fixes = $config->get('HTML.TidyAdd');
17004:         $remove_fixes = $config->get('HTML.TidyRemove');
17005: 
17006:         foreach ($fixes as $name => $fix) {
17007:             // needs to be refactored a little to implement globbing
17008:             if (isset($remove_fixes[$name]) ||
17009:                 (!isset($add_fixes[$name]) && !isset($fixes_lookup[$name]))) {
17010:                 unset($fixes[$name]);
17011:             }
17012:         }
17013: 
17014:         // populate this module with necessary fixes
17015:         $this->populate($fixes);
17016:     }
17017: 
17018:     /**
17019:      * Retrieves all fixes per a level, returning fixes for that specific
17020:      * level as well as all levels below it.
17021:      * @param string $level level identifier, see $levels for valid values
17022:      * @return array Lookup up table of fixes
17023:      */
17024:     public function getFixesForLevel($level)
17025:     {
17026:         if ($level == $this->levels[0]) {
17027:             return array();
17028:         }
17029:         $activated_levels = array();
17030:         for ($i = 1, $c = count($this->levels); $i < $c; $i++) {
17031:             $activated_levels[] = $this->levels[$i];
17032:             if ($this->levels[$i] == $level) {
17033:                 break;
17034:             }
17035:         }
17036:         if ($i == $c) {
17037:             trigger_error(
17038:                 'Tidy level ' . htmlspecialchars($level) . ' not recognized',
17039:                 E_USER_WARNING
17040:             );
17041:             return array();
17042:         }
17043:         $ret = array();
17044:         foreach ($activated_levels as $level) {
17045:             foreach ($this->fixesForLevel[$level] as $fix) {
17046:                 $ret[$fix] = true;
17047:             }
17048:         }
17049:         return $ret;
17050:     }
17051: 
17052:     /**
17053:      * Dynamically populates the $fixesForLevel member variable using
17054:      * the fixes array. It may be custom overloaded, used in conjunction
17055:      * with $defaultLevel, or not used at all.
17056:      * @param array $fixes
17057:      */
17058:     public function makeFixesForLevel($fixes)
17059:     {
17060:         if (!isset($this->defaultLevel)) {
17061:             return;
17062:         }
17063:         if (!isset($this->fixesForLevel[$this->defaultLevel])) {
17064:             trigger_error(
17065:                 'Default level ' . $this->defaultLevel . ' does not exist',
17066:                 E_USER_ERROR
17067:             );
17068:             return;
17069:         }
17070:         $this->fixesForLevel[$this->defaultLevel] = array_keys($fixes);
17071:     }
17072: 
17073:     /**
17074:      * Populates the module with transforms and other special-case code
17075:      * based on a list of fixes passed to it
17076:      * @param array $fixes Lookup table of fixes to activate
17077:      */
17078:     public function populate($fixes)
17079:     {
17080:         foreach ($fixes as $name => $fix) {
17081:             // determine what the fix is for
17082:             list($type, $params) = $this->getFixType($name);
17083:             switch ($type) {
17084:                 case 'attr_transform_pre':
17085:                 case 'attr_transform_post':
17086:                     $attr = $params['attr'];
17087:                     if (isset($params['element'])) {
17088:                         $element = $params['element'];
17089:                         if (empty($this->info[$element])) {
17090:                             $e = $this->addBlankElement($element);
17091:                         } else {
17092:                             $e = $this->info[$element];
17093:                         }
17094:                     } else {
17095:                         $type = "info_$type";
17096:                         $e = $this;
17097:                     }
17098:                     // PHP does some weird parsing when I do
17099:                     // $e->$type[$attr], so I have to assign a ref.
17100:                     $f =& $e->$type;
17101:                     $f[$attr] = $fix;
17102:                     break;
17103:                 case 'tag_transform':
17104:                     $this->info_tag_transform[$params['element']] = $fix;
17105:                     break;
17106:                 case 'child':
17107:                 case 'content_model_type':
17108:                     $element = $params['element'];
17109:                     if (empty($this->info[$element])) {
17110:                         $e = $this->addBlankElement($element);
17111:                     } else {
17112:                         $e = $this->info[$element];
17113:                     }
17114:                     $e->$type = $fix;
17115:                     break;
17116:                 default:
17117:                     trigger_error("Fix type $type not supported", E_USER_ERROR);
17118:                     break;
17119:             }
17120:         }
17121:     }
17122: 
17123:     /**
17124:      * Parses a fix name and determines what kind of fix it is, as well
17125:      * as other information defined by the fix
17126:      * @param $name String name of fix
17127:      * @return array(string $fix_type, array $fix_parameters)
17128:      * @note $fix_parameters is type dependant, see populate() for usage
17129:      *       of these parameters
17130:      */
17131:     public function getFixType($name)
17132:     {
17133:         // parse it
17134:         $property = $attr = null;
17135:         if (strpos($name, '#') !== false) {
17136:             list($name, $property) = explode('#', $name);
17137:         }
17138:         if (strpos($name, '@') !== false) {
17139:             list($name, $attr) = explode('@', $name);
17140:         }
17141: 
17142:         // figure out the parameters
17143:         $params = array();
17144:         if ($name !== '') {
17145:             $params['element'] = $name;
17146:         }
17147:         if (!is_null($attr)) {
17148:             $params['attr'] = $attr;
17149:         }
17150: 
17151:         // special case: attribute transform
17152:         if (!is_null($attr)) {
17153:             if (is_null($property)) {
17154:                 $property = 'pre';
17155:             }
17156:             $type = 'attr_transform_' . $property;
17157:             return array($type, $params);
17158:         }
17159: 
17160:         // special case: tag transform
17161:         if (is_null($property)) {
17162:             return array('tag_transform', $params);
17163:         }
17164: 
17165:         return array($property, $params);
17166: 
17167:     }
17168: 
17169:     /**
17170:      * Defines all fixes the module will perform in a compact
17171:      * associative array of fix name to fix implementation.
17172:      * @return array
17173:      */
17174:     public function makeFixes()
17175:     {
17176:     }
17177: }
17178: 
17179: 
17180: 
17181: 
17182: 
17183: class HTMLPurifier_HTMLModule_XMLCommonAttributes extends HTMLPurifier_HTMLModule
17184: {
17185:     /**
17186:      * @type string
17187:      */
17188:     public $name = 'XMLCommonAttributes';
17189: 
17190:     /**
17191:      * @type array
17192:      */
17193:     public $attr_collections = array(
17194:         'Lang' => array(
17195:             'xml:lang' => 'LanguageCode',
17196:         )
17197:     );
17198: }
17199: 
17200: 
17201: 
17202: 
17203: 
17204: /**
17205:  * Name is deprecated, but allowed in strict doctypes, so onl
17206:  */
17207: class HTMLPurifier_HTMLModule_Tidy_Name extends HTMLPurifier_HTMLModule_Tidy
17208: {
17209:     /**
17210:      * @type string
17211:      */
17212:     public $name = 'Tidy_Name';
17213: 
17214:     /**
17215:      * @type string
17216:      */
17217:     public $defaultLevel = 'heavy';
17218: 
17219:     /**
17220:      * @return array
17221:      */
17222:     public function makeFixes()
17223:     {
17224:         $r = array();
17225:         // @name for img, a -----------------------------------------------
17226:         // Technically, it's allowed even on strict, so we allow authors to use
17227:         // it. However, it's deprecated in future versions of XHTML.
17228:         $r['img@name'] =
17229:         $r['a@name'] = new HTMLPurifier_AttrTransform_Name();
17230:         return $r;
17231:     }
17232: }
17233: 
17234: 
17235: 
17236: 
17237: 
17238: class HTMLPurifier_HTMLModule_Tidy_Proprietary extends HTMLPurifier_HTMLModule_Tidy
17239: {
17240: 
17241:     /**
17242:      * @type string
17243:      */
17244:     public $name = 'Tidy_Proprietary';
17245: 
17246:     /**
17247:      * @type string
17248:      */
17249:     public $defaultLevel = 'light';
17250: 
17251:     /**
17252:      * @return array
17253:      */
17254:     public function makeFixes()
17255:     {
17256:         $r = array();
17257:         $r['table@background'] = new HTMLPurifier_AttrTransform_Background();
17258:         $r['td@background']    = new HTMLPurifier_AttrTransform_Background();
17259:         $r['th@background']    = new HTMLPurifier_AttrTransform_Background();
17260:         $r['tr@background']    = new HTMLPurifier_AttrTransform_Background();
17261:         $r['thead@background'] = new HTMLPurifier_AttrTransform_Background();
17262:         $r['tfoot@background'] = new HTMLPurifier_AttrTransform_Background();
17263:         $r['tbody@background'] = new HTMLPurifier_AttrTransform_Background();
17264:         $r['table@height']     = new HTMLPurifier_AttrTransform_Length('height');
17265:         return $r;
17266:     }
17267: }
17268: 
17269: 
17270: 
17271: 
17272: 
17273: class HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4 extends HTMLPurifier_HTMLModule_Tidy
17274: {
17275: 
17276:     /**
17277:      * @return array
17278:      */
17279:     public function makeFixes()
17280:     {
17281:         $r = array();
17282: 
17283:         // == deprecated tag transforms ===================================
17284: 
17285:         $r['font'] = new HTMLPurifier_TagTransform_Font();
17286:         $r['menu'] = new HTMLPurifier_TagTransform_Simple('ul');
17287:         $r['dir'] = new HTMLPurifier_TagTransform_Simple('ul');
17288:         $r['center'] = new HTMLPurifier_TagTransform_Simple('div', 'text-align:center;');
17289:         $r['u'] = new HTMLPurifier_TagTransform_Simple('span', 'text-decoration:underline;');
17290:         $r['s'] = new HTMLPurifier_TagTransform_Simple('span', 'text-decoration:line-through;');
17291:         $r['strike'] = new HTMLPurifier_TagTransform_Simple('span', 'text-decoration:line-through;');
17292: 
17293:         // == deprecated attribute transforms =============================
17294: 
17295:         $r['caption@align'] =
17296:             new HTMLPurifier_AttrTransform_EnumToCSS(
17297:                 'align',
17298:                 array(
17299:                     // we're following IE's behavior, not Firefox's, due
17300:                     // to the fact that no one supports caption-side:right,
17301:                     // W3C included (with CSS 2.1). This is a slightly
17302:                     // unreasonable attribute!
17303:                     'left' => 'text-align:left;',
17304:                     'right' => 'text-align:right;',
17305:                     'top' => 'caption-side:top;',
17306:                     'bottom' => 'caption-side:bottom;' // not supported by IE
17307:                 )
17308:             );
17309: 
17310:         // @align for img -------------------------------------------------
17311:         $r['img@align'] =
17312:             new HTMLPurifier_AttrTransform_EnumToCSS(
17313:                 'align',
17314:                 array(
17315:                     'left' => 'float:left;',
17316:                     'right' => 'float:right;',
17317:                     'top' => 'vertical-align:top;',
17318:                     'middle' => 'vertical-align:middle;',
17319:                     'bottom' => 'vertical-align:baseline;',
17320:                 )
17321:             );
17322: 
17323:         // @align for table -----------------------------------------------
17324:         $r['table@align'] =
17325:             new HTMLPurifier_AttrTransform_EnumToCSS(
17326:                 'align',
17327:                 array(
17328:                     'left' => 'float:left;',
17329:                     'center' => 'margin-left:auto;margin-right:auto;',
17330:                     'right' => 'float:right;'
17331:                 )
17332:             );
17333: 
17334:         // @align for hr -----------------------------------------------
17335:         $r['hr@align'] =
17336:             new HTMLPurifier_AttrTransform_EnumToCSS(
17337:                 'align',
17338:                 array(
17339:                     // we use both text-align and margin because these work
17340:                     // for different browsers (IE and Firefox, respectively)
17341:                     // and the melange makes for a pretty cross-compatible
17342:                     // solution
17343:                     'left' => 'margin-left:0;margin-right:auto;text-align:left;',
17344:                     'center' => 'margin-left:auto;margin-right:auto;text-align:center;',
17345:                     'right' => 'margin-left:auto;margin-right:0;text-align:right;'
17346:                 )
17347:             );
17348: 
17349:         // @align for h1, h2, h3, h4, h5, h6, p, div ----------------------
17350:         // {{{
17351:         $align_lookup = array();
17352:         $align_values = array('left', 'right', 'center', 'justify');
17353:         foreach ($align_values as $v) {
17354:             $align_lookup[$v] = "text-align:$v;";
17355:         }
17356:         // }}}
17357:         $r['h1@align'] =
17358:         $r['h2@align'] =
17359:         $r['h3@align'] =
17360:         $r['h4@align'] =
17361:         $r['h5@align'] =
17362:         $r['h6@align'] =
17363:         $r['p@align'] =
17364:         $r['div@align'] =
17365:             new HTMLPurifier_AttrTransform_EnumToCSS('align', $align_lookup);
17366: 
17367:         // @bgcolor for table, tr, td, th ---------------------------------
17368:         $r['table@bgcolor'] =
17369:         $r['td@bgcolor'] =
17370:         $r['th@bgcolor'] =
17371:             new HTMLPurifier_AttrTransform_BgColor();
17372: 
17373:         // @border for img ------------------------------------------------
17374:         $r['img@border'] = new HTMLPurifier_AttrTransform_Border();
17375: 
17376:         // @clear for br --------------------------------------------------
17377:         $r['br@clear'] =
17378:             new HTMLPurifier_AttrTransform_EnumToCSS(
17379:                 'clear',
17380:                 array(
17381:                     'left' => 'clear:left;',
17382:                     'right' => 'clear:right;',
17383:                     'all' => 'clear:both;',
17384:                     'none' => 'clear:none;',
17385:                 )
17386:             );
17387: 
17388:         // @height for td, th ---------------------------------------------
17389:         $r['td@height'] =
17390:         $r['th@height'] =
17391:             new HTMLPurifier_AttrTransform_Length('height');
17392: 
17393:         // @hspace for img ------------------------------------------------
17394:         $r['img@hspace'] = new HTMLPurifier_AttrTransform_ImgSpace('hspace');
17395: 
17396:         // @noshade for hr ------------------------------------------------
17397:         // this transformation is not precise but often good enough.
17398:         // different browsers use different styles to designate noshade
17399:         $r['hr@noshade'] =
17400:             new HTMLPurifier_AttrTransform_BoolToCSS(
17401:                 'noshade',
17402:                 'color:#808080;background-color:#808080;border:0;'
17403:             );
17404: 
17405:         // @nowrap for td, th ---------------------------------------------
17406:         $r['td@nowrap'] =
17407:         $r['th@nowrap'] =
17408:             new HTMLPurifier_AttrTransform_BoolToCSS(
17409:                 'nowrap',
17410:                 'white-space:nowrap;'
17411:             );
17412: 
17413:         // @size for hr  --------------------------------------------------
17414:         $r['hr@size'] = new HTMLPurifier_AttrTransform_Length('size', 'height');
17415: 
17416:         // @type for li, ol, ul -------------------------------------------
17417:         // {{{
17418:         $ul_types = array(
17419:             'disc' => 'list-style-type:disc;',
17420:             'square' => 'list-style-type:square;',
17421:             'circle' => 'list-style-type:circle;'
17422:         );
17423:         $ol_types = array(
17424:             '1' => 'list-style-type:decimal;',
17425:             'i' => 'list-style-type:lower-roman;',
17426:             'I' => 'list-style-type:upper-roman;',
17427:             'a' => 'list-style-type:lower-alpha;',
17428:             'A' => 'list-style-type:upper-alpha;'
17429:         );
17430:         $li_types = $ul_types + $ol_types;
17431:         // }}}
17432: 
17433:         $r['ul@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $ul_types);
17434:         $r['ol@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $ol_types, true);
17435:         $r['li@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $li_types, true);
17436: 
17437:         // @vspace for img ------------------------------------------------
17438:         $r['img@vspace'] = new HTMLPurifier_AttrTransform_ImgSpace('vspace');
17439: 
17440:         // @width for hr, td, th ------------------------------------------
17441:         $r['td@width'] =
17442:         $r['th@width'] =
17443:         $r['hr@width'] = new HTMLPurifier_AttrTransform_Length('width');
17444: 
17445:         return $r;
17446:     }
17447: }
17448: 
17449: 
17450: 
17451: 
17452: 
17453: class HTMLPurifier_HTMLModule_Tidy_Strict extends HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4
17454: {
17455:     /**
17456:      * @type string
17457:      */
17458:     public $name = 'Tidy_Strict';
17459: 
17460:     /**
17461:      * @type string
17462:      */
17463:     public $defaultLevel = 'light';
17464: 
17465:     /**
17466:      * @return array
17467:      */
17468:     public function makeFixes()
17469:     {
17470:         $r = parent::makeFixes();
17471:         $r['blockquote#content_model_type'] = 'strictblockquote';
17472:         return $r;
17473:     }
17474: 
17475:     /**
17476:      * @type bool
17477:      */
17478:     public $defines_child_def = true;
17479: 
17480:     /**
17481:      * @param HTMLPurifier_ElementDef $def
17482:      * @return HTMLPurifier_ChildDef_StrictBlockquote
17483:      */
17484:     public function getChildDef($def)
17485:     {
17486:         if ($def->content_model_type != 'strictblockquote') {
17487:             return parent::getChildDef($def);
17488:         }
17489:         return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model);
17490:     }
17491: }
17492: 
17493: 
17494: 
17495: 
17496: 
17497: class HTMLPurifier_HTMLModule_Tidy_Transitional extends HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4
17498: {
17499:     /**
17500:      * @type string
17501:      */
17502:     public $name = 'Tidy_Transitional';
17503: 
17504:     /**
17505:      * @type string
17506:      */
17507:     public $defaultLevel = 'heavy';
17508: }
17509: 
17510: 
17511: 
17512: 
17513: 
17514: class HTMLPurifier_HTMLModule_Tidy_XHTML extends HTMLPurifier_HTMLModule_Tidy
17515: {
17516:     /**
17517:      * @type string
17518:      */
17519:     public $name = 'Tidy_XHTML';
17520: 
17521:     /**
17522:      * @type string
17523:      */
17524:     public $defaultLevel = 'medium';
17525: 
17526:     /**
17527:      * @return array
17528:      */
17529:     public function makeFixes()
17530:     {
17531:         $r = array();
17532:         $r['@lang'] = new HTMLPurifier_AttrTransform_Lang();
17533:         return $r;
17534:     }
17535: }
17536: 
17537: 
17538: 
17539: 
17540: 
17541: /**
17542:  * Injector that auto paragraphs text in the root node based on
17543:  * double-spacing.
17544:  * @todo Ensure all states are unit tested, including variations as well.
17545:  * @todo Make a graph of the flow control for this Injector.
17546:  */
17547: class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
17548: {
17549:     /**
17550:      * @type string
17551:      */
17552:     public $name = 'AutoParagraph';
17553: 
17554:     /**
17555:      * @type array
17556:      */
17557:     public $needed = array('p');
17558: 
17559:     /**
17560:      * @return HTMLPurifier_Token_Start
17561:      */
17562:     private function _pStart()
17563:     {
17564:         $par = new HTMLPurifier_Token_Start('p');
17565:         $par->armor['MakeWellFormed_TagClosedError'] = true;
17566:         return $par;
17567:     }
17568: 
17569:     /**
17570:      * @param HTMLPurifier_Token_Text $token
17571:      */
17572:     public function handleText(&$token)
17573:     {
17574:         $text = $token->data;
17575:         // Does the current parent allow <p> tags?
17576:         if ($this->allowsElement('p')) {
17577:             if (empty($this->currentNesting) || strpos($text, "\n\n") !== false) {
17578:                 // Note that we have differing behavior when dealing with text
17579:                 // in the anonymous root node, or a node inside the document.
17580:                 // If the text as a double-newline, the treatment is the same;
17581:                 // if it doesn't, see the next if-block if you're in the document.
17582: 
17583:                 $i = $nesting = null;
17584:                 if (!$this->forwardUntilEndToken($i, $current, $nesting) && $token->is_whitespace) {
17585:                     // State 1.1: ...    ^ (whitespace, then document end)
17586:                     //               ----
17587:                     // This is a degenerate case
17588:                 } else {
17589:                     if (!$token->is_whitespace || $this->_isInline($current)) {
17590:                         // State 1.2: PAR1
17591:                         //            ----
17592: 
17593:                         // State 1.3: PAR1\n\nPAR2
17594:                         //            ------------
17595: 
17596:                         // State 1.4: <div>PAR1\n\nPAR2 (see State 2)
17597:                         //                 ------------
17598:                         $token = array($this->_pStart());
17599:                         $this->_splitText($text, $token);
17600:                     } else {
17601:                         // State 1.5: \n<hr />
17602:                         //            --
17603:                     }
17604:                 }
17605:             } else {
17606:                 // State 2:   <div>PAR1... (similar to 1.4)
17607:                 //                 ----
17608: 
17609:                 // We're in an element that allows paragraph tags, but we're not
17610:                 // sure if we're going to need them.
17611:                 if ($this->_pLookAhead()) {
17612:                     // State 2.1: <div>PAR1<b>PAR1\n\nPAR2
17613:                     //                 ----
17614:                     // Note: This will always be the first child, since any
17615:                     // previous inline element would have triggered this very
17616:                     // same routine, and found the double newline. One possible
17617:                     // exception would be a comment.
17618:                     $token = array($this->_pStart(), $token);
17619:                 } else {
17620:                     // State 2.2.1: <div>PAR1<div>
17621:                     //                   ----
17622: 
17623:                     // State 2.2.2: <div>PAR1<b>PAR1</b></div>
17624:                     //                   ----
17625:                 }
17626:             }
17627:             // Is the current parent a <p> tag?
17628:         } elseif (!empty($this->currentNesting) &&
17629:             $this->currentNesting[count($this->currentNesting) - 1]->name == 'p') {
17630:             // State 3.1: ...<p>PAR1
17631:             //                  ----
17632: 
17633:             // State 3.2: ...<p>PAR1\n\nPAR2
17634:             //                  ------------
17635:             $token = array();
17636:             $this->_splitText($text, $token);
17637:             // Abort!
17638:         } else {
17639:             // State 4.1: ...<b>PAR1
17640:             //                  ----
17641: 
17642:             // State 4.2: ...<b>PAR1\n\nPAR2
17643:             //                  ------------
17644:         }
17645:     }
17646: 
17647:     /**
17648:      * @param HTMLPurifier_Token $token
17649:      */
17650:     public function handleElement(&$token)
17651:     {
17652:         // We don't have to check if we're already in a <p> tag for block
17653:         // tokens, because the tag would have been autoclosed by MakeWellFormed.
17654:         if ($this->allowsElement('p')) {
17655:             if (!empty($this->currentNesting)) {
17656:                 if ($this->_isInline($token)) {
17657:                     // State 1: <div>...<b>
17658:                     //                  ---
17659:                     // Check if this token is adjacent to the parent token
17660:                     // (seek backwards until token isn't whitespace)
17661:                     $i = null;
17662:                     $this->backward($i, $prev);
17663: 
17664:                     if (!$prev instanceof HTMLPurifier_Token_Start) {
17665:                         // Token wasn't adjacent
17666:                         if ($prev instanceof HTMLPurifier_Token_Text &&
17667:                             substr($prev->data, -2) === "\n\n"
17668:                         ) {
17669:                             // State 1.1.4: <div><p>PAR1</p>\n\n<b>
17670:                             //                                  ---
17671:                             // Quite frankly, this should be handled by splitText
17672:                             $token = array($this->_pStart(), $token);
17673:                         } else {
17674:                             // State 1.1.1: <div><p>PAR1</p><b>
17675:                             //                              ---
17676:                             // State 1.1.2: <div><br /><b>
17677:                             //                         ---
17678:                             // State 1.1.3: <div>PAR<b>
17679:                             //                      ---
17680:                         }
17681:                     } else {
17682:                         // State 1.2.1: <div><b>
17683:                         //                   ---
17684:                         // Lookahead to see if <p> is needed.
17685:                         if ($this->_pLookAhead()) {
17686:                             // State 1.3.1: <div><b>PAR1\n\nPAR2
17687:                             //                   ---
17688:                             $token = array($this->_pStart(), $token);
17689:                         } else {
17690:                             // State 1.3.2: <div><b>PAR1</b></div>
17691:                             //                   ---
17692: 
17693:                             // State 1.3.3: <div><b>PAR1</b><div></div>\n\n</div>
17694:                             //                   ---
17695:                         }
17696:                     }
17697:                 } else {
17698:                     // State 2.3: ...<div>
17699:                     //               -----
17700:                 }
17701:             } else {
17702:                 if ($this->_isInline($token)) {
17703:                     // State 3.1: <b>
17704:                     //            ---
17705:                     // This is where the {p} tag is inserted, not reflected in
17706:                     // inputTokens yet, however.
17707:                     $token = array($this->_pStart(), $token);
17708:                 } else {
17709:                     // State 3.2: <div>
17710:                     //            -----
17711:                 }
17712: 
17713:                 $i = null;
17714:                 if ($this->backward($i, $prev)) {
17715:                     if (!$prev instanceof HTMLPurifier_Token_Text) {
17716:                         // State 3.1.1: ...</p>{p}<b>
17717:                         //                        ---
17718:                         // State 3.2.1: ...</p><div>
17719:                         //                     -----
17720:                         if (!is_array($token)) {
17721:                             $token = array($token);
17722:                         }
17723:                         array_unshift($token, new HTMLPurifier_Token_Text("\n\n"));
17724:                     } else {
17725:                         // State 3.1.2: ...</p>\n\n{p}<b>
17726:                         //                            ---
17727:                         // State 3.2.2: ...</p>\n\n<div>
17728:                         //                         -----
17729:                         // Note: PAR<ELEM> cannot occur because PAR would have been
17730:                         // wrapped in <p> tags.
17731:                     }
17732:                 }
17733:             }
17734:         } else {
17735:             // State 2.2: <ul><li>
17736:             //                ----
17737:             // State 2.4: <p><b>
17738:             //               ---
17739:         }
17740:     }
17741: 
17742:     /**
17743:      * Splits up a text in paragraph tokens and appends them
17744:      * to the result stream that will replace the original
17745:      * @param string $data String text data that will be processed
17746:      *    into paragraphs
17747:      * @param HTMLPurifier_Token[] $result Reference to array of tokens that the
17748:      *    tags will be appended onto
17749:      */
17750:     private function _splitText($data, &$result)
17751:     {
17752:         $raw_paragraphs = explode("\n\n", $data);
17753:         $paragraphs = array(); // without empty paragraphs
17754:         $needs_start = false;
17755:         $needs_end = false;
17756: 
17757:         $c = count($raw_paragraphs);
17758:         if ($c == 1) {
17759:             // There were no double-newlines, abort quickly. In theory this
17760:             // should never happen.
17761:             $result[] = new HTMLPurifier_Token_Text($data);
17762:             return;
17763:         }
17764:         for ($i = 0; $i < $c; $i++) {
17765:             $par = $raw_paragraphs[$i];
17766:             if (trim($par) !== '') {
17767:                 $paragraphs[] = $par;
17768:             } else {
17769:                 if ($i == 0) {
17770:                     // Double newline at the front
17771:                     if (empty($result)) {
17772:                         // The empty result indicates that the AutoParagraph
17773:                         // injector did not add any start paragraph tokens.
17774:                         // This means that we have been in a paragraph for
17775:                         // a while, and the newline means we should start a new one.
17776:                         $result[] = new HTMLPurifier_Token_End('p');
17777:                         $result[] = new HTMLPurifier_Token_Text("\n\n");
17778:                         // However, the start token should only be added if
17779:                         // there is more processing to be done (i.e. there are
17780:                         // real paragraphs in here). If there are none, the
17781:                         // next start paragraph tag will be handled by the
17782:                         // next call to the injector
17783:                         $needs_start = true;
17784:                     } else {
17785:                         // We just started a new paragraph!
17786:                         // Reinstate a double-newline for presentation's sake, since
17787:                         // it was in the source code.
17788:                         array_unshift($result, new HTMLPurifier_Token_Text("\n\n"));
17789:                     }
17790:                 } elseif ($i + 1 == $c) {
17791:                     // Double newline at the end
17792:                     // There should be a trailing </p> when we're finally done.
17793:                     $needs_end = true;
17794:                 }
17795:             }
17796:         }
17797: 
17798:         // Check if this was just a giant blob of whitespace. Move this earlier,
17799:         // perhaps?
17800:         if (empty($paragraphs)) {
17801:             return;
17802:         }
17803: 
17804:         // Add the start tag indicated by \n\n at the beginning of $data
17805:         if ($needs_start) {
17806:             $result[] = $this->_pStart();
17807:         }
17808: 
17809:         // Append the paragraphs onto the result
17810:         foreach ($paragraphs as $par) {
17811:             $result[] = new HTMLPurifier_Token_Text($par);
17812:             $result[] = new HTMLPurifier_Token_End('p');
17813:             $result[] = new HTMLPurifier_Token_Text("\n\n");
17814:             $result[] = $this->_pStart();
17815:         }
17816: 
17817:         // Remove trailing start token; Injector will handle this later if
17818:         // it was indeed needed. This prevents from needing to do a lookahead,
17819:         // at the cost of a lookbehind later.
17820:         array_pop($result);
17821: 
17822:         // If there is no need for an end tag, remove all of it and let
17823:         // MakeWellFormed close it later.
17824:         if (!$needs_end) {
17825:             array_pop($result); // removes \n\n
17826:             array_pop($result); // removes </p>
17827:         }
17828:     }
17829: 
17830:     /**
17831:      * Returns true if passed token is inline (and, ergo, allowed in
17832:      * paragraph tags)
17833:      * @param HTMLPurifier_Token $token
17834:      * @return bool
17835:      */
17836:     private function _isInline($token)
17837:     {
17838:         return isset($this->htmlDefinition->info['p']->child->elements[$token->name]);
17839:     }
17840: 
17841:     /**
17842:      * Looks ahead in the token list and determines whether or not we need
17843:      * to insert a <p> tag.
17844:      * @return bool
17845:      */
17846:     private function _pLookAhead()
17847:     {
17848:         if ($this->currentToken instanceof HTMLPurifier_Token_Start) {
17849:             $nesting = 1;
17850:         } else {
17851:             $nesting = 0;
17852:         }
17853:         $ok = false;
17854:         $i = null;
17855:         while ($this->forwardUntilEndToken($i, $current, $nesting)) {
17856:             $result = $this->_checkNeedsP($current);
17857:             if ($result !== null) {
17858:                 $ok = $result;
17859:                 break;
17860:             }
17861:         }
17862:         return $ok;
17863:     }
17864: 
17865:     /**
17866:      * Determines if a particular token requires an earlier inline token
17867:      * to get a paragraph. This should be used with _forwardUntilEndToken
17868:      * @param HTMLPurifier_Token $current
17869:      * @return bool
17870:      */
17871:     private function _checkNeedsP($current)
17872:     {
17873:         if ($current instanceof HTMLPurifier_Token_Start) {
17874:             if (!$this->_isInline($current)) {
17875:                 // <div>PAR1<div>
17876:                 //      ----
17877:                 // Terminate early, since we hit a block element
17878:                 return false;
17879:             }
17880:         } elseif ($current instanceof HTMLPurifier_Token_Text) {
17881:             if (strpos($current->data, "\n\n") !== false) {
17882:                 // <div>PAR1<b>PAR1\n\nPAR2
17883:                 //      ----
17884:                 return true;
17885:             } else {
17886:                 // <div>PAR1<b>PAR1...
17887:                 //      ----
17888:             }
17889:         }
17890:         return null;
17891:     }
17892: }
17893: 
17894: 
17895: 
17896: 
17897: 
17898: /**
17899:  * Injector that displays the URL of an anchor instead of linking to it, in addition to showing the text of the link.
17900:  */
17901: class HTMLPurifier_Injector_DisplayLinkURI extends HTMLPurifier_Injector
17902: {
17903:     /**
17904:      * @type string
17905:      */
17906:     public $name = 'DisplayLinkURI';
17907: 
17908:     /**
17909:      * @type array
17910:      */
17911:     public $needed = array('a');
17912: 
17913:     /**
17914:      * @param $token
17915:      */
17916:     public function handleElement(&$token)
17917:     {
17918:     }
17919: 
17920:     /**
17921:      * @param HTMLPurifier_Token $token
17922:      */
17923:     public function handleEnd(&$token)
17924:     {
17925:         if (isset($token->start->attr['href'])) {
17926:             $url = $token->start->attr['href'];
17927:             unset($token->start->attr['href']);
17928:             $token = array($token, new HTMLPurifier_Token_Text(" ($url)"));
17929:         } else {
17930:             // nothing to display
17931:         }
17932:     }
17933: }
17934: 
17935: 
17936: 
17937: 
17938: 
17939: /**
17940:  * Injector that converts http, https and ftp text URLs to actual links.
17941:  */
17942: class HTMLPurifier_Injector_Linkify extends HTMLPurifier_Injector
17943: {
17944:     /**
17945:      * @type string
17946:      */
17947:     public $name = 'Linkify';
17948: 
17949:     /**
17950:      * @type array
17951:      */
17952:     public $needed = array('a' => array('href'));
17953: 
17954:     /**
17955:      * @param HTMLPurifier_Token $token
17956:      */
17957:     public function handleText(&$token)
17958:     {
17959:         if (!$this->allowsElement('a')) {
17960:             return;
17961:         }
17962: 
17963:         if (strpos($token->data, '://') === false) {
17964:             // our really quick heuristic failed, abort
17965:             // this may not work so well if we want to match things like
17966:             // "google.com", but then again, most people don't
17967:             return;
17968:         }
17969: 
17970:         // there is/are URL(s). Let's split the string:
17971:         // Note: this regex is extremely permissive
17972:         $bits = preg_split('#((?:https?|ftp)://[^\s\'",<>()]+)#Su', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE);
17973: 
17974: 
17975:         $token = array();
17976: 
17977:         // $i = index
17978:         // $c = count
17979:         // $l = is link
17980:         for ($i = 0, $c = count($bits), $l = false; $i < $c; $i++, $l = !$l) {
17981:             if (!$l) {
17982:                 if ($bits[$i] === '') {
17983:                     continue;
17984:                 }
17985:                 $token[] = new HTMLPurifier_Token_Text($bits[$i]);
17986:             } else {
17987:                 $token[] = new HTMLPurifier_Token_Start('a', array('href' => $bits[$i]));
17988:                 $token[] = new HTMLPurifier_Token_Text($bits[$i]);
17989:                 $token[] = new HTMLPurifier_Token_End('a');
17990:             }
17991:         }
17992:     }
17993: }
17994: 
17995: 
17996: 
17997: 
17998: 
17999: /**
18000:  * Injector that converts configuration directive syntax %Namespace.Directive
18001:  * to links
18002:  */
18003: class HTMLPurifier_Injector_PurifierLinkify extends HTMLPurifier_Injector
18004: {
18005:     /**
18006:      * @type string
18007:      */
18008:     public $name = 'PurifierLinkify';
18009: 
18010:     /**
18011:      * @type string
18012:      */
18013:     public $docURL;
18014: 
18015:     /**
18016:      * @type array
18017:      */
18018:     public $needed = array('a' => array('href'));
18019: 
18020:     /**
18021:      * @param HTMLPurifier_Config $config
18022:      * @param HTMLPurifier_Context $context
18023:      * @return string
18024:      */
18025:     public function prepare($config, $context)
18026:     {
18027:         $this->docURL = $config->get('AutoFormat.PurifierLinkify.DocURL');
18028:         return parent::prepare($config, $context);
18029:     }
18030: 
18031:     /**
18032:      * @param HTMLPurifier_Token $token
18033:      */
18034:     public function handleText(&$token)
18035:     {
18036:         if (!$this->allowsElement('a')) {
18037:             return;
18038:         }
18039:         if (strpos($token->data, '%') === false) {
18040:             return;
18041:         }
18042: 
18043:         $bits = preg_split('#%([a-z0-9]+\.[a-z0-9]+)#Si', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE);
18044:         $token = array();
18045: 
18046:         // $i = index
18047:         // $c = count
18048:         // $l = is link
18049:         for ($i = 0, $c = count($bits), $l = false; $i < $c; $i++, $l = !$l) {
18050:             if (!$l) {
18051:                 if ($bits[$i] === '') {
18052:                     continue;
18053:                 }
18054:                 $token[] = new HTMLPurifier_Token_Text($bits[$i]);
18055:             } else {
18056:                 $token[] = new HTMLPurifier_Token_Start(
18057:                     'a',
18058:                     array('href' => str_replace('%s', $bits[$i], $this->docURL))
18059:                 );
18060:                 $token[] = new HTMLPurifier_Token_Text('%' . $bits[$i]);
18061:                 $token[] = new HTMLPurifier_Token_End('a');
18062:             }
18063:         }
18064:     }
18065: }
18066: 
18067: 
18068: 
18069: 
18070: 
18071: class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector
18072: {
18073:     /**
18074:      * @type HTMLPurifier_Context
18075:      */
18076:     private $context;
18077: 
18078:     /**
18079:      * @type HTMLPurifier_Config
18080:      */
18081:     private $config;
18082: 
18083:     /**
18084:      * @type HTMLPurifier_AttrValidator
18085:      */
18086:     private $attrValidator;
18087: 
18088:     /**
18089:      * @type bool
18090:      */
18091:     private $removeNbsp;
18092: 
18093:     /**
18094:      * @type bool
18095:      */
18096:     private $removeNbspExceptions;
18097: 
18098:     /**
18099:      * @type array
18100:      * TODO: make me configurable
18101:      */
18102:     private $_exclude = array('colgroup' => 1, 'th' => 1, 'td' => 1, 'iframe' => 1);
18103: 
18104:     /**
18105:      * @param HTMLPurifier_Config $config
18106:      * @param HTMLPurifier_Context $context
18107:      * @return void
18108:      */
18109:     public function prepare($config, $context)
18110:     {
18111:         parent::prepare($config, $context);
18112:         $this->config = $config;
18113:         $this->context = $context;
18114:         $this->removeNbsp = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp');
18115:         $this->removeNbspExceptions = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions');
18116:         $this->attrValidator = new HTMLPurifier_AttrValidator();
18117:     }
18118: 
18119:     /**
18120:      * @param HTMLPurifier_Token $token
18121:      */
18122:     public function handleElement(&$token)
18123:     {
18124:         if (!$token instanceof HTMLPurifier_Token_Start) {
18125:             return;
18126:         }
18127:         $next = false;
18128:         $deleted = 1; // the current tag
18129:         for ($i = count($this->inputZipper->back) - 1; $i >= 0; $i--, $deleted++) {
18130:             $next = $this->inputZipper->back[$i];
18131:             if ($next instanceof HTMLPurifier_Token_Text) {
18132:                 if ($next->is_whitespace) {
18133:                     continue;
18134:                 }
18135:                 if ($this->removeNbsp && !isset($this->removeNbspExceptions[$token->name])) {
18136:                     $plain = str_replace("\xC2\xA0", "", $next->data);
18137:                     $isWsOrNbsp = $plain === '' || ctype_space($plain);
18138:                     if ($isWsOrNbsp) {
18139:                         continue;
18140:                     }
18141:                 }
18142:             }
18143:             break;
18144:         }
18145:         if (!$next || ($next instanceof HTMLPurifier_Token_End && $next->name == $token->name)) {
18146:             if (isset($this->_exclude[$token->name])) {
18147:                 return;
18148:             }
18149:             $this->attrValidator->validateToken($token, $this->config, $this->context);
18150:             $token->armor['ValidateAttributes'] = true;
18151:             if (isset($token->attr['id']) || isset($token->attr['name'])) {
18152:                 return;
18153:             }
18154:             $token = $deleted + 1;
18155:             for ($b = 0, $c = count($this->inputZipper->front); $b < $c; $b++) {
18156:                 $prev = $this->inputZipper->front[$b];
18157:                 if ($prev instanceof HTMLPurifier_Token_Text && $prev->is_whitespace) {
18158:                     continue;
18159:                 }
18160:                 break;
18161:             }
18162:             // This is safe because we removed the token that triggered this.
18163:             $this->rewindOffset($b+$deleted);
18164:             return;
18165:         }
18166:     }
18167: }
18168: 
18169: 
18170: 
18171: 
18172: 
18173: /**
18174:  * Injector that removes spans with no attributes
18175:  */
18176: class HTMLPurifier_Injector_RemoveSpansWithoutAttributes extends HTMLPurifier_Injector
18177: {
18178:     /**
18179:      * @type string
18180:      */
18181:     public $name = 'RemoveSpansWithoutAttributes';
18182: 
18183:     /**
18184:      * @type array
18185:      */
18186:     public $needed = array('span');
18187: 
18188:     /**
18189:      * @type HTMLPurifier_AttrValidator
18190:      */
18191:     private $attrValidator;
18192: 
18193:     /**
18194:      * Used by AttrValidator.
18195:      * @type HTMLPurifier_Config
18196:      */
18197:     private $config;
18198: 
18199:     /**
18200:      * @type HTMLPurifier_Context
18201:      */
18202:     private $context;
18203: 
18204:     public function prepare($config, $context)
18205:     {
18206:         $this->attrValidator = new HTMLPurifier_AttrValidator();
18207:         $this->config = $config;
18208:         $this->context = $context;
18209:         return parent::prepare($config, $context);
18210:     }
18211: 
18212:     /**
18213:      * @param HTMLPurifier_Token $token
18214:      */
18215:     public function handleElement(&$token)
18216:     {
18217:         if ($token->name !== 'span' || !$token instanceof HTMLPurifier_Token_Start) {
18218:             return;
18219:         }
18220: 
18221:         // We need to validate the attributes now since this doesn't normally
18222:         // happen until after MakeWellFormed. If all the attributes are removed
18223:         // the span needs to be removed too.
18224:         $this->attrValidator->validateToken($token, $this->config, $this->context);
18225:         $token->armor['ValidateAttributes'] = true;
18226: 
18227:         if (!empty($token->attr)) {
18228:             return;
18229:         }
18230: 
18231:         $nesting = 0;
18232:         while ($this->forwardUntilEndToken($i, $current, $nesting)) {
18233:         }
18234: 
18235:         if ($current instanceof HTMLPurifier_Token_End && $current->name === 'span') {
18236:             // Mark closing span tag for deletion
18237:             $current->markForDeletion = true;
18238:             // Delete open span tag
18239:             $token = false;
18240:         }
18241:     }
18242: 
18243:     /**
18244:      * @param HTMLPurifier_Token $token
18245:      */
18246:     public function handleEnd(&$token)
18247:     {
18248:         if ($token->markForDeletion) {
18249:             $token = false;
18250:         }
18251:     }
18252: }
18253: 
18254: 
18255: 
18256: 
18257: 
18258: /**
18259:  * Adds important param elements to inside of object in order to make
18260:  * things safe.
18261:  */
18262: class HTMLPurifier_Injector_SafeObject extends HTMLPurifier_Injector
18263: {
18264:     /**
18265:      * @type string
18266:      */
18267:     public $name = 'SafeObject';
18268: 
18269:     /**
18270:      * @type array
18271:      */
18272:     public $needed = array('object', 'param');
18273: 
18274:     /**
18275:      * @type array
18276:      */
18277:     protected $objectStack = array();
18278: 
18279:     /**
18280:      * @type array
18281:      */
18282:     protected $paramStack = array();
18283: 
18284:     /**
18285:      * Keep this synchronized with AttrTransform/SafeParam.php.
18286:      * @type array
18287:      */
18288:     protected $addParam = array(
18289:         'allowScriptAccess' => 'never',
18290:         'allowNetworking' => 'internal',
18291:     );
18292: 
18293:     /**
18294:      * @type array
18295:      */
18296:     protected $allowedParam = array(
18297:         'wmode' => true,
18298:         'movie' => true,
18299:         'flashvars' => true,
18300:         'src' => true,
18301:         'allowFullScreen' => true, // if omitted, assume to be 'false'
18302:     );
18303: 
18304:     /**
18305:      * @param HTMLPurifier_Config $config
18306:      * @param HTMLPurifier_Context $context
18307:      * @return void
18308:      */
18309:     public function prepare($config, $context)
18310:     {
18311:         parent::prepare($config, $context);
18312:     }
18313: 
18314:     /**
18315:      * @param HTMLPurifier_Token $token
18316:      */
18317:     public function handleElement(&$token)
18318:     {
18319:         if ($token->name == 'object') {
18320:             $this->objectStack[] = $token;
18321:             $this->paramStack[] = array();
18322:             $new = array($token);
18323:             foreach ($this->addParam as $name => $value) {
18324:                 $new[] = new HTMLPurifier_Token_Empty('param', array('name' => $name, 'value' => $value));
18325:             }
18326:             $token = $new;
18327:         } elseif ($token->name == 'param') {
18328:             $nest = count($this->currentNesting) - 1;
18329:             if ($nest >= 0 && $this->currentNesting[$nest]->name === 'object') {
18330:                 $i = count($this->objectStack) - 1;
18331:                 if (!isset($token->attr['name'])) {
18332:                     $token = false;
18333:                     return;
18334:                 }
18335:                 $n = $token->attr['name'];
18336:                 // We need this fix because YouTube doesn't supply a data
18337:                 // attribute, which we need if a type is specified. This is
18338:                 // *very* Flash specific.
18339:                 if (!isset($this->objectStack[$i]->attr['data']) &&
18340:                     ($token->attr['name'] == 'movie' || $token->attr['name'] == 'src')
18341:                 ) {
18342:                     $this->objectStack[$i]->attr['data'] = $token->attr['value'];
18343:                 }
18344:                 // Check if the parameter is the correct value but has not
18345:                 // already been added
18346:                 if (!isset($this->paramStack[$i][$n]) &&
18347:                     isset($this->addParam[$n]) &&
18348:                     $token->attr['name'] === $this->addParam[$n]) {
18349:                     // keep token, and add to param stack
18350:                     $this->paramStack[$i][$n] = true;
18351:                 } elseif (isset($this->allowedParam[$n])) {
18352:                     // keep token, don't do anything to it
18353:                     // (could possibly check for duplicates here)
18354:                 } else {
18355:                     $token = false;
18356:                 }
18357:             } else {
18358:                 // not directly inside an object, DENY!
18359:                 $token = false;
18360:             }
18361:         }
18362:     }
18363: 
18364:     public function handleEnd(&$token)
18365:     {
18366:         // This is the WRONG way of handling the object and param stacks;
18367:         // we should be inserting them directly on the relevant object tokens
18368:         // so that the global stack handling handles it.
18369:         if ($token->name == 'object') {
18370:             array_pop($this->objectStack);
18371:             array_pop($this->paramStack);
18372:         }
18373:     }
18374: }
18375: 
18376: 
18377: 
18378: 
18379: 
18380: /**
18381:  * Parser that uses PHP 5's DOM extension (part of the core).
18382:  *
18383:  * In PHP 5, the DOM XML extension was revamped into DOM and added to the core.
18384:  * It gives us a forgiving HTML parser, which we use to transform the HTML
18385:  * into a DOM, and then into the tokens.  It is blazingly fast (for large
18386:  * documents, it performs twenty times faster than
18387:  * HTMLPurifier_Lexer_DirectLex,and is the default choice for PHP 5.
18388:  *
18389:  * @note Any empty elements will have empty tokens associated with them, even if
18390:  * this is prohibited by the spec. This is cannot be fixed until the spec
18391:  * comes into play.
18392:  *
18393:  * @note PHP's DOM extension does not actually parse any entities, we use
18394:  *       our own function to do that.
18395:  *
18396:  * @warning DOM tends to drop whitespace, which may wreak havoc on indenting.
18397:  *          If this is a huge problem, due to the fact that HTML is hand
18398:  *          edited and you are unable to get a parser cache that caches the
18399:  *          the output of HTML Purifier while keeping the original HTML lying
18400:  *          around, you may want to run Tidy on the resulting output or use
18401:  *          HTMLPurifier_DirectLex
18402:  */
18403: 
18404: class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
18405: {
18406: 
18407:     /**
18408:      * @type HTMLPurifier_TokenFactory
18409:      */
18410:     private $factory;
18411: 
18412:     public function __construct()
18413:     {
18414:         // setup the factory
18415:         parent::__construct();
18416:         $this->factory = new HTMLPurifier_TokenFactory();
18417:     }
18418: 
18419:     /**
18420:      * @param string $html
18421:      * @param HTMLPurifier_Config $config
18422:      * @param HTMLPurifier_Context $context
18423:      * @return HTMLPurifier_Token[]
18424:      */
18425:     public function tokenizeHTML($html, $config, $context)
18426:     {
18427:         $html = $this->normalize($html, $config, $context);
18428: 
18429:         // attempt to armor stray angled brackets that cannot possibly
18430:         // form tags and thus are probably being used as emoticons
18431:         if ($config->get('Core.AggressivelyFixLt')) {
18432:             $char = '[^a-z!\/]';
18433:             $comment = "/<!--(.*?)(-->|\z)/is";
18434:             $html = preg_replace_callback($comment, array($this, 'callbackArmorCommentEntities'), $html);
18435:             do {
18436:                 $old = $html;
18437:                 $html = preg_replace("/<($char)/i", '&lt;\\1', $html);
18438:             } while ($html !== $old);
18439:             $html = preg_replace_callback($comment, array($this, 'callbackUndoCommentSubst'), $html); // fix comments
18440:         }
18441: 
18442:         // preprocess html, essential for UTF-8
18443:         $html = $this->wrapHTML($html, $config, $context);
18444: 
18445:         $doc = new DOMDocument();
18446:         $doc->encoding = 'UTF-8'; // theoretically, the above has this covered
18447: 
18448:         set_error_handler(array($this, 'muteErrorHandler'));
18449:         $doc->loadHTML($html);
18450:         restore_error_handler();
18451: 
18452:         $tokens = array();
18453:         $this->tokenizeDOM(
18454:             $doc->getElementsByTagName('html')->item(0)-> // <html>
18455:             getElementsByTagName('body')->item(0)-> //   <body>
18456:             getElementsByTagName('div')->item(0), //     <div>
18457:             $tokens
18458:         );
18459:         return $tokens;
18460:     }
18461: 
18462:     /**
18463:      * Iterative function that tokenizes a node, putting it into an accumulator.
18464:      * To iterate is human, to recurse divine - L. Peter Deutsch
18465:      * @param DOMNode $node DOMNode to be tokenized.
18466:      * @param HTMLPurifier_Token[] $tokens   Array-list of already tokenized tokens.
18467:      * @return HTMLPurifier_Token of node appended to previously passed tokens.
18468:      */
18469:     protected function tokenizeDOM($node, &$tokens)
18470:     {
18471:         $level = 0;
18472:         $nodes = array($level => new HTMLPurifier_Queue(array($node)));
18473:         $closingNodes = array();
18474:         do {
18475:             while (!$nodes[$level]->isEmpty()) {
18476:                 $node = $nodes[$level]->shift(); // FIFO
18477:                 $collect = $level > 0 ? true : false;
18478:                 $needEndingTag = $this->createStartNode($node, $tokens, $collect);
18479:                 if ($needEndingTag) {
18480:                     $closingNodes[$level][] = $node;
18481:                 }
18482:                 if ($node->childNodes && $node->childNodes->length) {
18483:                     $level++;
18484:                     $nodes[$level] = new HTMLPurifier_Queue();
18485:                     foreach ($node->childNodes as $childNode) {
18486:                         $nodes[$level]->push($childNode);
18487:                     }
18488:                 }
18489:             }
18490:             $level--;
18491:             if ($level && isset($closingNodes[$level])) {
18492:                 while ($node = array_pop($closingNodes[$level])) {
18493:                     $this->createEndNode($node, $tokens);
18494:                 }
18495:             }
18496:         } while ($level > 0);
18497:     }
18498: 
18499:     /**
18500:      * @param DOMNode $node DOMNode to be tokenized.
18501:      * @param HTMLPurifier_Token[] $tokens   Array-list of already tokenized tokens.
18502:      * @param bool $collect  Says whether or start and close are collected, set to
18503:      *                    false at first recursion because it's the implicit DIV
18504:      *                    tag you're dealing with.
18505:      * @return bool if the token needs an endtoken
18506:      * @todo data and tagName properties don't seem to exist in DOMNode?
18507:      */
18508:     protected function createStartNode($node, &$tokens, $collect)
18509:     {
18510:         // intercept non element nodes. WE MUST catch all of them,
18511:         // but we're not getting the character reference nodes because
18512:         // those should have been preprocessed
18513:         if ($node->nodeType === XML_TEXT_NODE) {
18514:             $tokens[] = $this->factory->createText($node->data);
18515:             return false;
18516:         } elseif ($node->nodeType === XML_CDATA_SECTION_NODE) {
18517:             // undo libxml's special treatment of <script> and <style> tags
18518:             $last = end($tokens);
18519:             $data = $node->data;
18520:             // (note $node->tagname is already normalized)
18521:             if ($last instanceof HTMLPurifier_Token_Start && ($last->name == 'script' || $last->name == 'style')) {
18522:                 $new_data = trim($data);
18523:                 if (substr($new_data, 0, 4) === '<!--') {
18524:                     $data = substr($new_data, 4);
18525:                     if (substr($data, -3) === '-->') {
18526:                         $data = substr($data, 0, -3);
18527:                     } else {
18528:                         // Highly suspicious! Not sure what to do...
18529:                     }
18530:                 }
18531:             }
18532:             $tokens[] = $this->factory->createText($this->parseData($data));
18533:             return false;
18534:         } elseif ($node->nodeType === XML_COMMENT_NODE) {
18535:             // this is code is only invoked for comments in script/style in versions
18536:             // of libxml pre-2.6.28 (regular comments, of course, are still
18537:             // handled regularly)
18538:             $tokens[] = $this->factory->createComment($node->data);
18539:             return false;
18540:         } elseif ($node->nodeType !== XML_ELEMENT_NODE) {
18541:             // not-well tested: there may be other nodes we have to grab
18542:             return false;
18543:         }
18544: 
18545:         $attr = $node->hasAttributes() ? $this->transformAttrToAssoc($node->attributes) : array();
18546: 
18547:         // We still have to make sure that the element actually IS empty
18548:         if (!$node->childNodes->length) {
18549:             if ($collect) {
18550:                 $tokens[] = $this->factory->createEmpty($node->tagName, $attr);
18551:             }
18552:             return false;
18553:         } else {
18554:             if ($collect) {
18555:                 $tokens[] = $this->factory->createStart(
18556:                     $tag_name = $node->tagName, // somehow, it get's dropped
18557:                     $attr
18558:                 );
18559:             }
18560:             return true;
18561:         }
18562:     }
18563: 
18564:     /**
18565:      * @param DOMNode $node
18566:      * @param HTMLPurifier_Token[] $tokens
18567:      */
18568:     protected function createEndNode($node, &$tokens)
18569:     {
18570:         $tokens[] = $this->factory->createEnd($node->tagName);
18571:     }
18572: 
18573: 
18574:     /**
18575:      * Converts a DOMNamedNodeMap of DOMAttr objects into an assoc array.
18576:      *
18577:      * @param DOMNamedNodeMap $node_map DOMNamedNodeMap of DOMAttr objects.
18578:      * @return array Associative array of attributes.
18579:      */
18580:     protected function transformAttrToAssoc($node_map)
18581:     {
18582:         // NamedNodeMap is documented very well, so we're using undocumented
18583:         // features, namely, the fact that it implements Iterator and
18584:         // has a ->length attribute
18585:         if ($node_map->length === 0) {
18586:             return array();
18587:         }
18588:         $array = array();
18589:         foreach ($node_map as $attr) {
18590:             $array[$attr->name] = $attr->value;
18591:         }
18592:         return $array;
18593:     }
18594: 
18595:     /**
18596:      * An error handler that mutes all errors
18597:      * @param int $errno
18598:      * @param string $errstr
18599:      */
18600:     public function muteErrorHandler($errno, $errstr)
18601:     {
18602:     }
18603: 
18604:     /**
18605:      * Callback function for undoing escaping of stray angled brackets
18606:      * in comments
18607:      * @param array $matches
18608:      * @return string
18609:      */
18610:     public function callbackUndoCommentSubst($matches)
18611:     {
18612:         return '<!--' . strtr($matches[1], array('&amp;' => '&', '&lt;' => '<')) . $matches[2];
18613:     }
18614: 
18615:     /**
18616:      * Callback function that entity-izes ampersands in comments so that
18617:      * callbackUndoCommentSubst doesn't clobber them
18618:      * @param array $matches
18619:      * @return string
18620:      */
18621:     public function callbackArmorCommentEntities($matches)
18622:     {
18623:         return '<!--' . str_replace('&', '&amp;', $matches[1]) . $matches[2];
18624:     }
18625: 
18626:     /**
18627:      * Wraps an HTML fragment in the necessary HTML
18628:      * @param string $html
18629:      * @param HTMLPurifier_Config $config
18630:      * @param HTMLPurifier_Context $context
18631:      * @return string
18632:      */
18633:     protected function wrapHTML($html, $config, $context)
18634:     {
18635:         $def = $config->getDefinition('HTML');
18636:         $ret = '';
18637: 
18638:         if (!empty($def->doctype->dtdPublic) || !empty($def->doctype->dtdSystem)) {
18639:             $ret .= '<!DOCTYPE html ';
18640:             if (!empty($def->doctype->dtdPublic)) {
18641:                 $ret .= 'PUBLIC "' . $def->doctype->dtdPublic . '" ';
18642:             }
18643:             if (!empty($def->doctype->dtdSystem)) {
18644:                 $ret .= '"' . $def->doctype->dtdSystem . '" ';
18645:             }
18646:             $ret .= '>';
18647:         }
18648: 
18649:         $ret .= '<html><head>';
18650:         $ret .= '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />';
18651:         // No protection if $html contains a stray </div>!
18652:         $ret .= '</head><body><div>' . $html . '</div></body></html>';
18653:         return $ret;
18654:     }
18655: }
18656: 
18657: 
18658: 
18659: 
18660: 
18661: /**
18662:  * Our in-house implementation of a parser.
18663:  *
18664:  * A pure PHP parser, DirectLex has absolutely no dependencies, making
18665:  * it a reasonably good default for PHP4.  Written with efficiency in mind,
18666:  * it can be four times faster than HTMLPurifier_Lexer_PEARSax3, although it
18667:  * pales in comparison to HTMLPurifier_Lexer_DOMLex.
18668:  *
18669:  * @todo Reread XML spec and document differences.
18670:  */
18671: class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
18672: {
18673:     /**
18674:      * @type bool
18675:      */
18676:     public $tracksLineNumbers = true;
18677: 
18678:     /**
18679:      * Whitespace characters for str(c)spn.
18680:      * @type string
18681:      */
18682:     protected $_whitespace = "\x20\x09\x0D\x0A";
18683: 
18684:     /**
18685:      * Callback function for script CDATA fudge
18686:      * @param array $matches, in form of array(opening tag, contents, closing tag)
18687:      * @return string
18688:      */
18689:     protected function scriptCallback($matches)
18690:     {
18691:         return $matches[1] . htmlspecialchars($matches[2], ENT_COMPAT, 'UTF-8') . $matches[3];
18692:     }
18693: 
18694:     /**
18695:      * @param String $html
18696:      * @param HTMLPurifier_Config $config
18697:      * @param HTMLPurifier_Context $context
18698:      * @return array|HTMLPurifier_Token[]
18699:      */
18700:     public function tokenizeHTML($html, $config, $context)
18701:     {
18702:         // special normalization for script tags without any armor
18703:         // our "armor" heurstic is a < sign any number of whitespaces after
18704:         // the first script tag
18705:         if ($config->get('HTML.Trusted')) {
18706:             $html = preg_replace_callback(
18707:                 '#(<script[^>]*>)(\s*[^<].+?)(</script>)#si',
18708:                 array($this, 'scriptCallback'),
18709:                 $html
18710:             );
18711:         }
18712: 
18713:         $html = $this->normalize($html, $config, $context);
18714: 
18715:         $cursor = 0; // our location in the text
18716:         $inside_tag = false; // whether or not we're parsing the inside of a tag
18717:         $array = array(); // result array
18718: 
18719:         // This is also treated to mean maintain *column* numbers too
18720:         $maintain_line_numbers = $config->get('Core.MaintainLineNumbers');
18721: 
18722:         if ($maintain_line_numbers === null) {
18723:             // automatically determine line numbering by checking
18724:             // if error collection is on
18725:             $maintain_line_numbers = $config->get('Core.CollectErrors');
18726:         }
18727: 
18728:         if ($maintain_line_numbers) {
18729:             $current_line = 1;
18730:             $current_col = 0;
18731:             $length = strlen($html);
18732:         } else {
18733:             $current_line = false;
18734:             $current_col = false;
18735:             $length = false;
18736:         }
18737:         $context->register('CurrentLine', $current_line);
18738:         $context->register('CurrentCol', $current_col);
18739:         $nl = "\n";
18740:         // how often to manually recalculate. This will ALWAYS be right,
18741:         // but it's pretty wasteful. Set to 0 to turn off
18742:         $synchronize_interval = $config->get('Core.DirectLexLineNumberSyncInterval');
18743: 
18744:         $e = false;
18745:         if ($config->get('Core.CollectErrors')) {
18746:             $e =& $context->get('ErrorCollector');
18747:         }
18748: 
18749:         // for testing synchronization
18750:         $loops = 0;
18751: 
18752:         while (++$loops) {
18753:             // $cursor is either at the start of a token, or inside of
18754:             // a tag (i.e. there was a < immediately before it), as indicated
18755:             // by $inside_tag
18756: 
18757:             if ($maintain_line_numbers) {
18758:                 // $rcursor, however, is always at the start of a token.
18759:                 $rcursor = $cursor - (int)$inside_tag;
18760: 
18761:                 // Column number is cheap, so we calculate it every round.
18762:                 // We're interested at the *end* of the newline string, so
18763:                 // we need to add strlen($nl) == 1 to $nl_pos before subtracting it
18764:                 // from our "rcursor" position.
18765:                 $nl_pos = strrpos($html, $nl, $rcursor - $length);
18766:                 $current_col = $rcursor - (is_bool($nl_pos) ? 0 : $nl_pos + 1);
18767: 
18768:                 // recalculate lines
18769:                 if ($synchronize_interval && // synchronization is on
18770:                     $cursor > 0 && // cursor is further than zero
18771:                     $loops % $synchronize_interval === 0) { // time to synchronize!
18772:                     $current_line = 1 + $this->substrCount($html, $nl, 0, $cursor);
18773:                 }
18774:             }
18775: 
18776:             $position_next_lt = strpos($html, '<', $cursor);
18777:             $position_next_gt = strpos($html, '>', $cursor);
18778: 
18779:             // triggers on "<b>asdf</b>" but not "asdf <b></b>"
18780:             // special case to set up context
18781:             if ($position_next_lt === $cursor) {
18782:                 $inside_tag = true;
18783:                 $cursor++;
18784:             }
18785: 
18786:             if (!$inside_tag && $position_next_lt !== false) {
18787:                 // We are not inside tag and there still is another tag to parse
18788:                 $token = new
18789:                 HTMLPurifier_Token_Text(
18790:                     $this->parseData(
18791:                         substr(
18792:                             $html,
18793:                             $cursor,
18794:                             $position_next_lt - $cursor
18795:                         )
18796:                     )
18797:                 );
18798:                 if ($maintain_line_numbers) {
18799:                     $token->rawPosition($current_line, $current_col);
18800:                     $current_line += $this->substrCount($html, $nl, $cursor, $position_next_lt - $cursor);
18801:                 }
18802:                 $array[] = $token;
18803:                 $cursor = $position_next_lt + 1;
18804:                 $inside_tag = true;
18805:                 continue;
18806:             } elseif (!$inside_tag) {
18807:                 // We are not inside tag but there are no more tags
18808:                 // If we're already at the end, break
18809:                 if ($cursor === strlen($html)) {
18810:                     break;
18811:                 }
18812:                 // Create Text of rest of string
18813:                 $token = new
18814:                 HTMLPurifier_Token_Text(
18815:                     $this->parseData(
18816:                         substr(
18817:                             $html,
18818:                             $cursor
18819:                         )
18820:                     )
18821:                 );
18822:                 if ($maintain_line_numbers) {
18823:                     $token->rawPosition($current_line, $current_col);
18824:                 }
18825:                 $array[] = $token;
18826:                 break;
18827:             } elseif ($inside_tag && $position_next_gt !== false) {
18828:                 // We are in tag and it is well formed
18829:                 // Grab the internals of the tag
18830:                 $strlen_segment = $position_next_gt - $cursor;
18831: 
18832:                 if ($strlen_segment < 1) {
18833:                     // there's nothing to process!
18834:                     $token = new HTMLPurifier_Token_Text('<');
18835:                     $cursor++;
18836:                     continue;
18837:                 }
18838: 
18839:                 $segment = substr($html, $cursor, $strlen_segment);
18840: 
18841:                 if ($segment === false) {
18842:                     // somehow, we attempted to access beyond the end of
18843:                     // the string, defense-in-depth, reported by Nate Abele
18844:                     break;
18845:                 }
18846: 
18847:                 // Check if it's a comment
18848:                 if (substr($segment, 0, 3) === '!--') {
18849:                     // re-determine segment length, looking for -->
18850:                     $position_comment_end = strpos($html, '-->', $cursor);
18851:                     if ($position_comment_end === false) {
18852:                         // uh oh, we have a comment that extends to
18853:                         // infinity. Can't be helped: set comment
18854:                         // end position to end of string
18855:                         if ($e) {
18856:                             $e->send(E_WARNING, 'Lexer: Unclosed comment');
18857:                         }
18858:                         $position_comment_end = strlen($html);
18859:                         $end = true;
18860:                     } else {
18861:                         $end = false;
18862:                     }
18863:                     $strlen_segment = $position_comment_end - $cursor;
18864:                     $segment = substr($html, $cursor, $strlen_segment);
18865:                     $token = new
18866:                     HTMLPurifier_Token_Comment(
18867:                         substr(
18868:                             $segment,
18869:                             3,
18870:                             $strlen_segment - 3
18871:                         )
18872:                     );
18873:                     if ($maintain_line_numbers) {
18874:                         $token->rawPosition($current_line, $current_col);
18875:                         $current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment);
18876:                     }
18877:                     $array[] = $token;
18878:                     $cursor = $end ? $position_comment_end : $position_comment_end + 3;
18879:                     $inside_tag = false;
18880:                     continue;
18881:                 }
18882: 
18883:                 // Check if it's an end tag
18884:                 $is_end_tag = (strpos($segment, '/') === 0);
18885:                 if ($is_end_tag) {
18886:                     $type = substr($segment, 1);
18887:                     $token = new HTMLPurifier_Token_End($type);
18888:                     if ($maintain_line_numbers) {
18889:                         $token->rawPosition($current_line, $current_col);
18890:                         $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
18891:                     }
18892:                     $array[] = $token;
18893:                     $inside_tag = false;
18894:                     $cursor = $position_next_gt + 1;
18895:                     continue;
18896:                 }
18897: 
18898:                 // Check leading character is alnum, if not, we may
18899:                 // have accidently grabbed an emoticon. Translate into
18900:                 // text and go our merry way
18901:                 if (!ctype_alpha($segment[0])) {
18902:                     // XML:  $segment[0] !== '_' && $segment[0] !== ':'
18903:                     if ($e) {
18904:                         $e->send(E_NOTICE, 'Lexer: Unescaped lt');
18905:                     }
18906:                     $token = new HTMLPurifier_Token_Text('<');
18907:                     if ($maintain_line_numbers) {
18908:                         $token->rawPosition($current_line, $current_col);
18909:                         $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
18910:                     }
18911:                     $array[] = $token;
18912:                     $inside_tag = false;
18913:                     continue;
18914:                 }
18915: 
18916:                 // Check if it is explicitly self closing, if so, remove
18917:                 // trailing slash. Remember, we could have a tag like <br>, so
18918:                 // any later token processing scripts must convert improperly
18919:                 // classified EmptyTags from StartTags.
18920:                 $is_self_closing = (strrpos($segment, '/') === $strlen_segment - 1);
18921:                 if ($is_self_closing) {
18922:                     $strlen_segment--;
18923:                     $segment = substr($segment, 0, $strlen_segment);
18924:                 }
18925: 
18926:                 // Check if there are any attributes
18927:                 $position_first_space = strcspn($segment, $this->_whitespace);
18928: 
18929:                 if ($position_first_space >= $strlen_segment) {
18930:                     if ($is_self_closing) {
18931:                         $token = new HTMLPurifier_Token_Empty($segment);
18932:                     } else {
18933:                         $token = new HTMLPurifier_Token_Start($segment);
18934:                     }
18935:                     if ($maintain_line_numbers) {
18936:                         $token->rawPosition($current_line, $current_col);
18937:                         $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
18938:                     }
18939:                     $array[] = $token;
18940:                     $inside_tag = false;
18941:                     $cursor = $position_next_gt + 1;
18942:                     continue;
18943:                 }
18944: 
18945:                 // Grab out all the data
18946:                 $type = substr($segment, 0, $position_first_space);
18947:                 $attribute_string =
18948:                     trim(
18949:                         substr(
18950:                             $segment,
18951:                             $position_first_space
18952:                         )
18953:                     );
18954:                 if ($attribute_string) {
18955:                     $attr = $this->parseAttributeString(
18956:                         $attribute_string,
18957:                         $config,
18958:                         $context
18959:                     );
18960:                 } else {
18961:                     $attr = array();
18962:                 }
18963: 
18964:                 if ($is_self_closing) {
18965:                     $token = new HTMLPurifier_Token_Empty($type, $attr);
18966:                 } else {
18967:                     $token = new HTMLPurifier_Token_Start($type, $attr);
18968:                 }
18969:                 if ($maintain_line_numbers) {
18970:                     $token->rawPosition($current_line, $current_col);
18971:                     $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
18972:                 }
18973:                 $array[] = $token;
18974:                 $cursor = $position_next_gt + 1;
18975:                 $inside_tag = false;
18976:                 continue;
18977:             } else {
18978:                 // inside tag, but there's no ending > sign
18979:                 if ($e) {
18980:                     $e->send(E_WARNING, 'Lexer: Missing gt');
18981:                 }
18982:                 $token = new
18983:                 HTMLPurifier_Token_Text(
18984:                     '<' .
18985:                     $this->parseData(
18986:                         substr($html, $cursor)
18987:                     )
18988:                 );
18989:                 if ($maintain_line_numbers) {
18990:                     $token->rawPosition($current_line, $current_col);
18991:                 }
18992:                 // no cursor scroll? Hmm...
18993:                 $array[] = $token;
18994:                 break;
18995:             }
18996:             break;
18997:         }
18998: 
18999:         $context->destroy('CurrentLine');
19000:         $context->destroy('CurrentCol');
19001:         return $array;
19002:     }
19003: 
19004:     /**
19005:      * PHP 5.0.x compatible substr_count that implements offset and length
19006:      * @param string $haystack
19007:      * @param string $needle
19008:      * @param int $offset
19009:      * @param int $length
19010:      * @return int
19011:      */
19012:     protected function substrCount($haystack, $needle, $offset, $length)
19013:     {
19014:         static $oldVersion;
19015:         if ($oldVersion === null) {
19016:             $oldVersion = version_compare(PHP_VERSION, '5.1', '<');
19017:         }
19018:         if ($oldVersion) {
19019:             $haystack = substr($haystack, $offset, $length);
19020:             return substr_count($haystack, $needle);
19021:         } else {
19022:             return substr_count($haystack, $needle, $offset, $length);
19023:         }
19024:     }
19025: 
19026:     /**
19027:      * Takes the inside of an HTML tag and makes an assoc array of attributes.
19028:      *
19029:      * @param string $string Inside of tag excluding name.
19030:      * @param HTMLPurifier_Config $config
19031:      * @param HTMLPurifier_Context $context
19032:      * @return array Assoc array of attributes.
19033:      */
19034:     public function parseAttributeString($string, $config, $context)
19035:     {
19036:         $string = (string)$string; // quick typecast
19037: 
19038:         if ($string == '') {
19039:             return array();
19040:         } // no attributes
19041: 
19042:         $e = false;
19043:         if ($config->get('Core.CollectErrors')) {
19044:             $e =& $context->get('ErrorCollector');
19045:         }
19046: 
19047:         // let's see if we can abort as quickly as possible
19048:         // one equal sign, no spaces => one attribute
19049:         $num_equal = substr_count($string, '=');
19050:         $has_space = strpos($string, ' ');
19051:         if ($num_equal === 0 && !$has_space) {
19052:             // bool attribute
19053:             return array($string => $string);
19054:         } elseif ($num_equal === 1 && !$has_space) {
19055:             // only one attribute
19056:             list($key, $quoted_value) = explode('=', $string);
19057:             $quoted_value = trim($quoted_value);
19058:             if (!$key) {
19059:                 if ($e) {
19060:                     $e->send(E_ERROR, 'Lexer: Missing attribute key');
19061:                 }
19062:                 return array();
19063:             }
19064:             if (!$quoted_value) {
19065:                 return array($key => '');
19066:             }
19067:             $first_char = @$quoted_value[0];
19068:             $last_char = @$quoted_value[strlen($quoted_value) - 1];
19069: 
19070:             $same_quote = ($first_char == $last_char);
19071:             $open_quote = ($first_char == '"' || $first_char == "'");
19072: 
19073:             if ($same_quote && $open_quote) {
19074:                 // well behaved
19075:                 $value = substr($quoted_value, 1, strlen($quoted_value) - 2);
19076:             } else {
19077:                 // not well behaved
19078:                 if ($open_quote) {
19079:                     if ($e) {
19080:                         $e->send(E_ERROR, 'Lexer: Missing end quote');
19081:                     }
19082:                     $value = substr($quoted_value, 1);
19083:                 } else {
19084:                     $value = $quoted_value;
19085:                 }
19086:             }
19087:             if ($value === false) {
19088:                 $value = '';
19089:             }
19090:             return array($key => $this->parseData($value));
19091:         }
19092: 
19093:         // setup loop environment
19094:         $array = array(); // return assoc array of attributes
19095:         $cursor = 0; // current position in string (moves forward)
19096:         $size = strlen($string); // size of the string (stays the same)
19097: 
19098:         // if we have unquoted attributes, the parser expects a terminating
19099:         // space, so let's guarantee that there's always a terminating space.
19100:         $string .= ' ';
19101: 
19102:         $old_cursor = -1;
19103:         while ($cursor < $size) {
19104:             if ($old_cursor >= $cursor) {
19105:                 throw new Exception("Infinite loop detected");
19106:             }
19107:             $old_cursor = $cursor;
19108: 
19109:             $cursor += ($value = strspn($string, $this->_whitespace, $cursor));
19110:             // grab the key
19111: 
19112:             $key_begin = $cursor; //we're currently at the start of the key
19113: 
19114:             // scroll past all characters that are the key (not whitespace or =)
19115:             $cursor += strcspn($string, $this->_whitespace . '=', $cursor);
19116: 
19117:             $key_end = $cursor; // now at the end of the key
19118: 
19119:             $key = substr($string, $key_begin, $key_end - $key_begin);
19120: 
19121:             if (!$key) {
19122:                 if ($e) {
19123:                     $e->send(E_ERROR, 'Lexer: Missing attribute key');
19124:                 }
19125:                 $cursor += 1 + strcspn($string, $this->_whitespace, $cursor + 1); // prevent infinite loop
19126:                 continue; // empty key
19127:             }
19128: 
19129:             // scroll past all whitespace
19130:             $cursor += strspn($string, $this->_whitespace, $cursor);
19131: 
19132:             if ($cursor >= $size) {
19133:                 $array[$key] = $key;
19134:                 break;
19135:             }
19136: 
19137:             // if the next character is an equal sign, we've got a regular
19138:             // pair, otherwise, it's a bool attribute
19139:             $first_char = @$string[$cursor];
19140: 
19141:             if ($first_char == '=') {
19142:                 // key="value"
19143: 
19144:                 $cursor++;
19145:                 $cursor += strspn($string, $this->_whitespace, $cursor);
19146: 
19147:                 if ($cursor === false) {
19148:                     $array[$key] = '';
19149:                     break;
19150:                 }
19151: 
19152:                 // we might be in front of a quote right now
19153: 
19154:                 $char = @$string[$cursor];
19155: 
19156:                 if ($char == '"' || $char == "'") {
19157:                     // it's quoted, end bound is $char
19158:                     $cursor++;
19159:                     $value_begin = $cursor;
19160:                     $cursor = strpos($string, $char, $cursor);
19161:                     $value_end = $cursor;
19162:                 } else {
19163:                     // it's not quoted, end bound is whitespace
19164:                     $value_begin = $cursor;
19165:                     $cursor += strcspn($string, $this->_whitespace, $cursor);
19166:                     $value_end = $cursor;
19167:                 }
19168: 
19169:                 // we reached a premature end
19170:                 if ($cursor === false) {
19171:                     $cursor = $size;
19172:                     $value_end = $cursor;
19173:                 }
19174: 
19175:                 $value = substr($string, $value_begin, $value_end - $value_begin);
19176:                 if ($value === false) {
19177:                     $value = '';
19178:                 }
19179:                 $array[$key] = $this->parseData($value);
19180:                 $cursor++;
19181:             } else {
19182:                 // boolattr
19183:                 if ($key !== '') {
19184:                     $array[$key] = $key;
19185:                 } else {
19186:                     // purely theoretical
19187:                     if ($e) {
19188:                         $e->send(E_ERROR, 'Lexer: Missing attribute key');
19189:                     }
19190:                 }
19191:             }
19192:         }
19193:         return $array;
19194:     }
19195: }
19196: 
19197: 
19198: 
19199: 
19200: 
19201: /**
19202:  * Concrete comment node class.
19203:  */
19204: class HTMLPurifier_Node_Comment extends HTMLPurifier_Node
19205: {
19206:     /**
19207:      * Character data within comment.
19208:      * @type string
19209:      */
19210:     public $data;
19211: 
19212:     /**
19213:      * @type bool
19214:      */
19215:     public $is_whitespace = true;
19216: 
19217:     /**
19218:      * Transparent constructor.
19219:      *
19220:      * @param string $data String comment data.
19221:      * @param int $line
19222:      * @param int $col
19223:      */
19224:     public function __construct($data, $line = null, $col = null)
19225:     {
19226:         $this->data = $data;
19227:         $this->line = $line;
19228:         $this->col = $col;
19229:     }
19230: 
19231:     public function toTokenPair() {
19232:         return array(new HTMLPurifier_Token_Comment($this->data, $this->line, $this->col), null);
19233:     }
19234: }
19235: 
19236: 
19237: 
19238: /**
19239:  * Concrete element node class.
19240:  */
19241: class HTMLPurifier_Node_Element extends HTMLPurifier_Node
19242: {
19243:     /**
19244:      * The lower-case name of the tag, like 'a', 'b' or 'blockquote'.
19245:      *
19246:      * @note Strictly speaking, XML tags are case sensitive, so we shouldn't
19247:      * be lower-casing them, but these tokens cater to HTML tags, which are
19248:      * insensitive.
19249:      * @type string
19250:      */
19251:     public $name;
19252: 
19253:     /**
19254:      * Associative array of the node's attributes.
19255:      * @type array
19256:      */
19257:     public $attr = array();
19258: 
19259:     /**
19260:      * List of child elements.
19261:      * @type array
19262:      */
19263:     public $children = array();
19264: 
19265:     /**
19266:      * Does this use the <a></a> form or the </a> form, i.e.
19267:      * is it a pair of start/end tokens or an empty token.
19268:      * @bool
19269:      */
19270:     public $empty = false;
19271: 
19272:     public $endCol = null, $endLine = null, $endArmor = array();
19273: 
19274:     public function __construct($name, $attr = array(), $line = null, $col = null, $armor = array()) {
19275:         $this->name = $name;
19276:         $this->attr = $attr;
19277:         $this->line = $line;
19278:         $this->col = $col;
19279:         $this->armor = $armor;
19280:     }
19281: 
19282:     public function toTokenPair() {
19283:         // XXX inefficiency here, normalization is not necessary
19284:         if ($this->empty) {
19285:             return array(new HTMLPurifier_Token_Empty($this->name, $this->attr, $this->line, $this->col, $this->armor), null);
19286:         } else {
19287:             $start = new HTMLPurifier_Token_Start($this->name, $this->attr, $this->line, $this->col, $this->armor);
19288:             $end = new HTMLPurifier_Token_End($this->name, array(), $this->endLine, $this->endCol, $this->endArmor);
19289:             //$end->start = $start;
19290:             return array($start, $end);
19291:         }
19292:     }
19293: }
19294: 
19295: 
19296: 
19297: 
19298: /**
19299:  * Concrete text token class.
19300:  *
19301:  * Text tokens comprise of regular parsed character data (PCDATA) and raw
19302:  * character data (from the CDATA sections). Internally, their
19303:  * data is parsed with all entities expanded. Surprisingly, the text token
19304:  * does have a "tag name" called #PCDATA, which is how the DTD represents it
19305:  * in permissible child nodes.
19306:  */
19307: class HTMLPurifier_Node_Text extends HTMLPurifier_Node
19308: {
19309: 
19310:     /**
19311:      * PCDATA tag name compatible with DTD, see
19312:      * HTMLPurifier_ChildDef_Custom for details.
19313:      * @type string
19314:      */
19315:     public $name = '#PCDATA';
19316: 
19317:     /**
19318:      * @type string
19319:      */
19320:     public $data;
19321:     /**< Parsed character data of text. */
19322: 
19323:     /**
19324:      * @type bool
19325:      */
19326:     public $is_whitespace;
19327: 
19328:     /**< Bool indicating if node is whitespace. */
19329: 
19330:     /**
19331:      * Constructor, accepts data and determines if it is whitespace.
19332:      * @param string $data String parsed character data.
19333:      * @param int $line
19334:      * @param int $col
19335:      */
19336:     public function __construct($data, $is_whitespace, $line = null, $col = null)
19337:     {
19338:         $this->data = $data;
19339:         $this->is_whitespace = $is_whitespace;
19340:         $this->line = $line;
19341:         $this->col = $col;
19342:     }
19343: 
19344:     public function toTokenPair() {
19345:         return array(new HTMLPurifier_Token_Text($this->data, $this->line, $this->col), null);
19346:     }
19347: }
19348: 
19349: 
19350: 
19351: 
19352: 
19353: /**
19354:  * Composite strategy that runs multiple strategies on tokens.
19355:  */
19356: abstract class HTMLPurifier_Strategy_Composite extends HTMLPurifier_Strategy
19357: {
19358: 
19359:     /**
19360:      * List of strategies to run tokens through.
19361:      * @type HTMLPurifier_Strategy[]
19362:      */
19363:     protected $strategies = array();
19364: 
19365:     /**
19366:      * @param HTMLPurifier_Token[] $tokens
19367:      * @param HTMLPurifier_Config $config
19368:      * @param HTMLPurifier_Context $context
19369:      * @return HTMLPurifier_Token[]
19370:      */
19371:     public function execute($tokens, $config, $context)
19372:     {
19373:         foreach ($this->strategies as $strategy) {
19374:             $tokens = $strategy->execute($tokens, $config, $context);
19375:         }
19376:         return $tokens;
19377:     }
19378: }
19379: 
19380: 
19381: 
19382: 
19383: 
19384: /**
19385:  * Core strategy composed of the big four strategies.
19386:  */
19387: class HTMLPurifier_Strategy_Core extends HTMLPurifier_Strategy_Composite
19388: {
19389:     public function __construct()
19390:     {
19391:         $this->strategies[] = new HTMLPurifier_Strategy_RemoveForeignElements();
19392:         $this->strategies[] = new HTMLPurifier_Strategy_MakeWellFormed();
19393:         $this->strategies[] = new HTMLPurifier_Strategy_FixNesting();
19394:         $this->strategies[] = new HTMLPurifier_Strategy_ValidateAttributes();
19395:     }
19396: }
19397: 
19398: 
19399: 
19400: 
19401: 
19402: /**
19403:  * Takes a well formed list of tokens and fixes their nesting.
19404:  *
19405:  * HTML elements dictate which elements are allowed to be their children,
19406:  * for example, you can't have a p tag in a span tag.  Other elements have
19407:  * much more rigorous definitions: tables, for instance, require a specific
19408:  * order for their elements.  There are also constraints not expressible by
19409:  * document type definitions, such as the chameleon nature of ins/del
19410:  * tags and global child exclusions.
19411:  *
19412:  * The first major objective of this strategy is to iterate through all
19413:  * the nodes and determine whether or not their children conform to the
19414:  * element's definition.  If they do not, the child definition may
19415:  * optionally supply an amended list of elements that is valid or
19416:  * require that the entire node be deleted (and the previous node
19417:  * rescanned).
19418:  *
19419:  * The second objective is to ensure that explicitly excluded elements of
19420:  * an element do not appear in its children.  Code that accomplishes this
19421:  * task is pervasive through the strategy, though the two are distinct tasks
19422:  * and could, theoretically, be seperated (although it's not recommended).
19423:  *
19424:  * @note Whether or not unrecognized children are silently dropped or
19425:  *       translated into text depends on the child definitions.
19426:  *
19427:  * @todo Enable nodes to be bubbled out of the structure.  This is
19428:  *       easier with our new algorithm.
19429:  */
19430: 
19431: class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
19432: {
19433: 
19434:     /**
19435:      * @param HTMLPurifier_Token[] $tokens
19436:      * @param HTMLPurifier_Config $config
19437:      * @param HTMLPurifier_Context $context
19438:      * @return array|HTMLPurifier_Token[]
19439:      */
19440:     public function execute($tokens, $config, $context)
19441:     {
19442: 
19443:         //####################################################################//
19444:         // Pre-processing
19445: 
19446:         // O(n) pass to convert to a tree, so that we can efficiently
19447:         // refer to substrings
19448:         $top_node = HTMLPurifier_Arborize::arborize($tokens, $config, $context);
19449: 
19450:         // get a copy of the HTML definition
19451:         $definition = $config->getHTMLDefinition();
19452: 
19453:         $excludes_enabled = !$config->get('Core.DisableExcludes');
19454: 
19455:         // setup the context variable 'IsInline', for chameleon processing
19456:         // is 'false' when we are not inline, 'true' when it must always
19457:         // be inline, and an integer when it is inline for a certain
19458:         // branch of the document tree
19459:         $is_inline = $definition->info_parent_def->descendants_are_inline;
19460:         $context->register('IsInline', $is_inline);
19461: 
19462:         // setup error collector
19463:         $e =& $context->get('ErrorCollector', true);
19464: 
19465:         //####################################################################//
19466:         // Loop initialization
19467: 
19468:         // stack that contains all elements that are excluded
19469:         // it is organized by parent elements, similar to $stack,
19470:         // but it is only populated when an element with exclusions is
19471:         // processed, i.e. there won't be empty exclusions.
19472:         $exclude_stack = array($definition->info_parent_def->excludes);
19473: 
19474:         // variable that contains the start token while we are processing
19475:         // nodes. This enables error reporting to do its job
19476:         $node = $top_node;
19477:         // dummy token
19478:         list($token, $d) = $node->toTokenPair();
19479:         $context->register('CurrentNode', $node);
19480:         $context->register('CurrentToken', $token);
19481: 
19482:         //####################################################################//
19483:         // Loop
19484: 
19485:         // We need to implement a post-order traversal iteratively, to
19486:         // avoid running into stack space limits.  This is pretty tricky
19487:         // to reason about, so we just manually stack-ify the recursive
19488:         // variant:
19489:         //
19490:         //  function f($node) {
19491:         //      foreach ($node->children as $child) {
19492:         //          f($child);
19493:         //      }
19494:         //      validate($node);
19495:         //  }
19496:         //
19497:         // Thus, we will represent a stack frame as array($node,
19498:         // $is_inline, stack of children)
19499:         // e.g. array_reverse($node->children) - already processed
19500:         // children.
19501: 
19502:         $parent_def = $definition->info_parent_def;
19503:         $stack = array(
19504:             array($top_node,
19505:                   $parent_def->descendants_are_inline,
19506:                   $parent_def->excludes, // exclusions
19507:                   0)
19508:             );
19509: 
19510:         while (!empty($stack)) {
19511:             list($node, $is_inline, $excludes, $ix) = array_pop($stack);
19512:             // recursive call
19513:             $go = false;
19514:             $def = empty($stack) ? $definition->info_parent_def : $definition->info[$node->name];
19515:             while (isset($node->children[$ix])) {
19516:                 $child = $node->children[$ix++];
19517:                 if ($child instanceof HTMLPurifier_Node_Element) {
19518:                     $go = true;
19519:                     $stack[] = array($node, $is_inline, $excludes, $ix);
19520:                     $stack[] = array($child,
19521:                         // ToDo: I don't think it matters if it's def or
19522:                         // child_def, but double check this...
19523:                         $is_inline || $def->descendants_are_inline,
19524:                         empty($def->excludes) ? $excludes
19525:                                               : array_merge($excludes, $def->excludes),
19526:                         0);
19527:                     break;
19528:                 }
19529:             };
19530:             if ($go) continue;
19531:             list($token, $d) = $node->toTokenPair();
19532:             // base case
19533:             if ($excludes_enabled && isset($excludes[$node->name])) {
19534:                 $node->dead = true;
19535:                 if ($e) $e->send(E_ERROR, 'Strategy_FixNesting: Node excluded');
19536:             } else {
19537:                 // XXX I suppose it would be slightly more efficient to
19538:                 // avoid the allocation here and have children
19539:                 // strategies handle it
19540:                 $children = array();
19541:                 foreach ($node->children as $child) {
19542:                     if (!$child->dead) $children[] = $child;
19543:                 }
19544:                 $result = $def->child->validateChildren($children, $config, $context);
19545:                 if ($result === true) {
19546:                     // nop
19547:                     $node->children = $children;
19548:                 } elseif ($result === false) {
19549:                     $node->dead = true;
19550:                     if ($e) $e->send(E_ERROR, 'Strategy_FixNesting: Node removed');
19551:                 } else {
19552:                     $node->children = $result;
19553:                     if ($e) {
19554:                         // XXX This will miss mutations of internal nodes. Perhaps defer to the child validators
19555:                         if (empty($result) && !empty($children)) {
19556:                             $e->send(E_ERROR, 'Strategy_FixNesting: Node contents removed');
19557:                         } else if ($result != $children) {
19558:                             $e->send(E_WARNING, 'Strategy_FixNesting: Node reorganized');
19559:                         }
19560:                     }
19561:                 }
19562:             }
19563:         }
19564: 
19565:         //####################################################################//
19566:         // Post-processing
19567: 
19568:         // remove context variables
19569:         $context->destroy('IsInline');
19570:         $context->destroy('CurrentNode');
19571:         $context->destroy('CurrentToken');
19572: 
19573:         //####################################################################//
19574:         // Return
19575: 
19576:         return HTMLPurifier_Arborize::flatten($node, $config, $context);
19577:     }
19578: }
19579: 
19580: 
19581: 
19582: 
19583: 
19584: /**
19585:  * Takes tokens makes them well-formed (balance end tags, etc.)
19586:  *
19587:  * Specification of the armor attributes this strategy uses:
19588:  *
19589:  *      - MakeWellFormed_TagClosedError: This armor field is used to
19590:  *        suppress tag closed errors for certain tokens [TagClosedSuppress],
19591:  *        in particular, if a tag was generated automatically by HTML
19592:  *        Purifier, we may rely on our infrastructure to close it for us
19593:  *        and shouldn't report an error to the user [TagClosedAuto].
19594:  */
19595: class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
19596: {
19597: 
19598:     /**
19599:      * Array stream of tokens being processed.
19600:      * @type HTMLPurifier_Token[]
19601:      */
19602:     protected $tokens;
19603: 
19604:     /**
19605:      * Current token.
19606:      * @type HTMLPurifier_Token
19607:      */
19608:     protected $token;
19609: 
19610:     /**
19611:      * Zipper managing the true state.
19612:      * @type HTMLPurifier_Zipper
19613:      */
19614:     protected $zipper;
19615: 
19616:     /**
19617:      * Current nesting of elements.
19618:      * @type array
19619:      */
19620:     protected $stack;
19621: 
19622:     /**
19623:      * Injectors active in this stream processing.
19624:      * @type HTMLPurifier_Injector[]
19625:      */
19626:     protected $injectors;
19627: 
19628:     /**
19629:      * Current instance of HTMLPurifier_Config.
19630:      * @type HTMLPurifier_Config
19631:      */
19632:     protected $config;
19633: 
19634:     /**
19635:      * Current instance of HTMLPurifier_Context.
19636:      * @type HTMLPurifier_Context
19637:      */
19638:     protected $context;
19639: 
19640:     /**
19641:      * @param HTMLPurifier_Token[] $tokens
19642:      * @param HTMLPurifier_Config $config
19643:      * @param HTMLPurifier_Context $context
19644:      * @return HTMLPurifier_Token[]
19645:      * @throws HTMLPurifier_Exception
19646:      */
19647:     public function execute($tokens, $config, $context)
19648:     {
19649:         $definition = $config->getHTMLDefinition();
19650: 
19651:         // local variables
19652:         $generator = new HTMLPurifier_Generator($config, $context);
19653:         $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
19654:         // used for autoclose early abortion
19655:         $global_parent_allowed_elements = $definition->info_parent_def->child->getAllowedElements($config);
19656:         $e = $context->get('ErrorCollector', true);
19657:         $i = false; // injector index
19658:         list($zipper, $token) = HTMLPurifier_Zipper::fromArray($tokens);
19659:         if ($token === NULL) {
19660:             return array();
19661:         }
19662:         $reprocess = false; // whether or not to reprocess the same token
19663:         $stack = array();
19664: 
19665:         // member variables
19666:         $this->stack =& $stack;
19667:         $this->tokens =& $tokens;
19668:         $this->token =& $token;
19669:         $this->zipper =& $zipper;
19670:         $this->config = $config;
19671:         $this->context = $context;
19672: 
19673:         // context variables
19674:         $context->register('CurrentNesting', $stack);
19675:         $context->register('InputZipper', $zipper);
19676:         $context->register('CurrentToken', $token);
19677: 
19678:         // -- begin INJECTOR --
19679: 
19680:         $this->injectors = array();
19681: 
19682:         $injectors = $config->getBatch('AutoFormat');
19683:         $def_injectors = $definition->info_injector;
19684:         $custom_injectors = $injectors['Custom'];
19685:         unset($injectors['Custom']); // special case
19686:         foreach ($injectors as $injector => $b) {
19687:             // XXX: Fix with a legitimate lookup table of enabled filters
19688:             if (strpos($injector, '.') !== false) {
19689:                 continue;
19690:             }
19691:             $injector = "HTMLPurifier_Injector_$injector";
19692:             if (!$b) {
19693:                 continue;
19694:             }
19695:             $this->injectors[] = new $injector;
19696:         }
19697:         foreach ($def_injectors as $injector) {
19698:             // assumed to be objects
19699:             $this->injectors[] = $injector;
19700:         }
19701:         foreach ($custom_injectors as $injector) {
19702:             if (!$injector) {
19703:                 continue;
19704:             }
19705:             if (is_string($injector)) {
19706:                 $injector = "HTMLPurifier_Injector_$injector";
19707:                 $injector = new $injector;
19708:             }
19709:             $this->injectors[] = $injector;
19710:         }
19711: 
19712:         // give the injectors references to the definition and context
19713:         // variables for performance reasons
19714:         foreach ($this->injectors as $ix => $injector) {
19715:             $error = $injector->prepare($config, $context);
19716:             if (!$error) {
19717:                 continue;
19718:             }
19719:             array_splice($this->injectors, $ix, 1); // rm the injector
19720:             trigger_error("Cannot enable {$injector->name} injector because $error is not allowed", E_USER_WARNING);
19721:         }
19722: 
19723:         // -- end INJECTOR --
19724: 
19725:         // a note on reprocessing:
19726:         //      In order to reduce code duplication, whenever some code needs
19727:         //      to make HTML changes in order to make things "correct", the
19728:         //      new HTML gets sent through the purifier, regardless of its
19729:         //      status. This means that if we add a start token, because it
19730:         //      was totally necessary, we don't have to update nesting; we just
19731:         //      punt ($reprocess = true; continue;) and it does that for us.
19732: 
19733:         // isset is in loop because $tokens size changes during loop exec
19734:         for (;;
19735:              // only increment if we don't need to reprocess
19736:              $reprocess ? $reprocess = false : $token = $zipper->next($token)) {
19737: 
19738:             // check for a rewind
19739:             if (is_int($i)) {
19740:                 // possibility: disable rewinding if the current token has a
19741:                 // rewind set on it already. This would offer protection from
19742:                 // infinite loop, but might hinder some advanced rewinding.
19743:                 $rewind_offset = $this->injectors[$i]->getRewindOffset();
19744:                 if (is_int($rewind_offset)) {
19745:                     for ($j = 0; $j < $rewind_offset; $j++) {
19746:                         if (empty($zipper->front)) break;
19747:                         $token = $zipper->prev($token);
19748:                         // indicate that other injectors should not process this token,
19749:                         // but we need to reprocess it
19750:                         unset($token->skip[$i]);
19751:                         $token->rewind = $i;
19752:                         if ($token instanceof HTMLPurifier_Token_Start) {
19753:                             array_pop($this->stack);
19754:                         } elseif ($token instanceof HTMLPurifier_Token_End) {
19755:                             $this->stack[] = $token->start;
19756:                         }
19757:                     }
19758:                 }
19759:                 $i = false;
19760:             }
19761: 
19762:             // handle case of document end
19763:             if ($token === NULL) {
19764:                 // kill processing if stack is empty
19765:                 if (empty($this->stack)) {
19766:                     break;
19767:                 }
19768: 
19769:                 // peek
19770:                 $top_nesting = array_pop($this->stack);
19771:                 $this->stack[] = $top_nesting;
19772: 
19773:                 // send error [TagClosedSuppress]
19774:                 if ($e && !isset($top_nesting->armor['MakeWellFormed_TagClosedError'])) {
19775:                     $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $top_nesting);
19776:                 }
19777: 
19778:                 // append, don't splice, since this is the end
19779:                 $token = new HTMLPurifier_Token_End($top_nesting->name);
19780: 
19781:                 // punt!
19782:                 $reprocess = true;
19783:                 continue;
19784:             }
19785: 
19786:             //echo '<br>'; printZipper($zipper, $token);//printTokens($this->stack);
19787:             //flush();
19788: 
19789:             // quick-check: if it's not a tag, no need to process
19790:             if (empty($token->is_tag)) {
19791:                 if ($token instanceof HTMLPurifier_Token_Text) {
19792:                     foreach ($this->injectors as $i => $injector) {
19793:                         if (isset($token->skip[$i])) {
19794:                             continue;
19795:                         }
19796:                         if ($token->rewind !== null && $token->rewind !== $i) {
19797:                             continue;
19798:                         }
19799:                         // XXX fuckup
19800:                         $r = $token;
19801:                         $injector->handleText($r);
19802:                         $token = $this->processToken($r, $i);
19803:                         $reprocess = true;
19804:                         break;
19805:                     }
19806:                 }
19807:                 // another possibility is a comment
19808:                 continue;
19809:             }
19810: 
19811:             if (isset($definition->info[$token->name])) {
19812:                 $type = $definition->info[$token->name]->child->type;
19813:             } else {
19814:                 $type = false; // Type is unknown, treat accordingly
19815:             }
19816: 
19817:             // quick tag checks: anything that's *not* an end tag
19818:             $ok = false;
19819:             if ($type === 'empty' && $token instanceof HTMLPurifier_Token_Start) {
19820:                 // claims to be a start tag but is empty
19821:                 $token = new HTMLPurifier_Token_Empty(
19822:                     $token->name,
19823:                     $token->attr,
19824:                     $token->line,
19825:                     $token->col,
19826:                     $token->armor
19827:                 );
19828:                 $ok = true;
19829:             } elseif ($type && $type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) {
19830:                 // claims to be empty but really is a start tag
19831:                 // NB: this assignment is required
19832:                 $old_token = $token;
19833:                 $token = new HTMLPurifier_Token_End($token->name);
19834:                 $token = $this->insertBefore(
19835:                     new HTMLPurifier_Token_Start($old_token->name, $old_token->attr, $old_token->line, $old_token->col, $old_token->armor)
19836:                 );
19837:                 // punt (since we had to modify the input stream in a non-trivial way)
19838:                 $reprocess = true;
19839:                 continue;
19840:             } elseif ($token instanceof HTMLPurifier_Token_Empty) {
19841:                 // real empty token
19842:                 $ok = true;
19843:             } elseif ($token instanceof HTMLPurifier_Token_Start) {
19844:                 // start tag
19845: 
19846:                 // ...unless they also have to close their parent
19847:                 if (!empty($this->stack)) {
19848: 
19849:                     // Performance note: you might think that it's rather
19850:                     // inefficient, recalculating the autoclose information
19851:                     // for every tag that a token closes (since when we
19852:                     // do an autoclose, we push a new token into the
19853:                     // stream and then /process/ that, before
19854:                     // re-processing this token.)  But this is
19855:                     // necessary, because an injector can make an
19856:                     // arbitrary transformations to the autoclosing
19857:                     // tokens we introduce, so things may have changed
19858:                     // in the meantime.  Also, doing the inefficient thing is
19859:                     // "easy" to reason about (for certain perverse definitions
19860:                     // of "easy")
19861: 
19862:                     $parent = array_pop($this->stack);
19863:                     $this->stack[] = $parent;
19864: 
19865:                     $parent_def = null;
19866:                     $parent_elements = null;
19867:                     $autoclose = false;
19868:                     if (isset($definition->info[$parent->name])) {
19869:                         $parent_def = $definition->info[$parent->name];
19870:                         $parent_elements = $parent_def->child->getAllowedElements($config);
19871:                         $autoclose = !isset($parent_elements[$token->name]);
19872:                     }
19873: 
19874:                     if ($autoclose && $definition->info[$token->name]->wrap) {
19875:                         // Check if an element can be wrapped by another
19876:                         // element to make it valid in a context (for
19877:                         // example, <ul><ul> needs a <li> in between)
19878:                         $wrapname = $definition->info[$token->name]->wrap;
19879:                         $wrapdef = $definition->info[$wrapname];
19880:                         $elements = $wrapdef->child->getAllowedElements($config);
19881:                         if (isset($elements[$token->name]) && isset($parent_elements[$wrapname])) {
19882:                             $newtoken = new HTMLPurifier_Token_Start($wrapname);
19883:                             $token = $this->insertBefore($newtoken);
19884:                             $reprocess = true;
19885:                             continue;
19886:                         }
19887:                     }
19888: 
19889:                     $carryover = false;
19890:                     if ($autoclose && $parent_def->formatting) {
19891:                         $carryover = true;
19892:                     }
19893: 
19894:                     if ($autoclose) {
19895:                         // check if this autoclose is doomed to fail
19896:                         // (this rechecks $parent, which his harmless)
19897:                         $autoclose_ok = isset($global_parent_allowed_elements[$token->name]);
19898:                         if (!$autoclose_ok) {
19899:                             foreach ($this->stack as $ancestor) {
19900:                                 $elements = $definition->info[$ancestor->name]->child->getAllowedElements($config);
19901:                                 if (isset($elements[$token->name])) {
19902:                                     $autoclose_ok = true;
19903:                                     break;
19904:                                 }
19905:                                 if ($definition->info[$token->name]->wrap) {
19906:                                     $wrapname = $definition->info[$token->name]->wrap;
19907:                                     $wrapdef = $definition->info[$wrapname];
19908:                                     $wrap_elements = $wrapdef->child->getAllowedElements($config);
19909:                                     if (isset($wrap_elements[$token->name]) && isset($elements[$wrapname])) {
19910:                                         $autoclose_ok = true;
19911:                                         break;
19912:                                     }
19913:                                 }
19914:                             }
19915:                         }
19916:                         if ($autoclose_ok) {
19917:                             // errors need to be updated
19918:                             $new_token = new HTMLPurifier_Token_End($parent->name);
19919:                             $new_token->start = $parent;
19920:                             // [TagClosedSuppress]
19921:                             if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) {
19922:                                 if (!$carryover) {
19923:                                     $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
19924:                                 } else {
19925:                                     $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent);
19926:                                 }
19927:                             }
19928:                             if ($carryover) {
19929:                                 $element = clone $parent;
19930:                                 // [TagClosedAuto]
19931:                                 $element->armor['MakeWellFormed_TagClosedError'] = true;
19932:                                 $element->carryover = true;
19933:                                 $token = $this->processToken(array($new_token, $token, $element));
19934:                             } else {
19935:                                 $token = $this->insertBefore($new_token);
19936:                             }
19937:                         } else {
19938:                             $token = $this->remove();
19939:                         }
19940:                         $reprocess = true;
19941:                         continue;
19942:                     }
19943: 
19944:                 }
19945:                 $ok = true;
19946:             }
19947: 
19948:             if ($ok) {
19949:                 foreach ($this->injectors as $i => $injector) {
19950:                     if (isset($token->skip[$i])) {
19951:                         continue;
19952:                     }
19953:                     if ($token->rewind !== null && $token->rewind !== $i) {
19954:                         continue;
19955:                     }
19956:                     $r = $token;
19957:                     $injector->handleElement($r);
19958:                     $token = $this->processToken($r, $i);
19959:                     $reprocess = true;
19960:                     break;
19961:                 }
19962:                 if (!$reprocess) {
19963:                     // ah, nothing interesting happened; do normal processing
19964:                     if ($token instanceof HTMLPurifier_Token_Start) {
19965:                         $this->stack[] = $token;
19966:                     } elseif ($token instanceof HTMLPurifier_Token_End) {
19967:                         throw new HTMLPurifier_Exception(
19968:                             'Improper handling of end tag in start code; possible error in MakeWellFormed'
19969:                         );
19970:                     }
19971:                 }
19972:                 continue;
19973:             }
19974: 
19975:             // sanity check: we should be dealing with a closing tag
19976:             if (!$token instanceof HTMLPurifier_Token_End) {
19977:                 throw new HTMLPurifier_Exception('Unaccounted for tag token in input stream, bug in HTML Purifier');
19978:             }
19979: 
19980:             // make sure that we have something open
19981:             if (empty($this->stack)) {
19982:                 if ($escape_invalid_tags) {
19983:                     if ($e) {
19984:                         $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
19985:                     }
19986:                     $token = new HTMLPurifier_Token_Text($generator->generateFromToken($token));
19987:                 } else {
19988:                     if ($e) {
19989:                         $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
19990:                     }
19991:                     $token = $this->remove();
19992:                 }
19993:                 $reprocess = true;
19994:                 continue;
19995:             }
19996: 
19997:             // first, check for the simplest case: everything closes neatly.
19998:             // Eventually, everything passes through here; if there are problems
19999:             // we modify the input stream accordingly and then punt, so that
20000:             // the tokens get processed again.
20001:             $current_parent = array_pop($this->stack);
20002:             if ($current_parent->name == $token->name) {
20003:                 $token->start = $current_parent;
20004:                 foreach ($this->injectors as $i => $injector) {
20005:                     if (isset($token->skip[$i])) {
20006:                         continue;
20007:                     }
20008:                     if ($token->rewind !== null && $token->rewind !== $i) {
20009:                         continue;
20010:                     }
20011:                     $r = $token;
20012:                     $injector->handleEnd($r);
20013:                     $token = $this->processToken($r, $i);
20014:                     $this->stack[] = $current_parent;
20015:                     $reprocess = true;
20016:                     break;
20017:                 }
20018:                 continue;
20019:             }
20020: 
20021:             // okay, so we're trying to close the wrong tag
20022: 
20023:             // undo the pop previous pop
20024:             $this->stack[] = $current_parent;
20025: 
20026:             // scroll back the entire nest, trying to find our tag.
20027:             // (feature could be to specify how far you'd like to go)
20028:             $size = count($this->stack);
20029:             // -2 because -1 is the last element, but we already checked that
20030:             $skipped_tags = false;
20031:             for ($j = $size - 2; $j >= 0; $j--) {
20032:                 if ($this->stack[$j]->name == $token->name) {
20033:                     $skipped_tags = array_slice($this->stack, $j);
20034:                     break;
20035:                 }
20036:             }
20037: 
20038:             // we didn't find the tag, so remove
20039:             if ($skipped_tags === false) {
20040:                 if ($escape_invalid_tags) {
20041:                     if ($e) {
20042:                         $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
20043:                     }
20044:                     $token = new HTMLPurifier_Token_Text($generator->generateFromToken($token));
20045:                 } else {
20046:                     if ($e) {
20047:                         $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
20048:                     }
20049:                     $token = $this->remove();
20050:                 }
20051:                 $reprocess = true;
20052:                 continue;
20053:             }
20054: 
20055:             // do errors, in REVERSE $j order: a,b,c with </a></b></c>
20056:             $c = count($skipped_tags);
20057:             if ($e) {
20058:                 for ($j = $c - 1; $j > 0; $j--) {
20059:                     // notice we exclude $j == 0, i.e. the current ending tag, from
20060:                     // the errors... [TagClosedSuppress]
20061:                     if (!isset($skipped_tags[$j]->armor['MakeWellFormed_TagClosedError'])) {
20062:                         $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$j]);
20063:                     }
20064:                 }
20065:             }
20066: 
20067:             // insert tags, in FORWARD $j order: c,b,a with </a></b></c>
20068:             $replace = array($token);
20069:             for ($j = 1; $j < $c; $j++) {
20070:                 // ...as well as from the insertions
20071:                 $new_token = new HTMLPurifier_Token_End($skipped_tags[$j]->name);
20072:                 $new_token->start = $skipped_tags[$j];
20073:                 array_unshift($replace, $new_token);
20074:                 if (isset($definition->info[$new_token->name]) && $definition->info[$new_token->name]->formatting) {
20075:                     // [TagClosedAuto]
20076:                     $element = clone $skipped_tags[$j];
20077:                     $element->carryover = true;
20078:                     $element->armor['MakeWellFormed_TagClosedError'] = true;
20079:                     $replace[] = $element;
20080:                 }
20081:             }
20082:             $token = $this->processToken($replace);
20083:             $reprocess = true;
20084:             continue;
20085:         }
20086: 
20087:         $context->destroy('CurrentToken');
20088:         $context->destroy('CurrentNesting');
20089:         $context->destroy('InputZipper');
20090: 
20091:         unset($this->injectors, $this->stack, $this->tokens);
20092:         return $zipper->toArray($token);
20093:     }
20094: 
20095:     /**
20096:      * Processes arbitrary token values for complicated substitution patterns.
20097:      * In general:
20098:      *
20099:      * If $token is an array, it is a list of tokens to substitute for the
20100:      * current token. These tokens then get individually processed. If there
20101:      * is a leading integer in the list, that integer determines how many
20102:      * tokens from the stream should be removed.
20103:      *
20104:      * If $token is a regular token, it is swapped with the current token.
20105:      *
20106:      * If $token is false, the current token is deleted.
20107:      *
20108:      * If $token is an integer, that number of tokens (with the first token
20109:      * being the current one) will be deleted.
20110:      *
20111:      * @param HTMLPurifier_Token|array|int|bool $token Token substitution value
20112:      * @param HTMLPurifier_Injector|int $injector Injector that performed the substitution; default is if
20113:      *        this is not an injector related operation.
20114:      * @throws HTMLPurifier_Exception
20115:      */
20116:     protected function processToken($token, $injector = -1)
20117:     {
20118:         // normalize forms of token
20119:         if (is_object($token)) {
20120:             $token = array(1, $token);
20121:         }
20122:         if (is_int($token)) {
20123:             $token = array($token);
20124:         }
20125:         if ($token === false) {
20126:             $token = array(1);
20127:         }
20128:         if (!is_array($token)) {
20129:             throw new HTMLPurifier_Exception('Invalid token type from injector');
20130:         }
20131:         if (!is_int($token[0])) {
20132:             array_unshift($token, 1);
20133:         }
20134:         if ($token[0] === 0) {
20135:             throw new HTMLPurifier_Exception('Deleting zero tokens is not valid');
20136:         }
20137: 
20138:         // $token is now an array with the following form:
20139:         // array(number nodes to delete, new node 1, new node 2, ...)
20140: 
20141:         $delete = array_shift($token);
20142:         list($old, $r) = $this->zipper->splice($this->token, $delete, $token);
20143: 
20144:         if ($injector > -1) {
20145:             // determine appropriate skips
20146:             $oldskip = isset($old[0]) ? $old[0]->skip : array();
20147:             foreach ($token as $object) {
20148:                 $object->skip = $oldskip;
20149:                 $object->skip[$injector] = true;
20150:             }
20151:         }
20152: 
20153:         return $r;
20154: 
20155:     }
20156: 
20157:     /**
20158:      * Inserts a token before the current token. Cursor now points to
20159:      * this token.  You must reprocess after this.
20160:      * @param HTMLPurifier_Token $token
20161:      */
20162:     private function insertBefore($token)
20163:     {
20164:         // NB not $this->zipper->insertBefore(), due to positioning
20165:         // differences
20166:         $splice = $this->zipper->splice($this->token, 0, array($token));
20167: 
20168:         return $splice[1];
20169:     }
20170: 
20171:     /**
20172:      * Removes current token. Cursor now points to new token occupying previously
20173:      * occupied space.  You must reprocess after this.
20174:      */
20175:     private function remove()
20176:     {
20177:         return $this->zipper->delete();
20178:     }
20179: }
20180: 
20181: 
20182: 
20183: 
20184: 
20185: /**
20186:  * Removes all unrecognized tags from the list of tokens.
20187:  *
20188:  * This strategy iterates through all the tokens and removes unrecognized
20189:  * tokens. If a token is not recognized but a TagTransform is defined for
20190:  * that element, the element will be transformed accordingly.
20191:  */
20192: 
20193: class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
20194: {
20195: 
20196:     /**
20197:      * @param HTMLPurifier_Token[] $tokens
20198:      * @param HTMLPurifier_Config $config
20199:      * @param HTMLPurifier_Context $context
20200:      * @return array|HTMLPurifier_Token[]
20201:      */
20202:     public function execute($tokens, $config, $context)
20203:     {
20204:         $definition = $config->getHTMLDefinition();
20205:         $generator = new HTMLPurifier_Generator($config, $context);
20206:         $result = array();
20207: 
20208:         $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
20209:         $remove_invalid_img = $config->get('Core.RemoveInvalidImg');
20210: 
20211:         // currently only used to determine if comments should be kept
20212:         $trusted = $config->get('HTML.Trusted');
20213:         $comment_lookup = $config->get('HTML.AllowedComments');
20214:         $comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
20215:         $check_comments = $comment_lookup !== array() || $comment_regexp !== null;
20216: 
20217:         $remove_script_contents = $config->get('Core.RemoveScriptContents');
20218:         $hidden_elements = $config->get('Core.HiddenElements');
20219: 
20220:         // remove script contents compatibility
20221:         if ($remove_script_contents === true) {
20222:             $hidden_elements['script'] = true;
20223:         } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
20224:             unset($hidden_elements['script']);
20225:         }
20226: 
20227:         $attr_validator = new HTMLPurifier_AttrValidator();
20228: 
20229:         // removes tokens until it reaches a closing tag with its value
20230:         $remove_until = false;
20231: 
20232:         // converts comments into text tokens when this is equal to a tag name
20233:         $textify_comments = false;
20234: 
20235:         $token = false;
20236:         $context->register('CurrentToken', $token);
20237: 
20238:         $e = false;
20239:         if ($config->get('Core.CollectErrors')) {
20240:             $e =& $context->get('ErrorCollector');
20241:         }
20242: 
20243:         foreach ($tokens as $token) {
20244:             if ($remove_until) {
20245:                 if (empty($token->is_tag) || $token->name !== $remove_until) {
20246:                     continue;
20247:                 }
20248:             }
20249:             if (!empty($token->is_tag)) {
20250:                 // DEFINITION CALL
20251: 
20252:                 // before any processing, try to transform the element
20253:                 if (isset($definition->info_tag_transform[$token->name])) {
20254:                     $original_name = $token->name;
20255:                     // there is a transformation for this tag
20256:                     // DEFINITION CALL
20257:                     $token = $definition->
20258:                         info_tag_transform[$token->name]->transform($token, $config, $context);
20259:                     if ($e) {
20260:                         $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
20261:                     }
20262:                 }
20263: 
20264:                 if (isset($definition->info[$token->name])) {
20265:                     // mostly everything's good, but
20266:                     // we need to make sure required attributes are in order
20267:                     if (($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) &&
20268:                         $definition->info[$token->name]->required_attr &&
20269:                         ($token->name != 'img' || $remove_invalid_img) // ensure config option still works
20270:                     ) {
20271:                         $attr_validator->validateToken($token, $config, $context);
20272:                         $ok = true;
20273:                         foreach ($definition->info[$token->name]->required_attr as $name) {
20274:                             if (!isset($token->attr[$name])) {
20275:                                 $ok = false;
20276:                                 break;
20277:                             }
20278:                         }
20279:                         if (!$ok) {
20280:                             if ($e) {
20281:                                 $e->send(
20282:                                     E_ERROR,
20283:                                     'Strategy_RemoveForeignElements: Missing required attribute',
20284:                                     $name
20285:                                 );
20286:                             }
20287:                             continue;
20288:                         }
20289:                         $token->armor['ValidateAttributes'] = true;
20290:                     }
20291: 
20292:                     if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) {
20293:                         $textify_comments = $token->name;
20294:                     } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) {
20295:                         $textify_comments = false;
20296:                     }
20297: 
20298:                 } elseif ($escape_invalid_tags) {
20299:                     // invalid tag, generate HTML representation and insert in
20300:                     if ($e) {
20301:                         $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
20302:                     }
20303:                     $token = new HTMLPurifier_Token_Text(
20304:                         $generator->generateFromToken($token)
20305:                     );
20306:                 } else {
20307:                     // check if we need to destroy all of the tag's children
20308:                     // CAN BE GENERICIZED
20309:                     if (isset($hidden_elements[$token->name])) {
20310:                         if ($token instanceof HTMLPurifier_Token_Start) {
20311:                             $remove_until = $token->name;
20312:                         } elseif ($token instanceof HTMLPurifier_Token_Empty) {
20313:                             // do nothing: we're still looking
20314:                         } else {
20315:                             $remove_until = false;
20316:                         }
20317:                         if ($e) {
20318:                             $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
20319:                         }
20320:                     } else {
20321:                         if ($e) {
20322:                             $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
20323:                         }
20324:                     }
20325:                     continue;
20326:                 }
20327:             } elseif ($token instanceof HTMLPurifier_Token_Comment) {
20328:                 // textify comments in script tags when they are allowed
20329:                 if ($textify_comments !== false) {
20330:                     $data = $token->data;
20331:                     $token = new HTMLPurifier_Token_Text($data);
20332:                 } elseif ($trusted || $check_comments) {
20333:                     // always cleanup comments
20334:                     $trailing_hyphen = false;
20335:                     if ($e) {
20336:                         // perform check whether or not there's a trailing hyphen
20337:                         if (substr($token->data, -1) == '-') {
20338:                             $trailing_hyphen = true;
20339:                         }
20340:                     }
20341:                     $token->data = rtrim($token->data, '-');
20342:                     $found_double_hyphen = false;
20343:                     while (strpos($token->data, '--') !== false) {
20344:                         $found_double_hyphen = true;
20345:                         $token->data = str_replace('--', '-', $token->data);
20346:                     }
20347:                     if ($trusted || !empty($comment_lookup[trim($token->data)]) ||
20348:                         ($comment_regexp !== null && preg_match($comment_regexp, trim($token->data)))) {
20349:                         // OK good
20350:                         if ($e) {
20351:                             if ($trailing_hyphen) {
20352:                                 $e->send(
20353:                                     E_NOTICE,
20354:                                     'Strategy_RemoveForeignElements: Trailing hyphen in comment removed'
20355:                                 );
20356:                             }
20357:                             if ($found_double_hyphen) {
20358:                                 $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
20359:                             }
20360:                         }
20361:                     } else {
20362:                         if ($e) {
20363:                             $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
20364:                         }
20365:                         continue;
20366:                     }
20367:                 } else {
20368:                     // strip comments
20369:                     if ($e) {
20370:                         $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
20371:                     }
20372:                     continue;
20373:                 }
20374:             } elseif ($token instanceof HTMLPurifier_Token_Text) {
20375:             } else {
20376:                 continue;
20377:             }
20378:             $result[] = $token;
20379:         }
20380:         if ($remove_until && $e) {
20381:             // we removed tokens until the end, throw error
20382:             $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
20383:         }
20384:         $context->destroy('CurrentToken');
20385:         return $result;
20386:     }
20387: }
20388: 
20389: 
20390: 
20391: 
20392: 
20393: /**
20394:  * Validate all attributes in the tokens.
20395:  */
20396: 
20397: class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
20398: {
20399: 
20400:     /**
20401:      * @param HTMLPurifier_Token[] $tokens
20402:      * @param HTMLPurifier_Config $config
20403:      * @param HTMLPurifier_Context $context
20404:      * @return HTMLPurifier_Token[]
20405:      */
20406:     public function execute($tokens, $config, $context)
20407:     {
20408:         // setup validator
20409:         $validator = new HTMLPurifier_AttrValidator();
20410: 
20411:         $token = false;
20412:         $context->register('CurrentToken', $token);
20413: 
20414:         foreach ($tokens as $key => $token) {
20415: 
20416:             // only process tokens that have attributes,
20417:             //   namely start and empty tags
20418:             if (!$token instanceof HTMLPurifier_Token_Start && !$token instanceof HTMLPurifier_Token_Empty) {
20419:                 continue;
20420:             }
20421: 
20422:             // skip tokens that are armored
20423:             if (!empty($token->armor['ValidateAttributes'])) {
20424:                 continue;
20425:             }
20426: 
20427:             // note that we have no facilities here for removing tokens
20428:             $validator->validateToken($token, $config, $context);
20429:         }
20430:         $context->destroy('CurrentToken');
20431:         return $tokens;
20432:     }
20433: }
20434: 
20435: 
20436: 
20437: 
20438: 
20439: /**
20440:  * Transforms FONT tags to the proper form (SPAN with CSS styling)
20441:  *
20442:  * This transformation takes the three proprietary attributes of FONT and
20443:  * transforms them into their corresponding CSS attributes.  These are color,
20444:  * face, and size.
20445:  *
20446:  * @note Size is an interesting case because it doesn't map cleanly to CSS.
20447:  *       Thanks to
20448:  *       http://style.cleverchimp.com/font_size_intervals/altintervals.html
20449:  *       for reasonable mappings.
20450:  * @warning This doesn't work completely correctly; specifically, this
20451:  *          TagTransform operates before well-formedness is enforced, so
20452:  *          the "active formatting elements" algorithm doesn't get applied.
20453:  */
20454: class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform
20455: {
20456:     /**
20457:      * @type string
20458:      */
20459:     public $transform_to = 'span';
20460: 
20461:     /**
20462:      * @type array
20463:      */
20464:     protected $_size_lookup = array(
20465:         '0' => 'xx-small',
20466:         '1' => 'xx-small',
20467:         '2' => 'small',
20468:         '3' => 'medium',
20469:         '4' => 'large',
20470:         '5' => 'x-large',
20471:         '6' => 'xx-large',
20472:         '7' => '300%',
20473:         '-1' => 'smaller',
20474:         '-2' => '60%',
20475:         '+1' => 'larger',
20476:         '+2' => '150%',
20477:         '+3' => '200%',
20478:         '+4' => '300%'
20479:     );
20480: 
20481:     /**
20482:      * @param HTMLPurifier_Token_Tag $tag
20483:      * @param HTMLPurifier_Config $config
20484:      * @param HTMLPurifier_Context $context
20485:      * @return HTMLPurifier_Token_End|string
20486:      */
20487:     public function transform($tag, $config, $context)
20488:     {
20489:         if ($tag instanceof HTMLPurifier_Token_End) {
20490:             $new_tag = clone $tag;
20491:             $new_tag->name = $this->transform_to;
20492:             return $new_tag;
20493:         }
20494: 
20495:         $attr = $tag->attr;
20496:         $prepend_style = '';
20497: 
20498:         // handle color transform
20499:         if (isset($attr['color'])) {
20500:             $prepend_style .= 'color:' . $attr['color'] . ';';
20501:             unset($attr['color']);
20502:         }
20503: 
20504:         // handle face transform
20505:         if (isset($attr['face'])) {
20506:             $prepend_style .= 'font-family:' . $attr['face'] . ';';
20507:             unset($attr['face']);
20508:         }
20509: 
20510:         // handle size transform
20511:         if (isset($attr['size'])) {
20512:             // normalize large numbers
20513:             if ($attr['size'] !== '') {
20514:                 if ($attr['size']{0} == '+' || $attr['size']{0} == '-') {
20515:                     $size = (int)$attr['size'];
20516:                     if ($size < -2) {
20517:                         $attr['size'] = '-2';
20518:                     }
20519:                     if ($size > 4) {
20520:                         $attr['size'] = '+4';
20521:                     }
20522:                 } else {
20523:                     $size = (int)$attr['size'];
20524:                     if ($size > 7) {
20525:                         $attr['size'] = '7';
20526:                     }
20527:                 }
20528:             }
20529:             if (isset($this->_size_lookup[$attr['size']])) {
20530:                 $prepend_style .= 'font-size:' .
20531:                     $this->_size_lookup[$attr['size']] . ';';
20532:             }
20533:             unset($attr['size']);
20534:         }
20535: 
20536:         if ($prepend_style) {
20537:             $attr['style'] = isset($attr['style']) ?
20538:                 $prepend_style . $attr['style'] :
20539:                 $prepend_style;
20540:         }
20541: 
20542:         $new_tag = clone $tag;
20543:         $new_tag->name = $this->transform_to;
20544:         $new_tag->attr = $attr;
20545: 
20546:         return $new_tag;
20547:     }
20548: }
20549: 
20550: 
20551: 
20552: 
20553: 
20554: /**
20555:  * Simple transformation, just change tag name to something else,
20556:  * and possibly add some styling. This will cover most of the deprecated
20557:  * tag cases.
20558:  */
20559: class HTMLPurifier_TagTransform_Simple extends HTMLPurifier_TagTransform
20560: {
20561:     /**
20562:      * @type string
20563:      */
20564:     protected $style;
20565: 
20566:     /**
20567:      * @param string $transform_to Tag name to transform to.
20568:      * @param string $style CSS style to add to the tag
20569:      */
20570:     public function __construct($transform_to, $style = null)
20571:     {
20572:         $this->transform_to = $transform_to;
20573:         $this->style = $style;
20574:     }
20575: 
20576:     /**
20577:      * @param HTMLPurifier_Token_Tag $tag
20578:      * @param HTMLPurifier_Config $config
20579:      * @param HTMLPurifier_Context $context
20580:      * @return string
20581:      */
20582:     public function transform($tag, $config, $context)
20583:     {
20584:         $new_tag = clone $tag;
20585:         $new_tag->name = $this->transform_to;
20586:         if (!is_null($this->style) &&
20587:             ($new_tag instanceof HTMLPurifier_Token_Start || $new_tag instanceof HTMLPurifier_Token_Empty)
20588:         ) {
20589:             $this->prependCSS($new_tag->attr, $this->style);
20590:         }
20591:         return $new_tag;
20592:     }
20593: }
20594: 
20595: 
20596: 
20597: 
20598: 
20599: /**
20600:  * Concrete comment token class. Generally will be ignored.
20601:  */
20602: class HTMLPurifier_Token_Comment extends HTMLPurifier_Token
20603: {
20604:     /**
20605:      * Character data within comment.
20606:      * @type string
20607:      */
20608:     public $data;
20609: 
20610:     /**
20611:      * @type bool
20612:      */
20613:     public $is_whitespace = true;
20614: 
20615:     /**
20616:      * Transparent constructor.
20617:      *
20618:      * @param string $data String comment data.
20619:      * @param int $line
20620:      * @param int $col
20621:      */
20622:     public function __construct($data, $line = null, $col = null)
20623:     {
20624:         $this->data = $data;
20625:         $this->line = $line;
20626:         $this->col = $col;
20627:     }
20628: 
20629:     public function toNode() {
20630:         return new HTMLPurifier_Node_Comment($this->data, $this->line, $this->col);
20631:     }
20632: }
20633: 
20634: 
20635: 
20636: 
20637: 
20638: /**
20639:  * Abstract class of a tag token (start, end or empty), and its behavior.
20640:  */
20641: abstract class HTMLPurifier_Token_Tag extends HTMLPurifier_Token
20642: {
20643:     /**
20644:      * Static bool marker that indicates the class is a tag.
20645:      *
20646:      * This allows us to check objects with <tt>!empty($obj->is_tag)</tt>
20647:      * without having to use a function call <tt>is_a()</tt>.
20648:      * @type bool
20649:      */
20650:     public $is_tag = true;
20651: 
20652:     /**
20653:      * The lower-case name of the tag, like 'a', 'b' or 'blockquote'.
20654:      *
20655:      * @note Strictly speaking, XML tags are case sensitive, so we shouldn't
20656:      * be lower-casing them, but these tokens cater to HTML tags, which are
20657:      * insensitive.
20658:      * @type string
20659:      */
20660:     public $name;
20661: 
20662:     /**
20663:      * Associative array of the tag's attributes.
20664:      * @type array
20665:      */
20666:     public $attr = array();
20667: 
20668:     /**
20669:      * Non-overloaded constructor, which lower-cases passed tag name.
20670:      *
20671:      * @param string $name String name.
20672:      * @param array $attr Associative array of attributes.
20673:      * @param int $line
20674:      * @param int $col
20675:      * @param array $armor
20676:      */
20677:     public function __construct($name, $attr = array(), $line = null, $col = null, $armor = array())
20678:     {
20679:         $this->name = ctype_lower($name) ? $name : strtolower($name);
20680:         foreach ($attr as $key => $value) {
20681:             // normalization only necessary when key is not lowercase
20682:             if (!ctype_lower($key)) {
20683:                 $new_key = strtolower($key);
20684:                 if (!isset($attr[$new_key])) {
20685:                     $attr[$new_key] = $attr[$key];
20686:                 }
20687:                 if ($new_key !== $key) {
20688:                     unset($attr[$key]);
20689:                 }
20690:             }
20691:         }
20692:         $this->attr = $attr;
20693:         $this->line = $line;
20694:         $this->col = $col;
20695:         $this->armor = $armor;
20696:     }
20697: 
20698:     public function toNode() {
20699:         return new HTMLPurifier_Node_Element($this->name, $this->attr, $this->line, $this->col, $this->armor);
20700:     }
20701: }
20702: 
20703: 
20704: 
20705: 
20706: 
20707: /**
20708:  * Concrete empty token class.
20709:  */
20710: class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag
20711: {
20712:     public function toNode() {
20713:         $n = parent::toNode();
20714:         $n->empty = true;
20715:         return $n;
20716:     }
20717: }
20718: 
20719: 
20720: 
20721: 
20722: 
20723: /**
20724:  * Concrete end token class.
20725:  *
20726:  * @warning This class accepts attributes even though end tags cannot. This
20727:  * is for optimization reasons, as under normal circumstances, the Lexers
20728:  * do not pass attributes.
20729:  */
20730: class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag
20731: {
20732:     /**
20733:      * Token that started this node.
20734:      * Added by MakeWellFormed. Please do not edit this!
20735:      * @type HTMLPurifier_Token
20736:      */
20737:     public $start;
20738: 
20739:     public function toNode() {
20740:         throw new Exception("HTMLPurifier_Token_End->toNode not supported!");
20741:     }
20742: }
20743: 
20744: 
20745: 
20746: 
20747: 
20748: /**
20749:  * Concrete start token class.
20750:  */
20751: class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag
20752: {
20753: }
20754: 
20755: 
20756: 
20757: 
20758: 
20759: /**
20760:  * Concrete text token class.
20761:  *
20762:  * Text tokens comprise of regular parsed character data (PCDATA) and raw
20763:  * character data (from the CDATA sections). Internally, their
20764:  * data is parsed with all entities expanded. Surprisingly, the text token
20765:  * does have a "tag name" called #PCDATA, which is how the DTD represents it
20766:  * in permissible child nodes.
20767:  */
20768: class HTMLPurifier_Token_Text extends HTMLPurifier_Token
20769: {
20770: 
20771:     /**
20772:      * @type string
20773:      */
20774:     public $name = '#PCDATA';
20775:     /**< PCDATA tag name compatible with DTD. */
20776: 
20777:     /**
20778:      * @type string
20779:      */
20780:     public $data;
20781:     /**< Parsed character data of text. */
20782: 
20783:     /**
20784:      * @type bool
20785:      */
20786:     public $is_whitespace;
20787: 
20788:     /**< Bool indicating if node is whitespace. */
20789: 
20790:     /**
20791:      * Constructor, accepts data and determines if it is whitespace.
20792:      * @param string $data String parsed character data.
20793:      * @param int $line
20794:      * @param int $col
20795:      */
20796:     public function __construct($data, $line = null, $col = null)
20797:     {
20798:         $this->data = $data;
20799:         $this->is_whitespace = ctype_space($data);
20800:         $this->line = $line;
20801:         $this->col = $col;
20802:     }
20803: 
20804:     public function toNode() {
20805:         return new HTMLPurifier_Node_Text($this->data, $this->is_whitespace, $this->line, $this->col);
20806:     }
20807: }
20808: 
20809: 
20810: 
20811: 
20812: 
20813: class HTMLPurifier_URIFilter_DisableExternal extends HTMLPurifier_URIFilter
20814: {
20815:     /**
20816:      * @type string
20817:      */
20818:     public $name = 'DisableExternal';
20819: 
20820:     /**
20821:      * @type array
20822:      */
20823:     protected $ourHostParts = false;
20824: 
20825:     /**
20826:      * @param HTMLPurifier_Config $config
20827:      * @return void
20828:      */
20829:     public function prepare($config)
20830:     {
20831:         $our_host = $config->getDefinition('URI')->host;
20832:         if ($our_host !== null) {
20833:             $this->ourHostParts = array_reverse(explode('.', $our_host));
20834:         }
20835:     }
20836: 
20837:     /**
20838:      * @param HTMLPurifier_URI $uri Reference
20839:      * @param HTMLPurifier_Config $config
20840:      * @param HTMLPurifier_Context $context
20841:      * @return bool
20842:      */
20843:     public function filter(&$uri, $config, $context)
20844:     {
20845:         if (is_null($uri->host)) {
20846:             return true;
20847:         }
20848:         if ($this->ourHostParts === false) {
20849:             return false;
20850:         }
20851:         $host_parts = array_reverse(explode('.', $uri->host));
20852:         foreach ($this->ourHostParts as $i => $x) {
20853:             if (!isset($host_parts[$i])) {
20854:                 return false;
20855:             }
20856:             if ($host_parts[$i] != $this->ourHostParts[$i]) {
20857:                 return false;
20858:             }
20859:         }
20860:         return true;
20861:     }
20862: }
20863: 
20864: 
20865: 
20866: 
20867: 
20868: class HTMLPurifier_URIFilter_DisableExternalResources extends HTMLPurifier_URIFilter_DisableExternal
20869: {
20870:     /**
20871:      * @type string
20872:      */
20873:     public $name = 'DisableExternalResources';
20874: 
20875:     /**
20876:      * @param HTMLPurifier_URI $uri
20877:      * @param HTMLPurifier_Config $config
20878:      * @param HTMLPurifier_Context $context
20879:      * @return bool
20880:      */
20881:     public function filter(&$uri, $config, $context)
20882:     {
20883:         if (!$context->get('EmbeddedURI', true)) {
20884:             return true;
20885:         }
20886:         return parent::filter($uri, $config, $context);
20887:     }
20888: }
20889: 
20890: 
20891: 
20892: 
20893: 
20894: class HTMLPurifier_URIFilter_DisableResources extends HTMLPurifier_URIFilter
20895: {
20896:     /**
20897:      * @type string
20898:      */
20899:     public $name = 'DisableResources';
20900: 
20901:     /**
20902:      * @param HTMLPurifier_URI $uri
20903:      * @param HTMLPurifier_Config $config
20904:      * @param HTMLPurifier_Context $context
20905:      * @return bool
20906:      */
20907:     public function filter(&$uri, $config, $context)
20908:     {
20909:         return !$context->get('EmbeddedURI', true);
20910:     }
20911: }
20912: 
20913: 
20914: 
20915: 
20916: 
20917: // It's not clear to me whether or not Punycode means that hostnames
20918: // do not have canonical forms anymore. As far as I can tell, it's
20919: // not a problem (punycoding should be identity when no Unicode
20920: // points are involved), but I'm not 100% sure
20921: class HTMLPurifier_URIFilter_HostBlacklist extends HTMLPurifier_URIFilter
20922: {
20923:     /**
20924:      * @type string
20925:      */
20926:     public $name = 'HostBlacklist';
20927: 
20928:     /**
20929:      * @type array
20930:      */
20931:     protected $blacklist = array();
20932: 
20933:     /**
20934:      * @param HTMLPurifier_Config $config
20935:      * @return bool
20936:      */
20937:     public function prepare($config)
20938:     {
20939:         $this->blacklist = $config->get('URI.HostBlacklist');
20940:         return true;
20941:     }
20942: 
20943:     /**
20944:      * @param HTMLPurifier_URI $uri
20945:      * @param HTMLPurifier_Config $config
20946:      * @param HTMLPurifier_Context $context
20947:      * @return bool
20948:      */
20949:     public function filter(&$uri, $config, $context)
20950:     {
20951:         foreach ($this->blacklist as $blacklisted_host_fragment) {
20952:             if (strpos($uri->host, $blacklisted_host_fragment) !== false) {
20953:                 return false;
20954:             }
20955:         }
20956:         return true;
20957:     }
20958: }
20959: 
20960: 
20961: 
20962: 
20963: 
20964: // does not support network paths
20965: 
20966: class HTMLPurifier_URIFilter_MakeAbsolute extends HTMLPurifier_URIFilter
20967: {
20968:     /**
20969:      * @type string
20970:      */
20971:     public $name = 'MakeAbsolute';
20972: 
20973:     /**
20974:      * @type
20975:      */
20976:     protected $base;
20977: 
20978:     /**
20979:      * @type array
20980:      */
20981:     protected $basePathStack = array();
20982: 
20983:     /**
20984:      * @param HTMLPurifier_Config $config
20985:      * @return bool
20986:      */
20987:     public function prepare($config)
20988:     {
20989:         $def = $config->getDefinition('URI');
20990:         $this->base = $def->base;
20991:         if (is_null($this->base)) {
20992:             trigger_error(
20993:                 'URI.MakeAbsolute is being ignored due to lack of ' .
20994:                 'value for URI.Base configuration',
20995:                 E_USER_WARNING
20996:             );
20997:             return false;
20998:         }
20999:         $this->base->fragment = null; // fragment is invalid for base URI
21000:         $stack = explode('/', $this->base->path);
21001:         array_pop($stack); // discard last segment
21002:         $stack = $this->_collapseStack($stack); // do pre-parsing
21003:         $this->basePathStack = $stack;
21004:         return true;
21005:     }
21006: 
21007:     /**
21008:      * @param HTMLPurifier_URI $uri
21009:      * @param HTMLPurifier_Config $config
21010:      * @param HTMLPurifier_Context $context
21011:      * @return bool
21012:      */
21013:     public function filter(&$uri, $config, $context)
21014:     {
21015:         if (is_null($this->base)) {
21016:             return true;
21017:         } // abort early
21018:         if ($uri->path === '' && is_null($uri->scheme) &&
21019:             is_null($uri->host) && is_null($uri->query) && is_null($uri->fragment)) {
21020:             // reference to current document
21021:             $uri = clone $this->base;
21022:             return true;
21023:         }
21024:         if (!is_null($uri->scheme)) {
21025:             // absolute URI already: don't change
21026:             if (!is_null($uri->host)) {
21027:                 return true;
21028:             }
21029:             $scheme_obj = $uri->getSchemeObj($config, $context);
21030:             if (!$scheme_obj) {
21031:                 // scheme not recognized
21032:                 return false;
21033:             }
21034:             if (!$scheme_obj->hierarchical) {
21035:                 // non-hierarchal URI with explicit scheme, don't change
21036:                 return true;
21037:             }
21038:             // special case: had a scheme but always is hierarchical and had no authority
21039:         }
21040:         if (!is_null($uri->host)) {
21041:             // network path, don't bother
21042:             return true;
21043:         }
21044:         if ($uri->path === '') {
21045:             $uri->path = $this->base->path;
21046:         } elseif ($uri->path[0] !== '/') {
21047:             // relative path, needs more complicated processing
21048:             $stack = explode('/', $uri->path);
21049:             $new_stack = array_merge($this->basePathStack, $stack);
21050:             if ($new_stack[0] !== '' && !is_null($this->base->host)) {
21051:                 array_unshift($new_stack, '');
21052:             }
21053:             $new_stack = $this->_collapseStack($new_stack);
21054:             $uri->path = implode('/', $new_stack);
21055:         } else {
21056:             // absolute path, but still we should collapse
21057:             $uri->path = implode('/', $this->_collapseStack(explode('/', $uri->path)));
21058:         }
21059:         // re-combine
21060:         $uri->scheme = $this->base->scheme;
21061:         if (is_null($uri->userinfo)) {
21062:             $uri->userinfo = $this->base->userinfo;
21063:         }
21064:         if (is_null($uri->host)) {
21065:             $uri->host = $this->base->host;
21066:         }
21067:         if (is_null($uri->port)) {
21068:             $uri->port = $this->base->port;
21069:         }
21070:         return true;
21071:     }
21072: 
21073:     /**
21074:      * Resolve dots and double-dots in a path stack
21075:      * @param array $stack
21076:      * @return array
21077:      */
21078:     private function _collapseStack($stack)
21079:     {
21080:         $result = array();
21081:         $is_folder = false;
21082:         for ($i = 0; isset($stack[$i]); $i++) {
21083:             $is_folder = false;
21084:             // absorb an internally duplicated slash
21085:             if ($stack[$i] == '' && $i && isset($stack[$i + 1])) {
21086:                 continue;
21087:             }
21088:             if ($stack[$i] == '..') {
21089:                 if (!empty($result)) {
21090:                     $segment = array_pop($result);
21091:                     if ($segment === '' && empty($result)) {
21092:                         // error case: attempted to back out too far:
21093:                         // restore the leading slash
21094:                         $result[] = '';
21095:                     } elseif ($segment === '..') {
21096:                         $result[] = '..'; // cannot remove .. with ..
21097:                     }
21098:                 } else {
21099:                     // relative path, preserve the double-dots
21100:                     $result[] = '..';
21101:                 }
21102:                 $is_folder = true;
21103:                 continue;
21104:             }
21105:             if ($stack[$i] == '.') {
21106:                 // silently absorb
21107:                 $is_folder = true;
21108:                 continue;
21109:             }
21110:             $result[] = $stack[$i];
21111:         }
21112:         if ($is_folder) {
21113:             $result[] = '';
21114:         }
21115:         return $result;
21116:     }
21117: }
21118: 
21119: 
21120: 
21121: 
21122: 
21123: class HTMLPurifier_URIFilter_Munge extends HTMLPurifier_URIFilter
21124: {
21125:     /**
21126:      * @type string
21127:      */
21128:     public $name = 'Munge';
21129: 
21130:     /**
21131:      * @type bool
21132:      */
21133:     public $post = true;
21134: 
21135:     /**
21136:      * @type string
21137:      */
21138:     private $target;
21139: 
21140:     /**
21141:      * @type HTMLPurifier_URIParser
21142:      */
21143:     private $parser;
21144: 
21145:     /**
21146:      * @type bool
21147:      */
21148:     private $doEmbed;
21149: 
21150:     /**
21151:      * @type string
21152:      */
21153:     private $secretKey;
21154: 
21155:     /**
21156:      * @type array
21157:      */
21158:     protected $replace = array();
21159: 
21160:     /**
21161:      * @param HTMLPurifier_Config $config
21162:      * @return bool
21163:      */
21164:     public function prepare($config)
21165:     {
21166:         $this->target = $config->get('URI.' . $this->name);
21167:         $this->parser = new HTMLPurifier_URIParser();
21168:         $this->doEmbed = $config->get('URI.MungeResources');
21169:         $this->secretKey = $config->get('URI.MungeSecretKey');
21170:         if ($this->secretKey && !function_exists('hash_hmac')) {
21171:             throw new Exception("Cannot use %URI.MungeSecretKey without hash_hmac support.");
21172:         }
21173:         return true;
21174:     }
21175: 
21176:     /**
21177:      * @param HTMLPurifier_URI $uri
21178:      * @param HTMLPurifier_Config $config
21179:      * @param HTMLPurifier_Context $context
21180:      * @return bool
21181:      */
21182:     public function filter(&$uri, $config, $context)
21183:     {
21184:         if ($context->get('EmbeddedURI', true) && !$this->doEmbed) {
21185:             return true;
21186:         }
21187: 
21188:         $scheme_obj = $uri->getSchemeObj($config, $context);
21189:         if (!$scheme_obj) {
21190:             return true;
21191:         } // ignore unknown schemes, maybe another postfilter did it
21192:         if (!$scheme_obj->browsable) {
21193:             return true;
21194:         } // ignore non-browseable schemes, since we can't munge those in a reasonable way
21195:         if ($uri->isBenign($config, $context)) {
21196:             return true;
21197:         } // don't redirect if a benign URL
21198: 
21199:         $this->makeReplace($uri, $config, $context);
21200:         $this->replace = array_map('rawurlencode', $this->replace);
21201: 
21202:         $new_uri = strtr($this->target, $this->replace);
21203:         $new_uri = $this->parser->parse($new_uri);
21204:         // don't redirect if the target host is the same as the
21205:         // starting host
21206:         if ($uri->host === $new_uri->host) {
21207:             return true;
21208:         }
21209:         $uri = $new_uri; // overwrite
21210:         return true;
21211:     }
21212: 
21213:     /**
21214:      * @param HTMLPurifier_URI $uri
21215:      * @param HTMLPurifier_Config $config
21216:      * @param HTMLPurifier_Context $context
21217:      */
21218:     protected function makeReplace($uri, $config, $context)
21219:     {
21220:         $string = $uri->toString();
21221:         // always available
21222:         $this->replace['%s'] = $string;
21223:         $this->replace['%r'] = $context->get('EmbeddedURI', true);
21224:         $token = $context->get('CurrentToken', true);
21225:         $this->replace['%n'] = $token ? $token->name : null;
21226:         $this->replace['%m'] = $context->get('CurrentAttr', true);
21227:         $this->replace['%p'] = $context->get('CurrentCSSProperty', true);
21228:         // not always available
21229:         if ($this->secretKey) {
21230:             $this->replace['%t'] = hash_hmac("sha256", $string, $this->secretKey);
21231:         }
21232:     }
21233: }
21234: 
21235: 
21236: 
21237: 
21238: 
21239: /**
21240:  * Implements safety checks for safe iframes.
21241:  *
21242:  * @warning This filter is *critical* for ensuring that %HTML.SafeIframe
21243:  * works safely.
21244:  */
21245: class HTMLPurifier_URIFilter_SafeIframe extends HTMLPurifier_URIFilter
21246: {
21247:     /**
21248:      * @type string
21249:      */
21250:     public $name = 'SafeIframe';
21251: 
21252:     /**
21253:      * @type bool
21254:      */
21255:     public $always_load = true;
21256: 
21257:     /**
21258:      * @type string
21259:      */
21260:     protected $regexp = null;
21261: 
21262:     // XXX: The not so good bit about how this is all set up now is we
21263:     // can't check HTML.SafeIframe in the 'prepare' step: we have to
21264:     // defer till the actual filtering.
21265:     /**
21266:      * @param HTMLPurifier_Config $config
21267:      * @return bool
21268:      */
21269:     public function prepare($config)
21270:     {
21271:         $this->regexp = $config->get('URI.SafeIframeRegexp');
21272:         return true;
21273:     }
21274: 
21275:     /**
21276:      * @param HTMLPurifier_URI $uri
21277:      * @param HTMLPurifier_Config $config
21278:      * @param HTMLPurifier_Context $context
21279:      * @return bool
21280:      */
21281:     public function filter(&$uri, $config, $context)
21282:     {
21283:         // check if filter not applicable
21284:         if (!$config->get('HTML.SafeIframe')) {
21285:             return true;
21286:         }
21287:         // check if the filter should actually trigger
21288:         if (!$context->get('EmbeddedURI', true)) {
21289:             return true;
21290:         }
21291:         $token = $context->get('CurrentToken', true);
21292:         if (!($token && $token->name == 'iframe')) {
21293:             return true;
21294:         }
21295:         // check if we actually have some whitelists enabled
21296:         if ($this->regexp === null) {
21297:             return false;
21298:         }
21299:         // actually check the whitelists
21300:         return preg_match($this->regexp, $uri->toString());
21301:     }
21302: }
21303: 
21304: 
21305: 
21306: 
21307: 
21308: /**
21309:  * Implements data: URI for base64 encoded images supported by GD.
21310:  */
21311: class HTMLPurifier_URIScheme_data extends HTMLPurifier_URIScheme
21312: {
21313:     /**
21314:      * @type bool
21315:      */
21316:     public $browsable = true;
21317: 
21318:     /**
21319:      * @type array
21320:      */
21321:     public $allowed_types = array(
21322:         // you better write validation code for other types if you
21323:         // decide to allow them
21324:         'image/jpeg' => true,
21325:         'image/gif' => true,
21326:         'image/png' => true,
21327:     );
21328:     // this is actually irrelevant since we only write out the path
21329:     // component
21330:     /**
21331:      * @type bool
21332:      */
21333:     public $may_omit_host = true;
21334: 
21335:     /**
21336:      * @param HTMLPurifier_URI $uri
21337:      * @param HTMLPurifier_Config $config
21338:      * @param HTMLPurifier_Context $context
21339:      * @return bool
21340:      */
21341:     public function doValidate(&$uri, $config, $context)
21342:     {
21343:         $result = explode(',', $uri->path, 2);
21344:         $is_base64 = false;
21345:         $charset = null;
21346:         $content_type = null;
21347:         if (count($result) == 2) {
21348:             list($metadata, $data) = $result;
21349:             // do some legwork on the metadata
21350:             $metas = explode(';', $metadata);
21351:             while (!empty($metas)) {
21352:                 $cur = array_shift($metas);
21353:                 if ($cur == 'base64') {
21354:                     $is_base64 = true;
21355:                     break;
21356:                 }
21357:                 if (substr($cur, 0, 8) == 'charset=') {
21358:                     // doesn't match if there are arbitrary spaces, but
21359:                     // whatever dude
21360:                     if ($charset !== null) {
21361:                         continue;
21362:                     } // garbage
21363:                     $charset = substr($cur, 8); // not used
21364:                 } else {
21365:                     if ($content_type !== null) {
21366:                         continue;
21367:                     } // garbage
21368:                     $content_type = $cur;
21369:                 }
21370:             }
21371:         } else {
21372:             $data = $result[0];
21373:         }
21374:         if ($content_type !== null && empty($this->allowed_types[$content_type])) {
21375:             return false;
21376:         }
21377:         if ($charset !== null) {
21378:             // error; we don't allow plaintext stuff
21379:             $charset = null;
21380:         }
21381:         $data = rawurldecode($data);
21382:         if ($is_base64) {
21383:             $raw_data = base64_decode($data);
21384:         } else {
21385:             $raw_data = $data;
21386:         }
21387:         // XXX probably want to refactor this into a general mechanism
21388:         // for filtering arbitrary content types
21389:         $file = tempnam("/tmp", "");
21390:         file_put_contents($file, $raw_data);
21391:         if (function_exists('exif_imagetype')) {
21392:             $image_code = exif_imagetype($file);
21393:             unlink($file);
21394:         } elseif (function_exists('getimagesize')) {
21395:             set_error_handler(array($this, 'muteErrorHandler'));
21396:             $info = getimagesize($file);
21397:             restore_error_handler();
21398:             unlink($file);
21399:             if ($info == false) {
21400:                 return false;
21401:             }
21402:             $image_code = $info[2];
21403:         } else {
21404:             trigger_error("could not find exif_imagetype or getimagesize functions", E_USER_ERROR);
21405:         }
21406:         $real_content_type = image_type_to_mime_type($image_code);
21407:         if ($real_content_type != $content_type) {
21408:             // we're nice guys; if the content type is something else we
21409:             // support, change it over
21410:             if (empty($this->allowed_types[$real_content_type])) {
21411:                 return false;
21412:             }
21413:             $content_type = $real_content_type;
21414:         }
21415:         // ok, it's kosher, rewrite what we need
21416:         $uri->userinfo = null;
21417:         $uri->host = null;
21418:         $uri->port = null;
21419:         $uri->fragment = null;
21420:         $uri->query = null;
21421:         $uri->path = "$content_type;base64," . base64_encode($raw_data);
21422:         return true;
21423:     }
21424: 
21425:     /**
21426:      * @param int $errno
21427:      * @param string $errstr
21428:      */
21429:     public function muteErrorHandler($errno, $errstr)
21430:     {
21431:     }
21432: }
21433: 
21434: 
21435: 
21436: /**
21437:  * Validates file as defined by RFC 1630 and RFC 1738.
21438:  */
21439: class HTMLPurifier_URIScheme_file extends HTMLPurifier_URIScheme
21440: {
21441:     /**
21442:      * Generally file:// URLs are not accessible from most
21443:      * machines, so placing them as an img src is incorrect.
21444:      * @type bool
21445:      */
21446:     public $browsable = false;
21447: 
21448:     /**
21449:      * Basically the *only* URI scheme for which this is true, since
21450:      * accessing files on the local machine is very common.  In fact,
21451:      * browsers on some operating systems don't understand the
21452:      * authority, though I hear it is used on Windows to refer to
21453:      * network shares.
21454:      * @type bool
21455:      */
21456:     public $may_omit_host = true;
21457: 
21458:     /**
21459:      * @param HTMLPurifier_URI $uri
21460:      * @param HTMLPurifier_Config $config
21461:      * @param HTMLPurifier_Context $context
21462:      * @return bool
21463:      */
21464:     public function doValidate(&$uri, $config, $context)
21465:     {
21466:         // Authentication method is not supported
21467:         $uri->userinfo = null;
21468:         // file:// makes no provisions for accessing the resource
21469:         $uri->port = null;
21470:         // While it seems to work on Firefox, the querystring has
21471:         // no possible effect and is thus stripped.
21472:         $uri->query = null;
21473:         return true;
21474:     }
21475: }
21476: 
21477: 
21478: 
21479: 
21480: 
21481: /**
21482:  * Validates ftp (File Transfer Protocol) URIs as defined by generic RFC 1738.
21483:  */
21484: class HTMLPurifier_URIScheme_ftp extends HTMLPurifier_URIScheme
21485: {
21486:     /**
21487:      * @type int
21488:      */
21489:     public $default_port = 21;
21490: 
21491:     /**
21492:      * @type bool
21493:      */
21494:     public $browsable = true; // usually
21495: 
21496:     /**
21497:      * @type bool
21498:      */
21499:     public $hierarchical = true;
21500: 
21501:     /**
21502:      * @param HTMLPurifier_URI $uri
21503:      * @param HTMLPurifier_Config $config
21504:      * @param HTMLPurifier_Context $context
21505:      * @return bool
21506:      */
21507:     public function doValidate(&$uri, $config, $context)
21508:     {
21509:         $uri->query = null;
21510: 
21511:         // typecode check
21512:         $semicolon_pos = strrpos($uri->path, ';'); // reverse
21513:         if ($semicolon_pos !== false) {
21514:             $type = substr($uri->path, $semicolon_pos + 1); // no semicolon
21515:             $uri->path = substr($uri->path, 0, $semicolon_pos);
21516:             $type_ret = '';
21517:             if (strpos($type, '=') !== false) {
21518:                 // figure out whether or not the declaration is correct
21519:                 list($key, $typecode) = explode('=', $type, 2);
21520:                 if ($key !== 'type') {
21521:                     // invalid key, tack it back on encoded
21522:                     $uri->path .= '%3B' . $type;
21523:                 } elseif ($typecode === 'a' || $typecode === 'i' || $typecode === 'd') {
21524:                     $type_ret = ";type=$typecode";
21525:                 }
21526:             } else {
21527:                 $uri->path .= '%3B' . $type;
21528:             }
21529:             $uri->path = str_replace(';', '%3B', $uri->path);
21530:             $uri->path .= $type_ret;
21531:         }
21532:         return true;
21533:     }
21534: }
21535: 
21536: 
21537: 
21538: 
21539: 
21540: /**
21541:  * Validates http (HyperText Transfer Protocol) as defined by RFC 2616
21542:  */
21543: class HTMLPurifier_URIScheme_http extends HTMLPurifier_URIScheme
21544: {
21545:     /**
21546:      * @type int
21547:      */
21548:     public $default_port = 80;
21549: 
21550:     /**
21551:      * @type bool
21552:      */
21553:     public $browsable = true;
21554: 
21555:     /**
21556:      * @type bool
21557:      */
21558:     public $hierarchical = true;
21559: 
21560:     /**
21561:      * @param HTMLPurifier_URI $uri
21562:      * @param HTMLPurifier_Config $config
21563:      * @param HTMLPurifier_Context $context
21564:      * @return bool
21565:      */
21566:     public function doValidate(&$uri, $config, $context)
21567:     {
21568:         $uri->userinfo = null;
21569:         return true;
21570:     }
21571: }
21572: 
21573: 
21574: 
21575: 
21576: 
21577: /**
21578:  * Validates https (Secure HTTP) according to http scheme.
21579:  */
21580: class HTMLPurifier_URIScheme_https extends HTMLPurifier_URIScheme_http
21581: {
21582:     /**
21583:      * @type int
21584:      */
21585:     public $default_port = 443;
21586:     /**
21587:      * @type bool
21588:      */
21589:     public $secure = true;
21590: }
21591: 
21592: 
21593: 
21594: 
21595: 
21596: // VERY RELAXED! Shouldn't cause problems, not even Firefox checks if the
21597: // email is valid, but be careful!
21598: 
21599: /**
21600:  * Validates mailto (for E-mail) according to RFC 2368
21601:  * @todo Validate the email address
21602:  * @todo Filter allowed query parameters
21603:  */
21604: 
21605: class HTMLPurifier_URIScheme_mailto extends HTMLPurifier_URIScheme
21606: {
21607:     /**
21608:      * @type bool
21609:      */
21610:     public $browsable = false;
21611: 
21612:     /**
21613:      * @type bool
21614:      */
21615:     public $may_omit_host = true;
21616: 
21617:     /**
21618:      * @param HTMLPurifier_URI $uri
21619:      * @param HTMLPurifier_Config $config
21620:      * @param HTMLPurifier_Context $context
21621:      * @return bool
21622:      */
21623:     public function doValidate(&$uri, $config, $context)
21624:     {
21625:         $uri->userinfo = null;
21626:         $uri->host     = null;
21627:         $uri->port     = null;
21628:         // we need to validate path against RFC 2368's addr-spec
21629:         return true;
21630:     }
21631: }
21632: 
21633: 
21634: 
21635: 
21636: 
21637: /**
21638:  * Validates news (Usenet) as defined by generic RFC 1738
21639:  */
21640: class HTMLPurifier_URIScheme_news extends HTMLPurifier_URIScheme
21641: {
21642:     /**
21643:      * @type bool
21644:      */
21645:     public $browsable = false;
21646: 
21647:     /**
21648:      * @type bool
21649:      */
21650:     public $may_omit_host = true;
21651: 
21652:     /**
21653:      * @param HTMLPurifier_URI $uri
21654:      * @param HTMLPurifier_Config $config
21655:      * @param HTMLPurifier_Context $context
21656:      * @return bool
21657:      */
21658:     public function doValidate(&$uri, $config, $context)
21659:     {
21660:         $uri->userinfo = null;
21661:         $uri->host = null;
21662:         $uri->port = null;
21663:         $uri->query = null;
21664:         // typecode check needed on path
21665:         return true;
21666:     }
21667: }
21668: 
21669: 
21670: 
21671: 
21672: 
21673: /**
21674:  * Validates nntp (Network News Transfer Protocol) as defined by generic RFC 1738
21675:  */
21676: class HTMLPurifier_URIScheme_nntp extends HTMLPurifier_URIScheme
21677: {
21678:     /**
21679:      * @type int
21680:      */
21681:     public $default_port = 119;
21682: 
21683:     /**
21684:      * @type bool
21685:      */
21686:     public $browsable = false;
21687: 
21688:     /**
21689:      * @param HTMLPurifier_URI $uri
21690:      * @param HTMLPurifier_Config $config
21691:      * @param HTMLPurifier_Context $context
21692:      * @return bool
21693:      */
21694:     public function doValidate(&$uri, $config, $context)
21695:     {
21696:         $uri->userinfo = null;
21697:         $uri->query = null;
21698:         return true;
21699:     }
21700: }
21701: 
21702: 
21703: 
21704: 
21705: 
21706: /**
21707:  * Performs safe variable parsing based on types which can be used by
21708:  * users. This may not be able to represent all possible data inputs,
21709:  * however.
21710:  */
21711: class HTMLPurifier_VarParser_Flexible extends HTMLPurifier_VarParser
21712: {
21713:     /**
21714:      * @param mixed $var
21715:      * @param int $type
21716:      * @param bool $allow_null
21717:      * @return array|bool|float|int|mixed|null|string
21718:      * @throws HTMLPurifier_VarParserException
21719:      */
21720:     protected function parseImplementation($var, $type, $allow_null)
21721:     {
21722:         if ($allow_null && $var === null) {
21723:             return null;
21724:         }
21725:         switch ($type) {
21726:             // Note: if code "breaks" from the switch, it triggers a generic
21727:             // exception to be thrown. Specific errors can be specifically
21728:             // done here.
21729:             case self::MIXED:
21730:             case self::ISTRING:
21731:             case self::STRING:
21732:             case self::TEXT:
21733:             case self::ITEXT:
21734:                 return $var;
21735:             case self::INT:
21736:                 if (is_string($var) && ctype_digit($var)) {
21737:                     $var = (int)$var;
21738:                 }
21739:                 return $var;
21740:             case self::FLOAT:
21741:                 if ((is_string($var) && is_numeric($var)) || is_int($var)) {
21742:                     $var = (float)$var;
21743:                 }
21744:                 return $var;
21745:             case self::BOOL:
21746:                 if (is_int($var) && ($var === 0 || $var === 1)) {
21747:                     $var = (bool)$var;
21748:                 } elseif (is_string($var)) {
21749:                     if ($var == 'on' || $var == 'true' || $var == '1') {
21750:                         $var = true;
21751:                     } elseif ($var == 'off' || $var == 'false' || $var == '0') {
21752:                         $var = false;
21753:                     } else {
21754:                         throw new HTMLPurifier_VarParserException("Unrecognized value '$var' for $type");
21755:                     }
21756:                 }
21757:                 return $var;
21758:             case self::ALIST:
21759:             case self::HASH:
21760:             case self::LOOKUP:
21761:                 if (is_string($var)) {
21762:                     // special case: technically, this is an array with
21763:                     // a single empty string item, but having an empty
21764:                     // array is more intuitive
21765:                     if ($var == '') {
21766:                         return array();
21767:                     }
21768:                     if (strpos($var, "\n") === false && strpos($var, "\r") === false) {
21769:                         // simplistic string to array method that only works
21770:                         // for simple lists of tag names or alphanumeric characters
21771:                         $var = explode(',', $var);
21772:                     } else {
21773:                         $var = preg_split('/(,|[\n\r]+)/', $var);
21774:                     }
21775:                     // remove spaces
21776:                     foreach ($var as $i => $j) {
21777:                         $var[$i] = trim($j);
21778:                     }
21779:                     if ($type === self::HASH) {
21780:                         // key:value,key2:value2
21781:                         $nvar = array();
21782:                         foreach ($var as $keypair) {
21783:                             $c = explode(':', $keypair, 2);
21784:                             if (!isset($c[1])) {
21785:                                 continue;
21786:                             }
21787:                             $nvar[trim($c[0])] = trim($c[1]);
21788:                         }
21789:                         $var = $nvar;
21790:                     }
21791:                 }
21792:                 if (!is_array($var)) {
21793:                     break;
21794:                 }
21795:                 $keys = array_keys($var);
21796:                 if ($keys === array_keys($keys)) {
21797:                     if ($type == self::ALIST) {
21798:                         return $var;
21799:                     } elseif ($type == self::LOOKUP) {
21800:                         $new = array();
21801:                         foreach ($var as $key) {
21802:                             $new[$key] = true;
21803:                         }
21804:                         return $new;
21805:                     } else {
21806:                         break;
21807:                     }
21808:                 }
21809:                 if ($type === self::ALIST) {
21810:                     trigger_error("Array list did not have consecutive integer indexes", E_USER_WARNING);
21811:                     return array_values($var);
21812:                 }
21813:                 if ($type === self::LOOKUP) {
21814:                     foreach ($var as $key => $value) {
21815:                         if ($value !== true) {
21816:                             trigger_error(
21817:                                 "Lookup array has non-true value at key '$key'; " .
21818:                                 "maybe your input array was not indexed numerically",
21819:                                 E_USER_WARNING
21820:                             );
21821:                         }
21822:                         $var[$key] = true;
21823:                     }
21824:                 }
21825:                 return $var;
21826:             default:
21827:                 $this->errorInconsistent(__CLASS__, $type);
21828:         }
21829:         $this->errorGeneric($var, $type);
21830:     }
21831: }
21832: 
21833: 
21834: 
21835: 
21836: 
21837: /**
21838:  * This variable parser uses PHP's internal code engine. Because it does
21839:  * this, it can represent all inputs; however, it is dangerous and cannot
21840:  * be used by users.
21841:  */
21842: class HTMLPurifier_VarParser_Native extends HTMLPurifier_VarParser
21843: {
21844: 
21845:     /**
21846:      * @param mixed $var
21847:      * @param int $type
21848:      * @param bool $allow_null
21849:      * @return null|string
21850:      */
21851:     protected function parseImplementation($var, $type, $allow_null)
21852:     {
21853:         return $this->evalExpression($var);
21854:     }
21855: 
21856:     /**
21857:      * @param string $expr
21858:      * @return mixed
21859:      * @throws HTMLPurifier_VarParserException
21860:      */
21861:     protected function evalExpression($expr)
21862:     {
21863:         $var = null;
21864:         $result = eval("\$var = $expr;");
21865:         if ($result === false) {
21866:             throw new HTMLPurifier_VarParserException("Fatal error in evaluated code");
21867:         }
21868:         return $var;
21869:     }
21870: }
21871: 
21872: 
21873: 
21874: 
API documentation generated by ApiGen 2.8.0