<?php // overload default configuration schema temporarily $custom_schema = new HTMLPurifier_ConfigSchema(); $old = HTMLPurifier_ConfigSchema::instance(); $custom_schema =& HTMLPurifier_ConfigSchema::instance($custom_schema); HTMLPurifier_ConfigSchema::defineNamespace('Element', 'Chemical substances that cannot be further decomposed'); HTMLPurifier_ConfigSchema::define('Element', 'Abbr', 'H', 'string', 'Abbreviation of element name.'); HTMLPurifier_ConfigSchema::define('Element', 'Name', 'hydrogen', 'istring', 'Full name of atoms.'); HTMLPurifier_ConfigSchema::define('Element', 'Number', 1, 'int', 'Atomic number, is identity.'); HTMLPurifier_ConfigSchema::define('Element', 'Mass', 1.00794, 'float', 'Atomic mass.'); HTMLPurifier_ConfigSchema::define('Element', 'Radioactive', false, 'bool', 'Does it have rapid decay?'); HTMLPurifier_ConfigSchema::define('Element', 'Isotopes', array('1' => true, '2' => true, '3' => true), 'lookup', 'What numbers of neutrons for this element have been observed?'); HTMLPurifier_ConfigSchema::define('Element', 'Traits', array('nonmetallic', 'odorless', 'flammable'), 'list', 'What are general properties of the element?'); HTMLPurifier_ConfigSchema::define('Element', 'IsotopeNames', array('1' => 'protium', '2' => 'deuterium', '3' => 'tritium'), 'hash', 'Lookup hash of neutron counts to formal names.'); HTMLPurifier_ConfigSchema::defineNamespace('Instrument', 'Of the musical type.'); HTMLPurifier_ConfigSchema::define('Instrument', 'Manufacturer', 'Yamaha', 'string', 'Who made it?'); HTMLPurifier_ConfigSchema::defineAllowedValues('Instrument', 'Manufacturer', array('Yamaha', 'Conn-Selmer', 'Vandoren', 'Laubin', 'Buffet', 'other')); HTMLPurifier_ConfigSchema::defineValueAliases('Instrument', 'Manufacturer', array('Selmer' => 'Conn-Selmer')); HTMLPurifier_ConfigSchema::define('Instrument', 'Family', 'woodwind', 'istring', 'What family is it?'); HTMLPurifier_ConfigSchema::defineAllowedValues('Instrument', 'Family', array('brass', 'woodwind', 'percussion', 'string', 'keyboard', 'electronic')); HTMLPurifier_ConfigSchema::defineValueAliases('Instrument', 'Family', array('synth' => 'electronic')); HTMLPurifier_ConfigSchema::defineNamespace('ReportCard', 'It is for grades.'); HTMLPurifier_ConfigSchema::define('ReportCard', 'English', null, 'string/null', 'Grade from English class.'); HTMLPurifier_ConfigSchema::define('ReportCard', 'Absences', 0, 'int', 'How many times missing from school?'); HTMLPurifier_ConfigSchema::defineNamespace('Text', 'This stuff is long, boring, and English.'); HTMLPurifier_ConfigSchema::define('Text', 'AboutUs', 'Nothing much, but this should be decently long so that a textarea would be better', 'text', 'Who are we? What are we up to?'); HTMLPurifier_ConfigSchema::define('Text', 'Hash', "not-case-sensitive\nstill-not-case-sensitive\nsuper-not-case-sensitive", 'itext', 'This is of limited utility, but of course it ends up being used.');
<?php HTMLPurifier_ConfigSchema::define('Core', 'Encoding', 'utf-8', 'istring', 'If for some reason you are unable to convert all webpages to UTF-8, ' . 'you can use this directive as a stop-gap compatibility change to ' . 'let HTML Purifier deal with non UTF-8 input. This technique has ' . 'notable deficiencies: absolutely no characters outside of the selected ' . 'character encoding will be preserved, not even the ones that have ' . 'been ampersand escaped (this is due to a UTF-8 specific <em>feature</em> ' . 'that automatically resolves all entities), making it pretty useless ' . 'for anything except the most I18N-blind applications, although ' . '%Core.EscapeNonASCIICharacters offers fixes this trouble with ' . 'another tradeoff. This directive ' . 'only accepts ISO-8859-1 if iconv is not enabled.'); HTMLPurifier_ConfigSchema::define('Core', 'EscapeNonASCIICharacters', false, 'bool', 'This directive overcomes a deficiency in %Core.Encoding by blindly ' . 'converting all non-ASCII characters into decimal numeric entities before ' . 'converting it to its native encoding. This means that even ' . 'characters that can be expressed in the non-UTF-8 encoding will ' . 'be entity-ized, which can be a real downer for encodings like Big5. ' . 'It also assumes that the ASCII repetoire is available, although ' . 'this is the case for almost all encodings. Anyway, use UTF-8! This ' . 'directive has been available since 1.4.0.'); if (!function_exists('iconv')) { // only encodings with native PHP support HTMLPurifier_ConfigSchema::defineAllowedValues('Core', 'Encoding', array('utf-8', 'iso-8859-1')); HTMLPurifier_ConfigSchema::defineValueAliases('Core', 'Encoding', array('iso8859-1' => 'iso-8859-1')); } HTMLPurifier_ConfigSchema::define('Test', 'ForceNoIconv', false, 'bool', 'When set to true, HTMLPurifier_Encoder will act as if iconv does not ' . 'exist and use only pure PHP implementations.'); /** * A UTF-8 specific character encoder that handles cleaning and transforming. * @note All functions in this class should be static. */ class HTMLPurifier_Encoder { /** * Constructor throws fatal error if you attempt to instantiate class */ private function __construct() { trigger_error('Cannot instantiate encoder, call methods statically', E_USER_ERROR); } /** * Cleans a UTF-8 string for well-formedness and SGML validity * * It will parse according to UTF-8 and return a valid UTF8 string, with * non-SGML codepoints excluded. * * @note Just for reference, the non-SGML code points are 0 to 31 and * 127 to 159, inclusive. However, we allow code points 9, 10