/** * Capture all attributes in given string * * @param Tag $tag Target tag * @param string $elName Name of the HTML element * @param string $str String containing the attribute declarations * @return void */ protected function captureAttributes(Tag $tag, $elName, $str) { preg_match_all('/[a-z][-a-z0-9]*(?>\\s*=\\s*(?>"[^"]*"|\'[^\']*\'|[^\\s"\'=<>`]+))?/i', $str, $attrMatches); foreach ($attrMatches[0] as $attrMatch) { $pos = strpos($attrMatch, '='); /** * If there's no equal sign, it's a boolean attribute and we generate a value equal * to the attribute's name, lowercased * * @link http://www.w3.org/html/wg/drafts/html/master/single-page.html#boolean-attributes */ if ($pos === false) { $pos = strlen($attrMatch); $attrMatch .= '=' . strtolower($attrMatch); } // Normalize the attribute name, remove the whitespace around its value to account // for cases like <b title = "foo"/> $attrName = strtolower(trim(substr($attrMatch, 0, $pos))); $attrValue = trim(substr($attrMatch, 1 + $pos)); // Use the attribute's alias if applicable if (isset($this->config['aliases'][$elName][$attrName])) { $attrName = $this->config['aliases'][$elName][$attrName]; } // Remove quotes around the value if ($attrValue[0] === '"' || $attrValue[0] === "'") { $attrValue = substr($attrValue, 1, -1); } $tag->setAttribute($attrName, html_entity_decode($attrValue, ENT_QUOTES, 'UTF-8')); } }
/** * @testdox The [MEDIA] tag transfers its priority to the tag it creates */ public function testTagPriority() { $newTag = $this->getMockBuilder('s9e\\TextFormatter\\Parser\\Tag')->disableOriginalConstructor()->setMethods(['setAttributes', 'setSortPriority'])->getMock(); $newTag->expects($this->once())->method('setSortPriority')->with(123); $tagStack = $this->getMockBuilder('s9e\\TextFormatter\\Parser')->disableOriginalConstructor()->setMethods(['addSelfClosingTag'])->getMock(); $tagStack->expects($this->once())->method('addSelfClosingTag')->will($this->returnValue($newTag)); $tag = new Tag(Tag::START_TAG, 'MEDIA', 0, 0); $tag->setAttribute('media', 'foo'); $tag->setSortPriority(123); Parser::filterTag($tag, $tagStack, ['foo.invalid' => 'foo']); }
protected function captureAttributes(Tag $tag, $elName, $str) { \preg_match_all('/[a-z][-a-z0-9]*(?>\\s*=\\s*(?>"[^"]*"|\'[^\']*\'|[^\\s"\'=<>`]+))?/i', $str, $attrMatches); foreach ($attrMatches[0] as $attrMatch) { $pos = \strpos($attrMatch, '='); if ($pos === \false) { $pos = \strlen($attrMatch); $attrMatch .= '=' . \strtolower($attrMatch); } $attrName = \strtolower(\trim(\substr($attrMatch, 0, $pos))); $attrValue = \trim(\substr($attrMatch, 1 + $pos)); if (isset($this->config['aliases'][$elName][$attrName])) { $attrName = $this->config['aliases'][$elName][$attrName]; } if ($attrValue[0] === '"' || $attrValue[0] === "'") { $attrValue = \substr($attrValue, 1, -1); } $tag->setAttribute($attrName, \html_entity_decode($attrValue, \ENT_QUOTES, 'UTF-8')); } }
protected function setLinkAttributes(Tag $tag, $linkInfo, $attrName) { $url = $linkInfo; $title = ''; $pos = \strpos($linkInfo, ' '); if ($pos !== \false) { $url = \substr($linkInfo, 0, $pos); $title = \substr(\trim(\substr($linkInfo, $pos)), 1, -1); } $tag->setAttribute($attrName, $this->decode($url)); if ($title > '') { $tag->setAttribute('title', $this->decode($title)); } }
/** * Truncate the replacement text set in a LINK_TEXT tag * * @param \s9e\TextFormatter\Parser\Tag $tag LINK_TEXT tag * @return bool Always true to indicate that the tag is valid */ public function truncate_text(\s9e\TextFormatter\Parser\Tag $tag) { $text = $tag->getAttribute('text'); if (utf8_strlen($text) > 55) { $text = utf8_substr($text, 0, 39) . ' ... ' . utf8_substr($text, -10); } $tag->setAttribute('text', $text); return true; }
protected static function scrapeUrl($url, Tag $tag, array $regexps, $cacheDir) { $content = self::wget($url, $cacheDir); foreach ($regexps as $regexp) { if (\preg_match($regexp, $content, $m)) { foreach ($m as $k => $v) { if (!\is_numeric($k) && !$tag->hasAttribute($k)) { $tag->setAttribute($k, $v); } } } } }
/** * @testdox filterAttributes() calls the logger's setAttribute() and unsetAttribute() methods for each attribute with a filterChain */ public function testFilterAttributesCallsLoggerSetAttribute() { $logger = $this->getMock('s9e\\TextFormatter\\Parser\\Logger', ['setAttribute', 'unsetAttribute']); $logger->expects($this->at(0))->method('setAttribute')->with('foo'); $logger->expects($this->at(2))->method('setAttribute')->with('bar'); $logger->expects($this->exactly(2))->method('unsetAttribute'); $tagConfig = new TagConfig(); $tagConfig->attributes->add('foo')->filterChain->append(function () { }); $tagConfig->attributes->add('bar')->filterChain->append(function () { }); $tag = new Tag(Tag::SELF_CLOSING_TAG, 'X', 0, 0); $tag->setAttribute('foo', 'foo'); $tag->setAttribute('bar', 'bar'); Parser::filterAttributes($tag, $tagConfig->asConfig(), [], $logger); }
/** * Scrape a URL to help fill a tag's attributes * * @param string $url URL to scrape * @param Tag $tag Tag to fill * @param string[] $regexps Regexps used to extract content from the page * @param string|null $cacheDir Path to the cache directory * @return void */ protected static function scrapeUrl($url, Tag $tag, array $regexps, $cacheDir) { $content = self::wget($url, $cacheDir); // Execute the extract regexps and fill any missing attribute foreach ($regexps as $regexp) { if (preg_match($regexp, $content, $m)) { foreach ($m as $k => $v) { if (!is_numeric($k) && !$tag->hasAttribute($k)) { $tag->setAttribute($k, $v); } } } } }
/** * Set a URL or IMG tag's attributes * * @param Tag $tag URL or IMG tag * @param array $m Regexp captures * @param string[] $attrNames List of attribute names * @return void */ protected function setLinkAttributes(Tag $tag, array $m, array $attrNames) { if (isset($m[3])) { $attrValues = $this->getInlineLinkAttributes($m); } else { $label = isset($m[2]) ? $m[2][0] : $m[1][0]; $attrValues = $this->getReferenceLinkAttributes($label); } foreach ($attrValues as $k => $attrValue) { $tag->setAttribute($attrNames[$k], $attrValue); } }
/** * @testdox removeAttribute('foo') unsets attribute 'foo' */ public function testRemoveAttribute() { $tag = new Tag(Tag::START_TAG, 'X', 0, 0); $tag->setAttribute('foo', 'bar'); $tag->setAttribute('baz', 'quux'); $tag->removeAttribute('foo'); $this->assertSame(['baz' => 'quux'], $tag->getAttributes()); }
public static function filterAttributes(Tag $tag, array $tagConfig, array $registeredVars, Logger $logger) { if (empty($tagConfig['attributes'])) { $tag->setAttributes(array()); return \true; } foreach ($tagConfig['attributes'] as $attrName => $attrConfig) { if (isset($attrConfig['generator'])) { $tag->setAttribute($attrName, self::executeFilter($attrConfig['generator'], array('attrName' => $attrName, 'logger' => $logger, 'registeredVars' => $registeredVars))); } } foreach ($tag->getAttributes() as $attrName => $attrValue) { if (!isset($tagConfig['attributes'][$attrName])) { $tag->removeAttribute($attrName); continue; } $attrConfig = $tagConfig['attributes'][$attrName]; if (!isset($attrConfig['filterChain'])) { continue; } $logger->setAttribute($attrName); foreach ($attrConfig['filterChain'] as $filter) { $attrValue = self::executeFilter($filter, array('attrName' => $attrName, 'attrValue' => $attrValue, 'logger' => $logger, 'registeredVars' => $registeredVars)); if ($attrValue === \false) { $tag->removeAttribute($attrName); break; } } if ($attrValue !== \false) { $tag->setAttribute($attrName, $attrValue); } $logger->unsetAttribute(); } foreach ($tagConfig['attributes'] as $attrName => $attrConfig) { if (!$tag->hasAttribute($attrName)) { if (isset($attrConfig['defaultValue'])) { $tag->setAttribute($attrName, $attrConfig['defaultValue']); } elseif (!empty($attrConfig['required'])) { return \false; } } } return \true; }
/** * Filter the attributes of given tag * * @private * * @param Tag $tag Tag being checked * @param array $tagConfig Tag's config * @param array $registeredVars Array of registered vars for use in attribute filters * @param Logger $logger This parser's Logger instance * @return bool Whether the whole attribute set is valid */ public static function filterAttributes(Tag $tag, array $tagConfig, array $registeredVars, Logger $logger) { if (empty($tagConfig['attributes'])) { $tag->setAttributes([]); return true; } // Generate values for attributes with a generator set foreach ($tagConfig['attributes'] as $attrName => $attrConfig) { if (isset($attrConfig['generator'])) { $tag->setAttribute($attrName, self::executeFilter($attrConfig['generator'], ['attrName' => $attrName, 'logger' => $logger, 'registeredVars' => $registeredVars])); } } // Filter and remove invalid attributes foreach ($tag->getAttributes() as $attrName => $attrValue) { // Test whether this attribute exists and remove it if it doesn't if (!isset($tagConfig['attributes'][$attrName])) { $tag->removeAttribute($attrName); continue; } $attrConfig = $tagConfig['attributes'][$attrName]; // Test whether this attribute has a filterChain if (!isset($attrConfig['filterChain'])) { continue; } // Record the name of the attribute being filtered into the logger $logger->setAttribute($attrName); foreach ($attrConfig['filterChain'] as $filter) { $attrValue = self::executeFilter($filter, ['attrName' => $attrName, 'attrValue' => $attrValue, 'logger' => $logger, 'registeredVars' => $registeredVars]); if ($attrValue === false) { $tag->removeAttribute($attrName); break; } } // Update the attribute value if it's valid if ($attrValue !== false) { $tag->setAttribute($attrName, $attrValue); } // Remove the attribute's name from the logger $logger->unsetAttribute(); } // Iterate over the attribute definitions to handle missing attributes foreach ($tagConfig['attributes'] as $attrName => $attrConfig) { // Test whether this attribute is missing if (!$tag->hasAttribute($attrName)) { if (isset($attrConfig['defaultValue'])) { // Use the attribute's default value $tag->setAttribute($attrName, $attrConfig['defaultValue']); } elseif (!empty($attrConfig['required'])) { // This attribute is missing, has no default value and is required, which means // the attribute set is invalid return false; } } } return true; }