/** * * get tag and attrs * @param string $value */ public function getTagData($value) { $mixed = $this->getInstance("Fl_Html_TagToken", $value)->run(); $attrs = $mixed['attrs']; $data = array($this->options['tag'] => $mixed['tag']); if (!empty($attrs)) { foreach ($attrs as $aItem) { $count = count($aItem); if ($count === 1) { $attrsData[$aItem[0]] = ""; } elseif ($count === 3) { if ($this->options['attrs_remove_quote']) { $valueDetail = Fl_Html_Static::getUnquoteText($aItem[2]); $attrsData[$aItem[0]] = $valueDetail['text']; } else { $attrsData[$aItem[0]] = $aItem[2]; } } } $data[$this->options['attrs']] = $attrsData; } return $data; }
/** * * @param array $token */ public function checkHtmlTokens() { $tokens = $this->getTokens('html'); foreach ($tokens as $item) { if ($item['type'] === FL_TOKEN_HTML_TAG_START) { $attrTokens = $this->getInstance("Fl_Html_TagToken", $item['value'])->run(); $tagName = strtolower($attrTokens['tag']); $tag = '<' . $attrTokens['tag'] . FL_SPACE; $attrTokens = $attrTokens['attrs']; foreach ($attrTokens as $attrItem) { $count = count($attrItem); $attr = strtolower($attrItem[0]); if ($count == 1) { $tag .= $this->checkIt(array_merge($item, array('value' => $attrItem[0])), 'html') . FL_SPACE; } elseif ($count === 3) { if ($attr && strpos($attr, 'on') === 0) { $type = 'event'; } elseif ($attr === 'src' || $attr === 'href' || $tagName === 'form' && $attr === 'action') { $type = 'url'; } else { $type = 'html'; } $value = $this->checkIt(array_merge($item, array('value' => $attrItem[2])), $type); $tag .= $attrItem[0] . '=' . $value . FL_SPACE; } } $tag = trim($tag) . ">"; $item['value'] = $tag; } else { if ($item['type'] === FL_TOKEN_HTML_SCRIPT_TAG) { //这里要判断是否是前端模版 $detail = Fl_Html_Static::splitSpecialValue($item['value'], 'script', $this); $tagInfo = Fl_Html_Static::getScriptTagInfo($detail['tag_start'], $this); //前端模版用HTML转义 if ($tagInfo['tpl']) { $item['value'] = $this->checkIt($item, 'html'); } else { $item['value'] = $this->checkIt($item, 'js'); } } else { $item['value'] = $this->checkIt($item, 'html'); } } $this->addOutput($item); } }
/** * * beautify * @param array $ast */ public function beautifyAst($ast, $parentType = '') { $result = ''; $first = true; foreach ($ast as $item) { if (!$first) { $result = rtrim($result, FL_NEWLINE) . FL_NEWLINE; } $hasComment = false; if (count($item['value']['commentBefore'])) { $hasComment = true; } $comment = $this->beautifyComment($item['value']); if ($comment['remove']) { $result = rtrim($result, FL_NEWLINE); } $result .= $comment['value']; // if ($item['type'] === FL_TOKEN_HTML_TAG_END) { continue; } if ($first) { $first = false; } $indent = $newline = false; if ($item['type'] === FL_TOKEN_HTML_TAG) { $count = count($item['children']); if ($count > 1) { $indent = true; $newline = true; } elseif ($count === 1) { $c = $item['children'][$count - 1]; if ($c['type'] !== FL_TOKEN_HTML_TEXT) { $indent = true; $newline = true; } } } else { if (count($ast) > 1) { $newline = true; } if ($item['type'] === FL_TOKEN_HTML_DOCTYPE || $item['type'] === FL_TOKEN_HTML_SINGLE_TAG) { $newline = true; } if ($item['type'] === FL_TOKEN_HTML_STYLE_TAG || $item['type'] === FL_TOKEN_HTML_SCRIPT_TAG) { $newline = true; $indent = true; } } if ($item['type'] !== FL_TOKEN_HTML_TEXT) { $result .= $this->getIndentString(); } else { if (count($ast) > 1) { $result .= $this->getIndentString(); } } if ($parentType === FL_TOKEN_HTML_STYLE_TAG) { Fl::loadClass("Fl_Css_Static"); $value = Fl_Css_Static::getStyleDetail($item['value']['value']); if ($value['prefix']) { $result .= $value['prefix'] . FL_NEWLINE; } $result .= $this->beautify_special($value['value'], 'css'); if ($value['suffix']) { $result .= FL_NEWLINE . $value['suffix'] . FL_NEWLINE; } } elseif ($parentType === FL_TOKEN_HTML_SCRIPT_TAG) { $result .= $this->beautify_special($item['value']['value'], 'js'); } else { $result .= $item['value']['value']; } if ($newline) { $result .= FL_NEWLINE; } if ($indent) { $this->indent++; } if (count($item['children'])) { $type = $item['type']; if ($type === FL_TOKEN_HTML_SCRIPT_TAG) { $tagInfo = Fl_Html_Static::getScriptTagInfo($item['value']['value'], $this); //虽然是script标签,但不一定是Js if (!$tagInfo['script'] || $tagInfo['external']) { $type = ''; } } $children = $this->beautifyAst($item['children'], $type); if ($tagInfo['external'] && !strlen(trim($children))) { $result = rtrim($result, FL_NEWLINE); $newline = false; $indent = 2; } else { $result .= $children; } } $types = array(FL_TOKEN_HTML_TAG => 1, FL_TOKEN_HTML_SCRIPT_TAG => 1, FL_TOKEN_HTML_PRE_TAG => 1, FL_TOKEN_HTML_STYLE_TAG => 1, FL_TOKEN_HTML_TEXTAREA_TAG => 1); $this->preToken = $item['value']; if (isset($types[$item['type']])) { if ($newline) { $result = rtrim($result, FL_NEWLINE); $result .= FL_NEWLINE; } if ($indent) { $this->indent--; if ($indent === true) { $result .= $this->getIndentString(); } } if (!empty($item['end'])) { $this->preToken = $item['end']; $comment = $this->beautifyComment($item['end']); $result .= rtrim($comment['value'], FL_NEWLINE); } $result .= '</' . $item['tag'] . '>'; } } return $result; }
/** * * 判断当前的text是否可删除 * @param string $text * @param array $nextToken */ public function textCanRemove($text, $preToken = array(), $nextToken = array()) { if ($this->options['removeBlockBlank'] && preg_match('/^\\s+$/', $text)) { $pregTag = $pregToken['lowerTag']; $nextTag = $nextToken['lowerTag']; if ($pregTag && Fl_Html_Static::isBlockTag($pregTag, $this->options['blockBlankList'])) { return true; } if ($nextTag && Fl_Html_Static::isBlockTag($nextTag, $this->options['blockBlankList'])) { return true; } } return false; }
/** * * 获取文本节点 */ public function getTextToken($char) { /*if (! isset ( $this->text {$this->pos + 1} )) { return false; }*/ $next = $this->text[$this->pos + 1]; $renext = $this->text[$this->pos + 2]; /* * return when next token is tpl */ if ($this->hasTplToken && $this->ld === substr($this->text, $this->pos + 1, strlen($this->ld))) { return false; } /* * 如果下一个字符是“<”, 并且下下个字符不是< * 需要兼容<div>welefen<<</div>这样的情况 * Chrome下: 对于<div>welefen< 会被解析成<div>welefen, 最后的<字符会被忽略 */ if ($next === Fl_Html_Static::LEFT && $renext !== Fl_Html_Static::LEFT && Fl_Html_Static::isTagFirstChar($renext)) { return false; } }
/** * * 获取文本节点 */ public function getTextToken($char) { $next = $this->getPosChar($this->pos + 1); $renext = $this->getPosChar($this->pos + 2); if ($next === false) { return false; } /* * 如果下一个是模板语法的token,则返回 * 这里用find有严重的性能问题,故直接使用substr比较 */ if ($this->ld && $this->ld === substr($this->text, $this->pos + 1, strlen($this->ld))) { return false; } /* * 如果下一个字符是“<”, 并且下下个字符不是< * 需要兼容<div>welefen<<</div>这样的情况 * Chrome下: 对于<div>welefen< 会被解析成<div>welefen, 最后的<字符会被忽略 */ if ($next === Fl_Html_Static::LEFT && $renext !== Fl_Html_Static::LEFT && Fl_Html_Static::isTagFirstChar($renext)) { return false; } }
/** * * 压缩闭合标签 * @param array $token */ public function compressEndTag($token) { if ($this->options['remove_optional_end_tag']) { if (Fl_Html_Static::isOptionalEndTag($token['lowerTag'], $this->options['remove_optional_end_tag_list'])) { return ''; } } $tag = $this->options['tag_to_lower'] ? $token['lowerTag'] : $token['tag']; return '</' . $tag . '>'; }
/** * * 过滤开始标签 * @param array $token */ public function filterTag($token, $notCheckECss = false) { $value = $token['value']; if (!empty($value)) { $instance = $this->getInstance('Fl_Html_TagToken', $value); $result = $instance->run(); } else { $result = $token; } $tag = strtolower($result['tag']); //外链的css if (!$notCheckECss && $this->isExternalCss($result)) { return $this->filterExternalCss($result); } if ($this->options['use_blank_tag_filter']) { if (!in_array($tag, $this->blankTagList)) { return false; } } $attrs = $result['attrs']; $attrResult = array(); foreach ($attrs as $item) { $name = strtolower($item[0]); //过滤事件 if ($this->options['remove_tag_event']) { if (strpos($name, 'on') === 0) { continue; } } //标签属性白名单 if ($this->options['use_blank_tag_property_filter']) { if (!in_array($name, $this->blankTagPropertyList)) { //标签的特殊属性过滤 if ($this->options['use_special_tag_filter']) { if (isset($this->allowSpecialTagProperty[$tag])) { $propList = $this->allowSpecialTagProperty[$tag]; if (isset($propList[$name])) { $value = $propList[$name]; $values = Fl_Html_Static::getUnquoteText($item[2]); //正则 if (substr($value, 0, 1) === '/') { if (preg_match($value, $values['text'])) { $attrResult[] = $item; } } else { if ($value === $values['text']) { $attrResult[] = $item; } } } } } continue; } } //a链接修复和过滤 if ($tag == 'a' && $this->options['filter_a_href_value'] && $name == 'href') { if (count($item) == 3 && $item[1] == '=') { $values = Fl_Html_Static::getUnquoteText($item[2]); $url = Fl_Static::getFixedUrl($values['text'], $this->url); if ($this->options['url_max_length']) { $url = substr($url, 0, $this->options['url_max_length']); } $item[2] = $values['quote'] . $url . $values['quote']; } else { continue; } } //图片连接的修复和过滤 if ($tag == 'img' && $this->options['filter_img_src_value'] && $name == 'src') { if (count($item) == 3 && $item[1] == '=') { $values = Fl_Html_Static::getUnquoteText($item[2]); $url = Fl_Static::getFixedUrl($values['text'], $this->url); if ($this->options['url_max_length']) { $url = substr($url, 0, $this->options['url_max_length']); } $item[2] = $values['quote'] . $url . $values['quote']; } else { continue; } } //style value if ($this->options['filter_tag_style_value'] && $name == 'style') { if (count($item) == 3 && $item[1] == '=') { $values = Fl_Html_Static::getUnquoteText($item[2]); $text = 'a{' . $values['text'] . '}'; $instance = $this->getInstance("Fl_Css_Filter", $text); $instance->url = $this->url; $instance->getResourceContentFn = $this->getResourceContentFn; $text = $instance->run($this->options); $item[2] = $values['quote'] . substr($text, 2, strlen($text) - 3) . $values['quote']; } else { continue; } } $attrResult[] = $item; } $attrsJoin = array(); foreach ($attrResult as $item) { $attrsJoin[] = join("", $item); } if (empty($attrsJoin)) { return '<' . $tag . '>'; } return '<' . $tag . ' ' . join(" ", $attrsJoin) . ">"; }
/** * * 获取特定属性的值 * @param array $attrs * @param string $name */ public static function getAttrValue($attrs, $name = '') { foreach ($attrs as $item) { if (count($item) === 3) { if (strtolower($item[0]) === $name) { $value = $item[2]; $valueInfo = Fl_Html_Static::getUnquoteText($value); return $valueInfo['text']; } } } return false; }
/** * * special */ public function specialStatement() { $tag = strtolower(Fl_Html_Static::getTagName($this->currentToken['value'], $this)); $special = Fl_Html_Static::splitSpecialValue($this->currentToken['value'], $tag, $this); $this->currentToken['value'] = $special['tag_start']; return array("type" => $this->currentToken['type'], "tag" => $tag, "value" => $this->getValue($this->currentToken), "children" => array(array("type" => FL_TOKEN_HTML_TEXT, "value" => $this->getValue(array_merge($this->currentToken, array("value" => $special["content"])))))); }