Пример #1
0
 public function getLink()
 {
     $tmp = array('link', 'guid');
     $through = array('headlines.yahoo.co.jp', 'youtube.com', 'groups.google.com/forum', 'facebook.com');
     foreach ($tmp as $v) {
         $c = xpath($this->content, $v);
         if (strrpos($c, '?') && !inStr($c, $through)) {
             $c = substr($c, 0, strrpos($c, '?'));
         }
         if ($c && !strpos($c, 'rss')) {
             return $c;
         }
     }
 }
Пример #2
0
    public function testXpath()
    {
        $xml = "<root> <a> 1 </a> <b>2</b> </root>";
        list($_) = xpath($xml, '/*');
        $this->assertRegexp("~^\\s*<root>\\s*<a> 1 </a>\\s*<b>2</b>\\s*</root>\\s*\$~", $_);
        $_ = xpath($xml);
        $this->assertRegexp("~^\\s*<root>\\s*<a> 1 </a>\\s*<b>2</b>\\s*</root>\\s*\$~", $_);
        list($_) = xpath($xml, '//a/text()');
        $this->assertEquals(' 1 ', $_);
        //
        $xml = "<root><test> \n </test></root>";
        $_ = xpath($xml);
        $this->assertRegexp("~^\\s*<root>\\s*<test> \n </test>\\s*</root>\\s*\$~", $_);
        //
        $xml = "<root> <a class='cl1 cl2 cl3'> 1 </a> <b>2</b> </root>";
        list($_) = xpath($xml, '//*[class(cl2)]');
        $this->assertRegexp("~^\\s*<a\\b[^>]*>.*?</a>\\s*\$~", $_);
        //
        $xml = "<root> <a>1</a> <b>2</b> <c>3</c> </root>";
        $xml = xpath($xml, '//b', function ($tag) {
            $tag->parentNode->removeChild($tag);
        });
        $this->assertRegexp("~^\\s*<root>\\s*<a>1</a>\\s*<c>3</c>\\s*</root>\\s*\$~", $xml);
        //
        $xml = "<root><a><one>1</one><two>2</two></a></root>";
        $xml = xpath($xml, '//a', function ($tag) {
            $_ = xpath($tag, '//text()');
            $tag->nodeValue = implode(' ', $_);
        });
        $this->assertRegexp("~^\\s*<root>\\s*<a>1 2</a>\\s*</root>\\s*\$~", $xml);
        //
        $xml = "<root><a><one>1</one><two>2</two></a></root>";
        $xml = xpath($xml, '//a', function ($tag) {
            if ($tag->hasChildNodes()) {
                $collector = array();
                foreach ($tag->childNodes as $child) {
                    $collector[] = $child;
                }
                for ($i = 0; $i < count($collector); $i++) {
                    $tag->parentNode->insertBefore($collector[$i], $tag);
                }
            }
            $tag->parentNode->removeChild($tag);
        });
        $this->assertRegexp("~^\\s*<root>\\s*<one>1</one>\\s*<two>2</two>\\s*</root>\\s*\$~", $xml);
        //
        $xml = "<root> <a>1</a> <b>2</b> <c>3</c> </root>";
        $xml = xpath($xml, '//b', "remove");
        $this->assertRegexp("~^\\s*<root>\\s*<a>1</a>\\s*<c>3</c>\\s*</root>\\s*\$~", $xml);
        //
        $xml = "<root><a remove='1'><b>b</b><c remove='1'></c></a></root>";
        $count = 0;
        $xml = xpath($xml, '//*[@remove="1"]', function ($tag) use(&$count) {
            $count += 1;
            $tag->parentNode->removeChild($tag);
        });
        $this->assertRegexp("~^\\s*<root/>\\s*\$~", $xml);
        $this->assertEquals(1, $count);
        //
        $xml = "<root><p>a<br>b</p></root>";
        $texts = xpath($xml, '//p/text()');
        $this->assertEquals("a", $texts[0]);
        $this->assertEquals("b", $texts[1]);
        //
        $xml = <<<END
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>title</title>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
</head>
<body>
content
</body>
</html>
END;
        list($text) = xpath($xml, '/html/body/text()');
        $this->assertEquals("content", trim($text));
    }
Пример #3
0
Файл: core.php Проект: ejz/core
function lexer(&$string, $rules = array())
{
    $escape = false;
    $ignore = false;
    $return = array('output' => array());
    $modify = function ($output, $scheme, $map, $debug) use(&$modify) {
        while ($scheme) {
            if (preg_match('~^[\\w0-9]+~', $scheme, $match) or preg_match('~^\\((((?>[^\\(\\)]+)|(?R))*)\\)~', $scheme, $match)) {
                $scheme = substr($scheme, strlen($match[0]));
                $while = $scheme[0] === '*';
                $scheme = ltrim($scheme, '*,');
                do {
                    $old = $output;
                    if (isset($match[1])) {
                        if (is_callable($_ = $modify)) {
                            $output = $_($output, $match[1], $map, $debug);
                        }
                    } else {
                        if (is_callable($_ = $map[$match[0]])) {
                            $output = $_($output);
                        }
                        if (is_callable($debug) and $old != $output) {
                            $debug($match[0], $old, $output);
                        }
                    }
                } while ($while and $old != $output);
            } else {
                $scheme = '';
            }
        }
        return $output;
    };
    $stringify = function ($output, $map, $extra = array()) use(&$stringify) {
        $parents = isset($extra['parents']) ? $extra['parents'] : array();
        $next = isset($extra['next']) ? $extra['next'] : null;
        $prev = isset($extra['prev']) ? $extra['prev'] : null;
        $collector = array();
        xpath($output, '/*', function ($tag) use(&$collector, $stringify, $map, $parents, $prev, $next) {
            $body = array();
            $parents[] = $tag->nodeName;
            $length = $tag->childNodes->length ?: 0;
            for ($i = 0; $i < $length; $i++) {
                if ($tag->childNodes->item($i)->nodeType === XML_TEXT_NODE) {
                    $body[] = $tag->childNodes->item($i)->nodeValue;
                } else {
                    for ($j = 0, $p = array(); $j < $i; $j++) {
                        $p[] = $tag->childNodes->item($j)->nodeName;
                    }
                    $p = array_reverse($p);
                    for ($j = $i + 1, $n = array(); $j < $length; $j++) {
                        $n[] = $tag->childNodes->item($j)->nodeName;
                    }
                    $body[] = $_ = call_user_func($stringify, xpath($tag->childNodes->item($i)), $map, array('parents' => $parents, 'prev' => $p, 'next' => $n));
                }
            }
            array_pop($parents);
            $body = implode('', $body);
            $attr = array();
            if ($tag->hasAttributes()) {
                foreach ($tag->attributes as $_) {
                    $attr[$_->nodeName] = $_->nodeValue;
                }
            }
            if (isset($map[$tag->nodeName]) and is_callable($map[$tag->nodeName])) {
                $collector[] = call_user_func($map[$tag->nodeName], $body, array('attr' => $attr, 'parents' => $parents, 'prev' => $prev, 'next' => $next));
            } elseif (!isset($map[$tag->nodeName])) {
                _warn(__FUNCTION__ . ": NO {$tag->nodeName} IN MAP!");
            } elseif (!is_callable($map[$tag->nodeName])) {
                _warn(__FUNCTION__ . ": {$tag->nodeName} IN MAP IS NOT CALLABLE!");
            }
        });
        return implode('', $collector);
    };
    $autoFix = function ($output) {
        $reg = '~<(?P<close>/?)(?P<tag>\\w+)\\b[^>]*>~';
        $order = array();
        $output = preg_replace_callback($reg, function ($match) use(&$order) {
            if (substr($match[0], -2) === '/>') {
                return $match[0];
            }
            $isClose = (isset($match['close']) and $match['close']);
            if (!$isClose) {
                $order[] = $match['tag'];
                return $match[0];
            }
            if (!$order) {
                return '';
            }
            $last = array_pop($order);
            if ($last === $match['tag']) {
                return $match[0];
            }
            $order[] = $last;
            $index = array_search($match['tag'], $order);
            if ($index === false) {
                return '';
            }
            $return = "";
            $count = count($order);
            for ($i = $index + 1; $i < $count; $i++) {
                $return .= "</{$order[$i]}>";
            }
            for ($i = $index; $i < $count; $i++) {
                array_pop($order);
            }
            return $return . $match[0];
        }, $output);
        foreach (array_reverse($order) as $tag) {
            $output .= "</{$tag}>";
        }
        return $output;
    };
    $pushChar = function ($char) use(&$return) {
        $char = esc($char);
        $space = '<space/>';
        $begin = '<string>';
        $bl = mb_strlen($begin);
        $end = '</string>';
        $el = mb_strlen($end);
        $count = count($return['output']);
        if ($count > 0 and $part =& $return['output'][$count - 1] and mb_substr($part, 0, $bl) === $begin and mb_substr($part, -$el) === $end) {
            $part = mb_substr($part, 0, -$el) . $char . $end;
        } elseif ($count > 1) {
            $triggerSpace = false;
            for ($i = $count - 1; $i >= 0; $i--) {
                if ($return['output'][$i] === $space) {
                    $triggerSpace = true;
                } else {
                    break;
                }
            }
            if ($triggerSpace and $i >= 0 and mb_substr($return['output'][$i], 0, $bl) === $begin and mb_substr($return['output'][$i], -$el) === $end) {
                $spaces = str_repeat(' ', $count - $i - 1);
                $return['output'][$i] = mb_substr($return['output'][$i], 0, -$el) . $spaces . $char . $end;
                $return['output'] = array_slice($return['output'], 0, $i + 1);
            } else {
                $return['output'][] = "{$begin}{$char}{$end}";
            }
        } else {
            $return['output'][] = "{$begin}{$char}{$end}";
        }
    };
    $cloakEscape = 0;
    @($escapeChar = $rules['escapeChar']);
    @($escapeSequence = $rules['escapeSequence']);
    if (is_string($escapeSequence)) {
        $_ = array();
        for ($i = 0; $i < strlen($escapeSequence); $i++) {
            $_[$escapeSequence[$i]] = $escapeSequence[$i];
        }
        $escapeSequence = $_;
    }
    @($ignoreBegin = $rules['ignoreBegin']);
    @($ignoreEnd = $rules['ignoreEnd']);
    @($endDomain = $rules['endDomain']);
    @($wrapper = $rules['wrapper']);
    @($tags = (array) $rules['tags']);
    $string = str_replace("\r\n", "\n", $string);
    while ($escape or mb_strlen($string) > 0) {
        if ($ignoreEnd and $ignore and mb_strpos($string, $ignoreEnd) === 0) {
            $ignore = false;
            $string = mb_substr($string, mb_strlen($ignoreEnd));
            continue;
        }
        if ($ignore) {
            $string = mb_substr($string, 1);
            continue;
        }
        if ($escape and !$string) {
            $escape = false;
            $pushChar($escapeChar);
            continue;
        }
        if ($escape) {
            $escape = false;
            $_ = mb_substr($string, 0, 1);
            $string = mb_substr($string, 1);
            if ($escapeSequence and array_key_exists($_, $escapeSequence)) {
                $pushChar($escapeSequence[$_]);
            } elseif ($cloakEscape === 0) {
                $cloakEscape = 1;
                $string = $escapeChar . $_ . $string;
            } else {
                $cloakEscape = 0;
                $pushChar($escapeChar);
                $string = $_ . $string;
            }
            continue;
        }
        if ($escapeChar and mb_strpos($string, $escapeChar) === 0 and in_array($cloakEscape, array(0, 2))) {
            $escape = true;
            $string = mb_substr($string, mb_strlen($escapeChar));
            continue;
        }
        if ($ignoreBegin and mb_strpos($string, $ignoreBegin) === 0) {
            $ignore = true;
            $string = mb_substr($string, mb_strlen($ignoreBegin));
            continue;
        }
        if ($endDomain and is_callable($endDomain) and $result = call_user_func_array($endDomain, array(&$string))) {
            if (is_assoc($result)) {
                $return = array_merge_recursive($return, $result);
            } elseif (is_string($result)) {
                $return['output'][] = $result;
            }
            break;
        }
        if ($endDomain and is_string($endDomain) and mb_strpos($string, $endDomain) === 0) {
            $string = mb_substr($string, mb_strlen($endDomain));
            if (isset($wrapper[1])) {
                $return['output'][] = $wrapper[1];
            }
            break;
        }
        /* SEARCH TAGS - BEGIN */
        foreach ($tags as $tag) {
            if (is_array($tag) and isset($tag['beginDomain']) and ($callable = is_callable($tag['beginDomain']) and $result = call_user_func_array($tag['beginDomain'], array(&$string)) or is_string($tag['beginDomain']) and mb_strpos($string, $tag['beginDomain']) === 0)) {
                $cloakEscape = 0;
                if (isset($tag['tags']) and $tag['tags'] === 'inherit') {
                    $tag['tags'] = $rules['tags'];
                }
                if (!$callable) {
                    $string = mb_substr($string, mb_strlen($tag['beginDomain']));
                }
                if (!$callable and isset($tag['wrapper'][0])) {
                    $return['output'][] = $tag['wrapper'][0];
                }
                $value = lexer($string, $tag);
                if ($callable) {
                    if (is_assoc($result)) {
                        $return = array_merge_recursive($return, $result);
                    } elseif (is_string($result)) {
                        $return['output'][] = $result;
                    }
                }
                if (is_assoc($value)) {
                    $return = array_merge_recursive($return, $value);
                } else {
                    $return['output'][] = $value;
                }
                continue 2;
            } elseif (is_array($tag) and count($tag) === 2 and isset($tag[0]) and mb_strpos($string, $tag[0]) === 0) {
                $cloakEscape = 0;
                $string = mb_substr($string, mb_strlen($tag[0]));
                $return['output'][] = $tag[1];
                continue 2;
            } elseif (is_callable($tag) and $result = call_user_func_array($tag, array(&$string))) {
                $cloakEscape = 0;
                if (is_assoc($result)) {
                    $return = array_merge_recursive($return, $result);
                } else {
                    $return['output'][] = $result;
                }
                continue 2;
            }
        }
        /* SEARCH TAGS - END */
        if (in_array($string[0], array("\n", "\r"))) {
            $string = mb_substr($string, 1);
            $return['output'][] = "<nl/>";
            continue;
        }
        if (ctype_space($string[0])) {
            $string = mb_substr($string, 1);
            $return['output'][] = "<space/>";
            continue;
        }
        if ($cloakEscape === 1) {
            $cloakEscape = 2;
            continue;
        }
        if (mb_strlen($string)) {
            $_ = mb_substr($string, 0, 1);
            $string = mb_substr($string, 1);
            $pushChar($_);
            continue;
        }
    }
    $return['output'] = implode('', $return['output']);
    if (isset($rules['autoFix']) and $rules['autoFix']) {
        $return['output'] = $autoFix($return['output']);
    }
    if (isset($rules['modify']) and $rules['modify'] and is_assoc($rules['modify'])) {
        $return['output'] = call_user_func_array($modify, array($return['output'], $rules['modify']['scheme'], $rules['modify']['map'], isset($rules['modify']['debug']) ? $rules['modify']['debug'] : null));
    }
    if (isset($rules['stringify']) and $rules['stringify'] and is_assoc($rules['stringify'])) {
        $return['output'] = call_user_func_array($stringify, array($return['output'], $rules['stringify']));
    }
    return $return;
}
Пример #4
0
function pageImageURL(\DOMDocument $doc)
{
    $xpath = "//div[contains(normalize-space(@id), 'viewer')]" . "//img";
    return f\map(f\map(elementToPageImage), xpath($doc, $xpath));
}