Пример #1
0
function pollpage($url)
{
    $XMLdata = getTidy($url);
    $obj = new OutlineFromXOXO($XMLdata, array('classes' => array('xoxo', 'posts')));
    $struct = $obj->toArray();
    if (!count($struct)) {
        $obj = new OutlineFromHATOM($XMLdata);
        $struct = $obj->toArray();
    }
    if (!count($struct)) {
        $obj = new OutlineFromXOXO($XMLdata, array('classes' => array()));
        $struct = $obj->toArray();
    }
    //header('Content-Type: text/plain');
    //var_dump($struct);exit;
    foreach ($struct as $structid => $node) {
        if (!is_numeric($structid)) {
            continue;
        }
        $postTitle = $node['text'];
        $postURL = $node['href'];
        foreach ($node as $id => $comment) {
            if (!is_numeric($id)) {
                continue;
            }
            $comment = array_change_key_case($comment);
            $url = $comment['href'];
            if ($url[0] == '#') {
                $url = $postURL . $url;
            }
            $query = XN_Query::create('Content')->filter('owner', '=')->filter('type', 'eic', 'Comment')->filter('my.url', '=', $url);
            $items = $query->execute();
            if (count($items) > 0) {
                continue;
            }
            if ($comment['author']) {
                $theParser = xml_parser_create();
                xml_parse_into_struct($theParser, $comment['author'], $tmp);
                xml_parser_free($theParser);
                $authorname = $tmp[0]['value'];
                $authorurl = $tmp[0]['attributes']['HREF'];
            } else {
                $authorname = $comment['text#1'];
                $authorurl = $comment['href#1'];
            }
            //end if-else comment[author]
            $time = (int) ($comment['title'] / 1000000000) < 100 ? (int) $comment['title'] : (int) ($comment['title'] / 1000000000);
            $obj = XN_Content::create('Comment')->my->add('content', $comment['body'])->my->add('url', $url)->my->add('authorname', $authorname)->my->add('authorurl', $authorurl)->my->add('time', $time)->my->add('posturl', $postURL)->my->add('posturl2', str_replace('-', '', str_replace('/', ' ', $postURL)))->my->add('posttitle', $postTitle);
            $obj->saveAnonymous();
        }
        //end foreach node
    }
    //end foreach struct
}
Пример #2
0
            //if we've found the right class
            $flattento = '<' . strtolower($el['tag']);
            foreach ($el['attributes'] as $att => $val) {
                $flattento .= ' ' . htmlspecialchars(strtolower($att)) . '="' . htmlspecialchars($val) . '"';
            }
            $flattento .= '>' . htmlspecialchars($el['value']);
            $flattentag = $el['tag'];
            $subflatten = 0;
            if ($isclose) {
                $flattento .= '</' . strtolower($flattentag) . '>';
                $rtrn[] = $flattento;
                $flattentag = '';
                unset($flattento);
                $flattento = false;
                $subflatten = 0;
            }
            //end if isclose
        }
        //end if theclass
    }
    //end foreach vals as el
    return $rtrn;
}
//end function extract_by_class
if (isset($_REQUEST['_microsummary'])) {
    header('Content-type: text/plain;');
    require_once 'xn-app://singpolymaplay/getTidy.php';
    $tmp = extract_by_class(getTidy($_REQUEST['url']), $_REQUEST['class']);
    echo str_replace('  ', ' ', trim(str_replace("\n", ' ', str_replace("\r", '', strip_tags($tmp[0])))));
}
//end if _microsummary
<?php

require_once 'getTidy.php';
require_once 'xn-app://xoxotools/proxy/normalize_url.php';
$_REQUEST['url'] = normalize_url(trim($_REQUEST['url']));
$domain = explode('/', $_REQUEST['url']);
array_pop($domain);
$dir = implode('/', $domain) . '/';
$domain = 'http://' . strtolower($domain[2]);
$doc = new DOMDocument();
$doc->preserveWhiteSpace = false;
@$doc->loadHTML(getTidy($_REQUEST['url']));
function doresolve(&$results, $attribute, $domain, $dir)
{
    foreach ($results as $node) {
        $href = $node->getAttribute($attribute);
        if (preg_match('/^[^:]*:.*$/', $href)) {
            continue;
        }
        if (!$href) {
            $node->setAttribute($attribute, $_REQUEST['url']);
            continue;
        }
        if ($href[0] == '/') {
            $node->setAttribute($attribute, $domain . $href);
        } else {
            $node->setAttribute($attribute, $dir . $href);
        }
    }
    //end foreach results
}
<?php

if (!$_REQUEST['url']) {
    die('<h1>No URL Given!</h1>');
}
header('Content-type: text/javascript;charset=utf8');
require_once 'xn-app://singpolymaplay/getTidy.php';
require_once 'xn-app://xoxotools/extract_by_class.php';
$page = getTidy($_REQUEST['url']);
$result = extract_by_class($page, 'entry-summary');
if ($_REQUEST['callback']) {
    echo $_REQUEST['callback'] . '(';
}
echo '{"summary":"' . str_replace("\n", '\\n', str_replace("\r", '\\n', addslashes($result[0]))) . '", "url":"' . str_replace("\n", '\\n', str_replace("\r", '\\n', addslashes($_REQUEST['url']))) . '"}';
if ($_REQUEST['callback']) {
    echo ')';
}
Пример #5
0
{
    //   $curl = curl_init('http://cgi.w3.org/cgi-bin/tidy?docAddr='.urlencode($url).'&forceXML=on');
    $curl = curl_init($url);
    curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
    curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.0.4) Gecko/20060508 Firefox/2.0');
    $rtrn = curl_exec($curl);
    curl_close($curl);
    //   $tidy = new tidy;
    //   $tidy->parseString($rtrn, array('output-xml' => true, 'doctype' => 'loose', 'add-xml-decl' => true),'utf8');
    //   $tidy->cleanRepair();
    //   return str_replace('&nbsp;','&#160;',$tidy);
    return str_replace('&nbsp;', '&#160;', $rtrn);
}
//end function getTidy
$bloggerdata = getTidy($_REQUEST['url']);
@($doc = new DOMDocument());
@($doc->preserveWhiteSpace = false);
@$doc->loadHTML($bloggerdata);
@($bloggerdata = $doc->saveXML());
$theParser = xml_parser_create();
xml_parse_into_struct($theParser, $bloggerdata, $vals);
xml_parser_free($theParser);
$img = array();
foreach ($vals as $el) {
    if ($el['tag'] == 'IMG' && ($el['attributes']['ALT'] == 'My Photo' || in_array('photo', explode(' ', $el['attributes']['CLASS'])))) {
        $img['photo'] = array();
        $img['photo']['url'] = $el['attributes']['SRC'];
        $img['photo']['width'] = $el['attributes']['WIDTH'];
        $img['photo']['height'] = $el['attributes']['HEIGHT'];
    }
Пример #6
0
<?php

require 'getTidy.php';
header('Content-type: application/xhtml+xml');
echo getTidy($_REQUEST['url']);
<?php

//need filter (if we ever do count)
if (!$_REQUEST['mainpage']) {
    die('<h2>Please specify a main page!</h2>');
}
require_once 'getTidy.php';
require_once 'xn-app://xoxotools/OutlineClasses/OutlineFromXOXO.php';
$page = getTidy(str_replace('Main_Page', 'Special:Categories?limit=9999', $_REQUEST['mainpage']));
$xoxo = new OutlineFromXOXO($page, array('classes' => array('special')));
header('Content-type: text/javascript;charset=utf-8');
if ($_REQUEST['callback']) {
    echo $_REQUEST['callback'] . '(';
}
echo '{';
//Count is zero because we don't know
foreach ($xoxo->getNodes() as $id => $item) {
    if ($id != 0) {
        echo ', ';
    }
    echo '"' . addslashes(str_replace("\n", ' ', str_replace("\r", '', $item->getField('text')))) . '":0';
}
//end foreach
echo '}';
if ($_REQUEST['callback']) {
    echo ')';
}
Пример #8
0
<?php

require_once 'xn-app://singpolymaplay/getTidy.php';
require_once 'xn-app://xoxotools/proxy/normalize_url.php';
require_once 'php2yubnubarray.php';
$doc = new DOMDocument();
$doc->preserveWhiteSpace = false;
if ($_REQUEST['url']) {
    $doc->loadHTML(getTidy(normalize_url($_REQUEST['url'])));
} else {
    $doc->loadHTML($_REQUEST['data']);
}
$xpath = new DOMXPath($doc);
$results = $xpath->query($_REQUEST['query']);
$final = array();
foreach ($results as $node) {
    $newDom = new DOMDocument();
    $newDom->appendChild($newDom->importNode($node, 1));
    $final[] = str_replace("<?xml version=\"1.0\"?>\n", '', $newDom->saveXML());
}
//end foreach results as node
$_REQUEST['as'] = $_REQUEST['as'] ? $_REQUEST['as'] : 'xml';
echo php2yubnubarray($final, $_REQUEST['as'], $_REQUEST['callback']);
Пример #9
0
   <form method="get" action="<?php 
    echo $_SERVER['PHP_SELF'];
    ?>
"><div>
      <input type="hidden" name="xn_auth" value="no" />
      URL: <input type="text" name="url" value="" />
      <input type="submit" value="Go" />
   </div></form>
   <?php 
    exit;
}
//end if ! url
require_once 'OutlineClasses/OutlineFromXOXO.php';
require_once 'OutlineClasses/OutlineFromHATOM.php';
require_once 'xn-app://singpolymaplay/getTidy.php';
$data = getTidy($_GET['url']);
preg_match('/<body>[^\\f]*?<\\/body>/', $data, $body);
if ($body[0]) {
    $data = $body[0];
}
//$data = preg_replace('/<(img|meta|link|hr|br)([^<>]*?)([\/]?)>/i','<$1$2 />', $data);
//$data = preg_replace('/&([^;]{10})/i','&amp;$1', $data);
//$data = str_replace('<HEAD>','<head>',$data);
//$data = str_replace('</HEAD>','</head>',$data);
$xoxo = new OutlineFromXOXO($data, array('classes' => array('xoxo', 'posts')));
$hatom = new OutlineFromHATOM($data, array('resolve' => $_GET['url']));
$raw = new OutlineFromXOXO($data, array('classes' => array()));
$site = false;
if ($xoxo->getNumNodes() && in_array('home', explode(' ', $xoxo->getNode(0)->getField('rel')))) {
    $site = $xoxo->getNode(0)->toArray();
    $xoxo->unsetNode(0);
Пример #10
0
//end if ! $archives
$urls = array();
foreach ($archives->getNodes() as $node) {
    if ($node->getField('href#1')) {
        $urls[] = $node->getField('href#1');
    } else {
        if ($node->getField('href')) {
            $urls[] = $node->getField('href');
        }
    }
}
//end foreach as node
$site = array();
$struct = array();
foreach ($urls as $url) {
    $data = getTidy($url);
    $xoxo = new OutlineFromXOXO($data, array('classes' => array('xoxo', 'posts')));
    $hatom = new OutlineFromHATOM($data, array('resolve' => $_GET['url']));
    $raw = new OutlineFromXOXO($data, array('classes' => array()));
    if (in_array('home', explode(' ', $xoxo->getNode(0)->getField('rel')))) {
        $site = $xoxo->getNode(0)->toArray();
        $xoxo->unsetNode(0);
        $xoxo->reindexNodes();
    }
    if (in_array('home', explode(' ', $hatom->getNode(0)->getField('rel')))) {
        $site = $hatom->getNode(0)->toArray();
        $hatom->unsetNode(0);
        $hatom->reindexNodes();
    }
    if (in_array('home', explode(' ', $raw->getNode(0)->getField('rel')))) {
        $site = $raw->getNode(0)->toArray();
Пример #11
0
            if ($isclose) {
                $flattento .= '</' . strtolower($flattentag) . '>';
                $rtrn[] = $flattento;
                $flattentag = '';
                unset($flattento);
                $flattento = false;
                $subflatten = 0;
            }
            //end if isclose
        }
        //end if theclass
    }
    //end foreach vals as el
    return $rtrn;
}
//end function extract_by_id
if (isset($_REQUEST['_json'])) {
    header('Content-type: text/javascript');
    require_once 'OutlineClasses/Outline.php';
    require_once 'xn-app://singpolymaplay/getTidy.php';
    if ($_REQUEST['callback']) {
        echo $_REQUEST['callback'] . '(';
    }
    $tmp = extract_by_id(getTidy($_REQUEST['url']), $_REQUEST['id']);
    $tmp = new Outline($tmp[0]);
    echo $tmp->toJSON();
    if ($_REQUEST['callback']) {
        echo ')';
    }
}
//end if _json
function getBloggerProfile($url)
{
    $bloggerdata = getTidy($url);
    $theParser = xml_parser_create();
    if (!xml_parse_into_struct($theParser, $bloggerdata, $vals)) {
        $errorcode = xml_get_error_code($theParser);
        if ($errorcode != XML_ERROR_NONE && $errorcode != 27) {
            $error = array('number' => $errorcode, 'message' => xml_error_string($errorcode));
        }
    }
    //end if ! parse
    xml_parser_free($theParser);
    $flattento = false;
    $flattentag = '';
    $flattendat = '';
    $subflatten = -1;
    $doblogs = false;
    $bloggerdata = array();
    foreach ($vals as $el) {
        $isopen = $el['type'] == 'open' || $el['type'] == 'complete';
        //for readability
        $isclose = $el['type'] == 'close' || $el['type'] == 'complete';
        if ($flattento) {
            //if flattening tags
            if ($isopen && $flattentag == $el['tag']) {
                $subflatten++;
            }
            if ($isclose && $flattentag == $el['tag']) {
                if ($subflatten) {
                    $subflatten--;
                } else {
                    if ($flattento == 'aboutme') {
                        $bloggerdata['aboutme'] = $flattendat . '</p>';
                    }
                    if ($flattento == 'contact') {
                        $bloggerdata['contact'] = $flattendat . '</ul>';
                    }
                    $flattendat = '';
                    $flattentag = '';
                    $subflatten = -1;
                    $flattento = '';
                    continue;
                }
                //end if-else subflatten
            }
            //end if isclose &&
            $emptytag = false;
            //assume not an empty tag
            if ($isopen) {
                //if opening tag
                $flattendat .= '<' . strtolower($el['tag']);
                //add open tag
                if ($el['attributes']) {
                    //if attributes
                    foreach ($el['attributes'] as $id => $val) {
                        //loop through and add
                        $flattendat .= ' ' . strtolower($id) . '="' . htmlspecialchars($val) . '"';
                    }
                    //end foreach
                }
                //end if attributes
                $emptytag = $el['type'] == 'complete' && !$el['value'];
                //is emptytag?
                $flattendat .= $emptytag ? ' />' : '>';
                //end tag
                if ($el['value']) {
                    $flattendat .= htmlspecialchars($el['value']);
                }
                //add contents, if any
            }
            //end if isopen
            if ($el['type'] == 'cdata') {
                //if cdata
                $flattendat .= htmlspecialchars($el['value']);
                //add data
            }
            //end if cdata
            if ($isclose) {
                //if closing tag
                if (!$emptytag) {
                    $flattendat .= '</' . strtolower($el['tag']) . '>';
                }
                //if not emptytag, write out end tag
            }
            //end if isclose
            continue;
        }
        //end if flattento
        if ($el['attributes']['ID'] == 'blogs') {
            $doblogs = true;
        }
        if ($doblogs && $el['tag'] == 'A') {
            if (substr($el['attributes']['HREF'], 0, 5) == 'http:') {
                $bloggerdata['blogs'][] = array('url' => $el['attributes']['HREF'], 'name' => $el['value']);
            } else {
                $tmp = array_pop($bloggerdata['blogs']);
                if (!$tmp['members']) {
                    $tmp['members'] = array();
                }
                $tmp['members'][] = array('url' => 'http://www.blogger.com' . $el['attributes']['HREF'], 'name' => $el['value']);
                $bloggerdata['blogs'][] = $tmp;
            }
            //end if-else http:
            //$bloggerdata['people'][] = array('url' => 'http://www.blogger.com'.$el['attributes']['HREF'],'name' => $el['value']);
        }
        //end if doblogs
        if ($doblogs && $isclose && $el['tag'] == 'TABLE') {
            $doblogs = false;
        }
        if ($el['tag'] == 'H1') {
            $bloggerdata['name'] = trim($el['value']);
        }
        if ($el['tag'] == 'H2' && $el['value'] == 'About Me') {
            $flattento = 'aboutme';
            $flattentag = 'P';
        }
        if ($el['tag'] == 'H2' && $el['value'] == 'Contact') {
            $flattento = 'contact';
            $flattentag = 'UL';
        }
        if ($el['attributes']['ALT'] == 'My Photo') {
            $bloggerdata['photo'] = array();
            $bloggerdata['photo']['url'] = $el['attributes']['SRC'];
            $bloggerdata['photo']['width'] = $el['attributes']['WIDTH'];
            $bloggerdata['photo']['height'] = $el['attributes']['HEIGHT'];
        }
    }
    //end foreach
    if ($error) {
        $bloggerdata['error'] = $error;
    }
    return $bloggerdata;
}