function pollpage($url) { $XMLdata = getTidy($url); $obj = new OutlineFromXOXO($XMLdata, array('classes' => array('xoxo', 'posts'))); $struct = $obj->toArray(); if (!count($struct)) { $obj = new OutlineFromHATOM($XMLdata); $struct = $obj->toArray(); } if (!count($struct)) { $obj = new OutlineFromXOXO($XMLdata, array('classes' => array())); $struct = $obj->toArray(); } //header('Content-Type: text/plain'); //var_dump($struct);exit; foreach ($struct as $structid => $node) { if (!is_numeric($structid)) { continue; } $postTitle = $node['text']; $postURL = $node['href']; foreach ($node as $id => $comment) { if (!is_numeric($id)) { continue; } $comment = array_change_key_case($comment); $url = $comment['href']; if ($url[0] == '#') { $url = $postURL . $url; } $query = XN_Query::create('Content')->filter('owner', '=')->filter('type', 'eic', 'Comment')->filter('my.url', '=', $url); $items = $query->execute(); if (count($items) > 0) { continue; } if ($comment['author']) { $theParser = xml_parser_create(); xml_parse_into_struct($theParser, $comment['author'], $tmp); xml_parser_free($theParser); $authorname = $tmp[0]['value']; $authorurl = $tmp[0]['attributes']['HREF']; } else { $authorname = $comment['text#1']; $authorurl = $comment['href#1']; } //end if-else comment[author] $time = (int) ($comment['title'] / 1000000000) < 100 ? (int) $comment['title'] : (int) ($comment['title'] / 1000000000); $obj = XN_Content::create('Comment')->my->add('content', $comment['body'])->my->add('url', $url)->my->add('authorname', $authorname)->my->add('authorurl', $authorurl)->my->add('time', $time)->my->add('posturl', $postURL)->my->add('posturl2', str_replace('-', '', str_replace('/', ' ', $postURL)))->my->add('posttitle', $postTitle); $obj->saveAnonymous(); } //end foreach node } //end foreach struct }
//if we've found the right class $flattento = '<' . strtolower($el['tag']); foreach ($el['attributes'] as $att => $val) { $flattento .= ' ' . htmlspecialchars(strtolower($att)) . '="' . htmlspecialchars($val) . '"'; } $flattento .= '>' . htmlspecialchars($el['value']); $flattentag = $el['tag']; $subflatten = 0; if ($isclose) { $flattento .= '</' . strtolower($flattentag) . '>'; $rtrn[] = $flattento; $flattentag = ''; unset($flattento); $flattento = false; $subflatten = 0; } //end if isclose } //end if theclass } //end foreach vals as el return $rtrn; } //end function extract_by_class if (isset($_REQUEST['_microsummary'])) { header('Content-type: text/plain;'); require_once 'xn-app://singpolymaplay/getTidy.php'; $tmp = extract_by_class(getTidy($_REQUEST['url']), $_REQUEST['class']); echo str_replace(' ', ' ', trim(str_replace("\n", ' ', str_replace("\r", '', strip_tags($tmp[0]))))); } //end if _microsummary
<?php require_once 'getTidy.php'; require_once 'xn-app://xoxotools/proxy/normalize_url.php'; $_REQUEST['url'] = normalize_url(trim($_REQUEST['url'])); $domain = explode('/', $_REQUEST['url']); array_pop($domain); $dir = implode('/', $domain) . '/'; $domain = 'http://' . strtolower($domain[2]); $doc = new DOMDocument(); $doc->preserveWhiteSpace = false; @$doc->loadHTML(getTidy($_REQUEST['url'])); function doresolve(&$results, $attribute, $domain, $dir) { foreach ($results as $node) { $href = $node->getAttribute($attribute); if (preg_match('/^[^:]*:.*$/', $href)) { continue; } if (!$href) { $node->setAttribute($attribute, $_REQUEST['url']); continue; } if ($href[0] == '/') { $node->setAttribute($attribute, $domain . $href); } else { $node->setAttribute($attribute, $dir . $href); } } //end foreach results }
<?php if (!$_REQUEST['url']) { die('<h1>No URL Given!</h1>'); } header('Content-type: text/javascript;charset=utf8'); require_once 'xn-app://singpolymaplay/getTidy.php'; require_once 'xn-app://xoxotools/extract_by_class.php'; $page = getTidy($_REQUEST['url']); $result = extract_by_class($page, 'entry-summary'); if ($_REQUEST['callback']) { echo $_REQUEST['callback'] . '('; } echo '{"summary":"' . str_replace("\n", '\\n', str_replace("\r", '\\n', addslashes($result[0]))) . '", "url":"' . str_replace("\n", '\\n', str_replace("\r", '\\n', addslashes($_REQUEST['url']))) . '"}'; if ($_REQUEST['callback']) { echo ')'; }
{ // $curl = curl_init('http://cgi.w3.org/cgi-bin/tidy?docAddr='.urlencode($url).'&forceXML=on'); $curl = curl_init($url); curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true); curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.0.4) Gecko/20060508 Firefox/2.0'); $rtrn = curl_exec($curl); curl_close($curl); // $tidy = new tidy; // $tidy->parseString($rtrn, array('output-xml' => true, 'doctype' => 'loose', 'add-xml-decl' => true),'utf8'); // $tidy->cleanRepair(); // return str_replace(' ',' ',$tidy); return str_replace(' ', ' ', $rtrn); } //end function getTidy $bloggerdata = getTidy($_REQUEST['url']); @($doc = new DOMDocument()); @($doc->preserveWhiteSpace = false); @$doc->loadHTML($bloggerdata); @($bloggerdata = $doc->saveXML()); $theParser = xml_parser_create(); xml_parse_into_struct($theParser, $bloggerdata, $vals); xml_parser_free($theParser); $img = array(); foreach ($vals as $el) { if ($el['tag'] == 'IMG' && ($el['attributes']['ALT'] == 'My Photo' || in_array('photo', explode(' ', $el['attributes']['CLASS'])))) { $img['photo'] = array(); $img['photo']['url'] = $el['attributes']['SRC']; $img['photo']['width'] = $el['attributes']['WIDTH']; $img['photo']['height'] = $el['attributes']['HEIGHT']; }
<?php require 'getTidy.php'; header('Content-type: application/xhtml+xml'); echo getTidy($_REQUEST['url']);
<?php //need filter (if we ever do count) if (!$_REQUEST['mainpage']) { die('<h2>Please specify a main page!</h2>'); } require_once 'getTidy.php'; require_once 'xn-app://xoxotools/OutlineClasses/OutlineFromXOXO.php'; $page = getTidy(str_replace('Main_Page', 'Special:Categories?limit=9999', $_REQUEST['mainpage'])); $xoxo = new OutlineFromXOXO($page, array('classes' => array('special'))); header('Content-type: text/javascript;charset=utf-8'); if ($_REQUEST['callback']) { echo $_REQUEST['callback'] . '('; } echo '{'; //Count is zero because we don't know foreach ($xoxo->getNodes() as $id => $item) { if ($id != 0) { echo ', '; } echo '"' . addslashes(str_replace("\n", ' ', str_replace("\r", '', $item->getField('text')))) . '":0'; } //end foreach echo '}'; if ($_REQUEST['callback']) { echo ')'; }
<?php require_once 'xn-app://singpolymaplay/getTidy.php'; require_once 'xn-app://xoxotools/proxy/normalize_url.php'; require_once 'php2yubnubarray.php'; $doc = new DOMDocument(); $doc->preserveWhiteSpace = false; if ($_REQUEST['url']) { $doc->loadHTML(getTidy(normalize_url($_REQUEST['url']))); } else { $doc->loadHTML($_REQUEST['data']); } $xpath = new DOMXPath($doc); $results = $xpath->query($_REQUEST['query']); $final = array(); foreach ($results as $node) { $newDom = new DOMDocument(); $newDom->appendChild($newDom->importNode($node, 1)); $final[] = str_replace("<?xml version=\"1.0\"?>\n", '', $newDom->saveXML()); } //end foreach results as node $_REQUEST['as'] = $_REQUEST['as'] ? $_REQUEST['as'] : 'xml'; echo php2yubnubarray($final, $_REQUEST['as'], $_REQUEST['callback']);
<form method="get" action="<?php echo $_SERVER['PHP_SELF']; ?> "><div> <input type="hidden" name="xn_auth" value="no" /> URL: <input type="text" name="url" value="" /> <input type="submit" value="Go" /> </div></form> <?php exit; } //end if ! url require_once 'OutlineClasses/OutlineFromXOXO.php'; require_once 'OutlineClasses/OutlineFromHATOM.php'; require_once 'xn-app://singpolymaplay/getTidy.php'; $data = getTidy($_GET['url']); preg_match('/<body>[^\\f]*?<\\/body>/', $data, $body); if ($body[0]) { $data = $body[0]; } //$data = preg_replace('/<(img|meta|link|hr|br)([^<>]*?)([\/]?)>/i','<$1$2 />', $data); //$data = preg_replace('/&([^;]{10})/i','&$1', $data); //$data = str_replace('<HEAD>','<head>',$data); //$data = str_replace('</HEAD>','</head>',$data); $xoxo = new OutlineFromXOXO($data, array('classes' => array('xoxo', 'posts'))); $hatom = new OutlineFromHATOM($data, array('resolve' => $_GET['url'])); $raw = new OutlineFromXOXO($data, array('classes' => array())); $site = false; if ($xoxo->getNumNodes() && in_array('home', explode(' ', $xoxo->getNode(0)->getField('rel')))) { $site = $xoxo->getNode(0)->toArray(); $xoxo->unsetNode(0);
//end if ! $archives $urls = array(); foreach ($archives->getNodes() as $node) { if ($node->getField('href#1')) { $urls[] = $node->getField('href#1'); } else { if ($node->getField('href')) { $urls[] = $node->getField('href'); } } } //end foreach as node $site = array(); $struct = array(); foreach ($urls as $url) { $data = getTidy($url); $xoxo = new OutlineFromXOXO($data, array('classes' => array('xoxo', 'posts'))); $hatom = new OutlineFromHATOM($data, array('resolve' => $_GET['url'])); $raw = new OutlineFromXOXO($data, array('classes' => array())); if (in_array('home', explode(' ', $xoxo->getNode(0)->getField('rel')))) { $site = $xoxo->getNode(0)->toArray(); $xoxo->unsetNode(0); $xoxo->reindexNodes(); } if (in_array('home', explode(' ', $hatom->getNode(0)->getField('rel')))) { $site = $hatom->getNode(0)->toArray(); $hatom->unsetNode(0); $hatom->reindexNodes(); } if (in_array('home', explode(' ', $raw->getNode(0)->getField('rel')))) { $site = $raw->getNode(0)->toArray();
if ($isclose) { $flattento .= '</' . strtolower($flattentag) . '>'; $rtrn[] = $flattento; $flattentag = ''; unset($flattento); $flattento = false; $subflatten = 0; } //end if isclose } //end if theclass } //end foreach vals as el return $rtrn; } //end function extract_by_id if (isset($_REQUEST['_json'])) { header('Content-type: text/javascript'); require_once 'OutlineClasses/Outline.php'; require_once 'xn-app://singpolymaplay/getTidy.php'; if ($_REQUEST['callback']) { echo $_REQUEST['callback'] . '('; } $tmp = extract_by_id(getTidy($_REQUEST['url']), $_REQUEST['id']); $tmp = new Outline($tmp[0]); echo $tmp->toJSON(); if ($_REQUEST['callback']) { echo ')'; } } //end if _json
function getBloggerProfile($url) { $bloggerdata = getTidy($url); $theParser = xml_parser_create(); if (!xml_parse_into_struct($theParser, $bloggerdata, $vals)) { $errorcode = xml_get_error_code($theParser); if ($errorcode != XML_ERROR_NONE && $errorcode != 27) { $error = array('number' => $errorcode, 'message' => xml_error_string($errorcode)); } } //end if ! parse xml_parser_free($theParser); $flattento = false; $flattentag = ''; $flattendat = ''; $subflatten = -1; $doblogs = false; $bloggerdata = array(); foreach ($vals as $el) { $isopen = $el['type'] == 'open' || $el['type'] == 'complete'; //for readability $isclose = $el['type'] == 'close' || $el['type'] == 'complete'; if ($flattento) { //if flattening tags if ($isopen && $flattentag == $el['tag']) { $subflatten++; } if ($isclose && $flattentag == $el['tag']) { if ($subflatten) { $subflatten--; } else { if ($flattento == 'aboutme') { $bloggerdata['aboutme'] = $flattendat . '</p>'; } if ($flattento == 'contact') { $bloggerdata['contact'] = $flattendat . '</ul>'; } $flattendat = ''; $flattentag = ''; $subflatten = -1; $flattento = ''; continue; } //end if-else subflatten } //end if isclose && $emptytag = false; //assume not an empty tag if ($isopen) { //if opening tag $flattendat .= '<' . strtolower($el['tag']); //add open tag if ($el['attributes']) { //if attributes foreach ($el['attributes'] as $id => $val) { //loop through and add $flattendat .= ' ' . strtolower($id) . '="' . htmlspecialchars($val) . '"'; } //end foreach } //end if attributes $emptytag = $el['type'] == 'complete' && !$el['value']; //is emptytag? $flattendat .= $emptytag ? ' />' : '>'; //end tag if ($el['value']) { $flattendat .= htmlspecialchars($el['value']); } //add contents, if any } //end if isopen if ($el['type'] == 'cdata') { //if cdata $flattendat .= htmlspecialchars($el['value']); //add data } //end if cdata if ($isclose) { //if closing tag if (!$emptytag) { $flattendat .= '</' . strtolower($el['tag']) . '>'; } //if not emptytag, write out end tag } //end if isclose continue; } //end if flattento if ($el['attributes']['ID'] == 'blogs') { $doblogs = true; } if ($doblogs && $el['tag'] == 'A') { if (substr($el['attributes']['HREF'], 0, 5) == 'http:') { $bloggerdata['blogs'][] = array('url' => $el['attributes']['HREF'], 'name' => $el['value']); } else { $tmp = array_pop($bloggerdata['blogs']); if (!$tmp['members']) { $tmp['members'] = array(); } $tmp['members'][] = array('url' => 'http://www.blogger.com' . $el['attributes']['HREF'], 'name' => $el['value']); $bloggerdata['blogs'][] = $tmp; } //end if-else http: //$bloggerdata['people'][] = array('url' => 'http://www.blogger.com'.$el['attributes']['HREF'],'name' => $el['value']); } //end if doblogs if ($doblogs && $isclose && $el['tag'] == 'TABLE') { $doblogs = false; } if ($el['tag'] == 'H1') { $bloggerdata['name'] = trim($el['value']); } if ($el['tag'] == 'H2' && $el['value'] == 'About Me') { $flattento = 'aboutme'; $flattentag = 'P'; } if ($el['tag'] == 'H2' && $el['value'] == 'Contact') { $flattento = 'contact'; $flattentag = 'UL'; } if ($el['attributes']['ALT'] == 'My Photo') { $bloggerdata['photo'] = array(); $bloggerdata['photo']['url'] = $el['attributes']['SRC']; $bloggerdata['photo']['width'] = $el['attributes']['WIDTH']; $bloggerdata['photo']['height'] = $el['attributes']['HEIGHT']; } } //end foreach if ($error) { $bloggerdata['error'] = $error; } return $bloggerdata; }