/** * This method handles running a test suite * @param string $path: path to the suite's JSON file * @access public * @return bool */ public function runSuite($path) { $suite = json_decode(file_get_contents($path)); echo sprintf('Running %s.', $suite->name), "\n"; $iterator = new \DirectoryIterator(dirname($path)); # loop: each file in the test suite foreach ($iterator as $file) { # if: file is a sub-directory and not a dot-directory if ($file->isDir() && !$file->isDot()) { $this->tests_total++; $path_of_test = $file->getPathname() . '/'; $test = json_decode(file_get_contents($path_of_test . 'test.json')); $input = file_get_contents($path_of_test . 'input.html'); $expected_output = json_decode(file_get_contents($path_of_test . 'output.json'), TRUE); $parser = new Parser($input, '', TRUE); $output = $parser->parse(TRUE); # if: test passed if ($output['items'] === $expected_output['items']) { // echo '.'; # can output a dot for successful tests $this->tests_passed++; } else { echo sprintf('"%s" failed.', $test->name), "\n\n"; echo sprintf('Parsed: %s', print_r($output, TRUE)), "\n"; echo sprintf('Expected: %s', print_r($expected_output, TRUE)), "\n"; $this->tests_failed++; } # end if } # end if } # end loop return TRUE; }
/** * @group parseP */ public function testParsePReturnsEmptyStringForBrHr() { $input = '<div class="h-card"><br class="p-name"/></div><div class="h-card"><hr class="p-name"/></div>'; $parser = new Parser($input); $output = $parser->parse(); $this->assertArrayHasKey('name', $output['items'][0]['properties']); $this->assertEquals('', $output['items'][0]['properties']['name'][0]); $this->assertEquals('', $output['items'][0]['properties']['name'][0]); }
/** * @see https://github.com/indieweb/php-mf2/issues/27 */ public function testParsesValueTitleDatetimes() { $input = <<<EOT <div class="h-entry"> <h1 class="p-name">test</h1> <span class="dt-published"><span class="value-title" title="2012-02-16T16:14:47+00:00"> </span>16.02.2012</span> </div> EOT; $parser = new Parser($input); $output = $parser->parse(); $this->assertEquals('2012-02-16T16:14:47+00:00', $output['items'][0]['properties']['published'][0]); }
/** * Wraps mfNamesFromClass to handle an element as input (common) * * @param DOMElement $e The element to get the classname for * @param string $prefix The prefix to look for * @return mixed See return value of mf2\Parser::mfNameFromClass() */ static function mfNamesFromElement(\DOMElement $e, $prefix = 'h-') { $class = $e->getAttribute('class'); return Parser::mfNamesFromClass($class, $prefix); }
function detectBloggingSoftware($response) { $d = new MfParser($response->getBody(1), $response->getEffectiveUrl()); foreach ($d->query('//meta[@name="generator"]') as $generatorEl) { if (stristr($generatorEl->getAttribute('content'), 'wordpress') !== false) { return 'wordpress'; } if (stristr($generatorEl->getAttribute('content'), 'mediawiki') !== false) { return 'mediawiki'; } if (stristr($generatorEl->getAttribute('content'), 'idno') !== false) { return 'idno'; } } return null; }
/** * @group parseDT * @group valueClass */ public function testYYYY_MM_DD__HHpm() { $input = '<div class="h-event"><span class="dt-start"><span class="value">2012-10-07</span> at <span class="value">9pm</span></span></div>'; $parser = new Parser($input); $output = $parser->parse(); $this->assertArrayHasKey('start', $output['items'][0]['properties']); $this->assertEquals('2012-10-07T21:00', $output['items'][0]['properties']['start'][0]); }
public function testParsesNestedMicroformatsWithClassnamesInAnyOrder() { $input = <<<EOT <div class="h-entry"> \t<div class="note- p-in-reply-to h-entry">Name</div> </div> EOT; $parser = new Parser($input); $output = $parser->parse(); $this->assertCount(1, $output['items'][0]['properties']['in-reply-to']); $this->assertEquals('Name', $output['items'][0]['properties']['in-reply-to'][0]['properties']['name'][0]); }
/** * @group parseU */ public function testParseUHandlesSource() { $input = '<div class="h-entry"><video><source class="u-video" src="http://example.com/video.mp4" type="video/mp4"><source class="u-video" src="http://example.com/video.ogg" type="video/ogg"></video></div>'; $parser = new Parser($input); $output = $parser->parse(); $this->assertArrayHasKey('video', $output['items'][0]['properties']); $this->assertEquals('http://example.com/video.mp4', $output['items'][0]['properties']['video'][0]); $this->assertEquals('http://example.com/video.ogg', $output['items'][0]['properties']['video'][1]); }
/** * @group baseUrl */ public function testResolvesRelativeBaseRelativeUrlsInImpliedMicroformats() { $input = '<base href="things/"/><a class="h-card"><img src="image.png" /></a>'; $parser = new Parser($input, 'http://example.com/'); $output = $parser->parse(); $this->assertEquals('http://example.com/things/image.png', $output['items'][0]['properties']['photo'][0]); }
/** * Kicks off the parsing routine * * If `$convertClassic` is set, any angle brackets in the results from non e-* properties * will be HTML-encoded, bringing all output to the same level of encoding. * * If a DOMElement is set as the $context, only descendants of that element will * be parsed for microformats. * * @param bool $convertClassic Whether or not to html-encode non e-* properties. Defaults to false * @param \DOMElement $context Optional: An element from which to parse microformats * @return array An array containing all the µfs found in the current document */ public function parse($convertClassic = true, \DOMElement $context = null) { $results = parent::parse($convertClassic, $context); $results['items'] = $this->_refineResults($results['items']); return $results; }
/** * @group parseU */ public function testParseUWithSpaces() { $input = '<div class="h-card"><a class="u-url" href=" http://example.com ">Awesome example website</a></div>'; $parser = new Parser($input); $output = $parser->parse(); $this->assertArrayHasKey('url', $output['items'][0]['properties']); $this->assertEquals('http://example.com', $output['items'][0]['properties']['url'][0]); }
/** * @group parseDT * @group valueClass */ public function testDTStartDateOnly() { $input = '<div class="h-event"> <span class="dt-start"><span class="value">2014-06-07</span> </span> </div>'; $parser = new Parser($input); $output = $parser->parse(); $this->assertArrayHasKey('start', $output['items'][0]['properties']); $this->assertEquals('2014-06-07', $output['items'][0]['properties']['start'][0]); }
/** * @param $html * @param $url * @param $headers * @param $topic * @return array * * Currently $headers is just returned as-is, so is usually an array. This should probably have some abstraction for * case insensitivity but that might be an issue if we can’t guarantee that $headers is an array, or that each item is a * scalar or array — e.g. how are multiple values for a header handled? */ function contextFromResponse($html, $url, $headers, $topic) { $content = $html; // TODO: per-host handling of microformats shim parsing should be handled by Mf2\Shim automatically. $host = parse_url($url, PHP_URL_HOST); if (in_array($host, ['www.twitter.com', 'twitter.com'])) { $parser = new Mf2\Shim\Twitter($html, $url); } elseif (in_array($host, ['www.facebook.com', 'facebook.com'])) { $parser = new Mf2\Shim\Facebook($html, $url); } else { $parser = new Mf2\Parser($html, $url); } $mf2 = $parser->parse(); return ['topic' => $topic, 'url' => $url, 'mf2' => $mf2, 'content' => $content, 'domdocument' => $parser->doc, 'parser' => $parser, 'headers' => $headers]; }
public function testWhitespaceBetweenElements() { $input = <<<EOT <div class="h-entry"> \t<data class="p-rsvp" value="yes">I'm attending</data> \t<a class="u-in-reply-to" href="https://snarfed.org/2014-06-16_homebrew-website-club-at-quip">Homebrew Website Club at Quip</a> \t<div class="p-content">Thanks for hosting!</div> </div> EOT; $parser = new Parser($input); $output = $parser->parse(); $this->assertContains('h-entry', $output['items'][0]['type']); $this->assertNotContains('attendingHomebrew', $output['items'][0]['properties']['name'][0]); }
public function testParseHcardInCategory() { $input = <<<EOT <span class="h-entry"> \t<a class="p-author h-card" href="http://a.example.com/">Alice</a> tagged \t<a href="http://b.example.com/" class="u-category h-card">Bob Smith</a> in \t<a class="u-tag-of u-in-reply-to" href="http://s.example.com/permalink47"> \t\t<img src="http://s.example.com/photo47.png" alt="a photo of Bob and Cole" /> \t</a> </span> EOT; $parser = new Parser($input); $output = $parser->parse(); $this->assertContains('h-entry', $output['items'][0]['type']); $this->assertArrayHasKey('category', $output['items'][0]['properties']); $this->assertContains('h-card', $output['items'][0]['properties']['category'][0]['type']); $this->assertArrayHasKey('name', $output['items'][0]['properties']['category'][0]['properties']); $this->assertEquals('Bob Smith', $output['items'][0]['properties']['category'][0]['properties']['name'][0]); $this->assertArrayHasKey('url', $output['items'][0]['properties']['category'][0]['properties']); $this->assertEquals('http://b.example.com/', $output['items'][0]['properties']['category'][0]['properties']['url'][0]); }
private function process($source, $target) { $this->_logger->log("Processing pingback from {$source} for {$target}", Zend_Log::INFO); $curl = curl_init(); curl_setopt($curl, CURLOPT_URL, $source); curl_setopt($curl, CURLOPT_HEADER, false); curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); curl_setopt($curl, CURLOPT_USERAGENT, 'Storytlr/1.0'); curl_setopt($curl, CURLOPT_FOLLOWLOCATION, TRUE); $response = curl_exec($curl); $http_code = curl_getinfo($curl, CURLINFO_HTTP_CODE); curl_close($curl); if ($http_code != 200) { $this->_logger->log("Failed to get content for {$source}", Zend_Log::DEBUG); return; } // Parse the source page for microformats content $parser = new Parser($response); $output = $parser->parse(); $this->_logger->log("Parsed output: " . var_export($output, true), Zend_Log::DEBUG); // Extract relevant data $hcards = array(); $hentries = array(); $this->processItems($output["items"], $hcards, $hentries); $this->_logger->log("Extracted hcards: " . var_export($hcards, true), Zend_Log::DEBUG); $this->_logger->log("Extracted hentries: " . var_export($hentries, true), Zend_Log::DEBUG); // Lookup if existing entry preg_match('/(?P<source>\\d+)\\-(?P<item>\\d+)\\.html$/', $target, $matches); $this->_logger->log("Matches: " . var_export($matches, true), Zend_Log::DEBUG); $source_id = $matches["source"]; $item_id = $matches["item"]; // Get the source and the user owning it $data = new Data(); $sources = new Sources(); $users = new Users(); // Does it relate to an item ? if ($source_id && $item_id) { $s = $sources->getSource($source_id); $i = $data->getItem($source_id, $item_id); if ($s && $i) { $user = $users->getUser($s['user_id']); } } // Otherwise, can we relate to a user ? if (!$user) { $user = $this->lookupUser($target); } // No user ? We have to giveup if (!$user) { throw new Exception('Failed to find corresponding storytlr user.'); } // Is this a h-entry ? if (count($hentries) > 0) { $hentry = $hentries[0]; if (count($hentry["author"]) > 0) { $hcard = $hentry["author"][0]; } } // If no hcard yet (maybe there was no h-entry, we grab the top-level one if (!$hcard && count($hcards) > 0) { $hcard = $hcards[0]; } // Find the published date if ($hentry && $hentry["published"]) { $timestamp = strtotime($hentry["published"]); } // If no timestamp, we fallback to now if (!$timestamp) { $timestamp = time(); } // Add the mention to the database try { $mentions = new Mentions(); $mentions->addMention($source_id, $item_id, $user->id, $source, $hentry["title"], $hcard["name"], $hcard["url"], "", $hcard["avatar"], $timestamp); } catch (Exception $e) { $this->_logger->log("Exception when storing mention: " . $e->getMessage(), Zend_Log::ERR); return; } // Send an email alert to owner try { $on_comment = $this->_properties->getProperty('on_comment'); if ($on_comment) { Stuffpress_Emails::sendMentionEmail($user->email, $user->username, $hcard["name"], $hcard["url"], $hentry["title"], $source, $target); } } catch (Exception $e) { $this->_logger->log("Sending mention notification exception: " . $e->getMessage(), Zend_Log::ERR); } }
/** * From http://microformats.org/wiki/microformats-2 */ public function testMoreDetailedPerson() { $input = '<div class="h-card"> <img class="u-photo" alt="photo of Mitchell" src="https://webfwd.org/content/about-experts/300.mitchellbaker/mentor_mbaker.jpg"/> <a class="p-name u-url" href="http://blog.lizardwrangler.com/" >Mitchell Baker</a> (<a class="u-url" href="https://twitter.com/MitchellBaker" >@MitchellBaker</a>) <span class="p-org">Mozilla Foundation</span> <p class="p-note"> Mitchell is responsible for setting the direction and scope of the Mozilla Foundation and its activities. </p> <span class="p-category">Strategy</span> <span class="p-category">Leadership</span> </div>'; $expected = '{ "rels": {}, "items": [{ "type": ["h-card"], "properties": { "photo": ["https://webfwd.org/content/about-experts/300.mitchellbaker/mentor_mbaker.jpg"], "name": ["Mitchell Baker"], "url": [ "http://blog.lizardwrangler.com/", "https://twitter.com/MitchellBaker" ], "org": ["Mozilla Foundation"], "note": ["Mitchell is responsible for setting the direction and scope of the Mozilla Foundation and its activities."], "category": [ "Strategy", "Leadership" ] } }] }'; $parser = new Parser($input); $output = $parser->parse(); $this->assertJsonStringEqualsJsonString(json_encode($output), $expected); }
public function testParsesFBerrimanClassicHEntry() { $input = <<<EOT <article id="post-976" class="post-976 post type-post status-publish format-standard hentry category-speaking category-web-dev tag-conferences tag-front-trends tag-fronttrends tag-speaking tag-txjs"> \t<header class="entry-header"> \t\t<h1 class="entry-title"> \t\t\t<a href="http://fberriman.com/2013/05/14/april-recap-txjs-front-trends/" rel="bookmark">April recap – TXJS & Front-Trends</a> \t\t</h1> \t\t \t\t<div class="entry-meta"> \t\t\t<span class="date"> \t\t\t\t<a href="http://fberriman.com/2013/05/14/april-recap-txjs-front-trends/" title="Permalink to April recap – TXJS & Front-Trends" rel="bookmark"> \t\t\t\t\t<time class="entry-date" datetime="2013-05-14T11:54:06+00:00">May 14, 2013</time> \t\t\t\t</a> \t\t\t</span> \t\t\t<span class="categories-links"> \t\t\t\t<a href="http://fberriman.com/category/speaking/" title="View all posts in Speaking" rel="category tag">Speaking</a>, \t\t\t\t<a href="http://fberriman.com/category/web-dev/" title="View all posts in Web Dev" rel="category tag">Web Dev</a> \t\t\t</span> \t\t\t<span class="tags-links"> \t\t\t\t<a href="http://fberriman.com/tag/conferences/" rel="tag">conferences</a>, \t\t\t\t<a href="http://fberriman.com/tag/front-trends/" rel="tag">front-trends</a>, \t\t\t\t<a href="http://fberriman.com/tag/fronttrends/" rel="tag">fronttrends</a>, \t\t\t\t<a href="http://fberriman.com/tag/speaking/" rel="tag">Speaking</a>, \t\t\t\t<a href="http://fberriman.com/tag/txjs/" rel="tag">txjs</a> \t\t\t</span> \t\t\t<span class="author vcard"><a class="url fn n" href="http://fberriman.com/author/admin/" title="View all posts by Frances" rel="author">Frances</a></span>\t\t\t\t\t</div> \t</header> \t\t<div class="entry-content"> \t\t<p>April was pretty decent. I got to attend two very good conferences <strong>and</strong> I got to speak at them.</p> \t\t\t</div> \t \t<footer class="entry-meta"> \t\t<div class="comments-link"> \t\t\t<a href="http://fberriman.com/2013/05/14/april-recap-txjs-front-trends/#respond" title="Comment on April recap – TXJS & Front-Trends"><span class="leave-reply">Leave a comment</span></a> \t\t</div> \t</footer><!-- .entry-meta --> </article><!-- #post --> EOT; $parser = new Parser($input); $result = $parser->parse(); $e = $result['items'][0]; $this->assertContains('h-entry', $e['type']); }
/** * Parse Microformats2 * * Functional shortcut for the commonest cases of parsing microformats2 from HTML. * * Example usage: * * use Mf2; * $output = Mf2\parse('<span class="h-card">Barnaby Walters</span>'); * echo json_encode($output, JSON_PRETTY_PRINT); * * Produces: * * { * "items": [ * { * "type": ["h-card"], * "properties": { * "name": ["Barnaby Walters"] * } * } * ], * "rels": {} * } * * @param string|DOMDocument $input The HTML string or DOMDocument object to parse * @param string $url The URL the input document was found at, for relative URL resolution * @param bool $convertClassic whether or not to convert classic microformats * @return array Canonical MF2 array structure */ function parse($input, $url = null, $convertClassic = true) { $parser = new Parser($input, $url); return $parser->parse($convertClassic); }
/** * rel-me links * given the HTML and URL of a page, returns all the rel-me links found on that page * @return array */ function relMeLinks($html, $url) { $parser = new Mf2\Parser($html, $url); $mf = $parser->parse(); $relMeLinks = @($mf['rels']['me'] ?: array()); return array_unique($relMeLinks); }
/** * From http://microformats.org/wiki/microformats2#combining_microformats */ public function testHCardChildHCard() { $input = '<div class="h-card"> <a class="p-name u-url" href="http://blog.lizardwrangler.com/"> Mitchell Baker</a> (<a class="h-card h-org" href="http://mozilla.org/"> Mozilla Foundation</a>) </div>'; $expected = '{ "rels": {}, "items": [{ "type": ["h-card"], "properties": { "name": ["Mitchell Baker"], "url": ["http://blog.lizardwrangler.com/"] }, "children": [{ "type": ["h-card","h-org"], "properties": { "name": ["Mozilla Foundation"], "url": ["http://mozilla.org/"] } }] }] }'; $parser = new Parser($input); $output = $parser->parse(); $this->assertJsonStringEqualsJsonString(json_encode($output), $expected); }
/** * generate the comment data from the microformatted content * * @param WP_Comment $commentdata the comment object * @param string $target the target url * @param string $html the parsed html * * @return array */ public static function generate_commentdata($commentdata, $target, $html) { global $wpdb; // add source $source = $commentdata['comment_author_url']; // parse source html $parser = new Parser($html, $source); $mf_array = $parser->parse(true); // get all 'relevant' entries $entries = self::get_entries($mf_array); if (empty($entries)) { return array(); } // get the entry of interest $entry = self::get_representative_entry($entries, $target); if (empty($entry)) { return array(); } // the entry properties $properties = $entry['properties']; // try to find some content // @link http://indiewebcamp.com/comments-presentation if (self::check_mf_attr('summary', $properties)) { $commentdata['comment_content'] = wp_slash($properties['summary'][0]); } elseif (self::check_mf_attr('content', $properties)) { $commentdata['comment_content'] = wp_filter_kses($properties['content'][0]['html']); } elseif (self::check_mf_attr('name', $properties)) { $commentdata['comment_content'] = wp_slash($properties['name'][0]); } $commentdata['comment_content'] = trim($commentdata['comment_content']); // set the right date if (self::check_mf_attr('published', $properties)) { $time = strtotime($properties['published'][0]); $commentdata['comment_date'] = get_date_from_gmt(date('Y-m-d H:i:s', $time), 'Y-m-d H:i:s'); } elseif (self::check_mf_attr('updated', $properties)) { $time = strtotime($properties['updated'][0]); $commentdata['comment_date'] = get_date_from_gmt(date('Y-m-d H:i:s', $time), 'Y-m-d H:i:s'); } $author = null; // check if h-card has an author if (isset($properties['author']) && isset($properties['author'][0]['properties'])) { $author = $properties['author'][0]['properties']; } else { $author = self::get_representative_author($mf_array, $source); } // if author is present use the informations for the comment if ($author) { if (self::check_mf_attr('name', $author)) { $commentdata['comment_author'] = wp_slash($author['name'][0]); } if (self::check_mf_attr('email', $author)) { $commentdata['comment_author_email'] = wp_slash($author['email'][0]); } if (self::check_mf_attr('url', $author)) { $commentdata['_author_url'] = esc_url_raw($author['url'][0]); } if (self::check_mf_attr('photo', $author)) { $commentdata['_avatar'] = esc_url_raw($author['photo'][0]); } } // set canonical url (u-url) if (self::check_mf_attr('url', $properties)) { $commentdata['_canonical'] = esc_url_raw($properties['url'][0]); } else { $commentdata['_canonical'] = esc_url_raw($source); } // check rsvp property if (self::check_mf_attr('rsvp', $properties)) { $commentdata['_type'] = wp_slash('rsvp:' . $properties['rsvp'][0]); } else { // get post type $commentdata['_type'] = wp_slash(self::get_entry_type($target, $entry, $mf_array)); } return $commentdata; }
public function testMultipleImpliedHCards() { $input = '<span class="h-card">Frances Berriman</span> <a class="h-card" href="http://benward.me">Ben Ward</a> <img class="h-card" alt="Sally Ride" src="http://upload.wikimedia.org/wikipedia/commons/a/a4/Ride-s.jpg"/> <a class="h-card" href="http://tantek.com"> <img alt="Tantek Çelik" src="http://ttk.me/logo.jpg"/> </a>'; $expected = '{ "rels": {}, "items": [{ "type": ["h-card"], "properties": { "name": ["Frances Berriman"] } }, { "type": ["h-card"], "properties": { "name": ["Ben Ward"], "url": ["http://benward.me"] } }, { "type": ["h-card"], "properties": { "name": ["Sally Ride"], "photo": ["http://upload.wikimedia.org/wikipedia/commons/a/a4/Ride-s.jpg"] } }, { "type": ["h-card"], "properties": { "name": ["Tantek Çelik"], "url": ["http://tantek.com"], "photo": ["http://ttk.me/logo.jpg"] } }] }'; $parser = new Parser($input, '', true); $output = $parser->parse(); $this->assertJsonStringEqualsJsonString(json_encode($output), $expected); }
<?php namespace IndieWeb; use mf2\Parser as Mf2Parser; use BarnabyWalters\Mf2; ob_start(); require __DIR__ . '/bootstrap.php'; ob_end_clean(); if (PHP_SAPI === 'cli') { $html = file_get_contents(__DIR__ . '/rel-me-test.html'); $parser = new Mf2Parser($html); $mf = $parser->parse(); $testSuites = Mf2\findMicroformatsByType($mf, 'h-x-test-suite'); if (count($testSuites) === 0) { die("Found no test suites in rel-me-test.html"); } $testSuite = $testSuites[0]; echo "\n" . Mf2\getProp($testSuite, 'name') . "\n"; echo "============================\n"; foreach ($testSuite['properties']['x-test-case'] as $testCase) { echo "\n"; $params = $testCase['properties']['x-parameter']; $meUrl = array_shift($params); if (count($params) === 0) { $redirects = mockFollowOneRedirect(array(null)); } else { $redirects = mockFollowOneRedirect($params); } list($expectedUrl, $expectedSecure) = $testCase['properties']['x-expected-result']; $expectedSecure = $expectedSecure === 'true';