public function testParseE() { $input = '<div class="h-entry"><div class="e-content">Here is a load of <strong>embedded markup</strong></div></div>'; //$parser = new Parser($input); $output = Mf2\parse($input); $this->assertArrayHasKey('content', $output['items'][0]['properties']); $this->assertEquals('Here is a load of <strong>embedded markup</strong>', $output['items'][0]['properties']['content'][0]['html']); $this->assertEquals('Here is a load of embedded markup', $output['items'][0]['properties']['content'][0]['value']); }
/** * What we really want to parse are the microformats, but here's a starter * method to deal with the original HTML. * * @param string The HTML * @param string The domain the HTML is from * * @return array The parsed microformats */ public function getMicroformats($html, $domain) { try { $microformats = \Mf2\parse($html, $domain); } catch (Exception $e) { //log $e maybe? throw new ParserException('php-mf2 failed to parse the HTML'); } return $microformats; }
function mfForResponse(Guzzle\Http\Message\Response $resp) { $html = $resp->getBody(true); $host = parse_url($resp->getEffectiveUrl(), PHP_URL_HOST); if ($host == 'twitter.com') { return Mf2\Shim\parseTwitter($html, $resp->getEffectiveUrl()); } elseif ($host == 'facebook.com') { return Mf2\Shim\parseFacebook($html, $resp->getEffectiveUrl()); } else { return Mf2\parse($html, $resp->getEffectiveUrl()); } }
public static function fetch($id) { // Fetch the user's home page and look for profile information there $user = ORM::for_table('users')->where_id_is($id)->find_one(); echo "Looking for representative h-card for " . $user->url . "\n"; $data = HTTP::get($user->url); $parsed = Mf2\parse($data['body'], $user->url); $representative = Mf2\HCard\representative($parsed, $user->url); if ($representative) { echo "Found it!\n"; print_r($representative); if (array_key_exists('name', $representative['properties'])) { $user->name = $representative['properties']['name'][0]; } if (array_key_exists('photo', $representative['properties'])) { $user->photo = $representative['properties']['photo'][0]; } $user->save(); } else { echo "Couldn't find one\n"; } }
public function testParsesSnarfedOrgArticleCorrectly() { $input = file_get_contents(__DIR__ . '/snarfed.org.html'); /*$parser = new Parser($input, 'http://snarfed.org/2013-10-23_oauth-dropins'); $result = $parser->parse();/**/ $result = Mf2\parse($input, 'http://snarfed.org/2013-10-23_oauth-dropins'); print_r($result); }
/** @see https://github.com/indieweb/php-mf2/issues/33 */ public function testParsesHrefBeforeValueClass() { $input = '<span class="h-card"><a class="u-url" href="http://example.com/right"><span class="value">WRONG</span></a></span>'; $result = Mf2\parse($input); $this->assertEquals('http://example.com/right', $result['items'][0]['properties']['url'][0]); }
function processHEntry($hEntry, $mf, $url, $resolveRelationships = true, Guzzle\Http\ClientInterface $client = null, $purifier = null) { if ($client === null) { $client = new Guzzle\Http\Client(); } if ($purifier === null) { $purifier = function ($value) { return $value; }; } // Use comment-presentation algorithm to clean up. $cleansed = comments\parse($hEntry); $referencedPosts = []; $referencedPostUrls = []; // Used internally to keep track of what referenced posts have been processed already. $indexedContent = M\getPlaintext($hEntry, 'content', $cleansed['text']); $displayContent = $purifier(M\getHtml($hEntry, 'content')); $cleansed['content'] = $indexedContent; $cleansed['display_content'] = $displayContent; // Handle all datetime cases, as per http://indiewebcamp.com/h-entry#How_to_consume_h-entry try { $published = new DateTime($cleansed['published']); $utcPublished = clone $published; $utcPublished->setTimezone(new DateTimeZone('UTC')); } catch (Exception $e) { $published = $utcPublished = false; } $inTheFuture = $utcPublished > new DateTime(null, new DateTimeZone('UTC')); // DateTime() accepts “false” as a constructor param for some reason. if (!$published and !$cleansed['published'] or $utcPublished > new DateTime(null, new DateTimeZone('UTC'))) { // If there’s absolutely no datetime, our best guess has to be “now”. // Additional heuristics could be used in the bizarre case of having a feed where an item without datetime is // published in between two items with datetimes, allowing us to guess the published datetime is between the two, // but until that actually happens it’s not worth coding for. $cleansed['published'] = gmdate('c'); $utcPublished = new DateTime(null, new DateTimeZone('UTC')); } else { // “published” is given and parses correctly, into $published. // Currently it’s not trivial to figure out if a given datetime is floating or not, so assume that the timezone // given here is correct for the moment. When this can be determined, follow http://indiewebcamp.com/datetime#implying_timezone_from_webmentions } // There’s some case causing $utcPublished to still be false and I can’t be bothered to debug it right now, so here’s a fix. if ($utcPublished === false) { $utcPublished = new DateTime(null, new DateTimeZone('UTC')); } // Store a string representation of published to be indexed+queried upon. $cleansed['published_utc'] = $utcPublished->format(DateTime::W3C); if (M\hasProp($hEntry, 'photo')) { $cleansed['photo'] = $purifier(M\getHtml($hEntry, 'photo')); } if (M\hasProp($hEntry, 'logo')) { $cleansed['logo'] = $purifier(M\getHtml($hEntry, 'logo')); } // For every post this post has a relation (in-reply-to, repost-of, like-of etc.), fetch and resolve that URL, // index it as it’s own post (if it doesn’t already exist) and store only a reference to it here. $references = ['in-reply-to' => [], 'like-of' => [], 'repost-of' => []]; foreach ($references as $relation => $_) { $refUrls = []; // These will be feed pages not permalink pages so cannot check rels, only microformats properties. if (M\hasProp($hEntry, $relation)) { foreach ($hEntry['properties'][$relation] as $value) { if (is_string($value)) { $refUrls[] = $value; } elseif (is_array($value) and isset($value['html'])) { // e-* properties unlikely to be URLs but try all the same. $refUrls[] = $value['value']; } elseif (M\isMicroformat($value)) { if (M\hasProp($value, 'url')) { $refUrls[] = M\getProp($value, 'url'); } elseif (M\hasProp($value, 'uid')) { $refUrls[] = M\getProp($value, 'uid'); } } else { // If this happens, the microformats parsing spec has changed. Currently do nothing as we don’t know how to interpret this. } } } if ($resolveRelationships) { foreach ($refUrls as $refUrl) { try { $resp = $client->get($refUrl)->send(); $refResolvedUrl = $resp->getEffectiveUrl(); $refMf = Mf2\parse($resp->getBody(1), $refResolvedUrl); $refHEntries = M\findMicroformatsByType($refMf, 'h-entry'); $relatedUrl = $refResolvedUrl; if (count($refHEntries) > 0) { $refHEntry = $refHEntries[0]; $refSearchUrl = M\hasProp($refHEntry, 'url') ? M\getProp($refHEntry, 'url') : $refResolvedUrl; if (!in_array($refSearchUrl, $referencedPostUrls)) { list($refCleansed, $_) = processHEntry($refHEntry, $refMf, $refResolvedUrl, false, $client, $purifier); $referencedPosts[] = $refCleansed; $referencedPostUrls[] = $refSearchUrl; $relatedUrl = $refSearchUrl; } } $references[$relation][] = $relatedUrl; } catch (Guzzle\Common\Exception\GuzzleException $e) { $references[$relation][] = $refUrl; } } } else { // If we’re not resolving relationships, the most accurate data we have is the data given already. $references[$relation] = $refUrls; } // Now we have the best possible list of URLs, attach it to $cleansed. $cleansed[$relation] = array_unique($references[$relation]); } if (!M\hasProp($hEntry, 'author') or !M\isMicroformat($hEntry['properties']['author'][0])) { // No authorship data given, we need to find the author! // TODO: proper /authorship implementation. // TODO: wrap proper /authorship implementation in layer which does purification, simplification, fallback. $potentialAuthor = M\getAuthor($hEntry, $mf, $url); if (M\isMicroformat($potentialAuthor)) { $cleansed['author'] = flattenHCard($potentialAuthor, $url); } elseif (!empty($mf['rels']['author'])) { // TODO: look in elasticsearch index for a person with the first rel-author URL then fall back to fetching. // Fetch the first author URL and look for a representative h-card there. $relAuthorMf = Mf2\fetch($mf['rels']['author'][0]); $relAuthorHCards = M\findMicroformatsByType($relAuthorMf, 'h-card'); foreach ($relAuthorHCards as $raHCard) { $relMes = @($relAuthorMf['rels']['me'] ?: []); if ((M\getProp($raHCard, 'url') === M\getProp($raHCard, 'url')) === $mf['rels']['author'][0]) { $cleansed['author'] = flattenHCard($raHCard, $mf['rels']['author'][0]); } elseif (M\hasProp($raHCard, 'url') and count(array_intersect($raHCard['properties']['url'], $relMes)) > 0) { $cleansed['author'] = flattenHCard($raHCard, $mf['rels']['author'][0]); } } } // If after all that there’s still no authorship data, fake some. if ($cleansed['author']['name'] === false) { $cleansed['author'] = flattenHCard(['properties' => []], $url); try { $response = $client->head("{$cleansed['author']['url']}/favicon.ico")->send(); if (strpos($response->getHeader('content-type'), 'image') !== false) { // This appears to be a valid image! $cleansed['author']['photo'] = $response->getEffectiveUrl(); } } catch (Guzzle\Common\Exception\GuzzleException $e) { // No photo fallback could be found. } } } // TODO: this will be M\getLocation when it’s ported to the other library. if (($location = getLocation($hEntry)) !== null) { $cleansed['location'] = $location; // TODO: do additional reverse lookups of address details if none are provided. if (!empty($location['latitude']) and !empty($location['longitude'])) { // If this is a valid point, add a point with mashed names for elasticsearch to index. $cleansed['location_point'] = ['lat' => $location['latitude'], 'lon' => $location['longitude']]; } } // TODO: figure out what other properties need storing/indexing, and whether anything else needs mashing for // elasticsearch to index more easily. return [$cleansed, $referencedPosts]; }
/** @see https://github.com/indieweb/php-mf2/issues/38 */ public function testValueClassDtMatchesSingleDigitTimeComponent() { $test = '<div class="h-entry"><span class="dt-published"><time class="value">6:01</time>, <time class="value">2013-02-01</time></span></div>'; $result = Mf2\parse($test); $this->assertEquals('2013-02-01T6:01', $result['items'][0]['properties']['published'][0]); }
public function testMicroformatsNestedUnderPPropertyClassnamesDeriveValueFromFirstPName() { $input = '<div class="h-entry"><div class="p-author h-card">This post was written by <span class="p-name">Zoe</span>.</div></div>'; $mf = Mf2\parse($input); $this->assertEquals($mf['items'][0]['properties']['author'][0]['value'], 'Zoe'); }
/** * @see https://github.com/indieweb/php-mf2/issues/69 */ public function testBrWhitespaceIssue69() { $input = '<div class="h-card"><p class="p-adr"><span class="p-street-address">Street Name 9</span><br/><span class="p-locality">12345 NY, USA</span></p></div>'; $result = Mf2\parse($input); $this->assertEquals('Street Name 9' . "\n" . '12345 NY, USA', $result['items'][0]['properties']['adr'][0]); $this->assertEquals('Street Name 9', $result['items'][0]['properties']['street-address'][0]); $this->assertEquals('12345 NY, USA', $result['items'][0]['properties']['locality'][0]); $this->assertEquals('Street Name 9' . "\n" . '12345 NY, USA', $result['items'][0]['properties']['name'][0]); }
function pushLinksForResponse(Guzzle\Http\Message\Response $resp) { $self = null; $hubs = []; $linkHeader = $resp->getHeader('link'); if ($linkHeader instanceof Guzzle\Http\Message\Header\Link) { $links = $linkHeader->getLinks(); foreach ($links as $link) { if (strpos(" {$link['rel']} ", ' self ') !== false) { $self = $link['url']; } if (strpos(" {$link['rel']} ", ' hub ') !== false) { $hubs[] = $link['url']; } } } if (strpos($resp->getContentType(), 'html') !== false) { $mf = Mf2\parse($resp->getBody(true), $resp->getEffectiveUrl()); if (!empty($mf['rels']['hub'])) { $hubs = array_merge($hubs, $mf['rels']['hub']); } if (!empty($mf['rels']['self']) and $self === null) { $self = $mf['rels']['self'][0]; } } return ['self' => $self, 'hub' => $hubs]; }
/** * @see https://github.com/indieweb/php-mf2/issues/53 * @see http://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property */ public function testConvertsNestedImgElementToAltOrSrc() { $input = <<<EOT <div class="h-entry"> \t<p class="e-content">It is a strange thing to see a <img alt="five legged elephant" src="/photos/five-legged-elephant.jpg" /></p> </div> EOT; $result = Mf2\parse($input, 'http://waterpigs.co.uk/articles/five-legged-elephant'); $this->assertEquals('It is a strange thing to see a five legged elephant', $result['items'][0]['properties']['content'][0]['value']); }
public function testParsesHProduct() { $input = <<<'EOT' <xml id="skufilterbrowse" class="slide"> <productcatalog><labels><label key="skuset.deliverypolicyurl">Delivery policy content URL</label><label key="price.save">Save</label><label key="skuset.seemoredetails">See more details</label><label key="price.additionaloffers">Additional Offers</label><label key="price.freeitem">Includes Free Item*</label><label key="price.instsaving">Instant Savings</label><label key="skuset.eddieseedetails">See details </label><label key="price.rebateurl">RebateURL</label><label key="skuset.freedelivery">FREE SHIPPING, plus 5% back for Rewards Members</label><label key="price.printableCoupons">Click here for Printable Coupon</label><label key="price.value">Value</label><label key="skuset.eddieshipdetails">Estimated to arrive no later than </label><label key="price.qty">Qty.</label><label key="price.chooseyouritems">Choose your Items</label><label key="price.true">true</label><label key="skuset.clearancemessage"><strong>CLEARANCE ITEM:</strong> </label><label key="price.ERF">Eco Fee</label><label key="skuset.viewlargerimage">View larger</label><label key="common.next">Next</label><label key="price.addtocart">Add to Cart</label><label key="common.reviews">reviews</label><label key="common.previous">Previous</label><label key="skuset.instockonline">In Stock Online</label><label key="price.priceafterrebate">Price <strong>after</strong> rebate</label><label key="btn.bopis">PICK UP TODAY</label><label key="common.share">SHARE</label><label key="skuset.freedeliverytostore">FREE Shipping to store</label><label key="erf.popup.label">Environmental fee notice:</label><label key="price.details">Details</label><label key="common.stars">stars</label><label key="skuset.learnmore1">Learn more.</label><label key="common.share.twitter">Share on Twitter</label><label key="price.instorespecial">Available In Store Only</label><label key="price.priceincart">See Price in Cart</label><label key="skuset.giftcards">Orders containing this item are not eligible for Gift cards or certain other methods of payment.</label><label key="skuset.mysoftwaredownloads">"My Software Downloads"</label><label key="price.bmsmerf">Buy More Save More prices do not include eco fee.</label><label key="price.now">Now</label><label key="price.collapse">Collapse</label><label key="classpage.getstarted">Get Started</label><label key="skuset.printpage">Print this page</label><label key="price.learnmore">Learn More</label><label key="common.item">Item</label><label key="common.icon.path">/sbdpas/img/ico/</label><label key="skuset.selectcomponent">Select another component below</label><label key="skuset.freeshippingentireorder">Item qualifies entire order for free delivery</label><label key="skuset.eddieincart">in cart. </label><label key="common.selected">selected</label><label key="price.priceaftersavings">Price <strong>after</strong> savings</label><label key="skuset.inktoner">Ink and toner</label><label key="classpage.comingsoon">Coming Soon</label><label key="price.before">Before</label><label key="price.was">Was</label><label key="skuset.viewfulldetails">View Full Details</label><label key="skuset.expdelivery">Expected Delivery:</label><label key="common.share.pinterest">Share on Pinterest</label><label key="skuset.deliveryonly">Online Only</label><label key="common.share.email">Email it</label><label key="price.seedetails">See Details</label><label key="skuset.expdelpopup">/sbd/content/help-center/shipping-and-delivery.html</label><label key="skuset.softwaredownload">Software Download</label><label key="erf.popup.url">/sbd/content/help/environmental_fee_popup.html</label><label key="price.finalprice">Final Price</label><label key="common.share.facebook">Share on Facebook</label><label key="skuset.eddiesaveproduct">on this product! </label><label key="skuset.suppliedandshippedby">Supplied and Shipped by</label><label key="common.addtofavorites">Add to Favorites</label><label key="skuset.esdnotepart1">Note: Shortly after purchase you will be able to access your Software Downloads in the </label><label key="skuset.esdnotepart2">section of your staples.com® account. It's easy and secure!</label><label key="skuset.software1">/sbd/cre/marketing/technology-research-centers/software/software-downloads.html#z_faq</label><label key="price.instantcoupon">Instant Coupon</label><label key="skuset.eddieshipdetails1">to </label><label key="price.pricebefore">Price <strong>before</strong></label><label key="skuset.eddieoffer">Offer valid for 20 minutes. </label><label key="price.price">Price</label><label key="common.model">Model</label><label key="skuset.supplierhover">We have partnered with this trusted supplier to offer you a wider assortment of products and brands for all of your business needs, with the same great level of service you can expect from Staples.com.</label><label key="skuset.forceshiptostore">Item can be shipped only to a retail store location. </label><label key="price.rebate">Rebate</label><label key="price.bmsm">Buy More Save More</label><label key="common.print">print</label><label key="skuset.viewvideo">View video</label><label key="skuset.instoreavailability">Check in Store Availability</label><label key="price.aslowas">As low as</label><label key="price.reg">Reg</label><label key="price.free">FREE</label><label key="erf.message">Provincial recycling or deposit fees may be applicable upon checkout.</label><label key="skuset.eddiesave">Save an extra </label><label key="skuset.deferredfinancemessage">Special Financing Available </label><label key="price.promotions">Promotions</label><label key="price.availableinstore">Available In-Store Only</label><label key="skuset.outofstock">Currently Out of Stock.</label><label key="price.totalsavings">Total Savings</label><label key="price.beforePresentation">Before continuing, please select an item</label></labels> <product bss="ON" envfeeflag="0" comingsoonflag="0" price="18.35" name="Swingline® 747® Classic Desktop Staplers" bopispilot="true" mss="ON" zipcode="01701" comp="0" presnvalue="Select an Item" snum="SS264184" leadtimedescription="1 - 30 Business Days" expandedpromo="0" alttext="Swingline® 747® Classic Desktop Staplers" prdtypeid="0" presntype="D" review="4.5" class="hproduct" type="skuset" skubopswitch="ON" id="609548" bopisenableflag="0"><descs><desc typeid="2">All-steel construction with non-skid rubber base</desc><desc typeid="2">Spring-loaded inner channel prevents jams</desc><desc typeid="2">Available in black, burgundy and beige <br /> <li>Staples up to 20 sheets</li></desc><desc typeid="38">Select an Item</desc><desc typeid="39">All-steel construction with non-skid rubber base</desc><desc typeid="39">Spring-loaded inner channel prevents jams</desc><desc typeid="39">Available in black, burgundy and beige <br /> <li>Staples up to 20 sheets</li></desc><desc class="fn">Swingline® 747® Classic Desktop Staplers</desc></descs><price is="18.35" uom="Each"></price><span class="price">18.35</span><span class="currency">USD</span><imgs><pic class="photo" id="1">http://www.staples-3p.com/s7/is/image/Staples/s0021414_sc7?$std$</pic><pic altimg="Y" id="2">http://www.staples-3p.com/s7/is/image/Staples/s0021414_sc7?$thb$</pic><pic id="6">http://www.staples-3p.com/s7/is/image/Staples/s0021414</pic></imgs><producturl class="url">/Swingline-747-Classic-Desktop-Staplers/product_SS264184</producturl><review rating="4.5" count="99" css="45"></review><delivery shiptostore="true" free="false" instore="2" forceshiptostore="false"></delivery></product> <product bss="ON" envfeeflag="0" comingsoonflag="0" price="18.35" name="Swingline® 747® Classic Desktop Full Strip Stapler, 20 Sheet Capacity, Black" bopispilot="true" mss="ON" zipcode="01701" comp="1" snum="264184" leadtimedescription="1 Business Day" expandedpromo="0" alttext="Swingline® 747® Classic Desktop Full Strip Stapler, 20 Sheet Capacity, Black" prdtypeid="0" review="4.5" class="hproduct" mnum="S7074771G" type="sku" skubopswitch="ON" id="609315" bopisenableflag="1"><descs><desc typeid="2">All-steel construction with non-skid rubber base</desc><desc typeid="2">Full strip</desc><desc typeid="2">Staples up to 20 sheets</desc><desc typeid="19">Each</desc><desc typeid="39">All-steel construction with non-skid rubber base</desc><desc typeid="39">Full strip</desc><desc typeid="39">Staples up to 20 sheets</desc><desc class="fn">Swingline® 747® Classic Desktop Full Strip Stapler, 20 Sheet Capacity, Black</desc></descs><price is="18.35" uom="Each"></price><span class="price">18.35</span><span class="currency">USD</span><imgs><pic class="photo" id="1">http://www.staples-3p.com/s7/is/image/Staples/s0021412_sc7?$std$</pic><pic altimg="Y" id="2">http://www.staples-3p.com/s7/is/image/Staples/s0021412_sc7?$thb$</pic><pic id="6">http://www.staples-3p.com/s7/is/image/Staples/s0021412</pic><pic id="120"></pic></imgs><producturl class="url">/Swingline-747-Classic-Desktop-Full-Strip-Stapler-20-Sheet-Capacity-Black/product_264184</producturl><review rating="4.5" count="99" css="45"></review><delivery shiptostore="true" free="false" instore="1" forceshiptostore="false"></delivery></product> <product bss="ON" envfeeflag="0" comingsoonflag="0" price="19.59" name="Swingline® 747® Classic Desktop Stapler, Burgundy" bopispilot="true" mss="ON" zipcode="01701" comp="1" snum="413732" leadtimedescription="1 Business Day" expandedpromo="0" alttext="Swingline® 747® Classic Desktop Stapler, Burgundy" prdtypeid="0" review="3.5" class="hproduct" mnum="74718/74782" type="sku" skubopswitch="ON" id="1460639" bopisenableflag="0"><descs><desc typeid="2">All-steel construction with non-skid rubber base</desc><desc typeid="2">Spring-loaded inner channel prevents jams</desc><desc typeid="2">Burgundy <br /> <li>Staples up to 20 sheets</li></desc><desc typeid="19">Each</desc><desc typeid="39">All-steel construction with non-skid rubber base</desc><desc typeid="39">Spring-loaded inner channel prevents jams</desc><desc typeid="39">Burgundy <br /> <li>Staples up to 20 sheets</li></desc><desc class="fn">Swingline® 747® Classic Desktop Stapler, Burgundy</desc></descs><price is="19.59" uom="Each"></price><span class="price">19.59</span><span class="currency">USD</span><imgs><pic class="photo" id="1">http://www.staples-3p.com/s7/is/image/Staples/m000240695_sc7?$std$</pic><pic altimg="Y" id="2">http://www.staples-3p.com/s7/is/image/Staples/m000240695_sc7?$thb$</pic><pic id="6">http://www.staples-3p.com/s7/is/image/Staples/m000240695</pic></imgs><producturl class="url">/Swingline-747-Classic-Desktop-Stapler-Burgundy/product_413732</producturl><review rating="3.5" count="7" css="35"></review><delivery shiptostore="true" free="false" instore="2" forceshiptostore="false"></delivery></product> <product bss="ON" envfeeflag="0" comingsoonflag="0" price="39.49" name="Swingline® 747® Rio Red Stapler, 20 Sheet Capacity" bopispilot="true" mss="ON" zipcode="01701" comp="1" brandname="Swingline" snum="562485" leadtimedescription="1 - 4 Business Days" expandedpromo="0" alttext="Swingline® 747® Rio Red Stapler, 20 Sheet Capacity" prdtypeid="0" review="4.5" class="hproduct" mnum="74736" type="sku" skubopswitch="ON" id="1093798" bopisenableflag="0"><descs><desc typeid="2">20 sheet capacity with Swingline S.F.® 4® Staples</desc><desc typeid="2">Durable metal construction</desc><desc typeid="2">Stapler opens for tacking ability</desc><desc typeid="19">Each</desc><desc typeid="39">20 sheet capacity with Swingline S.F.® 4® Staples</desc><desc typeid="39">Durable metal construction</desc><desc typeid="39">Stapler opens for tacking ability</desc><desc class="fn">Swingline® 747® Rio Red Stapler, 20 Sheet Capacity</desc></descs><price is="39.49" uom="Each"></price><span class="price">39.49</span><span class="currency">USD</span><imgs><pic class="photo" id="1">http://www.staples-3p.com/s7/is/image/Staples/s0446269_sc7?$std$</pic><pic altimg="Y" id="2">http://www.staples-3p.com/s7/is/image/Staples/s0446269_sc7?$thb$</pic><pic id="6">http://www.staples-3p.com/s7/is/image/Staples/s0446269</pic></imgs><producturl class="url">/Swingline-747-Rio-Red-Stapler-20-Sheet-Capacity/product_562485</producturl><review rating="4.5" count="76" css="45"></review><delivery shiptostore="true" free="true" instore="2" forceshiptostore="false"></delivery></product> </productcatalog> </xml> EOT; $result = Mf2\parse($input, 'http://www.staples.com/Swingline-747-Rio-Red-Stapler-20-Sheet-Capacity/product_562485'); $this->assertCount(4, $result['items']); }
public function findAuthor($mf, $permalink = true) { //check for h-entry's /* will currently only work with first h-entry TODO: work with multiple h-entry's */ //instantiate vars $this->permalink = $permalink; $this->mf = $mf; $this->hEntry = false; $this->hFeed = false; $this->author = null; $this->authorPage = null; $this->authorInfo = null; for ($i = 0; $i < count($this->mf['items']); $i++) { foreach ($this->mf['items'][$i]['type'] as $type) { if ($type == 'h-entry') { $this->hEntry = $this->mf['items'][$i]; } elseif ($type == 'h-feed') { $this->hFeed = $this->mf['items'][$i]; } } } if ($this->hEntry === false && $this->hFeed === false) { //we may neither an h-entry or an h-feed in the parent items array throw new AuthorshipParserException('No h-entry found'); } //parse the h-entry if ($this->hEntry !== false) { //if h-entry has an author property use that if (array_key_exists('author', $this->hEntry['properties'])) { $this->author = $this->hEntry['properties']['author']; } } //otherwise look for parent h-feed, if that has author property use that if ($this->hFeed !== false) { foreach ($this->hFeed['children'] as $child) { if ($child['type'][0] == 'h-card') { //we have a h-card on the page, use it $this->author = $child; } } } //if an author property was found if ($this->author !== null) { //if it has an h-card, use it, exit if (is_array($this->author)) { if (array_search('h-card', $this->author) !== false) { return $this->normalise($this->author); } } //otherwise if `author` is a URL, let that be author-page if (filter_var($this->author, FILTER_VALIDATE_URL)) { $this->authorPage = $this->author; } else { //otherwise use `author` property as author name, exit return $this->normalise($this->author); } } //if no author-page and h-entry is a permalink then look for rel-author link //and let that be author-page if ($this->authorPage === null && $this->permalink == true) { if (array_key_exists('author', $this->mf['rels'])) { if (is_array($this->mf['rels']['author'])) { //need to deal with this better $this->authorPage = $this->mf['rels']['author'][0]; } else { $this->authorPage = $this->mf['rels']['author']; } } } //if there is an author-page if ($this->authorPage !== null) { //grab mf2 from author-page try { $this->parser = new Parser(); $this->response = $this->guzzle->get($this->authorPage); $this->html = (string) $this->response->getBody(); } catch (\GuzzleHttp\Exception\BadResponseException $e) { //var_dump($e); throw new AuthorshipParserException('Unable to get the Content from the authors page'); } $this->authorMf2 = \Mf2\parse($this->html, $this->authorPage); //if page has 1+ h-card where url == uid == author-page then use first //such h-card, exit if (array_search('uid', $this->hEntry)) { foreach ($this->authorMf2['items'] as $item) { if (array_search('h-card', $item['type']) !== false) { $urls = $item['properties']['url']; foreach ($urls as $url) { if ($url == $uid && $url == $authorPage) { return $this->normalise($item); } } } } } //else if page has 1+ h-card with url property which matches a rel-me //link on the page, use first such h-card, exit foreach ($this->authorMf2['items'] as $item) { if (array_search('h-card', $item['type']) !== false && array_key_exists('me', $this->authorMf2['rels'])) { $urls = $item['properties']['url']; $relMeLinks = $this->authorMf2['rels']['me']; //in_array can take an arry for its needle foreach ($urls as $url) { if (in_array($url, $relMeLinks)) { return $this->normalise($item); } } } } //if the h-entry page has 1+ h-card with url == author-page, use first //such h-card, exit foreach ($this->authorMf2['items'] as $item) { if (array_search('h-card', $item['type']) !== false) { $urls = $item['properties']['url']; if (in_array($this->authorPage, $urls)) { return $this->normalise($item); } } } } //if we have got this far, we haven't been able to determine auhtor info //to return, so return false return false; }
/** @see https://github.com/indieweb/php-mf2/issues/6 */ public function testParsesImpliedNameFromAbbrTitle() { $input = '<abbr class="h-card" title="Barnaby Walters">BJW</abbr>'; $result = Mf2\parse($input); $this->assertEquals('Barnaby Walters', $result['items'][0]['properties']['name'][0]); }
/** * @see https://github.com/indieweb/php-mf2/issues/53 * @see http://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property */ public function testConvertsNestedImgElementToAltOrSrc() { $input = <<<EOT <div class="h-entry"> \t<p class="p-name">The day I saw a <img alt="five legged elephant" src="/photos/five-legged-elephant.jpg" /></p> \t<p class="p-summary">Blah blah <img src="/photos/five-legged-elephant.jpg" /></p> </div> EOT; $result = Mf2\parse($input, 'http://waterpigs.co.uk/articles/five-legged-elephant'); $this->assertEquals('The day I saw a five legged elephant', $result['items'][0]['properties']['name'][0]); $this->assertEquals('Blah blah http://waterpigs.co.uk/photos/five-legged-elephant.jpg', $result['items'][0]['properties']['summary'][0]); }
public function testParsesInputValue() { $input = '<span class="h-card"><input class="u-url" value="http://example.com" /></span>'; $result = Mf2\parse($input); $this->assertEquals('http://example.com', $result['items'][0]['properties']['url'][0]); }