Пример #1
0
 public function testParseE()
 {
     $input = '<div class="h-entry"><div class="e-content">Here is a load of <strong>embedded markup</strong></div></div>';
     //$parser = new Parser($input);
     $output = Mf2\parse($input);
     $this->assertArrayHasKey('content', $output['items'][0]['properties']);
     $this->assertEquals('Here is a load of <strong>embedded markup</strong>', $output['items'][0]['properties']['content'][0]['html']);
     $this->assertEquals('Here is a load of embedded markup', $output['items'][0]['properties']['content'][0]['value']);
 }
Пример #2
0
 /**
  * What we really want to parse are the microformats, but here's a starter
  * method to deal with the original HTML.
  *
  * @param  string  The HTML
  * @param  string  The domain the HTML is from
  *
  * @return array The parsed microformats
  */
 public function getMicroformats($html, $domain)
 {
     try {
         $microformats = \Mf2\parse($html, $domain);
     } catch (Exception $e) {
         //log $e maybe?
         throw new ParserException('php-mf2 failed to parse the HTML');
     }
     return $microformats;
 }
Пример #3
0
function mfForResponse(Guzzle\Http\Message\Response $resp)
{
    $html = $resp->getBody(true);
    $host = parse_url($resp->getEffectiveUrl(), PHP_URL_HOST);
    if ($host == 'twitter.com') {
        return Mf2\Shim\parseTwitter($html, $resp->getEffectiveUrl());
    } elseif ($host == 'facebook.com') {
        return Mf2\Shim\parseFacebook($html, $resp->getEffectiveUrl());
    } else {
        return Mf2\parse($html, $resp->getEffectiveUrl());
    }
}
Пример #4
0
 public static function fetch($id)
 {
     // Fetch the user's home page and look for profile information there
     $user = ORM::for_table('users')->where_id_is($id)->find_one();
     echo "Looking for representative h-card for " . $user->url . "\n";
     $data = HTTP::get($user->url);
     $parsed = Mf2\parse($data['body'], $user->url);
     $representative = Mf2\HCard\representative($parsed, $user->url);
     if ($representative) {
         echo "Found it!\n";
         print_r($representative);
         if (array_key_exists('name', $representative['properties'])) {
             $user->name = $representative['properties']['name'][0];
         }
         if (array_key_exists('photo', $representative['properties'])) {
             $user->photo = $representative['properties']['photo'][0];
         }
         $user->save();
     } else {
         echo "Couldn't find one\n";
     }
 }
Пример #5
0
 public function testParsesSnarfedOrgArticleCorrectly()
 {
     $input = file_get_contents(__DIR__ . '/snarfed.org.html');
     /*$parser = new Parser($input, 'http://snarfed.org/2013-10-23_oauth-dropins');
     		$result = $parser->parse();/**/
     $result = Mf2\parse($input, 'http://snarfed.org/2013-10-23_oauth-dropins');
     print_r($result);
 }
Пример #6
0
 /** @see https://github.com/indieweb/php-mf2/issues/33 */
 public function testParsesHrefBeforeValueClass()
 {
     $input = '<span class="h-card"><a class="u-url" href="http://example.com/right"><span class="value">WRONG</span></a></span>';
     $result = Mf2\parse($input);
     $this->assertEquals('http://example.com/right', $result['items'][0]['properties']['url'][0]);
 }
Пример #7
0
function processHEntry($hEntry, $mf, $url, $resolveRelationships = true, Guzzle\Http\ClientInterface $client = null, $purifier = null)
{
    if ($client === null) {
        $client = new Guzzle\Http\Client();
    }
    if ($purifier === null) {
        $purifier = function ($value) {
            return $value;
        };
    }
    // Use comment-presentation algorithm to clean up.
    $cleansed = comments\parse($hEntry);
    $referencedPosts = [];
    $referencedPostUrls = [];
    // Used internally to keep track of what referenced posts have been processed already.
    $indexedContent = M\getPlaintext($hEntry, 'content', $cleansed['text']);
    $displayContent = $purifier(M\getHtml($hEntry, 'content'));
    $cleansed['content'] = $indexedContent;
    $cleansed['display_content'] = $displayContent;
    // Handle all datetime cases, as per http://indiewebcamp.com/h-entry#How_to_consume_h-entry
    try {
        $published = new DateTime($cleansed['published']);
        $utcPublished = clone $published;
        $utcPublished->setTimezone(new DateTimeZone('UTC'));
    } catch (Exception $e) {
        $published = $utcPublished = false;
    }
    $inTheFuture = $utcPublished > new DateTime(null, new DateTimeZone('UTC'));
    // DateTime() accepts “false” as a constructor param for some reason.
    if (!$published and !$cleansed['published'] or $utcPublished > new DateTime(null, new DateTimeZone('UTC'))) {
        // If there’s absolutely no datetime, our best guess has to be “now”.
        // Additional heuristics could be used in the bizarre case of having a feed where an item without datetime is
        // published in between two items with datetimes, allowing us to guess the published datetime is between the two,
        // but until that actually happens it’s not worth coding for.
        $cleansed['published'] = gmdate('c');
        $utcPublished = new DateTime(null, new DateTimeZone('UTC'));
    } else {
        // “published” is given and parses correctly, into $published.
        // Currently it’s not trivial to figure out if a given datetime is floating or not, so assume that the timezone
        // given here is correct for the moment. When this can be determined, follow http://indiewebcamp.com/datetime#implying_timezone_from_webmentions
    }
    // There’s some case causing $utcPublished to still be false and I can’t be bothered to debug it right now, so here’s a fix.
    if ($utcPublished === false) {
        $utcPublished = new DateTime(null, new DateTimeZone('UTC'));
    }
    // Store a string representation of published to be indexed+queried upon.
    $cleansed['published_utc'] = $utcPublished->format(DateTime::W3C);
    if (M\hasProp($hEntry, 'photo')) {
        $cleansed['photo'] = $purifier(M\getHtml($hEntry, 'photo'));
    }
    if (M\hasProp($hEntry, 'logo')) {
        $cleansed['logo'] = $purifier(M\getHtml($hEntry, 'logo'));
    }
    // For every post this post has a relation (in-reply-to, repost-of, like-of etc.), fetch and resolve that URL,
    // index it as it’s own post (if it doesn’t already exist) and store only a reference to it here.
    $references = ['in-reply-to' => [], 'like-of' => [], 'repost-of' => []];
    foreach ($references as $relation => $_) {
        $refUrls = [];
        // These will be feed pages not permalink pages so cannot check rels, only microformats properties.
        if (M\hasProp($hEntry, $relation)) {
            foreach ($hEntry['properties'][$relation] as $value) {
                if (is_string($value)) {
                    $refUrls[] = $value;
                } elseif (is_array($value) and isset($value['html'])) {
                    // e-* properties unlikely to be URLs but try all the same.
                    $refUrls[] = $value['value'];
                } elseif (M\isMicroformat($value)) {
                    if (M\hasProp($value, 'url')) {
                        $refUrls[] = M\getProp($value, 'url');
                    } elseif (M\hasProp($value, 'uid')) {
                        $refUrls[] = M\getProp($value, 'uid');
                    }
                } else {
                    // If this happens, the microformats parsing spec has changed. Currently do nothing as we don’t know how to interpret this.
                }
            }
        }
        if ($resolveRelationships) {
            foreach ($refUrls as $refUrl) {
                try {
                    $resp = $client->get($refUrl)->send();
                    $refResolvedUrl = $resp->getEffectiveUrl();
                    $refMf = Mf2\parse($resp->getBody(1), $refResolvedUrl);
                    $refHEntries = M\findMicroformatsByType($refMf, 'h-entry');
                    $relatedUrl = $refResolvedUrl;
                    if (count($refHEntries) > 0) {
                        $refHEntry = $refHEntries[0];
                        $refSearchUrl = M\hasProp($refHEntry, 'url') ? M\getProp($refHEntry, 'url') : $refResolvedUrl;
                        if (!in_array($refSearchUrl, $referencedPostUrls)) {
                            list($refCleansed, $_) = processHEntry($refHEntry, $refMf, $refResolvedUrl, false, $client, $purifier);
                            $referencedPosts[] = $refCleansed;
                            $referencedPostUrls[] = $refSearchUrl;
                            $relatedUrl = $refSearchUrl;
                        }
                    }
                    $references[$relation][] = $relatedUrl;
                } catch (Guzzle\Common\Exception\GuzzleException $e) {
                    $references[$relation][] = $refUrl;
                }
            }
        } else {
            // If we’re not resolving relationships, the most accurate data we have is the data given already.
            $references[$relation] = $refUrls;
        }
        // Now we have the best possible list of URLs, attach it to $cleansed.
        $cleansed[$relation] = array_unique($references[$relation]);
    }
    if (!M\hasProp($hEntry, 'author') or !M\isMicroformat($hEntry['properties']['author'][0])) {
        // No authorship data given, we need to find the author!
        // TODO: proper /authorship implementation.
        // TODO: wrap proper /authorship implementation in layer which does purification, simplification, fallback.
        $potentialAuthor = M\getAuthor($hEntry, $mf, $url);
        if (M\isMicroformat($potentialAuthor)) {
            $cleansed['author'] = flattenHCard($potentialAuthor, $url);
        } elseif (!empty($mf['rels']['author'])) {
            // TODO: look in elasticsearch index for a person with the first rel-author URL then fall back to fetching.
            // Fetch the first author URL and look for a representative h-card there.
            $relAuthorMf = Mf2\fetch($mf['rels']['author'][0]);
            $relAuthorHCards = M\findMicroformatsByType($relAuthorMf, 'h-card');
            foreach ($relAuthorHCards as $raHCard) {
                $relMes = @($relAuthorMf['rels']['me'] ?: []);
                if ((M\getProp($raHCard, 'url') === M\getProp($raHCard, 'url')) === $mf['rels']['author'][0]) {
                    $cleansed['author'] = flattenHCard($raHCard, $mf['rels']['author'][0]);
                } elseif (M\hasProp($raHCard, 'url') and count(array_intersect($raHCard['properties']['url'], $relMes)) > 0) {
                    $cleansed['author'] = flattenHCard($raHCard, $mf['rels']['author'][0]);
                }
            }
        }
        // If after all that there’s still no authorship data, fake some.
        if ($cleansed['author']['name'] === false) {
            $cleansed['author'] = flattenHCard(['properties' => []], $url);
            try {
                $response = $client->head("{$cleansed['author']['url']}/favicon.ico")->send();
                if (strpos($response->getHeader('content-type'), 'image') !== false) {
                    // This appears to be a valid image!
                    $cleansed['author']['photo'] = $response->getEffectiveUrl();
                }
            } catch (Guzzle\Common\Exception\GuzzleException $e) {
                // No photo fallback could be found.
            }
        }
    }
    // TODO: this will be M\getLocation when it’s ported to the other library.
    if (($location = getLocation($hEntry)) !== null) {
        $cleansed['location'] = $location;
        // TODO: do additional reverse lookups of address details if none are provided.
        if (!empty($location['latitude']) and !empty($location['longitude'])) {
            // If this is a valid point, add a point with mashed names for elasticsearch to index.
            $cleansed['location_point'] = ['lat' => $location['latitude'], 'lon' => $location['longitude']];
        }
    }
    // TODO: figure out what other properties need storing/indexing, and whether anything else needs mashing for
    // elasticsearch to index more easily.
    return [$cleansed, $referencedPosts];
}
 /** @see https://github.com/indieweb/php-mf2/issues/38 */
 public function testValueClassDtMatchesSingleDigitTimeComponent()
 {
     $test = '<div class="h-entry"><span class="dt-published"><time class="value">6:01</time>, <time class="value">2013-02-01</time></span></div>';
     $result = Mf2\parse($test);
     $this->assertEquals('2013-02-01T6:01', $result['items'][0]['properties']['published'][0]);
 }
 public function testMicroformatsNestedUnderPPropertyClassnamesDeriveValueFromFirstPName()
 {
     $input = '<div class="h-entry"><div class="p-author h-card">This post was written by <span class="p-name">Zoe</span>.</div></div>';
     $mf = Mf2\parse($input);
     $this->assertEquals($mf['items'][0]['properties']['author'][0]['value'], 'Zoe');
 }
Пример #10
0
 /**
  * @see https://github.com/indieweb/php-mf2/issues/69
  */
 public function testBrWhitespaceIssue69()
 {
     $input = '<div class="h-card"><p class="p-adr"><span class="p-street-address">Street Name 9</span><br/><span class="p-locality">12345 NY, USA</span></p></div>';
     $result = Mf2\parse($input);
     $this->assertEquals('Street Name 9' . "\n" . '12345 NY, USA', $result['items'][0]['properties']['adr'][0]);
     $this->assertEquals('Street Name 9', $result['items'][0]['properties']['street-address'][0]);
     $this->assertEquals('12345 NY, USA', $result['items'][0]['properties']['locality'][0]);
     $this->assertEquals('Street Name 9' . "\n" . '12345 NY, USA', $result['items'][0]['properties']['name'][0]);
 }
Пример #11
0
function pushLinksForResponse(Guzzle\Http\Message\Response $resp)
{
    $self = null;
    $hubs = [];
    $linkHeader = $resp->getHeader('link');
    if ($linkHeader instanceof Guzzle\Http\Message\Header\Link) {
        $links = $linkHeader->getLinks();
        foreach ($links as $link) {
            if (strpos(" {$link['rel']} ", ' self ') !== false) {
                $self = $link['url'];
            }
            if (strpos(" {$link['rel']} ", ' hub ') !== false) {
                $hubs[] = $link['url'];
            }
        }
    }
    if (strpos($resp->getContentType(), 'html') !== false) {
        $mf = Mf2\parse($resp->getBody(true), $resp->getEffectiveUrl());
        if (!empty($mf['rels']['hub'])) {
            $hubs = array_merge($hubs, $mf['rels']['hub']);
        }
        if (!empty($mf['rels']['self']) and $self === null) {
            $self = $mf['rels']['self'][0];
        }
    }
    return ['self' => $self, 'hub' => $hubs];
}
Пример #12
0
    /**
     * @see https://github.com/indieweb/php-mf2/issues/53
     * @see http://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property
     */
    public function testConvertsNestedImgElementToAltOrSrc()
    {
        $input = <<<EOT
<div class="h-entry">
\t<p class="e-content">It is a strange thing to see a <img alt="five legged elephant" src="/photos/five-legged-elephant.jpg" /></p>
</div>
EOT;
        $result = Mf2\parse($input, 'http://waterpigs.co.uk/articles/five-legged-elephant');
        $this->assertEquals('It is a strange thing to see a five legged elephant', $result['items'][0]['properties']['content'][0]['value']);
    }
Пример #13
0
    public function testParsesHProduct()
    {
        $input = <<<'EOT'
<xml id="skufilterbrowse" class="slide">
<productcatalog><labels><label key="skuset.deliverypolicyurl">Delivery policy content URL</label><label key="price.save">Save</label><label key="skuset.seemoredetails">See more details</label><label key="price.additionaloffers">Additional Offers</label><label key="price.freeitem">Includes Free Item*</label><label key="price.instsaving">Instant Savings</label><label key="skuset.eddieseedetails">See details </label><label key="price.rebateurl">RebateURL</label><label key="skuset.freedelivery">FREE SHIPPING, plus 5% back for Rewards Members</label><label key="price.printableCoupons">Click here for Printable Coupon</label><label key="price.value">Value</label><label key="skuset.eddieshipdetails">Estimated to arrive no later than </label><label key="price.qty">Qty.</label><label key="price.chooseyouritems">Choose your Items</label><label key="price.true">true</label><label key="skuset.clearancemessage">&lt;strong&gt;CLEARANCE ITEM:&lt;/strong&gt; </label><label key="price.ERF">Eco Fee</label><label key="skuset.viewlargerimage">View larger</label><label key="common.next">Next</label><label key="price.addtocart">Add to Cart</label><label key="common.reviews">reviews</label><label key="common.previous">Previous</label><label key="skuset.instockonline">In Stock Online</label><label key="price.priceafterrebate">Price &lt;strong&gt;after&lt;/strong&gt; rebate</label><label key="btn.bopis">PICK UP TODAY</label><label key="common.share">SHARE</label><label key="skuset.freedeliverytostore">FREE Shipping to store</label><label key="erf.popup.label">Environmental fee notice:</label><label key="price.details">Details</label><label key="common.stars">stars</label><label key="skuset.learnmore1">Learn more.</label><label key="common.share.twitter">Share on Twitter</label><label key="price.instorespecial">Available In Store Only</label><label key="price.priceincart">See Price in Cart</label><label key="skuset.giftcards">Orders containing this item are not eligible for Gift cards or certain other methods of payment.</label><label key="skuset.mysoftwaredownloads">"My Software Downloads"</label><label key="price.bmsmerf">Buy More Save More prices do not include eco fee.</label><label key="price.now">Now</label><label key="price.collapse">Collapse</label><label key="classpage.getstarted">Get Started</label><label key="skuset.printpage">Print this page</label><label key="price.learnmore">Learn More</label><label key="common.item">Item</label><label key="common.icon.path">/sbdpas/img/ico/</label><label key="skuset.selectcomponent">Select another component below</label><label key="skuset.freeshippingentireorder">Item qualifies entire order for free delivery</label><label key="skuset.eddieincart">in cart. </label><label key="common.selected">selected</label><label key="price.priceaftersavings">Price &lt;strong&gt;after&lt;/strong&gt; savings</label><label key="skuset.inktoner">Ink and toner</label><label key="classpage.comingsoon">Coming Soon</label><label key="price.before">Before</label><label key="price.was">Was</label><label key="skuset.viewfulldetails">View Full Details</label><label key="skuset.expdelivery">Expected Delivery:</label><label key="common.share.pinterest">Share on Pinterest</label><label key="skuset.deliveryonly">Online Only</label><label key="common.share.email">Email it</label><label key="price.seedetails">See Details</label><label key="skuset.expdelpopup">/sbd/content/help-center/shipping-and-delivery.html</label><label key="skuset.softwaredownload">Software Download</label><label key="erf.popup.url">/sbd/content/help/environmental_fee_popup.html</label><label key="price.finalprice">Final Price</label><label key="common.share.facebook">Share on Facebook</label><label key="skuset.eddiesaveproduct">on this product! </label><label key="skuset.suppliedandshippedby">Supplied and Shipped by</label><label key="common.addtofavorites">Add to Favorites</label><label key="skuset.esdnotepart1">Note: Shortly after purchase you will be able to access your Software Downloads in the </label><label key="skuset.esdnotepart2">section of your staples.com&#174; account. It's easy and secure!</label><label key="skuset.software1">/sbd/cre/marketing/technology-research-centers/software/software-downloads.html#z_faq</label><label key="price.instantcoupon">Instant Coupon</label><label key="skuset.eddieshipdetails1">to </label><label key="price.pricebefore">Price &lt;strong&gt;before&lt;/strong&gt;</label><label key="skuset.eddieoffer">Offer valid for 20 minutes. </label><label key="price.price">Price</label><label key="common.model">Model</label><label key="skuset.supplierhover">We have partnered with this trusted supplier to offer you a wider assortment of products and brands for all of your business needs, with the same great level of service you can expect from Staples.com.</label><label key="skuset.forceshiptostore">Item can be shipped only to a retail store location. </label><label key="price.rebate">Rebate</label><label key="price.bmsm">Buy More Save More</label><label key="common.print">print</label><label key="skuset.viewvideo">View video</label><label key="skuset.instoreavailability">Check in Store Availability</label><label key="price.aslowas">As low as</label><label key="price.reg">Reg</label><label key="price.free">FREE</label><label key="erf.message">Provincial recycling or deposit fees may be applicable upon checkout.</label><label key="skuset.eddiesave">Save an extra </label><label key="skuset.deferredfinancemessage">Special Financing Available </label><label key="price.promotions">Promotions</label><label key="price.availableinstore">Available In-Store Only</label><label key="skuset.outofstock">Currently Out of Stock.</label><label key="price.totalsavings">Total Savings</label><label key="price.beforePresentation">Before continuing, please select an item</label></labels>

<product bss="ON" envfeeflag="0" comingsoonflag="0" price="18.35" name="Swingline® 747® Classic Desktop Staplers" bopispilot="true" mss="ON" zipcode="01701" comp="0" presnvalue="Select an Item" snum="SS264184" leadtimedescription="1 - 30 Business Days" expandedpromo="0" alttext="Swingline® 747® Classic Desktop Staplers" prdtypeid="0" presntype="D" review="4.5" class="hproduct" type="skuset" skubopswitch="ON" id="609548" bopisenableflag="0"><descs><desc typeid="2">All-steel construction with non-skid rubber base</desc><desc typeid="2">Spring-loaded inner channel prevents jams</desc><desc typeid="2">Available in black, burgundy and beige &lt;br /&gt; &lt;li&gt;Staples up to 20 sheets&lt;/li&gt;</desc><desc typeid="38">Select an Item</desc><desc typeid="39">All-steel construction with non-skid rubber base</desc><desc typeid="39">Spring-loaded inner channel prevents jams</desc><desc typeid="39">Available in black, burgundy and beige &lt;br /&gt; &lt;li&gt;Staples up to 20 sheets&lt;/li&gt;</desc><desc class="fn">Swingline® 747® Classic Desktop Staplers</desc></descs><price is="18.35" uom="Each"></price><span class="price">18.35</span><span class="currency">USD</span><imgs><pic class="photo" id="1">http://www.staples-3p.com/s7/is/image/Staples/s0021414_sc7?$std$</pic><pic altimg="Y" id="2">http://www.staples-3p.com/s7/is/image/Staples/s0021414_sc7?$thb$</pic><pic id="6">http://www.staples-3p.com/s7/is/image/Staples/s0021414</pic></imgs><producturl class="url">/Swingline-747-Classic-Desktop-Staplers/product_SS264184</producturl><review rating="4.5" count="99" css="45"></review><delivery shiptostore="true" free="false" instore="2" forceshiptostore="false"></delivery></product>

<product bss="ON" envfeeflag="0" comingsoonflag="0" price="18.35" name="Swingline® 747® Classic Desktop Full Strip Stapler, 20 Sheet Capacity, Black" bopispilot="true" mss="ON" zipcode="01701" comp="1" snum="264184" leadtimedescription="1 Business Day" expandedpromo="0" alttext="Swingline® 747® Classic Desktop Full Strip Stapler, 20 Sheet Capacity, Black" prdtypeid="0" review="4.5" class="hproduct" mnum="S7074771G" type="sku" skubopswitch="ON" id="609315" bopisenableflag="1"><descs><desc typeid="2">All-steel construction with non-skid rubber base</desc><desc typeid="2">Full strip</desc><desc typeid="2">Staples up to 20 sheets</desc><desc typeid="19">Each</desc><desc typeid="39">All-steel construction with non-skid rubber base</desc><desc typeid="39">Full strip</desc><desc typeid="39">Staples up to 20 sheets</desc><desc class="fn">Swingline® 747® Classic Desktop Full Strip Stapler, 20 Sheet Capacity, Black</desc></descs><price is="18.35" uom="Each"></price><span class="price">18.35</span><span class="currency">USD</span><imgs><pic class="photo" id="1">http://www.staples-3p.com/s7/is/image/Staples/s0021412_sc7?$std$</pic><pic altimg="Y" id="2">http://www.staples-3p.com/s7/is/image/Staples/s0021412_sc7?$thb$</pic><pic id="6">http://www.staples-3p.com/s7/is/image/Staples/s0021412</pic><pic id="120"></pic></imgs><producturl class="url">/Swingline-747-Classic-Desktop-Full-Strip-Stapler-20-Sheet-Capacity-Black/product_264184</producturl><review rating="4.5" count="99" css="45"></review><delivery shiptostore="true" free="false" instore="1" forceshiptostore="false"></delivery></product>

<product bss="ON" envfeeflag="0" comingsoonflag="0" price="19.59" name="Swingline® 747® Classic Desktop Stapler, Burgundy" bopispilot="true" mss="ON" zipcode="01701" comp="1" snum="413732" leadtimedescription="1 Business Day" expandedpromo="0" alttext="Swingline® 747® Classic Desktop Stapler, Burgundy" prdtypeid="0" review="3.5" class="hproduct" mnum="74718/74782" type="sku" skubopswitch="ON" id="1460639" bopisenableflag="0"><descs><desc typeid="2">All-steel construction with non-skid rubber base</desc><desc typeid="2">Spring-loaded inner channel prevents jams</desc><desc typeid="2">Burgundy &lt;br /&gt; &lt;li&gt;Staples up to 20 sheets&lt;/li&gt;</desc><desc typeid="19">Each</desc><desc typeid="39">All-steel construction with non-skid rubber base</desc><desc typeid="39">Spring-loaded inner channel prevents jams</desc><desc typeid="39">Burgundy &lt;br /&gt; &lt;li&gt;Staples up to 20 sheets&lt;/li&gt;</desc><desc class="fn">Swingline® 747® Classic Desktop Stapler, Burgundy</desc></descs><price is="19.59" uom="Each"></price><span class="price">19.59</span><span class="currency">USD</span><imgs><pic class="photo" id="1">http://www.staples-3p.com/s7/is/image/Staples/m000240695_sc7?$std$</pic><pic altimg="Y" id="2">http://www.staples-3p.com/s7/is/image/Staples/m000240695_sc7?$thb$</pic><pic id="6">http://www.staples-3p.com/s7/is/image/Staples/m000240695</pic></imgs><producturl class="url">/Swingline-747-Classic-Desktop-Stapler-Burgundy/product_413732</producturl><review rating="3.5" count="7" css="35"></review><delivery shiptostore="true" free="false" instore="2" forceshiptostore="false"></delivery></product>

<product bss="ON" envfeeflag="0" comingsoonflag="0" price="39.49" name="Swingline® 747® Rio Red Stapler, 20 Sheet Capacity" bopispilot="true" mss="ON" zipcode="01701" comp="1" brandname="Swingline" snum="562485" leadtimedescription="1 - 4 Business Days" expandedpromo="0" alttext="Swingline® 747® Rio Red Stapler, 20 Sheet Capacity" prdtypeid="0" review="4.5" class="hproduct" mnum="74736" type="sku" skubopswitch="ON" id="1093798" bopisenableflag="0"><descs><desc typeid="2">20 sheet capacity with Swingline S.F.® 4® Staples</desc><desc typeid="2">Durable metal construction</desc><desc typeid="2">Stapler opens for tacking ability</desc><desc typeid="19">Each</desc><desc typeid="39">20 sheet capacity with Swingline S.F.® 4® Staples</desc><desc typeid="39">Durable metal construction</desc><desc typeid="39">Stapler opens for tacking ability</desc><desc class="fn">Swingline® 747® Rio Red Stapler, 20 Sheet Capacity</desc></descs><price is="39.49" uom="Each"></price><span class="price">39.49</span><span class="currency">USD</span><imgs><pic class="photo" id="1">http://www.staples-3p.com/s7/is/image/Staples/s0446269_sc7?$std$</pic><pic altimg="Y" id="2">http://www.staples-3p.com/s7/is/image/Staples/s0446269_sc7?$thb$</pic><pic id="6">http://www.staples-3p.com/s7/is/image/Staples/s0446269</pic></imgs><producturl class="url">/Swingline-747-Rio-Red-Stapler-20-Sheet-Capacity/product_562485</producturl><review rating="4.5" count="76" css="45"></review><delivery shiptostore="true" free="true" instore="2" forceshiptostore="false"></delivery></product>
</productcatalog>
</xml>
EOT;
        $result = Mf2\parse($input, 'http://www.staples.com/Swingline-747-Rio-Red-Stapler-20-Sheet-Capacity/product_562485');
        $this->assertCount(4, $result['items']);
    }
Пример #14
0
 public function findAuthor($mf, $permalink = true)
 {
     //check for h-entry's
     /* will currently only work with first h-entry
        TODO: work with multiple h-entry's
        */
     //instantiate vars
     $this->permalink = $permalink;
     $this->mf = $mf;
     $this->hEntry = false;
     $this->hFeed = false;
     $this->author = null;
     $this->authorPage = null;
     $this->authorInfo = null;
     for ($i = 0; $i < count($this->mf['items']); $i++) {
         foreach ($this->mf['items'][$i]['type'] as $type) {
             if ($type == 'h-entry') {
                 $this->hEntry = $this->mf['items'][$i];
             } elseif ($type == 'h-feed') {
                 $this->hFeed = $this->mf['items'][$i];
             }
         }
     }
     if ($this->hEntry === false && $this->hFeed === false) {
         //we may neither an h-entry or an h-feed in the parent items array
         throw new AuthorshipParserException('No h-entry found');
     }
     //parse the h-entry
     if ($this->hEntry !== false) {
         //if h-entry has an author property use that
         if (array_key_exists('author', $this->hEntry['properties'])) {
             $this->author = $this->hEntry['properties']['author'];
         }
     }
     //otherwise look for parent h-feed, if that has author property use that
     if ($this->hFeed !== false) {
         foreach ($this->hFeed['children'] as $child) {
             if ($child['type'][0] == 'h-card') {
                 //we have a h-card on the page, use it
                 $this->author = $child;
             }
         }
     }
     //if an author property was found
     if ($this->author !== null) {
         //if it has an h-card, use it, exit
         if (is_array($this->author)) {
             if (array_search('h-card', $this->author) !== false) {
                 return $this->normalise($this->author);
             }
         }
         //otherwise if `author` is a URL, let that be author-page
         if (filter_var($this->author, FILTER_VALIDATE_URL)) {
             $this->authorPage = $this->author;
         } else {
             //otherwise use `author` property as author name, exit
             return $this->normalise($this->author);
         }
     }
     //if no author-page and h-entry is a permalink then look for rel-author link
     //and let that be author-page
     if ($this->authorPage === null && $this->permalink == true) {
         if (array_key_exists('author', $this->mf['rels'])) {
             if (is_array($this->mf['rels']['author'])) {
                 //need to deal with this better
                 $this->authorPage = $this->mf['rels']['author'][0];
             } else {
                 $this->authorPage = $this->mf['rels']['author'];
             }
         }
     }
     //if there is an author-page
     if ($this->authorPage !== null) {
         //grab mf2 from author-page
         try {
             $this->parser = new Parser();
             $this->response = $this->guzzle->get($this->authorPage);
             $this->html = (string) $this->response->getBody();
         } catch (\GuzzleHttp\Exception\BadResponseException $e) {
             //var_dump($e);
             throw new AuthorshipParserException('Unable to get the Content from the authors page');
         }
         $this->authorMf2 = \Mf2\parse($this->html, $this->authorPage);
         //if page has 1+ h-card where url == uid == author-page then use first
         //such h-card, exit
         if (array_search('uid', $this->hEntry)) {
             foreach ($this->authorMf2['items'] as $item) {
                 if (array_search('h-card', $item['type']) !== false) {
                     $urls = $item['properties']['url'];
                     foreach ($urls as $url) {
                         if ($url == $uid && $url == $authorPage) {
                             return $this->normalise($item);
                         }
                     }
                 }
             }
         }
         //else if page has 1+ h-card with url property which matches a rel-me
         //link on the page, use first such h-card, exit
         foreach ($this->authorMf2['items'] as $item) {
             if (array_search('h-card', $item['type']) !== false && array_key_exists('me', $this->authorMf2['rels'])) {
                 $urls = $item['properties']['url'];
                 $relMeLinks = $this->authorMf2['rels']['me'];
                 //in_array can take an arry for its needle
                 foreach ($urls as $url) {
                     if (in_array($url, $relMeLinks)) {
                         return $this->normalise($item);
                     }
                 }
             }
         }
         //if the h-entry page has 1+ h-card with url == author-page, use first
         //such h-card, exit
         foreach ($this->authorMf2['items'] as $item) {
             if (array_search('h-card', $item['type']) !== false) {
                 $urls = $item['properties']['url'];
                 if (in_array($this->authorPage, $urls)) {
                     return $this->normalise($item);
                 }
             }
         }
     }
     //if we have got this far, we haven't been able to determine auhtor info
     //to return, so return false
     return false;
 }
Пример #15
0
 /** @see https://github.com/indieweb/php-mf2/issues/6 */
 public function testParsesImpliedNameFromAbbrTitle()
 {
     $input = '<abbr class="h-card" title="Barnaby Walters">BJW</abbr>';
     $result = Mf2\parse($input);
     $this->assertEquals('Barnaby Walters', $result['items'][0]['properties']['name'][0]);
 }
Пример #16
0
    /**
     * @see https://github.com/indieweb/php-mf2/issues/53
     * @see http://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property
     */
    public function testConvertsNestedImgElementToAltOrSrc()
    {
        $input = <<<EOT
<div class="h-entry">
\t<p class="p-name">The day I saw a <img alt="five legged elephant" src="/photos/five-legged-elephant.jpg" /></p>
\t<p class="p-summary">Blah blah <img src="/photos/five-legged-elephant.jpg" /></p>
</div>
EOT;
        $result = Mf2\parse($input, 'http://waterpigs.co.uk/articles/five-legged-elephant');
        $this->assertEquals('The day I saw a five legged elephant', $result['items'][0]['properties']['name'][0]);
        $this->assertEquals('Blah blah http://waterpigs.co.uk/photos/five-legged-elephant.jpg', $result['items'][0]['properties']['summary'][0]);
    }
Пример #17
0
 public function testParsesInputValue()
 {
     $input = '<span class="h-card"><input class="u-url" value="http://example.com" /></span>';
     $result = Mf2\parse($input);
     $this->assertEquals('http://example.com', $result['items'][0]['properties']['url'][0]);
 }