PHP BarnabyWalters\Mf2 getHtml Examples

Programming Language: PHP

Namespace/Package Name: BarnabyWalters\Mf2

Method/Function: getHtml

Examples at hotexamples.com: 2

PHP BarnabyWalters\Mf2 getHtml - 2 examples found. These are the top rated real world PHP examples of BarnabyWalters\Mf2\getHtml extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: app.php Project: bnvk/shrewdness

function processHEntry($hEntry, $mf, $url, $resolveRelationships = true, Guzzle\Http\ClientInterface $client = null, $purifier = null)
{
    if ($client === null) {
        $client = new Guzzle\Http\Client();
    }
    if ($purifier === null) {
        $purifier = function ($value) {
            return $value;
        };
    }
    // Use comment-presentation algorithm to clean up.
    $cleansed = comments\parse($hEntry);
    $referencedPosts = [];
    $referencedPostUrls = [];
    // Used internally to keep track of what referenced posts have been processed already.
    $indexedContent = M\getPlaintext($hEntry, 'content', $cleansed['text']);
    $displayContent = $purifier(M\getHtml($hEntry, 'content'));
    $cleansed['content'] = $indexedContent;
    $cleansed['display_content'] = $displayContent;
    // Handle all datetime cases, as per http://indiewebcamp.com/h-entry#How_to_consume_h-entry
    try {
        $published = new DateTime($cleansed['published']);
        $utcPublished = clone $published;
        $utcPublished->setTimezone(new DateTimeZone('UTC'));
    } catch (Exception $e) {
        $published = $utcPublished = false;
    }
    $inTheFuture = $utcPublished > new DateTime(null, new DateTimeZone('UTC'));
    // DateTime() accepts “false” as a constructor param for some reason.
    if (!$published and !$cleansed['published'] or $utcPublished > new DateTime(null, new DateTimeZone('UTC'))) {
        // If there’s absolutely no datetime, our best guess has to be “now”.
        // Additional heuristics could be used in the bizarre case of having a feed where an item without datetime is
        // published in between two items with datetimes, allowing us to guess the published datetime is between the two,
        // but until that actually happens it’s not worth coding for.
        $cleansed['published'] = gmdate('c');
        $utcPublished = new DateTime(null, new DateTimeZone('UTC'));
    } else {
        // “published” is given and parses correctly, into $published.
        // Currently it’s not trivial to figure out if a given datetime is floating or not, so assume that the timezone
        // given here is correct for the moment. When this can be determined, follow http://indiewebcamp.com/datetime#implying_timezone_from_webmentions
    }
    // There’s some case causing $utcPublished to still be false and I can’t be bothered to debug it right now, so here’s a fix.
    if ($utcPublished === false) {
        $utcPublished = new DateTime(null, new DateTimeZone('UTC'));
    }
    // Store a string representation of published to be indexed+queried upon.
    $cleansed['published_utc'] = $utcPublished->format(DateTime::W3C);
    if (M\hasProp($hEntry, 'photo')) {
        $cleansed['photo'] = $purifier(M\getHtml($hEntry, 'photo'));
    }
    if (M\hasProp($hEntry, 'logo')) {
        $cleansed['logo'] = $purifier(M\getHtml($hEntry, 'logo'));
    }
    // For every post this post has a relation (in-reply-to, repost-of, like-of etc.), fetch and resolve that URL,
    // index it as it’s own post (if it doesn’t already exist) and store only a reference to it here.
    $references = ['in-reply-to' => [], 'like-of' => [], 'repost-of' => []];
    foreach ($references as $relation => $_) {
        $refUrls = [];
        // These will be feed pages not permalink pages so cannot check rels, only microformats properties.
        if (M\hasProp($hEntry, $relation)) {
            foreach ($hEntry['properties'][$relation] as $value) {
                if (is_string($value)) {
                    $refUrls[] = $value;
                } elseif (is_array($value) and isset($value['html'])) {
                    // e-* properties unlikely to be URLs but try all the same.
                    $refUrls[] = $value['value'];
                } elseif (M\isMicroformat($value)) {
                    if (M\hasProp($value, 'url')) {
                        $refUrls[] = M\getProp($value, 'url');
                    } elseif (M\hasProp($value, 'uid')) {
                        $refUrls[] = M\getProp($value, 'uid');
                    }
                } else {
                    // If this happens, the microformats parsing spec has changed. Currently do nothing as we don’t know how to interpret this.
                }
            }
        }
        if ($resolveRelationships) {
            foreach ($refUrls as $refUrl) {
                try {
                    $resp = $client->get($refUrl)->send();
                    $refResolvedUrl = $resp->getEffectiveUrl();
                    $refMf = Mf2\parse($resp->getBody(1), $refResolvedUrl);
                    $refHEntries = M\findMicroformatsByType($refMf, 'h-entry');
                    $relatedUrl = $refResolvedUrl;
                    if (count($refHEntries) > 0) {
                        $refHEntry = $refHEntries[0];
                        $refSearchUrl = M\hasProp($refHEntry, 'url') ? M\getProp($refHEntry, 'url') : $refResolvedUrl;
                        if (!in_array($refSearchUrl, $referencedPostUrls)) {
                            list($refCleansed, $_) = processHEntry($refHEntry, $refMf, $refResolvedUrl, false, $client, $purifier);
                            $referencedPosts[] = $refCleansed;
                            $referencedPostUrls[] = $refSearchUrl;
                            $relatedUrl = $refSearchUrl;
                        }
                    }
                    $references[$relation][] = $relatedUrl;
                } catch (Guzzle\Common\Exception\GuzzleException $e) {
                    $references[$relation][] = $refUrl;
                }
            }
        } else {
            // If we’re not resolving relationships, the most accurate data we have is the data given already.
            $references[$relation] = $refUrls;
        }
        // Now we have the best possible list of URLs, attach it to $cleansed.
        $cleansed[$relation] = array_unique($references[$relation]);
    }
    if (!M\hasProp($hEntry, 'author') or !M\isMicroformat($hEntry['properties']['author'][0])) {
        // No authorship data given, we need to find the author!
        // TODO: proper /authorship implementation.
        // TODO: wrap proper /authorship implementation in layer which does purification, simplification, fallback.
        $potentialAuthor = M\getAuthor($hEntry, $mf, $url);
        if (M\isMicroformat($potentialAuthor)) {
            $cleansed['author'] = flattenHCard($potentialAuthor, $url);
        } elseif (!empty($mf['rels']['author'])) {
            // TODO: look in elasticsearch index for a person with the first rel-author URL then fall back to fetching.
            // Fetch the first author URL and look for a representative h-card there.
            $relAuthorMf = Mf2\fetch($mf['rels']['author'][0]);
            $relAuthorHCards = M\findMicroformatsByType($relAuthorMf, 'h-card');
            foreach ($relAuthorHCards as $raHCard) {
                $relMes = @($relAuthorMf['rels']['me'] ?: []);
                if ((M\getProp($raHCard, 'url') === M\getProp($raHCard, 'url')) === $mf['rels']['author'][0]) {
                    $cleansed['author'] = flattenHCard($raHCard, $mf['rels']['author'][0]);
                } elseif (M\hasProp($raHCard, 'url') and count(array_intersect($raHCard['properties']['url'], $relMes)) > 0) {
                    $cleansed['author'] = flattenHCard($raHCard, $mf['rels']['author'][0]);
                }
            }
        }
        // If after all that there’s still no authorship data, fake some.
        if ($cleansed['author']['name'] === false) {
            $cleansed['author'] = flattenHCard(['properties' => []], $url);
            try {
                $response = $client->head("{$cleansed['author']['url']}/favicon.ico")->send();
                if (strpos($response->getHeader('content-type'), 'image') !== false) {
                    // This appears to be a valid image!
                    $cleansed['author']['photo'] = $response->getEffectiveUrl();
                }
            } catch (Guzzle\Common\Exception\GuzzleException $e) {
                // No photo fallback could be found.
            }
        }
    }
    // TODO: this will be M\getLocation when it’s ported to the other library.
    if (($location = getLocation($hEntry)) !== null) {
        $cleansed['location'] = $location;
        // TODO: do additional reverse lookups of address details if none are provided.
        if (!empty($location['latitude']) and !empty($location['longitude'])) {
            // If this is a valid point, add a point with mashed names for elasticsearch to index.
            $cleansed['location_point'] = ['lat' => $location['latitude'], 'lon' => $location['longitude']];
        }
    }
    // TODO: figure out what other properties need storing/indexing, and whether anything else needs mashing for
    // elasticsearch to index more easily.
    return [$cleansed, $referencedPosts];
}

Example #2

Show file

File: FeedTask.php Project: diplix/Monocle

 public static function refresh_feed($feed_id)
 {
     $feed = db\get_feed($feed_id);
     echo "Refreshing feed " . $feed->feed_url . " ({$feed_id})\n\n";
     // check if this feed is already being refreshed, and re-queue the job for 30 seconds from now to give the first job a chance to finish
     if ($feed->refresh_in_progress) {
         echo "This feed is already being processed, re-queuing for later\n";
         DeferredTask::queue('FeedTask', 'refresh_feed', $feed_id, 5);
         return;
         // return here which will cause the job runner to re-queue the job
     }
     // mark that this feed is currently being refreshed
     $feed->refresh_started = date('Y-m-d H:i:s');
     $feed->refresh_in_progress = 1;
     $feed->save();
     // only deal with mf2 feeds for now
     try {
         $response = request\get_url($feed->feed_url, true);
         $header_rels = IndieWeb\http_rels($response['headers']);
         $html = $response['body'];
         $mf2 = feeds\parse_mf2($html, $feed->feed_url);
         $hub_url = false;
         if (k($header_rels, 'hub')) {
             $hub_url = $header_rels['hub'][0];
             $hub_url_source = 'http';
         } elseif (k($mf2, 'rels') && k($mf2['rels'], 'hub')) {
             $hub_url = $mf2['rels']['hub'][0];
             $hub_url_source = 'html';
         }
         // check for PuSH info and subscribe to the hub if found
         if ($hub_url) {
             if (k($header_rels, 'self')) {
                 $self_url = $header_rels['self'][0];
                 $self_url_source = 'http';
             } elseif (k($mf2, 'rels') && k($mf2['rels'], 'self')) {
                 $self_url = $mf2['rels']['self'][0];
                 $self_url_source = 'html';
             } else {
                 $self_url = $feed->feed_url;
                 $self_url_source = 'default';
             }
             // Keep track of what the hub URL was last time we saw it
             $last_hub_url = $feed->push_hub_url;
             // Store the new hub and topic
             $feed->push_hub_url = $hub_url;
             $feed->push_topic_url = $self_url;
             // re-subscribe if the expiration date is coming up soon
             // or if the hub has changed
             if ($feed->push_subscribed == 0 || $hub_url != $last_hub_url || $feed->push_expiration && strtotime($feed->push_expiration) - 300 < time()) {
                 echo "Attempting to subscribe to the hub!\n";
                 echo "Hub: " . $feed->push_hub_url . " (found in {$hub_url_source})\n";
                 echo "Topic: " . $feed->push_topic_url . " (found in {$self_url_source})\n";
                 // This will cause the hub to make a GET request to the callback URL which we will to verify
                 $response = request\post($feed->push_hub_url, ['hub.mode' => 'subscribe', 'hub.topic' => $feed->push_topic_url, 'hub.callback' => 'http://' . Config::$hostname . '/push/feed/' . $feed->hash]);
                 echo "Hub responded:\n";
                 echo $response['status'] . "\n";
                 echo $response['body'] . "\n";
             }
             $feed->save();
         }
         // check if there are any h-entry posts
         $info = feeds\find_feed_info($mf2);
         if ($info) {
             #print_r($info);
             foreach ($info['entries'] as $i => $e) {
                 echo "\nProcessing entry {$i}\n";
                 // Find the canonical URL for the entry and fetch the page
                 $entry_url = Mf2\getPlaintext($e, 'url');
                 if ($entry_url) {
                     echo $entry_url . "\n";
                     // Parse the entry for all required info and store in the "entries" table
                     $entry_html = request\get_url($entry_url);
                     if ($entry_html) {
                         $entry_mf2 = feeds\parse_mf2($entry_html, $entry_url);
                         $entries = Mf2\findMicroformatsByType($entry_mf2['items'], 'h-entry');
                         $entry_mf2 = $entries[0];
                         if (!Mf2\isMicroformat($entry_mf2)) {
                             echo "Does not appear to be a microformat\n";
                             continue;
                         }
                         if (!in_array('h-entry', $entry_mf2['type'])) {
                             print_r($entry_mf2);
                             continue;
                         }
                         if (!($entry = ORM::for_table('entries')->where('feed_id', $feed->id)->where('url', $entry_url)->find_one())) {
                             $entry = ORM::for_table('entries')->create();
                             $entry->feed_id = $feed->id;
                             $entry->url = $entry_url;
                         }
                         // Decide whether to store the name, summary and content depending on whether they are unique
                         $name = Mf2\getPlaintext($entry_mf2, 'name');
                         $summary = Mf2\getPlaintext($entry_mf2, 'summary');
                         $content = Mf2\getHtml($entry_mf2, 'content');
                         $content_text = Mf2\getPlaintext($entry_mf2, 'content');
                         // Store the name if it's different from the summary and the content
                         if (!feeds\content_is_equal($name, $summary) && !feeds\content_is_equal($name, $content_text)) {
                             $entry->name = $name;
                             echo "Entry has a name: {$name}\n";
                         } else {
                             $entry->name = '';
                         }
                         // Store the summary if it's different from the content
                         if ($summary && !feeds\content_is_equal($summary, $content_text)) {
                             $entry->summary = $summary;
                             echo "Entry has a summary\n";
                         } else {
                             $entry->summary = '';
                         }
                         $entry->content = $content;
                         $date_string = Mf2\getPlaintext($entry_mf2, 'published');
                         if ($date_string) {
                             try {
                                 $date = new DateTime($date_string);
                                 if ($date) {
                                     $entry->timezone_offset = $date->format('Z');
                                     $date->setTimeZone(new DateTimeZone('UTC'));
                                     $entry->date_published = $date->format('Y-m-d H:i:s');
                                     echo "Published: {$entry->date_published}\n";
                                 }
                             } catch (Exception $e) {
                                 echo "Error parsing date: {$date_string}\n";
                             }
                         }
                         // Set the date published to now if none was found in the entry
                         if (!$entry->date_published) {
                             $entry->date_published = date('Y-m-d H:i:s');
                         }
                         if (Mf2\getPlaintext($entry_mf2, 'like-of')) {
                             $entry->like_of_url = Mf2\getPlaintext($entry_mf2, 'like-of');
                         }
                         if (Mf2\getPlaintext($entry_mf2, 'repost-of')) {
                             $entry->repost_of_url = Mf2\getPlaintext($entry_mf2, 'repost-of');
                         }
                         // TODO: move this to a helper
                         // finds the URL for a property if the property is a plain string or a nested h-cite
                         if (Mf2\getPlaintext($entry_mf2, 'in-reply-to')) {
                             if (Mf2\isMicroformat($entry_mf2['properties']['in-reply-to'][0])) {
                                 $entry->in_reply_to_url = $entry_mf2['properties']['in-reply-to'][0]['properties']['url'][0];
                             } else {
                                 $entry->in_reply_to_url = Mf2\getPlaintext($entry_mf2, 'in-reply-to');
                             }
                         }
                         if (Mf2\getPlaintext($entry_mf2, 'photo')) {
                             $entry->photo_url = Mf2\getPlaintext($entry_mf2, 'photo');
                         }
                         if (Mf2\getPlaintext($entry_mf2, 'video')) {
                             $entry->video_url = Mf2\getPlaintext($entry_mf2, 'video');
                         }
                         if (Mf2\getPlaintext($entry_mf2, 'audio')) {
                             $entry->audio_url = Mf2\getPlaintext($entry_mf2, 'audio');
                         }
                         $author_mf2 = false;
                         if (Mf2\hasProp($entry_mf2, 'author')) {
                             $author_mf2 = $entry_mf2['properties']['author'][0];
                         } elseif (Mf2\hasProp($info, 'author')) {
                             $author_mf2 = $info['properties']['author'][0];
                         }
                         if ($author_mf2) {
                             $entry->author_name = Mf2\getPlaintext($author_mf2, 'name');
                             $entry->author_url = Mf2\getPlaintext($author_mf2, 'url');
                             $entry->author_photo = Mf2\getPlaintext($author_mf2, 'photo');
                         } else {
                             echo "NO AUTHOR WAS FOUND!!\n";
                         }
                         if (Mf2\hasProp($entry_mf2, 'like')) {
                             $entry->num_likes = count($entry_mf2['properties']['like']);
                         }
                         if (Mf2\hasProp($entry_mf2, 'repost')) {
                             $entry->num_reposts = count($entry_mf2['properties']['repost']);
                         }
                         if (Mf2\hasProp($entry_mf2, 'comment')) {
                             $entry->num_comments = count($entry_mf2['properties']['comment']);
                         }
                         if (Mf2\hasProp($entry_mf2, 'rsvp')) {
                             $entry->num_rsvps = count($entry_mf2['properties']['rsvp']);
                         }
                         $entry->date_retrieved = date('Y-m-d H:i:s');
                         $entry->date_updated = date('Y-m-d H:i:s');
                         $entry->save();
                         // Add or update all tags for this entry
                         if (Mf2\hasProp($entry_mf2, 'category')) {
                             $entry_tags = array_unique(array_map(function ($c) {
                                 return strtolower(trim($c, '#'));
                             }, $entry_mf2['properties']['category']));
                             foreach ($entry_tags as $tag) {
                                 if (!ORM::for_table('entry_tags')->where('entry_id', $entry->id)->where('tag', $tag)->find_one()) {
                                     $et = ORM::for_table('entry_tags')->create();
                                     $et->entry_id = $entry->id;
                                     $et->tag = $tag;
                                     $et->save();
                                 }
                             }
                         } else {
                             $entry_tags = array();
                         }
                         // TODO: Remove tags that are no longer found in the entry
                         // Add syndication URLs
                         if (Mf2\hasProp($entry_mf2, 'syndication')) {
                             $syndications = array_unique($entry_mf2['properties']['syndication']);
                             foreach ($syndications as $syn) {
                                 if (!ORM::for_table('entry_syndications')->where('entry_id', $entry->id)->where('syndication_url', $syn)->find_one()) {
                                     $es = ORM::for_table('entry_syndications')->create();
                                     $es->entry_id = $entry->id;
                                     $es->syndication_url = $syn;
                                     $es->save();
                                 }
                             }
                         }
                         // TODO: Remove urls that are no longer found in the entry
                         // Run through all the channels that have this feed and add the entry to each channel
                         $sources = ORM::for_table('channel_sources')->where('feed_id', $feed_id)->find_many();
                         foreach ($sources as $source) {
                             #$channel = ORM::for_table('channel')->where('id',$source->channel_id)->find_one();
                             $add = false;
                             if ($source->filter) {
                                 $tags = explode(',', $source->filter);
                                 foreach ($tags as $tag) {
                                     if (preg_match('/\\b' . $tag . '\\b/', $entry->content . "\n" . $entry->name . "\n" . $entry->summary)) {
                                         $add = true;
                                     }
                                     if (in_array(strtolower($tag), $entry_tags)) {
                                         $add = true;
                                     }
                                 }
                             } else {
                                 $add = true;
                             }
                             if ($add) {
                                 $ce = ORM::for_table('channel_entries')->where('channel_id', $source->channel_id)->where('entry_id', $entry->id)->find_one();
                                 if (!$ce) {
                                     $ce = ORM::for_table('channel_entries')->create();
                                     $ce->channel_id = $source->channel_id;
                                     $ce->entry_id = $entry->id;
                                 }
                                 $ce->entry_published = $entry->date_published;
                                 $ce->date_created = date('Y-m-d H:i:s');
                                 $ce->save();
                                 echo "Adding to channel\n";
                             }
                         }
                     } else {
                         // Bad response returned, might be 410 deleted
                         // TODO: Figure out if it's a deleted post or just temporary error
                     }
                 } else {
                     echo "No URL was found for this entry\n";
                 }
             }
         }
         $feed->last_retrieved = date('Y-m-d H:i:s');
     } catch (Exception $e) {
         echo "Error processing feed!\n";
         echo $e->getMessage() . "\n";
         echo $e->getTraceAsString() . "\n";
     }
     // mark complete
     // TODO: add some exception handling that will set this to 0 on errors?
     $feed->refresh_in_progress = 0;
     $feed->save();
 }