private static function mf2parse($content, $url)
 {
     $data = array();
     $host = extract_domain_name($url);
     switch ($host) {
         case 'twitter.com':
             $parsed = Mf2\Shim\parseTwitter($content, $url);
             break;
         default:
             $parsed = Mf2\parse($content, $url);
     }
     if (mf2_cleaner::isMicroformatCollection($parsed)) {
         $entries = mf2_cleaner::findMicroformatsByType($parsed, 'h-entry');
         if ($entries) {
             $entry = $entries[0];
             if (mf2_cleaner::isMicroformat($entry)) {
                 foreach ($entry['properties'] as $key => $value) {
                     $data[$key] = mf2_cleaner::getPlaintext($entry, $key);
                 }
                 $data['published'] = mf2_cleaner::getPublished($entry);
                 $data['updated'] = mf2_cleaner::getUpdated($entry);
                 $data['name'] = mf2_cleaner::getPlaintext($entry, 'name');
                 $data['content'] = mf2_cleaner::getHtml($entry, 'content');
                 $data['summary'] = mf2_cleaner::getHtml($entry, 'summary');
                 $data['name'] = trim(preg_replace('/https?:\\/\\/([^ ]+|$)/', '', $data['name']));
                 $author = mf2_cleaner::getAuthor($entry);
                 if ($author) {
                     $data['author'] = array();
                     foreach ($author['properties'] as $key => $value) {
                         $data['author'][$key] = mf2_cleaner::getPlaintext($author, $key);
                     }
                     $data['author'] = array_filter($data['author']);
                 }
             }
         }
     }
     return array_filter($data);
 }
示例#2
0
 private static function parseTwitter($html, $url)
 {
     $parsed = \Mf2\Shim\parseTwitter($html, $url);
     if (!empty($parsed['items'][0]['properties']['content'][0]['html'])) {
         $content = $parsed['items'][0]['properties']['content'][0]['html'];
         $config = \HTMLPurifier_Config::createDefault();
         $config->set('URI.Base', $url);
         $config->set('URI.MakeAbsolute', true);
         $config->set('HTML.Allowed', 'a[href],img[src],p,br');
         $purifier = new \HTMLPurifier($config);
         $content = $purifier->purify($content);
         $parsed['items'][0]['properties']['content'][0]['html'] = $content;
     }
     return $parsed;
 }