private static function mf2parse($content, $url) { $data = array(); $host = extract_domain_name($url); switch ($host) { case 'twitter.com': $parsed = Mf2\Shim\parseTwitter($content, $url); break; default: $parsed = Mf2\parse($content, $url); } if (mf2_cleaner::isMicroformatCollection($parsed)) { $entries = mf2_cleaner::findMicroformatsByType($parsed, 'h-entry'); if ($entries) { $entry = $entries[0]; if (mf2_cleaner::isMicroformat($entry)) { foreach ($entry['properties'] as $key => $value) { $data[$key] = mf2_cleaner::getPlaintext($entry, $key); } $data['published'] = mf2_cleaner::getPublished($entry); $data['updated'] = mf2_cleaner::getUpdated($entry); $data['name'] = mf2_cleaner::getPlaintext($entry, 'name'); $data['content'] = mf2_cleaner::getHtml($entry, 'content'); $data['summary'] = mf2_cleaner::getHtml($entry, 'summary'); $data['name'] = trim(preg_replace('/https?:\\/\\/([^ ]+|$)/', '', $data['name'])); $author = mf2_cleaner::getAuthor($entry); if ($author) { $data['author'] = array(); foreach ($author['properties'] as $key => $value) { $data['author'][$key] = mf2_cleaner::getPlaintext($author, $key); } $data['author'] = array_filter($data['author']); } } } } return array_filter($data); }
private static function parseTwitter($html, $url) { $parsed = \Mf2\Shim\parseTwitter($html, $url); if (!empty($parsed['items'][0]['properties']['content'][0]['html'])) { $content = $parsed['items'][0]['properties']['content'][0]['html']; $config = \HTMLPurifier_Config::createDefault(); $config->set('URI.Base', $url); $config->set('URI.MakeAbsolute', true); $config->set('HTML.Allowed', 'a[href],img[src],p,br'); $purifier = new \HTMLPurifier($config); $content = $purifier->purify($content); $parsed['items'][0]['properties']['content'][0]['html'] = $content; } return $parsed; }