/** * @brief Process atom feed and update anything/everything we might need to update. * * @param array $xml * The (atom) feed to consume - RSS isn't as fully supported but may work for simple feeds. * @param $importer * The contact_record (joined to user_record) of the local user who owns this * relationship. It is this person's stuff that is going to be updated. * @param $contact * The person who is sending us stuff. If not set, we MAY be processing a "follow" activity * from an external network and MAY create an appropriate contact record. Otherwise, we MUST * have a contact record. * @param int $pass by default ($pass = 0) we cannot guarantee that a parent item has been * imported prior to its children being seen in the stream unless we are certain * of how the feed is arranged/ordered. * * With $pass = 1, we only pull parent items out of the stream. * * With $pass = 2, we only pull children (comments/likes). * * So running this twice, first with pass 1 and then with pass 2 will do the right * thing regardless of feed ordering. This won't be adequate in a fully-threaded * model where comments can have sub-threads. That would require some massive sorting * to get all the feed items into a mostly linear ordering, and might still require * recursion. */ function consume_feed($xml, $importer, &$contact, $pass = 0) { require_once 'library/simplepie/simplepie.inc'; if (!strlen($xml)) { logger('consume_feed: empty input'); return; } $sys_expire = intval(get_config('system', 'default_expire_days')); $chn_expire = intval($importer['channel_expire_days']); $expire_days = $sys_expire; if ($chn_expire != 0 && $chn_expire < $sys_expire) { $expire_days = $chn_expire; } // logger('expire_days: ' . $expire_days); $feed = new SimplePie(); $feed->set_raw_data($xml); $feed->init(); if ($feed->error()) { logger('consume_feed: Error parsing XML: ' . $feed->error()); } $permalink = $feed->get_permalink(); // Check at the feed level for updated contact name and/or photo // process any deleted entries $del_entries = $feed->get_feed_tags(NAMESPACE_TOMB, 'deleted-entry'); if (is_array($del_entries) && count($del_entries) && $pass != 2) { foreach ($del_entries as $dentry) { $deleted = false; if (isset($dentry['attribs']['']['ref'])) { $mid = $dentry['attribs']['']['ref']; $deleted = true; if (isset($dentry['attribs']['']['when'])) { $when = $dentry['attribs']['']['when']; $when = datetime_convert('UTC', 'UTC', $when, 'Y-m-d H:i:s'); } else { $when = datetime_convert('UTC', 'UTC', 'now', 'Y-m-d H:i:s'); } } if ($deleted && is_array($contact)) { $r = q("SELECT * from item where mid = '%s' and author_xchan = '%s' and uid = %d limit 1", dbesc(base64url_encode($mid)), dbesc($contact['xchan_hash']), intval($importer['channel_id'])); if ($r) { $item = $r[0]; if (!intval($item['item_deleted'])) { logger('consume_feed: deleting item ' . $item['id'] . ' mid=' . base64url_decode($item['mid']), LOGGER_DEBUG); drop_item($item['id'], false); } } } } } // Now process the feed if ($feed->get_item_quantity()) { logger('consume_feed: feed item count = ' . $feed->get_item_quantity(), LOGGER_DEBUG); $items = $feed->get_items(); foreach ($items as $item) { $is_reply = false; $item_id = base64url_encode($item->get_id()); logger('consume_feed: processing ' . $item_id, LOGGER_DEBUG); $rawthread = $item->get_item_tags(NAMESPACE_THREAD, 'in-reply-to'); if (isset($rawthread[0]['attribs']['']['ref'])) { $is_reply = true; $parent_mid = base64url_encode($rawthread[0]['attribs']['']['ref']); } if ($is_reply) { if ($pass == 1) { continue; } // Have we seen it? If not, import it. $item_id = base64url_encode($item->get_id()); $author = array(); $datarray = get_atom_elements($feed, $item, $author); if ($contact['xchan_network'] === 'rss') { $datarray['public_policy'] = 'specific'; $datarray['comment_policy'] = 'none'; } if (!x($author, 'author_name') || $author['author_is_feed']) { $author['author_name'] = $contact['xchan_name']; } if (!x($author, 'author_link') || $author['author_is_feed']) { $author['author_link'] = $contact['xchan_url']; } if (!x($author, 'author_photo') || $author['author_is_feed']) { $author['author_photo'] = $contact['xchan_photo_m']; } $datarray['author_xchan'] = ''; if ($author['author_link'] != $contact['xchan_url']) { $x = import_author_unknown(array('name' => $author['author_name'], 'url' => $author['author_link'], 'photo' => array('src' => $author['author_photo']))); if ($x) { $datarray['author_xchan'] = $x; } } if (!$datarray['author_xchan']) { $datarray['author_xchan'] = $contact['xchan_hash']; } $datarray['owner_xchan'] = $contact['xchan_hash']; $r = q("SELECT edited FROM item WHERE mid = '%s' AND uid = %d LIMIT 1", dbesc($item_id), intval($importer['channel_id'])); // Update content if 'updated' changes if ($r) { if (x($datarray, 'edited') !== false && datetime_convert('UTC', 'UTC', $datarray['edited']) !== $r[0]['edited']) { // do not accept (ignore) an earlier edit than one we currently have. if (datetime_convert('UTC', 'UTC', $datarray['edited']) < $r[0]['edited']) { continue; } update_feed_item($importer['channel_id'], $datarray); } continue; } $datarray['parent_mid'] = $parent_mid; $datarray['aid'] = $importer['channel_account_id']; $datarray['uid'] = $importer['channel_id']; logger('consume_feed: ' . print_r($datarray, true), LOGGER_DATA); $xx = item_store($datarray); $r = $xx['item_id']; continue; } else { // Head post of a conversation. Have we seen it? If not, import it. $item_id = base64url_encode($item->get_id()); $author = array(); $datarray = get_atom_elements($feed, $item, $author); if ($contact['xchan_network'] === 'rss') { $datarray['public_policy'] = 'specific'; $datarray['comment_policy'] = 'none'; } if (is_array($contact)) { if (!x($author, 'author_name') || $author['author_is_feed']) { $author['author_name'] = $contact['xchan_name']; } if (!x($author, 'author_link') || $author['author_is_feed']) { $author['author_link'] = $contact['xchan_url']; } if (!x($author, 'author_photo') || $author['author_is_feed']) { $author['author_photo'] = $contact['xchan_photo_m']; } } if (!x($author, 'author_name') || !x($author, 'author_link')) { logger('consume_feed: no author information! ' . print_r($author, true)); continue; } $datarray['author_xchan'] = ''; if (activity_match($datarray['verb'], ACTIVITY_FOLLOW) && $datarray['obj_type'] === ACTIVITY_OBJ_PERSON) { $cb = array('item' => $datarray, 'channel' => $importer, 'xchan' => null, 'author' => $author, 'caught' => false); call_hooks('follow_from_feed', $cb); if ($cb['caught']) { if ($cb['return_code']) { http_status_exit($cb['return_code']); } continue; } } if ($author['author_link'] != $contact['xchan_url']) { $x = import_author_unknown(array('name' => $author['author_name'], 'url' => $author['author_link'], 'photo' => array('src' => $author['author_photo']))); if ($x) { $datarray['author_xchan'] = $x; } } if (!$datarray['author_xchan']) { $datarray['author_xchan'] = $contact['xchan_hash']; } $datarray['owner_xchan'] = $contact['xchan_hash']; if (array_key_exists('created', $datarray) && $datarray['created'] != NULL_DATE && $expire_days) { $t1 = $datarray['created']; $t2 = datetime_convert('UTC', 'UTC', 'now - ' . $expire_days . 'days'); if ($t1 < $t2) { logger('feed content older than expiration. Ignoring.', LOGGER_DEBUG, LOG_INFO); continue; } } $r = q("SELECT edited FROM item WHERE mid = '%s' AND uid = %d LIMIT 1", dbesc($item_id), intval($importer['channel_id'])); // Update content if 'updated' changes if ($r) { if (x($datarray, 'edited') !== false && datetime_convert('UTC', 'UTC', $datarray['edited']) !== $r[0]['edited']) { // do not accept (ignore) an earlier edit than one we currently have. if (datetime_convert('UTC', 'UTC', $datarray['edited']) < $r[0]['edited']) { continue; } update_feed_item($importer['channel_id'], $datarray); } continue; } $datarray['parent_mid'] = $item_id; $datarray['uid'] = $importer['channel_id']; $datarray['aid'] = $importer['channel_account_id']; if (!link_compare($author['owner_link'], $contact['xchan_url'])) { logger('consume_feed: Correcting item owner.', LOGGER_DEBUG); $author['owner_name'] = $contact['name']; $author['owner_link'] = $contact['url']; $author['owner_avatar'] = $contact['thumb']; } if (!post_is_importable($datarray, $contact)) { continue; } logger('consume_feed: author ' . print_r($author, true), LOGGER_DEBUG); logger('consume_feed: ' . print_r($datarray, true), LOGGER_DATA); $xx = item_store($datarray); $r = $xx['item_id']; continue; } } } }
/** * @brief Process atom feed and update anything/everything we might need to update. * * $hub = should we find a hub declation in the feed, pass it back to our calling process, who might (or * might not) try and subscribe to it. * $datedir sorts in reverse order * * @param array $xml * The (atom) feed to consume - RSS isn't as fully supported but may work for simple feeds. * @param $importer * The contact_record (joined to user_record) of the local user who owns this * relationship. It is this person's stuff that is going to be updated. * @param $contact * The person who is sending us stuff. If not set, we MAY be processing a "follow" activity * from an external network and MAY create an appropriate contact record. Otherwise, we MUST * have a contact record. * @param int $pass by default ($pass = 0) we cannot guarantee that a parent item has been * imported prior to its children being seen in the stream unless we are certain * of how the feed is arranged/ordered. * * With $pass = 1, we only pull parent items out of the stream. * * With $pass = 2, we only pull children (comments/likes). * * So running this twice, first with pass 1 and then with pass 2 will do the right * thing regardless of feed ordering. This won't be adequate in a fully-threaded * model where comments can have sub-threads. That would require some massive sorting * to get all the feed items into a mostly linear ordering, and might still require * recursion. */ function consume_feed($xml, $importer, &$contact, $pass = 0) { require_once 'library/simplepie/simplepie.inc'; if (!strlen($xml)) { logger('consume_feed: empty input'); return; } $feed = new SimplePie(); $feed->set_raw_data($xml); $feed->init(); if ($feed->error()) { logger('consume_feed: Error parsing XML: ' . $feed->error()); } $permalink = $feed->get_permalink(); // Check at the feed level for updated contact name and/or photo // process any deleted entries $del_entries = $feed->get_feed_tags(NAMESPACE_TOMB, 'deleted-entry'); if (is_array($del_entries) && count($del_entries) && $pass != 2) { foreach ($del_entries as $dentry) { $deleted = false; if (isset($dentry['attribs']['']['ref'])) { $mid = $dentry['attribs']['']['ref']; $deleted = true; if (isset($dentry['attribs']['']['when'])) { $when = $dentry['attribs']['']['when']; $when = datetime_convert('UTC', 'UTC', $when, 'Y-m-d H:i:s'); } else { $when = datetime_convert('UTC', 'UTC', 'now', 'Y-m-d H:i:s'); } } if ($deleted && is_array($contact)) { $r = q("SELECT * from item where mid = '%s' and author_xchan = '%s' and uid = %d limit 1", dbesc(base64url_encode($mid)), dbesc($contact['xchan_hash']), intval($importer['channel_id'])); if ($r) { $item = $r[0]; if (!($item['item_restrict'] & ITEM_DELETED)) { logger('consume_feed: deleting item ' . $item['id'] . ' mid=' . base64url_decode($item['mid']), LOGGER_DEBUG); drop_item($item['id'], false); } } } } } // Now process the feed if ($feed->get_item_quantity()) { logger('consume_feed: feed item count = ' . $feed->get_item_quantity(), LOGGER_DEBUG); $items = $feed->get_items(); foreach ($items as $item) { $is_reply = false; $item_id = base64url_encode($item->get_id()); logger('consume_feed: processing ' . $item_id, LOGGER_DEBUG); $rawthread = $item->get_item_tags(NAMESPACE_THREAD, 'in-reply-to'); if (isset($rawthread[0]['attribs']['']['ref'])) { $is_reply = true; $parent_mid = base64url_encode($rawthread[0]['attribs']['']['ref']); } if ($is_reply) { if ($pass == 1) { continue; } // Have we seen it? If not, import it. $item_id = base64url_encode($item->get_id()); $author = array(); $datarray = get_atom_elements($feed, $item, $author); if (!x($author, 'author_name') || $author['author_is_feed']) { $author['author_name'] = $contact['xchan_name']; } if (!x($author, 'author_link') || $author['author_is_feed']) { $author['author_link'] = $contact['xchan_url']; } if (!x($author, 'author_photo') || $author['author_is_feed']) { $author['author_photo'] = $contact['xchan_photo_m']; } $datarray['author_xchan'] = ''; if ($author['author_link'] != $contact['xchan_url']) { $x = import_author_unknown(array('name' => $author['author_name'], 'url' => $author['author_link'], 'photo' => array('src' => $author['author_photo']))); if ($x) { $datarray['author_xchan'] = $x; } } if (!$datarray['author_xchan']) { $datarray['author_xchan'] = $contact['xchan_hash']; } $datarray['owner_xchan'] = $contact['xchan_hash']; $r = q("SELECT edited FROM item WHERE mid = '%s' AND uid = %d LIMIT 1", dbesc($item_id), intval($importer['channel_id'])); // Update content if 'updated' changes if ($r) { if (x($datarray, 'edited') !== false && datetime_convert('UTC', 'UTC', $datarray['edited']) !== $r[0]['edited']) { // do not accept (ignore) an earlier edit than one we currently have. if (datetime_convert('UTC', 'UTC', $datarray['edited']) < $r[0]['edited']) { continue; } update_feed_item($importer['channel_id'], $datarray); } continue; } $datarray['parent_mid'] = $parent_mid; $datarray['uid'] = $importer['channel_id']; logger('consume_feed: ' . print_r($datarray, true), LOGGER_DATA); $xx = item_store($datarray); $r = $xx['item_id']; continue; } else { // Head post of a conversation. Have we seen it? If not, import it. $item_id = base64url_encode($item->get_id()); $author = array(); $datarray = get_atom_elements($feed, $item, $author); if (is_array($contact)) { if (!x($author, 'author_name') || $author['author_is_feed']) { $author['author_name'] = $contact['xchan_name']; } if (!x($author, 'author_link') || $author['author_is_feed']) { $author['author_link'] = $contact['xchan_url']; } if (!x($author, 'author_photo') || $author['author_is_feed']) { $author['author_photo'] = $contact['xchan_photo_m']; } } if (!x($author, 'author_name') || !x($author, 'author_link')) { logger('consume_feed: no author information! ' . print_r($author, true)); continue; } $datarray['author_xchan'] = ''; if ($author['author_link'] != $contact['xchan_url']) { $x = import_author_unknown(array('name' => $author['author_name'], 'url' => $author['author_link'], 'photo' => array('src' => $author['author_photo']))); if ($x) { $datarray['author_xchan'] = $x; } } if (!$datarray['author_xchan']) { $datarray['author_xchan'] = $contact['xchan_hash']; } $datarray['owner_xchan'] = $contact['xchan_hash']; $r = q("SELECT edited FROM item WHERE mid = '%s' AND uid = %d LIMIT 1", dbesc($item_id), intval($importer['channel_id'])); // Update content if 'updated' changes if ($r) { if (x($datarray, 'edited') !== false && datetime_convert('UTC', 'UTC', $datarray['edited']) !== $r[0]['edited']) { // do not accept (ignore) an earlier edit than one we currently have. if (datetime_convert('UTC', 'UTC', $datarray['edited']) < $r[0]['edited']) { continue; } update_feed_item($importer['channel_id'], $datarray); } continue; } $datarray['parent_mid'] = $item_id; $datarray['uid'] = $importer['channel_id']; if (!link_compare($author['owner_link'], $contact['xchan_url'])) { logger('consume_feed: Correcting item owner.', LOGGER_DEBUG); $author['owner_name'] = $contact['name']; $author['owner_link'] = $contact['url']; $author['owner_avatar'] = $contact['thumb']; } logger('consume_feed: author ' . print_r($author, true), LOGGER_DEBUG); logger('consume_feed: ' . print_r($datarray, true), LOGGER_DATA); $xx = item_store($datarray); $r = $xx['item_id']; continue; } } } }
function salmon_post(&$a) { $sys_disabled = true; if (!get_config('system', 'disable_discover_tab')) { $sys_disabled = get_config('system', 'disable_diaspora_discover_tab'); } $sys = $sys_disabled ? null : get_sys_channel(); if (App::$data['salmon_test']) { $xml = file_get_contents('test.xml'); App::$argv[1] = 'gnusoc'; } else { $xml = file_get_contents('php://input'); } logger('mod-salmon: new salmon ' . $xml, LOGGER_DATA); $nick = argc() > 1 ? trim(argv(1)) : ''; // $mentions = ((App::$argc > 2 && App::$argv[2] === 'mention') ? true : false); $importer = channelx_by_nick($nick); if (!$importer) { http_status_exit(500); } // @fixme check that this channel has the GNU-Social protocol enabled // parse the xml $dom = simplexml_load_string($xml, 'SimpleXMLElement', 0, NAMESPACE_SALMON_ME); // figure out where in the DOM tree our data is hiding if ($dom->provenance->data) { $base = $dom->provenance; } elseif ($dom->env->data) { $base = $dom->env; } elseif ($dom->data) { $base = $dom; } if (!$base) { logger('mod-salmon: unable to locate salmon data in xml '); http_status_exit(400); } logger('data: ' . $xml, LOGGER_DATA); // Stash the signature away for now. We have to find their key or it won't be good for anything. logger('sig: ' . $base->sig); $signature = base64url_decode($base->sig); logger('sig: ' . $base->sig . ' decoded length: ' . strlen($signature)); // unpack the data // strip whitespace so our data element will return to one big base64 blob $data = str_replace(array(" ", "\t", "\r", "\n"), array("", "", "", ""), $base->data); // stash away some other stuff for later $type = $base->data[0]->attributes()->type[0]; $keyhash = $base->sig[0]->attributes()->keyhash[0]; $encoding = $base->encoding; $alg = $base->alg; // Salmon magic signatures have evolved and there is no way of knowing ahead of time which // flavour we have. We'll try and verify it regardless. $stnet_signed_data = $data; $signed_data = $data . '.' . base64url_encode($type, false) . '.' . base64url_encode($encoding, false) . '.' . base64url_encode($alg, false); $compliant_format = str_replace('=', '', $signed_data); // decode the data $data = base64url_decode($data); logger('decoded: ' . $data, LOGGER_DATA); // GNU-Social doesn't send a legal Atom feed over salmon, only an Atom entry. Unfortunately // our parser is a bit strict about compliance so we'll insert just enough of a feed // tag to trick it into believing it's a compliant feed. if (!strstr($data, '<feed')) { $data = str_replace('<entry ', '<feed xmlns="http://www.w3.org/2005/Atom"><entry ', $data); $data .= '</feed>'; } $datarray = process_salmon_feed($data, $importer); $author_link = $datarray['author']['author_link']; $item = $datarray['item']; if (!$author_link) { logger('mod-salmon: Could not retrieve author URI.'); http_status_exit(400); } $r = q("select xchan_pubkey from xchan where xchan_guid = '%s' limit 1", dbesc($author_link)); if ($r) { $pubkey = $r[0]['xchan_pubkey']; } else { // Once we have the author URI, go to the web and try to find their public key logger('mod-salmon: Fetching key for ' . $author_link); $pubkey = get_salmon_key($author_link, $keyhash); if (!$pubkey) { logger('mod-salmon: Could not retrieve author key.'); http_status_exit(400); } logger('mod-salmon: key details: ' . print_r($pubkey, true), LOGGER_DEBUG); } $pubkey = rtrim($pubkey); // We should have everything we need now. Let's see if it verifies. $verify = rsa_verify($signed_data, $signature, $pubkey); if (!$verify) { logger('mod-salmon: message did not verify using protocol. Trying padding hack.'); $verify = rsa_verify($compliant_format, $signature, $pubkey); } if (!$verify) { logger('mod-salmon: message did not verify using padding. Trying old statusnet hack.'); $verify = rsa_verify($stnet_signed_data, $signature, $pubkey); } if (!$verify) { logger('mod-salmon: Message did not verify. Discarding.'); http_status_exit(400); } logger('mod-salmon: Message verified.'); /* lookup the author */ if (!$datarray['author']['author_link']) { logger('unable to probe - no author identifier'); http_status_exit(400); } $r = q("select * from xchan where xchan_guid = '%s' limit 1", dbesc($datarray['author']['author_link'])); if (!$r) { if (discover_by_webbie($datarray['author']['author_link'])) { $r = q("select * from xchan where xchan_guid = '%s' limit 1", dbesc($datarray['author']['author_link'])); if (!$r) { logger('discovery failed'); http_status_exit(400); } } } $xchan = $r[0]; /* * * If we reached this point, the message is good. Now let's figure out if the author is allowed to send us stuff. * */ // First check for and process follow activity if (activity_match($item['verb'], ACTIVITY_FOLLOW) && $item['obj_type'] === ACTIVITY_OBJ_PERSON) { $cb = array('item' => $item, 'channel' => $importer, 'xchan' => $xchan, 'author' => $datarray['author'], 'caught' => false); call_hooks('follow_from_feed', $cb); if ($cb['caught']) { http_status_exit(200); } } $m = parse_url($xchan['xchan_url']); if ($m) { $host = $m['scheme'] . '://' . $m['host']; q("update site set site_dead = 0, site_update = '%s' where site_type = %d and site_url = '%s'", dbesc(datetime_convert()), intval(SITE_TYPE_NOTZOT), dbesc($url)); if (!check_siteallowed($host)) { logger('blacklisted site: ' . $host); http_status_exit(403, 'permission denied.'); } } $importer_arr = array($importer); if (!$sys_disabled) { $sys['system'] = true; $importer_arr[] = $sys; } unset($datarray['author']); // we will only set and return the status code for operations // on an importer channel and not for the sys channel $status = 200; foreach ($importer_arr as $importer) { if (!$importer['system']) { $allowed = get_pconfig($importer['channel_id'], 'system', 'gnusoc_allowed'); if (!intval($allowed)) { logger('mod-salmon: disallowed for channel ' . $importer['channel_name']); $status = 202; continue; } } // Otherwise check general permissions if (!perm_is_allowed($importer['channel_id'], $xchan['xchan_hash'], 'send_stream') && !$importer['system']) { // check for and process ostatus autofriend // ... fixme // otherwise logger('mod-salmon: Ignoring this author.'); $status = 202; continue; } $parent_item = null; if ($item['parent_mid']) { $r = q("select * from item where mid = '%s' and uid = %d limit 1", dbesc($item['parent_mid']), intval($importer['channel_id'])); if (!$r) { logger('mod-salmon: parent item not found.'); if (!$importer['system']) { $status = 202; } continue; } $parent_item = $r[0]; } if (!$item['author_xchan']) { $item['author_xchan'] = $xchan['xchan_hash']; } $item['owner_xchan'] = $parent_item ? $parent_item['owner_xchan'] : $xchan['xchan_hash']; $r = q("SELECT edited FROM item WHERE mid = '%s' AND uid = %d LIMIT 1", dbesc($item['mid']), intval($importer['channel_id'])); // Update content if 'updated' changes // currently a no-op @fixme if ($r) { if (x($item, 'edited') !== false && datetime_convert('UTC', 'UTC', $item['edited']) !== $r[0]['edited']) { // do not accept (ignore) an earlier edit than one we currently have. if (datetime_convert('UTC', 'UTC', $item['edited']) > $r[0]['edited']) { update_feed_item($importer['channel_id'], $item); } } if (!$importer['system']) { $status = 200; } continue; } if (!$item['parent_mid']) { $item['parent_mid'] = $item['mid']; } $item['aid'] = $importer['channel_account_id']; $item['uid'] = $importer['channel_id']; logger('consume_feed: ' . print_r($item, true), LOGGER_DATA); $xx = item_store($item); $r = $xx['item_id']; if (!$importer['system']) { $status = 200; } continue; } http_status_exit($status); }