/** * @brief Process atom feed and update anything/everything we might need to update. * * @param array $xml * The (atom) feed to consume - RSS isn't as fully supported but may work for simple feeds. * @param $importer * The contact_record (joined to user_record) of the local user who owns this * relationship. It is this person's stuff that is going to be updated. * @param $contact * The person who is sending us stuff. If not set, we MAY be processing a "follow" activity * from an external network and MAY create an appropriate contact record. Otherwise, we MUST * have a contact record. * @param int $pass by default ($pass = 0) we cannot guarantee that a parent item has been * imported prior to its children being seen in the stream unless we are certain * of how the feed is arranged/ordered. * * With $pass = 1, we only pull parent items out of the stream. * * With $pass = 2, we only pull children (comments/likes). * * So running this twice, first with pass 1 and then with pass 2 will do the right * thing regardless of feed ordering. This won't be adequate in a fully-threaded * model where comments can have sub-threads. That would require some massive sorting * to get all the feed items into a mostly linear ordering, and might still require * recursion. */ function consume_feed($xml, $importer, &$contact, $pass = 0) { require_once 'library/simplepie/simplepie.inc'; if (!strlen($xml)) { logger('consume_feed: empty input'); return; } $sys_expire = intval(get_config('system', 'default_expire_days')); $chn_expire = intval($importer['channel_expire_days']); $expire_days = $sys_expire; if ($chn_expire != 0 && $chn_expire < $sys_expire) { $expire_days = $chn_expire; } // logger('expire_days: ' . $expire_days); $feed = new SimplePie(); $feed->set_raw_data($xml); $feed->init(); if ($feed->error()) { logger('consume_feed: Error parsing XML: ' . $feed->error()); } $permalink = $feed->get_permalink(); // Check at the feed level for updated contact name and/or photo // process any deleted entries $del_entries = $feed->get_feed_tags(NAMESPACE_TOMB, 'deleted-entry'); if (is_array($del_entries) && count($del_entries) && $pass != 2) { foreach ($del_entries as $dentry) { $deleted = false; if (isset($dentry['attribs']['']['ref'])) { $mid = $dentry['attribs']['']['ref']; $deleted = true; if (isset($dentry['attribs']['']['when'])) { $when = $dentry['attribs']['']['when']; $when = datetime_convert('UTC', 'UTC', $when, 'Y-m-d H:i:s'); } else { $when = datetime_convert('UTC', 'UTC', 'now', 'Y-m-d H:i:s'); } } if ($deleted && is_array($contact)) { $r = q("SELECT * from item where mid = '%s' and author_xchan = '%s' and uid = %d limit 1", dbesc(base64url_encode($mid)), dbesc($contact['xchan_hash']), intval($importer['channel_id'])); if ($r) { $item = $r[0]; if (!intval($item['item_deleted'])) { logger('consume_feed: deleting item ' . $item['id'] . ' mid=' . base64url_decode($item['mid']), LOGGER_DEBUG); drop_item($item['id'], false); } } } } } // Now process the feed if ($feed->get_item_quantity()) { logger('consume_feed: feed item count = ' . $feed->get_item_quantity(), LOGGER_DEBUG); $items = $feed->get_items(); foreach ($items as $item) { $is_reply = false; $item_id = base64url_encode($item->get_id()); logger('consume_feed: processing ' . $item_id, LOGGER_DEBUG); $rawthread = $item->get_item_tags(NAMESPACE_THREAD, 'in-reply-to'); if (isset($rawthread[0]['attribs']['']['ref'])) { $is_reply = true; $parent_mid = base64url_encode($rawthread[0]['attribs']['']['ref']); } if ($is_reply) { if ($pass == 1) { continue; } // Have we seen it? If not, import it. $item_id = base64url_encode($item->get_id()); $author = array(); $datarray = get_atom_elements($feed, $item, $author); if ($contact['xchan_network'] === 'rss') { $datarray['public_policy'] = 'specific'; $datarray['comment_policy'] = 'none'; } if (!x($author, 'author_name') || $author['author_is_feed']) { $author['author_name'] = $contact['xchan_name']; } if (!x($author, 'author_link') || $author['author_is_feed']) { $author['author_link'] = $contact['xchan_url']; } if (!x($author, 'author_photo') || $author['author_is_feed']) { $author['author_photo'] = $contact['xchan_photo_m']; } $datarray['author_xchan'] = ''; if ($author['author_link'] != $contact['xchan_url']) { $x = import_author_unknown(array('name' => $author['author_name'], 'url' => $author['author_link'], 'photo' => array('src' => $author['author_photo']))); if ($x) { $datarray['author_xchan'] = $x; } } if (!$datarray['author_xchan']) { $datarray['author_xchan'] = $contact['xchan_hash']; } $datarray['owner_xchan'] = $contact['xchan_hash']; $r = q("SELECT edited FROM item WHERE mid = '%s' AND uid = %d LIMIT 1", dbesc($item_id), intval($importer['channel_id'])); // Update content if 'updated' changes if ($r) { if (x($datarray, 'edited') !== false && datetime_convert('UTC', 'UTC', $datarray['edited']) !== $r[0]['edited']) { // do not accept (ignore) an earlier edit than one we currently have. if (datetime_convert('UTC', 'UTC', $datarray['edited']) < $r[0]['edited']) { continue; } update_feed_item($importer['channel_id'], $datarray); } continue; } $datarray['parent_mid'] = $parent_mid; $datarray['aid'] = $importer['channel_account_id']; $datarray['uid'] = $importer['channel_id']; logger('consume_feed: ' . print_r($datarray, true), LOGGER_DATA); $xx = item_store($datarray); $r = $xx['item_id']; continue; } else { // Head post of a conversation. Have we seen it? If not, import it. $item_id = base64url_encode($item->get_id()); $author = array(); $datarray = get_atom_elements($feed, $item, $author); if ($contact['xchan_network'] === 'rss') { $datarray['public_policy'] = 'specific'; $datarray['comment_policy'] = 'none'; } if (is_array($contact)) { if (!x($author, 'author_name') || $author['author_is_feed']) { $author['author_name'] = $contact['xchan_name']; } if (!x($author, 'author_link') || $author['author_is_feed']) { $author['author_link'] = $contact['xchan_url']; } if (!x($author, 'author_photo') || $author['author_is_feed']) { $author['author_photo'] = $contact['xchan_photo_m']; } } if (!x($author, 'author_name') || !x($author, 'author_link')) { logger('consume_feed: no author information! ' . print_r($author, true)); continue; } $datarray['author_xchan'] = ''; if (activity_match($datarray['verb'], ACTIVITY_FOLLOW) && $datarray['obj_type'] === ACTIVITY_OBJ_PERSON) { $cb = array('item' => $datarray, 'channel' => $importer, 'xchan' => null, 'author' => $author, 'caught' => false); call_hooks('follow_from_feed', $cb); if ($cb['caught']) { if ($cb['return_code']) { http_status_exit($cb['return_code']); } continue; } } if ($author['author_link'] != $contact['xchan_url']) { $x = import_author_unknown(array('name' => $author['author_name'], 'url' => $author['author_link'], 'photo' => array('src' => $author['author_photo']))); if ($x) { $datarray['author_xchan'] = $x; } } if (!$datarray['author_xchan']) { $datarray['author_xchan'] = $contact['xchan_hash']; } $datarray['owner_xchan'] = $contact['xchan_hash']; if (array_key_exists('created', $datarray) && $datarray['created'] != NULL_DATE && $expire_days) { $t1 = $datarray['created']; $t2 = datetime_convert('UTC', 'UTC', 'now - ' . $expire_days . 'days'); if ($t1 < $t2) { logger('feed content older than expiration. Ignoring.', LOGGER_DEBUG, LOG_INFO); continue; } } $r = q("SELECT edited FROM item WHERE mid = '%s' AND uid = %d LIMIT 1", dbesc($item_id), intval($importer['channel_id'])); // Update content if 'updated' changes if ($r) { if (x($datarray, 'edited') !== false && datetime_convert('UTC', 'UTC', $datarray['edited']) !== $r[0]['edited']) { // do not accept (ignore) an earlier edit than one we currently have. if (datetime_convert('UTC', 'UTC', $datarray['edited']) < $r[0]['edited']) { continue; } update_feed_item($importer['channel_id'], $datarray); } continue; } $datarray['parent_mid'] = $item_id; $datarray['uid'] = $importer['channel_id']; $datarray['aid'] = $importer['channel_account_id']; if (!link_compare($author['owner_link'], $contact['xchan_url'])) { logger('consume_feed: Correcting item owner.', LOGGER_DEBUG); $author['owner_name'] = $contact['name']; $author['owner_link'] = $contact['url']; $author['owner_avatar'] = $contact['thumb']; } if (!post_is_importable($datarray, $contact)) { continue; } logger('consume_feed: author ' . print_r($author, true), LOGGER_DEBUG); logger('consume_feed: ' . print_r($datarray, true), LOGGER_DATA); $xx = item_store($datarray); $r = $xx['item_id']; continue; } } } }
/** * @brief Process atom feed and update anything/everything we might need to update. * * $hub = should we find a hub declation in the feed, pass it back to our calling process, who might (or * might not) try and subscribe to it. * $datedir sorts in reverse order * * @param array $xml * The (atom) feed to consume - RSS isn't as fully supported but may work for simple feeds. * @param $importer * The contact_record (joined to user_record) of the local user who owns this * relationship. It is this person's stuff that is going to be updated. * @param $contact * The person who is sending us stuff. If not set, we MAY be processing a "follow" activity * from an external network and MAY create an appropriate contact record. Otherwise, we MUST * have a contact record. * @param int $pass by default ($pass = 0) we cannot guarantee that a parent item has been * imported prior to its children being seen in the stream unless we are certain * of how the feed is arranged/ordered. * * With $pass = 1, we only pull parent items out of the stream. * * With $pass = 2, we only pull children (comments/likes). * * So running this twice, first with pass 1 and then with pass 2 will do the right * thing regardless of feed ordering. This won't be adequate in a fully-threaded * model where comments can have sub-threads. That would require some massive sorting * to get all the feed items into a mostly linear ordering, and might still require * recursion. */ function consume_feed($xml, $importer, &$contact, $pass = 0) { require_once 'library/simplepie/simplepie.inc'; if (!strlen($xml)) { logger('consume_feed: empty input'); return; } $feed = new SimplePie(); $feed->set_raw_data($xml); $feed->init(); if ($feed->error()) { logger('consume_feed: Error parsing XML: ' . $feed->error()); } $permalink = $feed->get_permalink(); // Check at the feed level for updated contact name and/or photo // process any deleted entries $del_entries = $feed->get_feed_tags(NAMESPACE_TOMB, 'deleted-entry'); if (is_array($del_entries) && count($del_entries) && $pass != 2) { foreach ($del_entries as $dentry) { $deleted = false; if (isset($dentry['attribs']['']['ref'])) { $mid = $dentry['attribs']['']['ref']; $deleted = true; if (isset($dentry['attribs']['']['when'])) { $when = $dentry['attribs']['']['when']; $when = datetime_convert('UTC', 'UTC', $when, 'Y-m-d H:i:s'); } else { $when = datetime_convert('UTC', 'UTC', 'now', 'Y-m-d H:i:s'); } } if ($deleted && is_array($contact)) { $r = q("SELECT * from item where mid = '%s' and author_xchan = '%s' and uid = %d limit 1", dbesc(base64url_encode($mid)), dbesc($contact['xchan_hash']), intval($importer['channel_id'])); if ($r) { $item = $r[0]; if (!($item['item_restrict'] & ITEM_DELETED)) { logger('consume_feed: deleting item ' . $item['id'] . ' mid=' . base64url_decode($item['mid']), LOGGER_DEBUG); drop_item($item['id'], false); } } } } } // Now process the feed if ($feed->get_item_quantity()) { logger('consume_feed: feed item count = ' . $feed->get_item_quantity(), LOGGER_DEBUG); $items = $feed->get_items(); foreach ($items as $item) { $is_reply = false; $item_id = base64url_encode($item->get_id()); logger('consume_feed: processing ' . $item_id, LOGGER_DEBUG); $rawthread = $item->get_item_tags(NAMESPACE_THREAD, 'in-reply-to'); if (isset($rawthread[0]['attribs']['']['ref'])) { $is_reply = true; $parent_mid = base64url_encode($rawthread[0]['attribs']['']['ref']); } if ($is_reply) { if ($pass == 1) { continue; } // Have we seen it? If not, import it. $item_id = base64url_encode($item->get_id()); $author = array(); $datarray = get_atom_elements($feed, $item, $author); if (!x($author, 'author_name') || $author['author_is_feed']) { $author['author_name'] = $contact['xchan_name']; } if (!x($author, 'author_link') || $author['author_is_feed']) { $author['author_link'] = $contact['xchan_url']; } if (!x($author, 'author_photo') || $author['author_is_feed']) { $author['author_photo'] = $contact['xchan_photo_m']; } $datarray['author_xchan'] = ''; if ($author['author_link'] != $contact['xchan_url']) { $x = import_author_unknown(array('name' => $author['author_name'], 'url' => $author['author_link'], 'photo' => array('src' => $author['author_photo']))); if ($x) { $datarray['author_xchan'] = $x; } } if (!$datarray['author_xchan']) { $datarray['author_xchan'] = $contact['xchan_hash']; } $datarray['owner_xchan'] = $contact['xchan_hash']; $r = q("SELECT edited FROM item WHERE mid = '%s' AND uid = %d LIMIT 1", dbesc($item_id), intval($importer['channel_id'])); // Update content if 'updated' changes if ($r) { if (x($datarray, 'edited') !== false && datetime_convert('UTC', 'UTC', $datarray['edited']) !== $r[0]['edited']) { // do not accept (ignore) an earlier edit than one we currently have. if (datetime_convert('UTC', 'UTC', $datarray['edited']) < $r[0]['edited']) { continue; } update_feed_item($importer['channel_id'], $datarray); } continue; } $datarray['parent_mid'] = $parent_mid; $datarray['uid'] = $importer['channel_id']; logger('consume_feed: ' . print_r($datarray, true), LOGGER_DATA); $xx = item_store($datarray); $r = $xx['item_id']; continue; } else { // Head post of a conversation. Have we seen it? If not, import it. $item_id = base64url_encode($item->get_id()); $author = array(); $datarray = get_atom_elements($feed, $item, $author); if (is_array($contact)) { if (!x($author, 'author_name') || $author['author_is_feed']) { $author['author_name'] = $contact['xchan_name']; } if (!x($author, 'author_link') || $author['author_is_feed']) { $author['author_link'] = $contact['xchan_url']; } if (!x($author, 'author_photo') || $author['author_is_feed']) { $author['author_photo'] = $contact['xchan_photo_m']; } } if (!x($author, 'author_name') || !x($author, 'author_link')) { logger('consume_feed: no author information! ' . print_r($author, true)); continue; } $datarray['author_xchan'] = ''; if ($author['author_link'] != $contact['xchan_url']) { $x = import_author_unknown(array('name' => $author['author_name'], 'url' => $author['author_link'], 'photo' => array('src' => $author['author_photo']))); if ($x) { $datarray['author_xchan'] = $x; } } if (!$datarray['author_xchan']) { $datarray['author_xchan'] = $contact['xchan_hash']; } $datarray['owner_xchan'] = $contact['xchan_hash']; $r = q("SELECT edited FROM item WHERE mid = '%s' AND uid = %d LIMIT 1", dbesc($item_id), intval($importer['channel_id'])); // Update content if 'updated' changes if ($r) { if (x($datarray, 'edited') !== false && datetime_convert('UTC', 'UTC', $datarray['edited']) !== $r[0]['edited']) { // do not accept (ignore) an earlier edit than one we currently have. if (datetime_convert('UTC', 'UTC', $datarray['edited']) < $r[0]['edited']) { continue; } update_feed_item($importer['channel_id'], $datarray); } continue; } $datarray['parent_mid'] = $item_id; $datarray['uid'] = $importer['channel_id']; if (!link_compare($author['owner_link'], $contact['xchan_url'])) { logger('consume_feed: Correcting item owner.', LOGGER_DEBUG); $author['owner_name'] = $contact['name']; $author['owner_link'] = $contact['url']; $author['owner_avatar'] = $contact['thumb']; } logger('consume_feed: author ' . print_r($author, true), LOGGER_DEBUG); logger('consume_feed: ' . print_r($datarray, true), LOGGER_DATA); $xx = item_store($datarray); $r = $xx['item_id']; continue; } } } }
function import_author_xchan($x) { $arr = array('xchan' => $x, 'xchan_hash' => ''); call_hooks('import_author_xchan', $arr); if ($arr['xchan_hash']) { return $arr['xchan_hash']; } if (!array_key_exists('network', $x) || $x['network'] === 'zot') { $y = import_author_zot($x); } if (!$y) { $y = import_author_diaspora($x); } if ($x['network'] === 'rss') { $y = import_author_rss($x); } if ($x['network'] === 'unknown') { $y = import_author_unknown($x); } return $y ? $y : false; }