/**
 * @brief Process atom feed and update anything/everything we might need to update.
 *
 * @param array $xml
 *   The (atom) feed to consume - RSS isn't as fully supported but may work for simple feeds.
 * @param $importer
 *   The contact_record (joined to user_record) of the local user who owns this
 *   relationship. It is this person's stuff that is going to be updated.
 * @param $contact
 *   The person who is sending us stuff. If not set, we MAY be processing a "follow" activity
 *   from an external network and MAY create an appropriate contact record. Otherwise, we MUST
 *   have a contact record.
 * @param int $pass by default ($pass = 0) we cannot guarantee that a parent item has been
 *   imported prior to its children being seen in the stream unless we are certain
 *   of how the feed is arranged/ordered.
 *  * With $pass = 1, we only pull parent items out of the stream.
 *  * With $pass = 2, we only pull children (comments/likes).
 *
 * So running this twice, first with pass 1 and then with pass 2 will do the right
 * thing regardless of feed ordering. This won't be adequate in a fully-threaded
 * model where comments can have sub-threads. That would require some massive sorting
 * to get all the feed items into a mostly linear ordering, and might still require
 * recursion.
 */
function consume_feed($xml, $importer, &$contact, $pass = 0)
{
    require_once 'library/simplepie/simplepie.inc';
    if (!strlen($xml)) {
        logger('consume_feed: empty input');
        return;
    }
    $sys_expire = intval(get_config('system', 'default_expire_days'));
    $chn_expire = intval($importer['channel_expire_days']);
    $expire_days = $sys_expire;
    if ($chn_expire != 0 && $chn_expire < $sys_expire) {
        $expire_days = $chn_expire;
    }
    // logger('expire_days: ' . $expire_days);
    $feed = new SimplePie();
    $feed->set_raw_data($xml);
    $feed->init();
    if ($feed->error()) {
        logger('consume_feed: Error parsing XML: ' . $feed->error());
    }
    $permalink = $feed->get_permalink();
    // Check at the feed level for updated contact name and/or photo
    // process any deleted entries
    $del_entries = $feed->get_feed_tags(NAMESPACE_TOMB, 'deleted-entry');
    if (is_array($del_entries) && count($del_entries) && $pass != 2) {
        foreach ($del_entries as $dentry) {
            $deleted = false;
            if (isset($dentry['attribs']['']['ref'])) {
                $mid = $dentry['attribs']['']['ref'];
                $deleted = true;
                if (isset($dentry['attribs']['']['when'])) {
                    $when = $dentry['attribs']['']['when'];
                    $when = datetime_convert('UTC', 'UTC', $when, 'Y-m-d H:i:s');
                } else {
                    $when = datetime_convert('UTC', 'UTC', 'now', 'Y-m-d H:i:s');
                }
            }
            if ($deleted && is_array($contact)) {
                $r = q("SELECT * from item where mid = '%s' and author_xchan = '%s' and uid = %d limit 1", dbesc(base64url_encode($mid)), dbesc($contact['xchan_hash']), intval($importer['channel_id']));
                if ($r) {
                    $item = $r[0];
                    if (!intval($item['item_deleted'])) {
                        logger('consume_feed: deleting item ' . $item['id'] . ' mid=' . base64url_decode($item['mid']), LOGGER_DEBUG);
                        drop_item($item['id'], false);
                    }
                }
            }
        }
    }
    // Now process the feed
    if ($feed->get_item_quantity()) {
        logger('consume_feed: feed item count = ' . $feed->get_item_quantity(), LOGGER_DEBUG);
        $items = $feed->get_items();
        foreach ($items as $item) {
            $is_reply = false;
            $item_id = base64url_encode($item->get_id());
            logger('consume_feed: processing ' . $item_id, LOGGER_DEBUG);
            $rawthread = $item->get_item_tags(NAMESPACE_THREAD, 'in-reply-to');
            if (isset($rawthread[0]['attribs']['']['ref'])) {
                $is_reply = true;
                $parent_mid = base64url_encode($rawthread[0]['attribs']['']['ref']);
            }
            if ($is_reply) {
                if ($pass == 1) {
                    continue;
                }
                // Have we seen it? If not, import it.
                $item_id = base64url_encode($item->get_id());
                $author = array();
                $datarray = get_atom_elements($feed, $item, $author);
                if ($contact['xchan_network'] === 'rss') {
                    $datarray['public_policy'] = 'specific';
                    $datarray['comment_policy'] = 'none';
                }
                if (!x($author, 'author_name') || $author['author_is_feed']) {
                    $author['author_name'] = $contact['xchan_name'];
                }
                if (!x($author, 'author_link') || $author['author_is_feed']) {
                    $author['author_link'] = $contact['xchan_url'];
                }
                if (!x($author, 'author_photo') || $author['author_is_feed']) {
                    $author['author_photo'] = $contact['xchan_photo_m'];
                }
                $datarray['author_xchan'] = '';
                if ($author['author_link'] != $contact['xchan_url']) {
                    $x = import_author_unknown(array('name' => $author['author_name'], 'url' => $author['author_link'], 'photo' => array('src' => $author['author_photo'])));
                    if ($x) {
                        $datarray['author_xchan'] = $x;
                    }
                }
                if (!$datarray['author_xchan']) {
                    $datarray['author_xchan'] = $contact['xchan_hash'];
                }
                $datarray['owner_xchan'] = $contact['xchan_hash'];
                $r = q("SELECT edited FROM item WHERE mid = '%s' AND uid = %d LIMIT 1", dbesc($item_id), intval($importer['channel_id']));
                // Update content if 'updated' changes
                if ($r) {
                    if (x($datarray, 'edited') !== false && datetime_convert('UTC', 'UTC', $datarray['edited']) !== $r[0]['edited']) {
                        // do not accept (ignore) an earlier edit than one we currently have.
                        if (datetime_convert('UTC', 'UTC', $datarray['edited']) < $r[0]['edited']) {
                            continue;
                        }
                        update_feed_item($importer['channel_id'], $datarray);
                    }
                    continue;
                }
                $datarray['parent_mid'] = $parent_mid;
                $datarray['aid'] = $importer['channel_account_id'];
                $datarray['uid'] = $importer['channel_id'];
                logger('consume_feed: ' . print_r($datarray, true), LOGGER_DATA);
                $xx = item_store($datarray);
                $r = $xx['item_id'];
                continue;
            } else {
                // Head post of a conversation. Have we seen it? If not, import it.
                $item_id = base64url_encode($item->get_id());
                $author = array();
                $datarray = get_atom_elements($feed, $item, $author);
                if ($contact['xchan_network'] === 'rss') {
                    $datarray['public_policy'] = 'specific';
                    $datarray['comment_policy'] = 'none';
                }
                if (is_array($contact)) {
                    if (!x($author, 'author_name') || $author['author_is_feed']) {
                        $author['author_name'] = $contact['xchan_name'];
                    }
                    if (!x($author, 'author_link') || $author['author_is_feed']) {
                        $author['author_link'] = $contact['xchan_url'];
                    }
                    if (!x($author, 'author_photo') || $author['author_is_feed']) {
                        $author['author_photo'] = $contact['xchan_photo_m'];
                    }
                }
                if (!x($author, 'author_name') || !x($author, 'author_link')) {
                    logger('consume_feed: no author information! ' . print_r($author, true));
                    continue;
                }
                $datarray['author_xchan'] = '';
                if (activity_match($datarray['verb'], ACTIVITY_FOLLOW) && $datarray['obj_type'] === ACTIVITY_OBJ_PERSON) {
                    $cb = array('item' => $datarray, 'channel' => $importer, 'xchan' => null, 'author' => $author, 'caught' => false);
                    call_hooks('follow_from_feed', $cb);
                    if ($cb['caught']) {
                        if ($cb['return_code']) {
                            http_status_exit($cb['return_code']);
                        }
                        continue;
                    }
                }
                if ($author['author_link'] != $contact['xchan_url']) {
                    $x = import_author_unknown(array('name' => $author['author_name'], 'url' => $author['author_link'], 'photo' => array('src' => $author['author_photo'])));
                    if ($x) {
                        $datarray['author_xchan'] = $x;
                    }
                }
                if (!$datarray['author_xchan']) {
                    $datarray['author_xchan'] = $contact['xchan_hash'];
                }
                $datarray['owner_xchan'] = $contact['xchan_hash'];
                if (array_key_exists('created', $datarray) && $datarray['created'] != NULL_DATE && $expire_days) {
                    $t1 = $datarray['created'];
                    $t2 = datetime_convert('UTC', 'UTC', 'now - ' . $expire_days . 'days');
                    if ($t1 < $t2) {
                        logger('feed content older than expiration. Ignoring.', LOGGER_DEBUG, LOG_INFO);
                        continue;
                    }
                }
                $r = q("SELECT edited FROM item WHERE mid = '%s' AND uid = %d LIMIT 1", dbesc($item_id), intval($importer['channel_id']));
                // Update content if 'updated' changes
                if ($r) {
                    if (x($datarray, 'edited') !== false && datetime_convert('UTC', 'UTC', $datarray['edited']) !== $r[0]['edited']) {
                        // do not accept (ignore) an earlier edit than one we currently have.
                        if (datetime_convert('UTC', 'UTC', $datarray['edited']) < $r[0]['edited']) {
                            continue;
                        }
                        update_feed_item($importer['channel_id'], $datarray);
                    }
                    continue;
                }
                $datarray['parent_mid'] = $item_id;
                $datarray['uid'] = $importer['channel_id'];
                $datarray['aid'] = $importer['channel_account_id'];
                if (!link_compare($author['owner_link'], $contact['xchan_url'])) {
                    logger('consume_feed: Correcting item owner.', LOGGER_DEBUG);
                    $author['owner_name'] = $contact['name'];
                    $author['owner_link'] = $contact['url'];
                    $author['owner_avatar'] = $contact['thumb'];
                }
                if (!post_is_importable($datarray, $contact)) {
                    continue;
                }
                logger('consume_feed: author ' . print_r($author, true), LOGGER_DEBUG);
                logger('consume_feed: ' . print_r($datarray, true), LOGGER_DATA);
                $xx = item_store($datarray);
                $r = $xx['item_id'];
                continue;
            }
        }
    }
}
Beispiel #2
0
/**
 * @brief Process atom feed and update anything/everything we might need to update.
 *
 * $hub = should we find a hub declation in the feed, pass it back to our calling process, who might (or
 *        might not) try and subscribe to it.
 * $datedir sorts in reverse order
 *
 * @param array $xml
 *   The (atom) feed to consume - RSS isn't as fully supported but may work for simple feeds.
 * @param $importer
 *   The contact_record (joined to user_record) of the local user who owns this
 *   relationship. It is this person's stuff that is going to be updated.
 * @param $contact
 *   The person who is sending us stuff. If not set, we MAY be processing a "follow" activity
 *   from an external network and MAY create an appropriate contact record. Otherwise, we MUST
 *   have a contact record.
 * @param int $pass by default ($pass = 0) we cannot guarantee that a parent item has been
 *   imported prior to its children being seen in the stream unless we are certain
 *   of how the feed is arranged/ordered.
 *  * With $pass = 1, we only pull parent items out of the stream.
 *  * With $pass = 2, we only pull children (comments/likes).
 *
 * So running this twice, first with pass 1 and then with pass 2 will do the right
 * thing regardless of feed ordering. This won't be adequate in a fully-threaded
 * model where comments can have sub-threads. That would require some massive sorting
 * to get all the feed items into a mostly linear ordering, and might still require
 * recursion.
 */
function consume_feed($xml, $importer, &$contact, $pass = 0)
{
    require_once 'library/simplepie/simplepie.inc';
    if (!strlen($xml)) {
        logger('consume_feed: empty input');
        return;
    }
    $feed = new SimplePie();
    $feed->set_raw_data($xml);
    $feed->init();
    if ($feed->error()) {
        logger('consume_feed: Error parsing XML: ' . $feed->error());
    }
    $permalink = $feed->get_permalink();
    // Check at the feed level for updated contact name and/or photo
    // process any deleted entries
    $del_entries = $feed->get_feed_tags(NAMESPACE_TOMB, 'deleted-entry');
    if (is_array($del_entries) && count($del_entries) && $pass != 2) {
        foreach ($del_entries as $dentry) {
            $deleted = false;
            if (isset($dentry['attribs']['']['ref'])) {
                $mid = $dentry['attribs']['']['ref'];
                $deleted = true;
                if (isset($dentry['attribs']['']['when'])) {
                    $when = $dentry['attribs']['']['when'];
                    $when = datetime_convert('UTC', 'UTC', $when, 'Y-m-d H:i:s');
                } else {
                    $when = datetime_convert('UTC', 'UTC', 'now', 'Y-m-d H:i:s');
                }
            }
            if ($deleted && is_array($contact)) {
                $r = q("SELECT * from item where mid = '%s' and author_xchan = '%s' and uid = %d limit 1", dbesc(base64url_encode($mid)), dbesc($contact['xchan_hash']), intval($importer['channel_id']));
                if ($r) {
                    $item = $r[0];
                    if (!($item['item_restrict'] & ITEM_DELETED)) {
                        logger('consume_feed: deleting item ' . $item['id'] . ' mid=' . base64url_decode($item['mid']), LOGGER_DEBUG);
                        drop_item($item['id'], false);
                    }
                }
            }
        }
    }
    // Now process the feed
    if ($feed->get_item_quantity()) {
        logger('consume_feed: feed item count = ' . $feed->get_item_quantity(), LOGGER_DEBUG);
        $items = $feed->get_items();
        foreach ($items as $item) {
            $is_reply = false;
            $item_id = base64url_encode($item->get_id());
            logger('consume_feed: processing ' . $item_id, LOGGER_DEBUG);
            $rawthread = $item->get_item_tags(NAMESPACE_THREAD, 'in-reply-to');
            if (isset($rawthread[0]['attribs']['']['ref'])) {
                $is_reply = true;
                $parent_mid = base64url_encode($rawthread[0]['attribs']['']['ref']);
            }
            if ($is_reply) {
                if ($pass == 1) {
                    continue;
                }
                // Have we seen it? If not, import it.
                $item_id = base64url_encode($item->get_id());
                $author = array();
                $datarray = get_atom_elements($feed, $item, $author);
                if (!x($author, 'author_name') || $author['author_is_feed']) {
                    $author['author_name'] = $contact['xchan_name'];
                }
                if (!x($author, 'author_link') || $author['author_is_feed']) {
                    $author['author_link'] = $contact['xchan_url'];
                }
                if (!x($author, 'author_photo') || $author['author_is_feed']) {
                    $author['author_photo'] = $contact['xchan_photo_m'];
                }
                $datarray['author_xchan'] = '';
                if ($author['author_link'] != $contact['xchan_url']) {
                    $x = import_author_unknown(array('name' => $author['author_name'], 'url' => $author['author_link'], 'photo' => array('src' => $author['author_photo'])));
                    if ($x) {
                        $datarray['author_xchan'] = $x;
                    }
                }
                if (!$datarray['author_xchan']) {
                    $datarray['author_xchan'] = $contact['xchan_hash'];
                }
                $datarray['owner_xchan'] = $contact['xchan_hash'];
                $r = q("SELECT edited FROM item WHERE mid = '%s' AND uid = %d LIMIT 1", dbesc($item_id), intval($importer['channel_id']));
                // Update content if 'updated' changes
                if ($r) {
                    if (x($datarray, 'edited') !== false && datetime_convert('UTC', 'UTC', $datarray['edited']) !== $r[0]['edited']) {
                        // do not accept (ignore) an earlier edit than one we currently have.
                        if (datetime_convert('UTC', 'UTC', $datarray['edited']) < $r[0]['edited']) {
                            continue;
                        }
                        update_feed_item($importer['channel_id'], $datarray);
                    }
                    continue;
                }
                $datarray['parent_mid'] = $parent_mid;
                $datarray['uid'] = $importer['channel_id'];
                logger('consume_feed: ' . print_r($datarray, true), LOGGER_DATA);
                $xx = item_store($datarray);
                $r = $xx['item_id'];
                continue;
            } else {
                // Head post of a conversation. Have we seen it? If not, import it.
                $item_id = base64url_encode($item->get_id());
                $author = array();
                $datarray = get_atom_elements($feed, $item, $author);
                if (is_array($contact)) {
                    if (!x($author, 'author_name') || $author['author_is_feed']) {
                        $author['author_name'] = $contact['xchan_name'];
                    }
                    if (!x($author, 'author_link') || $author['author_is_feed']) {
                        $author['author_link'] = $contact['xchan_url'];
                    }
                    if (!x($author, 'author_photo') || $author['author_is_feed']) {
                        $author['author_photo'] = $contact['xchan_photo_m'];
                    }
                }
                if (!x($author, 'author_name') || !x($author, 'author_link')) {
                    logger('consume_feed: no author information! ' . print_r($author, true));
                    continue;
                }
                $datarray['author_xchan'] = '';
                if ($author['author_link'] != $contact['xchan_url']) {
                    $x = import_author_unknown(array('name' => $author['author_name'], 'url' => $author['author_link'], 'photo' => array('src' => $author['author_photo'])));
                    if ($x) {
                        $datarray['author_xchan'] = $x;
                    }
                }
                if (!$datarray['author_xchan']) {
                    $datarray['author_xchan'] = $contact['xchan_hash'];
                }
                $datarray['owner_xchan'] = $contact['xchan_hash'];
                $r = q("SELECT edited FROM item WHERE mid = '%s' AND uid = %d LIMIT 1", dbesc($item_id), intval($importer['channel_id']));
                // Update content if 'updated' changes
                if ($r) {
                    if (x($datarray, 'edited') !== false && datetime_convert('UTC', 'UTC', $datarray['edited']) !== $r[0]['edited']) {
                        // do not accept (ignore) an earlier edit than one we currently have.
                        if (datetime_convert('UTC', 'UTC', $datarray['edited']) < $r[0]['edited']) {
                            continue;
                        }
                        update_feed_item($importer['channel_id'], $datarray);
                    }
                    continue;
                }
                $datarray['parent_mid'] = $item_id;
                $datarray['uid'] = $importer['channel_id'];
                if (!link_compare($author['owner_link'], $contact['xchan_url'])) {
                    logger('consume_feed: Correcting item owner.', LOGGER_DEBUG);
                    $author['owner_name'] = $contact['name'];
                    $author['owner_link'] = $contact['url'];
                    $author['owner_avatar'] = $contact['thumb'];
                }
                logger('consume_feed: author ' . print_r($author, true), LOGGER_DEBUG);
                logger('consume_feed: ' . print_r($datarray, true), LOGGER_DATA);
                $xx = item_store($datarray);
                $r = $xx['item_id'];
                continue;
            }
        }
    }
}
Beispiel #3
0
function import_author_xchan($x)
{
    $arr = array('xchan' => $x, 'xchan_hash' => '');
    call_hooks('import_author_xchan', $arr);
    if ($arr['xchan_hash']) {
        return $arr['xchan_hash'];
    }
    if (!array_key_exists('network', $x) || $x['network'] === 'zot') {
        $y = import_author_zot($x);
    }
    if (!$y) {
        $y = import_author_diaspora($x);
    }
    if ($x['network'] === 'rss') {
        $y = import_author_rss($x);
    }
    if ($x['network'] === 'unknown') {
        $y = import_author_unknown($x);
    }
    return $y ? $y : false;
}