/** * * consume_feed - process atom feed and update anything/everything we might need to update * * $xml = the (atom) feed to consume - RSS isn't as fully supported but may work for simple feeds. * * $importer = the contact_record (joined to user_record) of the local user who owns this relationship. * It is this person's stuff that is going to be updated. * $contact = the person who is sending us stuff. If not set, we MAY be processing a "follow" activity * from an external network and MAY create an appropriate contact record. Otherwise, we MUST * have a contact record. * $hub = should we find a hub declation in the feed, pass it back to our calling process, who might (or * might not) try and subscribe to it. * $datedir sorts in reverse order * $pass - by default ($pass = 0) we cannot guarantee that a parent item has been * imported prior to its children being seen in the stream unless we are certain * of how the feed is arranged/ordered. * With $pass = 1, we only pull parent items out of the stream. * With $pass = 2, we only pull children (comments/likes). * * So running this twice, first with pass 1 and then with pass 2 will do the right * thing regardless of feed ordering. This won't be adequate in a fully-threaded * model where comments can have sub-threads. That would require some massive sorting * to get all the feed items into a mostly linear ordering, and might still require * recursion. */ function consume_feed($xml, $importer, &$contact, &$hub, $datedir = 0, $pass = 0) { if ($contact['network'] === NETWORK_OSTATUS) { if ($pass < 2) { // Test - remove before flight //$tempfile = tempnam(get_temppath(), "ostatus2"); //file_put_contents($tempfile, $xml); logger("Consume OStatus messages ", LOGGER_DEBUG); ostatus_import($xml, $importer, $contact, $hub); } return; } if ($contact['network'] === NETWORK_FEED) { if ($pass < 2) { logger("Consume feeds", LOGGER_DEBUG); feed_import($xml, $importer, $contact, $hub); } return; } require_once 'library/simplepie/simplepie.inc'; require_once 'include/contact_selectors.php'; if (!strlen($xml)) { logger('consume_feed: empty input'); return; } $feed = new SimplePie(); $feed->set_raw_data($xml); if ($datedir) { $feed->enable_order_by_date(true); } else { $feed->enable_order_by_date(false); } $feed->init(); if ($feed->error()) { logger('consume_feed: Error parsing XML: ' . $feed->error()); } $permalink = $feed->get_permalink(); // Check at the feed level for updated contact name and/or photo $name_updated = ''; $new_name = ''; $photo_timestamp = ''; $photo_url = ''; $birthday = ''; $contact_updated = ''; $hubs = $feed->get_links('hub'); logger('consume_feed: hubs: ' . print_r($hubs, true), LOGGER_DATA); if (count($hubs)) { $hub = implode(',', $hubs); } $rawtags = $feed->get_feed_tags(NAMESPACE_DFRN, 'owner'); if (!$rawtags) { $rawtags = $feed->get_feed_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'author'); } if ($rawtags) { $elems = $rawtags[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10]; if ($elems['name'][0]['attribs'][NAMESPACE_DFRN]['updated']) { $name_updated = $elems['name'][0]['attribs'][NAMESPACE_DFRN]['updated']; $new_name = $elems['name'][0]['data']; // Manually checking for changed contact names if ($new_name != $contact['name'] and $new_name != "" and $name_updated <= $contact['name-date']) { $name_updated = date("c"); $photo_timestamp = date("c"); } } if (x($elems, 'link') && $elems['link'][0]['attribs']['']['rel'] === 'photo' && $elems['link'][0]['attribs'][NAMESPACE_DFRN]['updated']) { if ($photo_timestamp == "") { $photo_timestamp = datetime_convert('UTC', 'UTC', $elems['link'][0]['attribs'][NAMESPACE_DFRN]['updated']); } $photo_url = $elems['link'][0]['attribs']['']['href']; } if (x($rawtags[0]['child'], NAMESPACE_DFRN) && x($rawtags[0]['child'][NAMESPACE_DFRN], 'birthday')) { $birthday = datetime_convert('UTC', 'UTC', $rawtags[0]['child'][NAMESPACE_DFRN]['birthday'][0]['data']); } } if (is_array($contact) && $photo_timestamp && strlen($photo_url) && $photo_timestamp > $contact['avatar-date']) { logger('consume_feed: Updating photo for ' . $contact['name'] . ' from ' . $photo_url . ' uid: ' . $contact['uid']); $contact_updated = $photo_timestamp; require_once "include/Photo.php"; $photos = import_profile_photo($photo_url, $contact['uid'], $contact['id']); q("UPDATE `contact` SET `avatar-date` = '%s', `photo` = '%s', `thumb` = '%s', `micro` = '%s'\n\t\t\tWHERE `uid` = %d AND `id` = %d AND NOT `self`", dbesc(datetime_convert()), dbesc($photos[0]), dbesc($photos[1]), dbesc($photos[2]), intval($contact['uid']), intval($contact['id'])); } if (is_array($contact) && $name_updated && strlen($new_name) && $name_updated > $contact['name-date']) { if ($name_updated > $contact_updated) { $contact_updated = $name_updated; } $r = q("SELECT * FROM `contact` WHERE `uid` = %d AND `id` = %d LIMIT 1", intval($contact['uid']), intval($contact['id'])); $x = q("UPDATE `contact` SET `name` = '%s', `name-date` = '%s' WHERE `uid` = %d AND `id` = %d AND `name` != '%s' AND NOT `self`", dbesc(notags(trim($new_name))), dbesc(datetime_convert()), intval($contact['uid']), intval($contact['id']), dbesc(notags(trim($new_name)))); // do our best to update the name on content items if (count($r) and notags(trim($new_name)) != $r[0]['name']) { q("UPDATE `item` SET `author-name` = '%s' WHERE `author-name` = '%s' AND `author-link` = '%s' AND `uid` = %d AND `author-name` != '%s'", dbesc(notags(trim($new_name))), dbesc($r[0]['name']), dbesc($r[0]['url']), intval($contact['uid']), dbesc(notags(trim($new_name)))); } } if ($contact_updated and $new_name and $photo_url) { poco_check($contact['url'], $new_name, NETWORK_DFRN, $photo_url, "", "", "", "", "", $contact_updated, 2, $contact['id'], $contact['uid']); } if (strlen($birthday)) { if (substr($birthday, 0, 4) != $contact['bdyear']) { logger('consume_feed: updating birthday: ' . $birthday); /** * * Add new birthday event for this person * * $bdtext is just a readable placeholder in case the event is shared * with others. We will replace it during presentation to our $importer * to contain a sparkle link and perhaps a photo. * */ $bdtext = sprintf(t('%s\'s birthday'), $contact['name']); $bdtext2 = sprintf(t('Happy Birthday %s'), ' [url=' . $contact['url'] . ']' . $contact['name'] . '[/url]'); $r = q("INSERT INTO `event` (`uid`,`cid`,`created`,`edited`,`start`,`finish`,`summary`,`desc`,`type`)\n\t\t\t\tVALUES ( %d, %d, '%s', '%s', '%s', '%s', '%s', '%s', '%s' ) ", intval($contact['uid']), intval($contact['id']), dbesc(datetime_convert()), dbesc(datetime_convert()), dbesc(datetime_convert('UTC', 'UTC', $birthday)), dbesc(datetime_convert('UTC', 'UTC', $birthday . ' + 1 day ')), dbesc($bdtext), dbesc($bdtext2), dbesc('birthday')); // update bdyear q("UPDATE `contact` SET `bdyear` = '%s' WHERE `uid` = %d AND `id` = %d", dbesc(substr($birthday, 0, 4)), intval($contact['uid']), intval($contact['id'])); // This function is called twice without reloading the contact // Make sure we only create one event. This is why &$contact // is a reference var in this function $contact['bdyear'] = substr($birthday, 0, 4); } } $community_page = 0; $rawtags = $feed->get_feed_tags(NAMESPACE_DFRN, 'community'); if ($rawtags) { $community_page = intval($rawtags[0]['data']); } if (is_array($contact) && intval($contact['forum']) != $community_page) { q("update contact set forum = %d where id = %d", intval($community_page), intval($contact['id'])); $contact['forum'] = (string) $community_page; } // process any deleted entries $del_entries = $feed->get_feed_tags(NAMESPACE_TOMB, 'deleted-entry'); if (is_array($del_entries) && count($del_entries) && $pass != 2) { foreach ($del_entries as $dentry) { $deleted = false; if (isset($dentry['attribs']['']['ref'])) { $uri = $dentry['attribs']['']['ref']; $deleted = true; if (isset($dentry['attribs']['']['when'])) { $when = $dentry['attribs']['']['when']; $when = datetime_convert('UTC', 'UTC', $when, 'Y-m-d H:i:s'); } else { $when = datetime_convert('UTC', 'UTC', 'now', 'Y-m-d H:i:s'); } } if ($deleted && is_array($contact)) { $r = q("SELECT `item`.*, `contact`.`self` FROM `item` INNER JOIN `contact` on `item`.`contact-id` = `contact`.`id`\n\t\t\t\t\tWHERE `uri` = '%s' AND `item`.`uid` = %d AND `contact-id` = %d AND NOT `item`.`file` LIKE '%%[%%' LIMIT 1", dbesc($uri), intval($importer['uid']), intval($contact['id'])); if (count($r)) { $item = $r[0]; if (!$item['deleted']) { logger('consume_feed: deleting item ' . $item['id'] . ' uri=' . $item['uri'], LOGGER_DEBUG); } if ($item['object-type'] === ACTIVITY_OBJ_EVENT) { logger("Deleting event " . $item['event-id'], LOGGER_DEBUG); event_delete($item['event-id']); } if ($item['verb'] === ACTIVITY_TAG && $item['object-type'] === ACTIVITY_OBJ_TAGTERM) { $xo = parse_xml_string($item['object'], false); $xt = parse_xml_string($item['target'], false); if ($xt->type === ACTIVITY_OBJ_NOTE) { $i = q("select * from `item` where uri = '%s' and uid = %d limit 1", dbesc($xt->id), intval($importer['importer_uid'])); if (count($i)) { // For tags, the owner cannot remove the tag on the author's copy of the post. $owner_remove = $item['contact-id'] == $i[0]['contact-id'] ? true : false; $author_remove = $item['origin'] && $item['self'] ? true : false; $author_copy = $item['origin'] ? true : false; if ($owner_remove && $author_copy) { continue; } if ($author_remove || $owner_remove) { $tags = explode(',', $i[0]['tag']); $newtags = array(); if (count($tags)) { foreach ($tags as $tag) { if (trim($tag) !== trim($xo->body)) { $newtags[] = trim($tag); } } } q("update item set tag = '%s' where id = %d", dbesc(implode(',', $newtags)), intval($i[0]['id'])); create_tags_from_item($i[0]['id']); } } } } if ($item['uri'] == $item['parent-uri']) { $r = q("UPDATE `item` SET `deleted` = 1, `edited` = '%s', `changed` = '%s',\n\t\t\t\t\t\t\t`body` = '', `title` = ''\n\t\t\t\t\t\t\tWHERE `parent-uri` = '%s' AND `uid` = %d", dbesc($when), dbesc(datetime_convert()), dbesc($item['uri']), intval($importer['uid'])); create_tags_from_itemuri($item['uri'], $importer['uid']); create_files_from_itemuri($item['uri'], $importer['uid']); update_thread_uri($item['uri'], $importer['uid']); } else { $r = q("UPDATE `item` SET `deleted` = 1, `edited` = '%s', `changed` = '%s',\n\t\t\t\t\t\t\t`body` = '', `title` = ''\n\t\t\t\t\t\t\tWHERE `uri` = '%s' AND `uid` = %d", dbesc($when), dbesc(datetime_convert()), dbesc($uri), intval($importer['uid'])); create_tags_from_itemuri($uri, $importer['uid']); create_files_from_itemuri($uri, $importer['uid']); if ($item['last-child']) { // ensure that last-child is set in case the comment that had it just got wiped. q("UPDATE `item` SET `last-child` = 0, `changed` = '%s' WHERE `parent-uri` = '%s' AND `uid` = %d ", dbesc(datetime_convert()), dbesc($item['parent-uri']), intval($item['uid'])); // who is the last child now? $r = q("SELECT `id` FROM `item` WHERE `parent-uri` = '%s' AND `type` != 'activity' AND `deleted` = 0 AND `moderated` = 0 AND `uid` = %d\n\t\t\t\t\t\t\t\tORDER BY `created` DESC LIMIT 1", dbesc($item['parent-uri']), intval($importer['uid'])); if (count($r)) { q("UPDATE `item` SET `last-child` = 1 WHERE `id` = %d", intval($r[0]['id'])); } } } } } } } // Now process the feed if ($feed->get_item_quantity()) { logger('consume_feed: feed item count = ' . $feed->get_item_quantity()); // in inverse date order if ($datedir) { $items = array_reverse($feed->get_items()); } else { $items = $feed->get_items(); } foreach ($items as $item) { $is_reply = false; $item_id = $item->get_id(); $rawthread = $item->get_item_tags(NAMESPACE_THREAD, 'in-reply-to'); if (isset($rawthread[0]['attribs']['']['ref'])) { $is_reply = true; $parent_uri = $rawthread[0]['attribs']['']['ref']; } if ($is_reply && is_array($contact)) { if ($pass == 1) { continue; } // not allowed to post if ($contact['rel'] == CONTACT_IS_FOLLOWER) { continue; } // Have we seen it? If not, import it. $item_id = $item->get_id(); $datarray = get_atom_elements($feed, $item, $contact); if (!x($datarray, 'author-name') && $contact['network'] != NETWORK_DFRN) { $datarray['author-name'] = $contact['name']; } if (!x($datarray, 'author-link') && $contact['network'] != NETWORK_DFRN) { $datarray['author-link'] = $contact['url']; } if (!x($datarray, 'author-avatar') && $contact['network'] != NETWORK_DFRN) { $datarray['author-avatar'] = $contact['thumb']; } if (!x($datarray, 'author-name') || !x($datarray, 'author-link')) { logger('consume_feed: no author information! ' . print_r($datarray, true)); continue; } $force_parent = false; if ($contact['network'] === NETWORK_OSTATUS || stristr($contact['url'], 'twitter.com')) { if ($contact['network'] === NETWORK_OSTATUS) { $force_parent = true; } if (strlen($datarray['title'])) { unset($datarray['title']); } $r = q("UPDATE `item` SET `last-child` = 0, `changed` = '%s' WHERE `parent-uri` = '%s' AND `uid` = %d", dbesc(datetime_convert()), dbesc($parent_uri), intval($importer['uid'])); $datarray['last-child'] = 1; update_thread_uri($parent_uri, $importer['uid']); } $r = q("SELECT `uid`, `last-child`, `edited`, `body` FROM `item` WHERE `uri` = '%s' AND `uid` = %d LIMIT 1", dbesc($item_id), intval($importer['uid'])); // Update content if 'updated' changes if (count($r)) { if (edited_timestamp_is_newer($r[0], $datarray)) { // do not accept (ignore) an earlier edit than one we currently have. if (datetime_convert('UTC', 'UTC', $datarray['edited']) < $r[0]['edited']) { continue; } $r = q("UPDATE `item` SET `title` = '%s', `body` = '%s', `tag` = '%s', `edited` = '%s', `changed` = '%s' WHERE `uri` = '%s' AND `uid` = %d", dbesc($datarray['title']), dbesc($datarray['body']), dbesc($datarray['tag']), dbesc(datetime_convert('UTC', 'UTC', $datarray['edited'])), dbesc(datetime_convert()), dbesc($item_id), intval($importer['uid'])); create_tags_from_itemuri($item_id, $importer['uid']); update_thread_uri($item_id, $importer['uid']); } // update last-child if it changes $allow = $item->get_item_tags(NAMESPACE_DFRN, 'comment-allow'); if ($allow && $allow[0]['data'] != $r[0]['last-child']) { $r = q("UPDATE `item` SET `last-child` = 0, `changed` = '%s' WHERE `parent-uri` = '%s' AND `uid` = %d", dbesc(datetime_convert()), dbesc($parent_uri), intval($importer['uid'])); $r = q("UPDATE `item` SET `last-child` = %d , `changed` = '%s' WHERE `uri` = '%s' AND `uid` = %d", intval($allow[0]['data']), dbesc(datetime_convert()), dbesc($item_id), intval($importer['uid'])); update_thread_uri($item_id, $importer['uid']); } continue; } if ($contact['network'] === NETWORK_FEED || !strlen($contact['notify'])) { // one way feed - no remote comment ability $datarray['last-child'] = 0; } $datarray['parent-uri'] = $parent_uri; $datarray['uid'] = $importer['uid']; $datarray['contact-id'] = $contact['id']; if ($datarray['verb'] === ACTIVITY_LIKE || $datarray['verb'] === ACTIVITY_DISLIKE || $datarray['verb'] === ACTIVITY_ATTEND || $datarray['verb'] === ACTIVITY_ATTENDNO || $datarray['verb'] === ACTIVITY_ATTENDMAYBE) { $datarray['type'] = 'activity'; $datarray['gravity'] = GRAVITY_LIKE; // only one like or dislike per person // splitted into two queries for performance issues $r = q("select id from item where uid = %d and `contact-id` = %d and verb ='%s' and deleted = 0 and (`parent-uri` = '%s') limit 1", intval($datarray['uid']), intval($datarray['contact-id']), dbesc($datarray['verb']), dbesc($parent_uri)); if ($r && count($r)) { continue; } $r = q("select id from item where uid = %d and `contact-id` = %d and verb ='%s' and deleted = 0 and (`thr-parent` = '%s') limit 1", intval($datarray['uid']), intval($datarray['contact-id']), dbesc($datarray['verb']), dbesc($parent_uri)); if ($r && count($r)) { continue; } } if ($datarray['verb'] === ACTIVITY_TAG && $datarray['object-type'] === ACTIVITY_OBJ_TAGTERM) { $xo = parse_xml_string($datarray['object'], false); $xt = parse_xml_string($datarray['target'], false); if ($xt->type == ACTIVITY_OBJ_NOTE) { $r = q("select * from item where `uri` = '%s' AND `uid` = %d limit 1", dbesc($xt->id), intval($importer['importer_uid'])); if (!count($r)) { continue; } // extract tag, if not duplicate, add to parent item if ($xo->id && $xo->content) { $newtag = '#[url=' . $xo->id . ']' . $xo->content . '[/url]'; if (!stristr($r[0]['tag'], $newtag)) { q("UPDATE item SET tag = '%s' WHERE id = %d", dbesc($r[0]['tag'] . (strlen($r[0]['tag']) ? ',' : '') . $newtag), intval($r[0]['id'])); create_tags_from_item($r[0]['id']); } } } } $r = item_store($datarray, $force_parent); continue; } else { // Head post of a conversation. Have we seen it? If not, import it. $item_id = $item->get_id(); $datarray = get_atom_elements($feed, $item, $contact); if (is_array($contact)) { if (!x($datarray, 'author-name') && $contact['network'] != NETWORK_DFRN) { $datarray['author-name'] = $contact['name']; } if (!x($datarray, 'author-link') && $contact['network'] != NETWORK_DFRN) { $datarray['author-link'] = $contact['url']; } if (!x($datarray, 'author-avatar') && $contact['network'] != NETWORK_DFRN) { $datarray['author-avatar'] = $contact['thumb']; } } if (!x($datarray, 'author-name') || !x($datarray, 'author-link')) { logger('consume_feed: no author information! ' . print_r($datarray, true)); continue; } // special handling for events if (x($datarray, 'object-type') && $datarray['object-type'] === ACTIVITY_OBJ_EVENT) { $ev = bbtoevent($datarray['body']); if ((x($ev, 'desc') || x($ev, 'summary')) && x($ev, 'start')) { $ev['uid'] = $importer['uid']; $ev['uri'] = $item_id; $ev['edited'] = $datarray['edited']; $ev['private'] = $datarray['private']; $ev['guid'] = $datarray['guid']; if (is_array($contact)) { $ev['cid'] = $contact['id']; } $r = q("SELECT * FROM `event` WHERE `uri` = '%s' AND `uid` = %d LIMIT 1", dbesc($item_id), intval($importer['uid'])); if (count($r)) { $ev['id'] = $r[0]['id']; } $xyz = event_store($ev); continue; } } if ($contact['network'] === NETWORK_OSTATUS || stristr($contact['url'], 'twitter.com')) { if (strlen($datarray['title'])) { unset($datarray['title']); } $datarray['last-child'] = 1; } $r = q("SELECT `uid`, `last-child`, `edited`, `body` FROM `item` WHERE `uri` = '%s' AND `uid` = %d LIMIT 1", dbesc($item_id), intval($importer['uid'])); // Update content if 'updated' changes if (count($r)) { if (edited_timestamp_is_newer($r[0], $datarray)) { // do not accept (ignore) an earlier edit than one we currently have. if (datetime_convert('UTC', 'UTC', $datarray['edited']) < $r[0]['edited']) { continue; } $r = q("UPDATE `item` SET `title` = '%s', `body` = '%s', `tag` = '%s', `edited` = '%s', `changed` = '%s' WHERE `uri` = '%s' AND `uid` = %d", dbesc($datarray['title']), dbesc($datarray['body']), dbesc($datarray['tag']), dbesc(datetime_convert('UTC', 'UTC', $datarray['edited'])), dbesc(datetime_convert()), dbesc($item_id), intval($importer['uid'])); create_tags_from_itemuri($item_id, $importer['uid']); update_thread_uri($item_id, $importer['uid']); } // update last-child if it changes $allow = $item->get_item_tags(NAMESPACE_DFRN, 'comment-allow'); if ($allow && $allow[0]['data'] != $r[0]['last-child']) { $r = q("UPDATE `item` SET `last-child` = %d , `changed` = '%s' WHERE `uri` = '%s' AND `uid` = %d", intval($allow[0]['data']), dbesc(datetime_convert()), dbesc($item_id), intval($importer['uid'])); update_thread_uri($item_id, $importer['uid']); } continue; } if (activity_match($datarray['verb'], ACTIVITY_FOLLOW)) { logger('consume-feed: New follower'); new_follower($importer, $contact, $datarray, $item); return; } if (activity_match($datarray['verb'], ACTIVITY_UNFOLLOW)) { lose_follower($importer, $contact, $datarray, $item); return; } if (activity_match($datarray['verb'], ACTIVITY_REQ_FRIEND)) { logger('consume-feed: New friend request'); new_follower($importer, $contact, $datarray, $item, true); return; } if (activity_match($datarray['verb'], ACTIVITY_UNFRIEND)) { lose_sharer($importer, $contact, $datarray, $item); return; } if (!is_array($contact)) { return; } if ($contact['network'] === NETWORK_FEED || !strlen($contact['notify'])) { // one way feed - no remote comment ability $datarray['last-child'] = 0; } if ($contact['network'] === NETWORK_FEED) { $datarray['private'] = 2; } $datarray['parent-uri'] = $item_id; $datarray['uid'] = $importer['uid']; $datarray['contact-id'] = $contact['id']; if (!link_compare($datarray['owner-link'], $contact['url'])) { // The item owner info is not our contact. It's OK and is to be expected if this is a tgroup delivery, // but otherwise there's a possible data mixup on the sender's system. // the tgroup delivery code called from item_store will correct it if it's a forum, // but we're going to unconditionally correct it here so that the post will always be owned by our contact. logger('consume_feed: Correcting item owner.', LOGGER_DEBUG); $datarray['owner-name'] = $contact['name']; $datarray['owner-link'] = $contact['url']; $datarray['owner-avatar'] = $contact['thumb']; } // We've allowed "followers" to reach this point so we can decide if they are // posting an @-tag delivery, which followers are allowed to do for certain // page types. Now that we've parsed the post, let's check if it is legit. Otherwise ignore it. if ($contact['rel'] == CONTACT_IS_FOLLOWER && !tgroup_check($importer['uid'], $datarray)) { continue; } // This is my contact on another system, but it's really me. // Turn this into a wall post. $notify = item_is_remote_self($contact, $datarray); $r = item_store($datarray, false, $notify); logger('Stored - Contact ' . $contact['url'] . ' Notify ' . $notify . ' return ' . $r . ' Item ' . print_r($datarray, true), LOGGER_DEBUG); continue; } } } }
/** * * consume_feed - process atom feed and update anything/everything we might need to update * * $xml = the (atom) feed to consume - RSS isn't as fully supported but may work for simple feeds. * * $importer = the contact_record (joined to user_record) of the local user who owns this relationship. * It is this person's stuff that is going to be updated. * $contact = the person who is sending us stuff. If not set, we MAY be processing a "follow" activity * from an external network and MAY create an appropriate contact record. Otherwise, we MUST * have a contact record. * $hub = should we find a hub declation in the feed, pass it back to our calling process, who might (or * might not) try and subscribe to it. * $datedir sorts in reverse order * $pass - by default ($pass = 0) we cannot guarantee that a parent item has been * imported prior to its children being seen in the stream unless we are certain * of how the feed is arranged/ordered. * With $pass = 1, we only pull parent items out of the stream. * With $pass = 2, we only pull children (comments/likes). * * So running this twice, first with pass 1 and then with pass 2 will do the right * thing regardless of feed ordering. This won't be adequate in a fully-threaded * model where comments can have sub-threads. That would require some massive sorting * to get all the feed items into a mostly linear ordering, and might still require * recursion. */ function consume_feed($xml, $importer, &$contact, &$hub, $datedir = 0, $pass = 0) { require_once 'library/simplepie/simplepie.inc'; if (!strlen($xml)) { logger('consume_feed: empty input'); return; } $feed = new SimplePie(); $feed->set_raw_data($xml); if ($datedir) { $feed->enable_order_by_date(true); } else { $feed->enable_order_by_date(false); } $feed->init(); if ($feed->error()) { logger('consume_feed: Error parsing XML: ' . $feed->error()); } $permalink = $feed->get_permalink(); // Check at the feed level for updated contact name and/or photo $name_updated = ''; $new_name = ''; $photo_timestamp = ''; $photo_url = ''; $birthday = ''; $hubs = $feed->get_links('hub'); if (count($hubs)) { $hub = implode(',', $hubs); } $rawtags = $feed->get_feed_tags(NAMESPACE_DFRN, 'owner'); if (!$rawtags) { $rawtags = $feed->get_feed_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'author'); } if ($rawtags) { $elems = $rawtags[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10]; if ($elems['name'][0]['attribs'][NAMESPACE_DFRN]['updated']) { $name_updated = $elems['name'][0]['attribs'][NAMESPACE_DFRN]['updated']; $new_name = $elems['name'][0]['data']; } if (x($elems, 'link') && $elems['link'][0]['attribs']['']['rel'] === 'photo' && $elems['link'][0]['attribs'][NAMESPACE_DFRN]['updated']) { $photo_timestamp = datetime_convert('UTC', 'UTC', $elems['link'][0]['attribs'][NAMESPACE_DFRN]['updated']); $photo_url = $elems['link'][0]['attribs']['']['href']; } if (x($rawtags[0]['child'], NAMESPACE_DFRN) && x($rawtags[0]['child'][NAMESPACE_DFRN], 'birthday')) { $birthday = datetime_convert('UTC', 'UTC', $rawtags[0]['child'][NAMESPACE_DFRN]['birthday'][0]['data']); } } if (is_array($contact) && $photo_timestamp && strlen($photo_url) && $photo_timestamp > $contact['avatar-date']) { logger('consume_feed: Updating photo for ' . $contact['name']); require_once "Photo.php"; $photo_failure = false; $have_photo = false; $r = q("SELECT `resource-id` FROM `photo` WHERE `contact-id` = %d AND `uid` = %d LIMIT 1", intval($contact['id']), intval($contact['uid'])); if (count($r)) { $resource_id = $r[0]['resource-id']; $have_photo = true; } else { $resource_id = photo_new_resource(); } $img_str = fetch_url($photo_url, true); $img = new Photo($img_str); if ($img->is_valid()) { if ($have_photo) { q("DELETE FROM `photo` WHERE `resource-id` = '%s' AND `contact-id` = %d AND `uid` = %d", dbesc($resource_id), intval($contact['id']), intval($contact['uid'])); } $img->scaleImageSquare(175); $hash = $resource_id; $r = $img->store($contact['uid'], $contact['id'], $hash, basename($photo_url), 'Contact Photos', 4); $img->scaleImage(80); $r = $img->store($contact['uid'], $contact['id'], $hash, basename($photo_url), 'Contact Photos', 5); $img->scaleImage(48); $r = $img->store($contact['uid'], $contact['id'], $hash, basename($photo_url), 'Contact Photos', 6); $a = get_app(); q("UPDATE `contact` SET `avatar-date` = '%s', `photo` = '%s', `thumb` = '%s', `micro` = '%s' \n\t\t\t\tWHERE `uid` = %d AND `id` = %d LIMIT 1", dbesc(datetime_convert()), dbesc($a->get_baseurl() . '/photo/' . $hash . '-4.jpg'), dbesc($a->get_baseurl() . '/photo/' . $hash . '-5.jpg'), dbesc($a->get_baseurl() . '/photo/' . $hash . '-6.jpg'), intval($contact['uid']), intval($contact['id'])); } } if (is_array($contact) && $name_updated && strlen($new_name) && $name_updated > $contact['name-date']) { $r = q("select * from contact where uid = %d and id = %d limit 1", intval($contact['uid']), intval($contact['id'])); $x = q("UPDATE `contact` SET `name` = '%s', `name-date` = '%s' WHERE `uid` = %d AND `id` = %d LIMIT 1", dbesc(notags(trim($new_name))), dbesc(datetime_convert()), intval($contact['uid']), intval($contact['id'])); // do our best to update the name on content items if (count($r)) { q("update item set `author-name` = '%s' where `author-name` = '%s' and `author-link` = '%s' and uid = %d", dbesc(notags(trim($new_name))), dbesc($r[0]['name']), dbesc($r[0]['url']), intval($contact['uid'])); } } if (strlen($birthday)) { if (substr($birthday, 0, 4) != $contact['bdyear']) { logger('consume_feed: updating birthday: ' . $birthday); /** * * Add new birthday event for this person * * $bdtext is just a readable placeholder in case the event is shared * with others. We will replace it during presentation to our $importer * to contain a sparkle link and perhaps a photo. * */ $bdtext = t('Birthday:') . ' [url=' . $contact['url'] . ']' . $contact['name'] . '[/url]'; $r = q("INSERT INTO `event` (`uid`,`cid`,`created`,`edited`,`start`,`finish`,`desc`,`type`)\n\t\t\t\tVALUES ( %d, %d, '%s', '%s', '%s', '%s', '%s', '%s' ) ", intval($contact['uid']), intval($contact['id']), dbesc(datetime_convert()), dbesc(datetime_convert()), dbesc(datetime_convert('UTC', 'UTC', $birthday)), dbesc(datetime_convert('UTC', 'UTC', $birthday . ' + 1 day ')), dbesc($bdtext), dbesc('birthday')); // update bdyear q("UPDATE `contact` SET `bdyear` = '%s' WHERE `uid` = %d AND `id` = %d LIMIT 1", dbesc(substr($birthday, 0, 4)), intval($contact['uid']), intval($contact['id'])); // This function is called twice without reloading the contact // Make sure we only create one event. This is why &$contact // is a reference var in this function $contact['bdyear'] = substr($birthday, 0, 4); } } $community_page = 0; $rawtags = $feed->get_feed_tags(NAMESPACE_DFRN, 'community'); if ($rawtags) { $community_page = intval($rawtags[0]['data']); } if (is_array($contact) && intval($contact['forum']) != $community_page) { q("update contact set forum = %d where id = %d limit 1", intval($community_page), intval($contact['id'])); $contact['forum'] = (string) $community_page; } // process any deleted entries $del_entries = $feed->get_feed_tags(NAMESPACE_TOMB, 'deleted-entry'); if (is_array($del_entries) && count($del_entries) && $pass != 2) { foreach ($del_entries as $dentry) { $deleted = false; if (isset($dentry['attribs']['']['ref'])) { $uri = $dentry['attribs']['']['ref']; $deleted = true; if (isset($dentry['attribs']['']['when'])) { $when = $dentry['attribs']['']['when']; $when = datetime_convert('UTC', 'UTC', $when, 'Y-m-d H:i:s'); } else { $when = datetime_convert('UTC', 'UTC', 'now', 'Y-m-d H:i:s'); } } if ($deleted && is_array($contact)) { $r = q("SELECT `item`.*, `contact`.`self` FROM `item` left join `contact` on `item`.`contact-id` = `contact`.`id` \n\t\t\t\t\tWHERE `uri` = '%s' AND `item`.`uid` = %d AND `contact-id` = %d AND NOT `item`.`file` LIKE '%%[%%' LIMIT 1", dbesc($uri), intval($importer['uid']), intval($contact['id'])); if (count($r)) { $item = $r[0]; if (!$item['deleted']) { logger('consume_feed: deleting item ' . $item['id'] . ' uri=' . $item['uri'], LOGGER_DEBUG); } if ($item['verb'] === ACTIVITY_TAG && $item['object-type'] === ACTIVITY_OBJ_TAGTERM) { $xo = parse_xml_string($item['object'], false); $xt = parse_xml_string($item['target'], false); if ($xt->type === ACTIVITY_OBJ_NOTE) { $i = q("select * from `item` where uri = '%s' and uid = %d limit 1", dbesc($xt->id), intval($importer['importer_uid'])); if (count($i)) { // For tags, the owner cannot remove the tag on the author's copy of the post. $owner_remove = $item['contact-id'] == $i[0]['contact-id'] ? true : false; $author_remove = $item['origin'] && $item['self'] ? true : false; $author_copy = $item['origin'] ? true : false; if ($owner_remove && $author_copy) { continue; } if ($author_remove || $owner_remove) { $tags = explode(',', $i[0]['tag']); $newtags = array(); if (count($tags)) { foreach ($tags as $tag) { if (trim($tag) !== trim($xo->body)) { $newtags[] = trim($tag); } } } q("update item set tag = '%s' where id = %d limit 1", dbesc(implode(',', $newtags)), intval($i[0]['id'])); } } } } if ($item['uri'] == $item['parent-uri']) { $r = q("UPDATE `item` SET `deleted` = 1, `edited` = '%s', `changed` = '%s',\n\t\t\t\t\t\t\t`body` = '', `title` = ''\n\t\t\t\t\t\t\tWHERE `parent-uri` = '%s' AND `uid` = %d", dbesc($when), dbesc(datetime_convert()), dbesc($item['uri']), intval($importer['uid'])); } else { $r = q("UPDATE `item` SET `deleted` = 1, `edited` = '%s', `changed` = '%s',\n\t\t\t\t\t\t\t`body` = '', `title` = '' \n\t\t\t\t\t\t\tWHERE `uri` = '%s' AND `uid` = %d LIMIT 1", dbesc($when), dbesc(datetime_convert()), dbesc($uri), intval($importer['uid'])); if ($item['last-child']) { // ensure that last-child is set in case the comment that had it just got wiped. q("UPDATE `item` SET `last-child` = 0, `changed` = '%s' WHERE `parent-uri` = '%s' AND `uid` = %d ", dbesc(datetime_convert()), dbesc($item['parent-uri']), intval($item['uid'])); // who is the last child now? $r = q("SELECT `id` FROM `item` WHERE `parent-uri` = '%s' AND `type` != 'activity' AND `deleted` = 0 AND `moderated` = 0 AND `uid` = %d \n\t\t\t\t\t\t\t\tORDER BY `created` DESC LIMIT 1", dbesc($item['parent-uri']), intval($importer['uid'])); if (count($r)) { q("UPDATE `item` SET `last-child` = 1 WHERE `id` = %d LIMIT 1", intval($r[0]['id'])); } } } } } } } // Now process the feed if ($feed->get_item_quantity()) { logger('consume_feed: feed item count = ' . $feed->get_item_quantity()); // in inverse date order if ($datedir) { $items = array_reverse($feed->get_items()); } else { $items = $feed->get_items(); } foreach ($items as $item) { $is_reply = false; $item_id = $item->get_id(); $rawthread = $item->get_item_tags(NAMESPACE_THREAD, 'in-reply-to'); if (isset($rawthread[0]['attribs']['']['ref'])) { $is_reply = true; $parent_uri = $rawthread[0]['attribs']['']['ref']; } if ($is_reply && is_array($contact)) { if ($pass == 1) { continue; } // Have we seen it? If not, import it. $item_id = $item->get_id(); $datarray = get_atom_elements($feed, $item); if (!x($datarray, 'author-name') && $contact['network'] != NETWORK_DFRN) { $datarray['author-name'] = $contact['name']; } if (!x($datarray, 'author-link') && $contact['network'] != NETWORK_DFRN) { $datarray['author-link'] = $contact['url']; } if (!x($datarray, 'author-avatar') && $contact['network'] != NETWORK_DFRN) { $datarray['author-avatar'] = $contact['thumb']; } if (!x($datarray, 'author-name') || !x($datarray, 'author-link')) { logger('consume_feed: no author information! ' . print_r($datarray, true)); continue; } $r = q("SELECT `uid`, `last-child`, `edited`, `body` FROM `item` WHERE `uri` = '%s' AND `uid` = %d LIMIT 1", dbesc($item_id), intval($importer['uid'])); // Update content if 'updated' changes if (count($r)) { if (x($datarray, 'edited') !== false && datetime_convert('UTC', 'UTC', $datarray['edited']) !== $r[0]['edited']) { $r = q("UPDATE `item` SET `title` = '%s', `body` = '%s', `tag` = '%s', `edited` = '%s' WHERE `uri` = '%s' AND `uid` = %d LIMIT 1", dbesc($datarray['title']), dbesc($datarray['body']), dbesc($datarray['tag']), dbesc(datetime_convert('UTC', 'UTC', $datarray['edited'])), dbesc($item_id), intval($importer['uid'])); } // update last-child if it changes $allow = $item->get_item_tags(NAMESPACE_DFRN, 'comment-allow'); if ($allow && $allow[0]['data'] != $r[0]['last-child']) { $r = q("UPDATE `item` SET `last-child` = 0, `changed` = '%s' WHERE `parent-uri` = '%s' AND `uid` = %d", dbesc(datetime_convert()), dbesc($parent_uri), intval($importer['uid'])); $r = q("UPDATE `item` SET `last-child` = %d , `changed` = '%s' WHERE `uri` = '%s' AND `uid` = %d LIMIT 1", intval($allow[0]['data']), dbesc(datetime_convert()), dbesc($item_id), intval($importer['uid'])); } continue; } $force_parent = false; if ($contact['network'] === NETWORK_OSTATUS || stristr($contact['url'], 'twitter.com')) { if ($contact['network'] === NETWORK_OSTATUS) { $force_parent = true; } if (strlen($datarray['title'])) { unset($datarray['title']); } $r = q("UPDATE `item` SET `last-child` = 0, `changed` = '%s' WHERE `parent-uri` = '%s' AND `uid` = %d", dbesc(datetime_convert()), dbesc($parent_uri), intval($importer['uid'])); $datarray['last-child'] = 1; } if ($contact['network'] === NETWORK_FEED || !strlen($contact['notify'])) { // one way feed - no remote comment ability $datarray['last-child'] = 0; } $datarray['parent-uri'] = $parent_uri; $datarray['uid'] = $importer['uid']; $datarray['contact-id'] = $contact['id']; if (activity_match($datarray['verb'], ACTIVITY_LIKE) || activity_match($datarray['verb'], ACTIVITY_DISLIKE)) { $datarray['type'] = 'activity'; $datarray['gravity'] = GRAVITY_LIKE; // only one like or dislike per person $r = q("select id from item where uid = %d and `contact-id` = %d and verb ='%s' and deleted = 0 limit 1", intval($datarray['uid']), intval($datarray['contact-id']), dbesc($datarray['verb'])); if ($r && count($r)) { continue; } } if ($datarray['verb'] === ACTIVITY_TAG && $datarray['object-type'] === ACTIVITY_OBJ_TAGTERM) { $xo = parse_xml_string($datarray['object'], false); $xt = parse_xml_string($datarray['target'], false); if ($xt->type == ACTIVITY_OBJ_NOTE) { $r = q("select * from item where `uri` = '%s' AND `uid` = %d limit 1", dbesc($xt->id), intval($importer['importer_uid'])); if (!count($r)) { continue; } // extract tag, if not duplicate, add to parent item if ($xo->id && $xo->content) { $newtag = '#[url=' . $xo->id . ']' . $xo->content . '[/url]'; if (!stristr($r[0]['tag'], $newtag)) { q("UPDATE item SET tag = '%s' WHERE id = %d LIMIT 1", dbesc($r[0]['tag'] . (strlen($r[0]['tag']) ? ',' : '') . $newtag), intval($r[0]['id'])); } } } } $r = item_store($datarray, $force_parent); continue; } else { // Head post of a conversation. Have we seen it? If not, import it. $item_id = $item->get_id(); $datarray = get_atom_elements($feed, $item); if (is_array($contact)) { if (!x($datarray, 'author-name') && $contact['network'] != NETWORK_DFRN) { $datarray['author-name'] = $contact['name']; } if (!x($datarray, 'author-link') && $contact['network'] != NETWORK_DFRN) { $datarray['author-link'] = $contact['url']; } if (!x($datarray, 'author-avatar') && $contact['network'] != NETWORK_DFRN) { $datarray['author-avatar'] = $contact['thumb']; } } if (!x($datarray, 'author-name') || !x($datarray, 'author-link')) { logger('consume_feed: no author information! ' . print_r($datarray, true)); continue; } // special handling for events if (x($datarray, 'object-type') && $datarray['object-type'] === ACTIVITY_OBJ_EVENT) { $ev = bbtoevent($datarray['body']); if (x($ev, 'desc') && x($ev, 'start')) { $ev['uid'] = $importer['uid']; $ev['uri'] = $item_id; $ev['edited'] = $datarray['edited']; $ev['private'] = $datarray['private']; if (is_array($contact)) { $ev['cid'] = $contact['id']; } $r = q("SELECT * FROM `event` WHERE `uri` = '%s' AND `uid` = %d LIMIT 1", dbesc($item_id), intval($importer['uid'])); if (count($r)) { $ev['id'] = $r[0]['id']; } $xyz = event_store($ev); continue; } } $r = q("SELECT `uid`, `last-child`, `edited`, `body` FROM `item` WHERE `uri` = '%s' AND `uid` = %d LIMIT 1", dbesc($item_id), intval($importer['uid'])); // Update content if 'updated' changes if (count($r)) { if (x($datarray, 'edited') !== false && datetime_convert('UTC', 'UTC', $datarray['edited']) !== $r[0]['edited']) { $r = q("UPDATE `item` SET `title` = '%s', `body` = '%s', `tag` = '%s', `edited` = '%s' WHERE `uri` = '%s' AND `uid` = %d LIMIT 1", dbesc($datarray['title']), dbesc($datarray['body']), dbesc($datarray['tag']), dbesc(datetime_convert('UTC', 'UTC', $datarray['edited'])), dbesc($item_id), intval($importer['uid'])); } // update last-child if it changes $allow = $item->get_item_tags(NAMESPACE_DFRN, 'comment-allow'); if ($allow && $allow[0]['data'] != $r[0]['last-child']) { $r = q("UPDATE `item` SET `last-child` = %d , `changed` = '%s' WHERE `uri` = '%s' AND `uid` = %d LIMIT 1", intval($allow[0]['data']), dbesc(datetime_convert()), dbesc($item_id), intval($importer['uid'])); } continue; } if (activity_match($datarray['verb'], ACTIVITY_FOLLOW)) { logger('consume-feed: New follower'); new_follower($importer, $contact, $datarray, $item); return; } if (activity_match($datarray['verb'], ACTIVITY_UNFOLLOW)) { lose_follower($importer, $contact, $datarray, $item); return; } if (activity_match($datarray['verb'], ACTIVITY_REQ_FRIEND)) { logger('consume-feed: New friend request'); new_follower($importer, $contact, $datarray, $item, true); return; } if (activity_match($datarray['verb'], ACTIVITY_UNFRIEND)) { lose_sharer($importer, $contact, $datarray, $item); return; } if (!is_array($contact)) { return; } if ($contact['network'] === NETWORK_OSTATUS || stristr($contact['url'], 'twitter.com')) { if (strlen($datarray['title'])) { unset($datarray['title']); } $datarray['last-child'] = 1; } if ($contact['network'] === NETWORK_FEED || !strlen($contact['notify'])) { // one way feed - no remote comment ability $datarray['last-child'] = 0; } // This is my contact on another system, but it's really me. // Turn this into a wall post. if ($contact['remote_self']) { $datarray['wall'] = 1; } $datarray['parent-uri'] = $item_id; $datarray['uid'] = $importer['uid']; $datarray['contact-id'] = $contact['id']; if (!link_compare($datarray['owner-link'], $contact['url'])) { // The item owner info is not our contact. It's OK and is to be expected if this is a tgroup delivery, // but otherwise there's a possible data mixup on the sender's system. // the tgroup delivery code called from item_store will correct it if it's a forum, // but we're going to unconditionally correct it here so that the post will always be owned by our contact. logger('consume_feed: Correcting item owner.', LOGGER_DEBUG); $datarray['owner-name'] = $contact['name']; $datarray['owner-link'] = $contact['url']; $datarray['owner-avatar'] = $contact['thumb']; } $r = item_store($datarray); continue; } } } }
function feed_meta($xml) { require_once 'library/simplepie/simplepie.inc'; $ret = array(); if (!strlen($xml)) { logger('empty input'); return $ret; } $feed = new SimplePie(); $feed->set_raw_data($xml); $feed->init(); if ($feed->error()) { logger('Error parsing XML: ' . $feed->error()); return $ret; } $ret['hubs'] = $feed->get_links('hub'); // logger('consume_feed: hubs: ' . print_r($hubs,true), LOGGER_DATA); $author = array(); $found_author = $feed->get_author(); if ($found_author) { $author['author_name'] = unxmlify($found_author->get_name()); $author['author_link'] = unxmlify($found_author->get_link()); $rawauthor = $feed->get_feed_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'author'); logger('rawauthor: ' . print_r($rawauthor, true)); if ($rawauthor) { if ($rawauthor[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['link']) { $base = $rawauthor[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['link']; foreach ($base as $link) { if (!x($author, 'author_photo') || !$author['author_photo']) { if ($link['attribs']['']['rel'] === 'photo' || $link['attribs']['']['rel'] === 'avatar') { $author['author_photo'] = unxmlify($link['attribs']['']['href']); break; } } } } if ($rawauthor[0]['child'][NAMESPACE_POCO]['displayName'][0]['data']) { $author['full_name'] = unxmlify($rawauthor[0]['child'][NAMESPACE_POCO]['displayName'][0]['data']); } if ($rawauthor[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['uri'][0]['data']) { $author['author_uri'] = unxmlify($rawauthor[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['uri'][0]['data']); } } } if (substr($author['author_link'], -1, 1) == '/') { $author['author_link'] = substr($author['author_link'], 0, -1); } $ret['author'] = $author; return $ret; }
function update_rss_feed($link, $feed, $ignore_daemon = false, $no_cache = false, $override_url = false) { require_once "lib/simplepie/simplepie.inc"; require_once "lib/magpierss/rss_fetch.inc"; require_once 'lib/magpierss/rss_utils.inc'; $debug_enabled = defined('DAEMON_EXTENDED_DEBUG') || $_REQUEST['xdebug']; if (!$_REQUEST["daemon"] && !$ignore_daemon) { return false; } if ($debug_enabled) { _debug("update_rss_feed: start"); } if (!$ignore_daemon) { if (DB_TYPE == "pgsql") { $updstart_thresh_qpart = "(ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < NOW() - INTERVAL '120 seconds')"; } else { $updstart_thresh_qpart = "(ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < DATE_SUB(NOW(), INTERVAL 120 SECOND))"; } $result = db_query($link, "SELECT id,update_interval,auth_login,\n\t\t\t\tauth_pass,cache_images,update_method,last_updated\n\t\t\t\tFROM ttrss_feeds WHERE id = '{$feed}' AND {$updstart_thresh_qpart}"); } else { $result = db_query($link, "SELECT id,update_interval,auth_login,\n\t\t\t\tfeed_url,auth_pass,cache_images,update_method,last_updated,\n\t\t\t\tmark_unread_on_update, owner_uid, update_on_checksum_change,\n\t\t\t\tpubsub_state\n\t\t\t\tFROM ttrss_feeds WHERE id = '{$feed}'"); } if (db_num_rows($result) == 0) { if ($debug_enabled) { _debug("update_rss_feed: feed {$feed} NOT FOUND/SKIPPED"); } return false; } $update_method = db_fetch_result($result, 0, "update_method"); $last_updated = db_fetch_result($result, 0, "last_updated"); $owner_uid = db_fetch_result($result, 0, "owner_uid"); $mark_unread_on_update = sql_bool_to_bool(db_fetch_result($result, 0, "mark_unread_on_update")); $update_on_checksum_change = sql_bool_to_bool(db_fetch_result($result, 0, "update_on_checksum_change")); $pubsub_state = db_fetch_result($result, 0, "pubsub_state"); db_query($link, "UPDATE ttrss_feeds SET last_update_started = NOW()\n\t\t\tWHERE id = '{$feed}'"); $auth_login = db_fetch_result($result, 0, "auth_login"); $auth_pass = db_fetch_result($result, 0, "auth_pass"); if ($update_method == 0) { $update_method = DEFAULT_UPDATE_METHOD + 1; } // 1 - Magpie // 2 - SimplePie // 3 - Twitter OAuth if ($update_method == 2) { $use_simplepie = true; } else { $use_simplepie = false; } if ($debug_enabled) { _debug("update method: {$update_method} (feed setting: {$update_method}) (use simplepie: {$use_simplepie})\n"); } if ($update_method == 1) { $auth_login = urlencode($auth_login); $auth_pass = urlencode($auth_pass); } $cache_images = sql_bool_to_bool(db_fetch_result($result, 0, "cache_images")); $fetch_url = db_fetch_result($result, 0, "feed_url"); $feed = db_escape_string($feed); if ($auth_login && $auth_pass) { $url_parts = array(); preg_match("/(^[^:]*):\\/\\/(.*)/", $fetch_url, $url_parts); if ($url_parts[1] && $url_parts[2]) { $fetch_url = $url_parts[1] . "://{$auth_login}:{$auth_pass}@" . $url_parts[2]; } } if ($override_url) { $fetch_url = $override_url; } if ($debug_enabled) { _debug("update_rss_feed: fetching [{$fetch_url}]..."); } // Ignore cache if new feed or manual update. $cache_age = is_null($last_updated) || $last_updated == '1970-01-01 00:00:00' ? -1 : get_feed_update_interval($link, $feed) * 60; if ($update_method == 3) { $rss = fetch_twitter_rss($link, $fetch_url, $owner_uid); } else { if ($update_method == 1) { define('MAGPIE_CACHE_AGE', $cache_age); define('MAGPIE_CACHE_ON', !$no_cache); define('MAGPIE_FETCH_TIME_OUT', 60); define('MAGPIE_CACHE_DIR', CACHE_DIR . "/magpie"); $rss = @fetch_rss($fetch_url); } else { $simplepie_cache_dir = CACHE_DIR . "/simplepie"; if (!is_dir($simplepie_cache_dir)) { mkdir($simplepie_cache_dir); } $rss = new SimplePie(); $rss->set_useragent(SELF_USER_AGENT); # $rss->set_timeout(10); $rss->set_feed_url($fetch_url); $rss->set_output_encoding('UTF-8'); //$rss->force_feed(true); if ($debug_enabled) { _debug("feed update interval (sec): " . get_feed_update_interval($link, $feed) * 60); } $rss->enable_cache(!$no_cache); if (!$no_cache) { $rss->set_cache_location($simplepie_cache_dir); $rss->set_cache_duration($cache_age); } $rss->init(); } } // print_r($rss); if ($debug_enabled) { _debug("update_rss_feed: fetch done, parsing..."); } $feed = db_escape_string($feed); if ($update_method == 2) { $fetch_ok = !$rss->error(); } else { $fetch_ok = !!$rss; } if ($fetch_ok) { if ($debug_enabled) { _debug("update_rss_feed: processing feed data..."); } // db_query($link, "BEGIN"); if (DB_TYPE == "pgsql") { $favicon_interval_qpart = "favicon_last_checked < NOW() - INTERVAL '12 hour'"; } else { $favicon_interval_qpart = "favicon_last_checked < DATE_SUB(NOW(), INTERVAL 12 HOUR)"; } $result = db_query($link, "SELECT title,icon_url,site_url,owner_uid,\n\t\t\t\t(favicon_last_checked IS NULL OR {$favicon_interval_qpart}) AS\n\t\t\t\t\t\tfavicon_needs_check\n\t\t\t\tFROM ttrss_feeds WHERE id = '{$feed}'"); $registered_title = db_fetch_result($result, 0, "title"); $orig_icon_url = db_fetch_result($result, 0, "icon_url"); $orig_site_url = db_fetch_result($result, 0, "site_url"); $favicon_needs_check = sql_bool_to_bool(db_fetch_result($result, 0, "favicon_needs_check")); $owner_uid = db_fetch_result($result, 0, "owner_uid"); if ($use_simplepie) { $site_url = db_escape_string(trim($rss->get_link())); } else { $site_url = db_escape_string(trim($rss->channel["link"])); } // weird, weird Magpie if (!$use_simplepie) { if (!$site_url) { $site_url = db_escape_string($rss->channel["link_"]); } } $site_url = rewrite_relative_url($fetch_url, $site_url); $site_url = substr($site_url, 0, 250); if ($debug_enabled) { _debug("update_rss_feed: checking favicon..."); } if ($favicon_needs_check) { check_feed_favicon($site_url, $feed, $link); db_query($link, "UPDATE ttrss_feeds SET favicon_last_checked = NOW()\n\t\t\t\t\tWHERE id = '{$feed}'"); } if (!$registered_title || $registered_title == "[Unknown]") { if ($use_simplepie) { $feed_title = db_escape_string($rss->get_title()); } else { $feed_title = db_escape_string($rss->channel["title"]); } if ($debug_enabled) { _debug("update_rss_feed: registering title: {$feed_title}"); } db_query($link, "UPDATE ttrss_feeds SET\n\t\t\t\t\ttitle = '{$feed_title}' WHERE id = '{$feed}'"); } if ($site_url && $orig_site_url != $site_url) { db_query($link, "UPDATE ttrss_feeds SET\n\t\t\t\t\tsite_url = '{$site_url}' WHERE id = '{$feed}'"); } // print "I: " . $rss->channel["image"]["url"]; if (!$use_simplepie) { $icon_url = db_escape_string(trim($rss->image["url"])); } else { $icon_url = db_escape_string(trim($rss->get_image_url())); } $icon_url = rewrite_relative_url($fetch_url, $icon_url); $icon_url = substr($icon_url, 0, 250); if ($icon_url && $orig_icon_url != $icon_url) { db_query($link, "UPDATE ttrss_feeds SET icon_url = '{$icon_url}' WHERE id = '{$feed}'"); } if ($debug_enabled) { _debug("update_rss_feed: loading filters..."); } $filters = load_filters($link, $feed, $owner_uid); // if ($debug_enabled) { // print_r($filters); // } if ($use_simplepie) { $iterator = $rss->get_items(); } else { $iterator = $rss->items; if (!$iterator || !is_array($iterator)) { $iterator = $rss->entries; } if (!$iterator || !is_array($iterator)) { $iterator = $rss; } } if (!is_array($iterator)) { /* db_query($link, "UPDATE ttrss_feeds SET last_error = 'Parse error: can\'t find any articles.' WHERE id = '$feed'"); */ // clear any errors and mark feed as updated if fetched okay // even if it's blank if ($debug_enabled) { _debug("update_rss_feed: entry iterator is not an array, no articles?"); } db_query($link, "UPDATE ttrss_feeds\n\t\t\t\t\tSET last_updated = NOW(), last_error = '' WHERE id = '{$feed}'"); return; // no articles } if ($pubsub_state != 2 && PUBSUBHUBBUB_ENABLED) { if ($debug_enabled) { _debug("update_rss_feed: checking for PUSH hub..."); } $feed_hub_url = false; if ($use_simplepie) { $links = $rss->get_links('hub'); if ($links && is_array($links)) { foreach ($links as $l) { $feed_hub_url = $l; break; } } } else { $atom = $rss->channel['atom']; if ($atom) { if ($atom['link@rel'] == 'hub') { $feed_hub_url = $atom['link@href']; } if (!$feed_hub_url && $atom['link#'] > 1) { for ($i = 2; $i <= $atom['link#']; $i++) { if ($atom["link#{$i}@rel"] == 'hub') { $feed_hub_url = $atom["link#{$i}@href"]; break; } } } } else { $feed_hub_url = $rss->channel['link_hub']; } } if ($debug_enabled) { _debug("update_rss_feed: feed hub url: {$feed_hub_url}"); } if ($feed_hub_url && function_exists('curl_init') && !ini_get("open_basedir")) { require_once 'lib/pubsubhubbub/subscriber.php'; $callback_url = get_self_url_prefix() . "/public.php?op=pubsub&id={$feed}"; $s = new Subscriber($feed_hub_url, $callback_url); $rc = $s->subscribe($fetch_url); if ($debug_enabled) { _debug("update_rss_feed: feed hub url found, subscribe request sent."); } db_query($link, "UPDATE ttrss_feeds SET pubsub_state = 1\n\t\t\t\t\t\tWHERE id = '{$feed}'"); } } if ($debug_enabled) { _debug("update_rss_feed: processing articles..."); } foreach ($iterator as $item) { if ($_REQUEST['xdebug'] == 2) { print_r($item); } if ($use_simplepie) { $entry_guid = $item->get_id(); if (!$entry_guid) { $entry_guid = $item->get_link(); } if (!$entry_guid) { $entry_guid = make_guid_from_title($item->get_title()); } } else { $entry_guid = $item["id"]; if (!$entry_guid) { $entry_guid = $item["guid"]; } if (!$entry_guid) { $entry_guid = $item["about"]; } if (!$entry_guid) { $entry_guid = $item["link"]; } if (!$entry_guid) { $entry_guid = make_guid_from_title($item["title"]); } } if ($debug_enabled) { _debug("update_rss_feed: guid {$entry_guid}"); } if (!$entry_guid) { continue; } $entry_timestamp = ""; if ($use_simplepie) { $entry_timestamp = strtotime($item->get_date()); } else { $rss_2_date = $item['pubdate']; $rss_1_date = $item['dc']['date']; $atom_date = $item['issued']; if (!$atom_date) { $atom_date = $item['updated']; } if ($atom_date != "") { $entry_timestamp = parse_w3cdtf($atom_date); } if ($rss_1_date != "") { $entry_timestamp = parse_w3cdtf($rss_1_date); } if ($rss_2_date != "") { $entry_timestamp = strtotime($rss_2_date); } } if ($entry_timestamp == "" || $entry_timestamp == -1 || !$entry_timestamp) { $entry_timestamp = time(); $no_orig_date = 'true'; } else { $no_orig_date = 'false'; } $entry_timestamp_fmt = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp); if ($debug_enabled) { _debug("update_rss_feed: date {$entry_timestamp} [{$entry_timestamp_fmt}]"); } if ($use_simplepie) { $entry_title = $item->get_title(); } else { $entry_title = trim(strip_tags($item["title"])); } if ($use_simplepie) { $entry_link = $item->get_link(); } else { // strange Magpie workaround $entry_link = $item["link_"]; if (!$entry_link) { $entry_link = $item["link"]; } } $entry_link = rewrite_relative_url($site_url, $entry_link); if ($debug_enabled) { _debug("update_rss_feed: title {$entry_title}"); _debug("update_rss_feed: link {$entry_link}"); } if (!$entry_title) { $entry_title = date("Y-m-d H:i:s", $entry_timestamp); } $entry_link = strip_tags($entry_link); if ($use_simplepie) { $entry_content = $item->get_content(); if (!$entry_content) { $entry_content = $item->get_description(); } } else { $entry_content = $item["content:escaped"]; if (!$entry_content) { $entry_content = $item["content:encoded"]; } if (!$entry_content && is_array($entry_content)) { $entry_content = $item["content"]["encoded"]; } if (!$entry_content) { $entry_content = $item["content"]; } if (is_array($entry_content)) { $entry_content = $entry_content[0]; } // Magpie bugs are getting ridiculous if (trim($entry_content) == "Array") { $entry_content = false; } if (!$entry_content) { $entry_content = $item["atom_content"]; } if (!$entry_content) { $entry_content = $item["summary"]; } if (!$entry_content || strlen($entry_content) < strlen($item["description"])) { $entry_content = $item["description"]; } // WTF if (is_array($entry_content)) { $entry_content = $entry_content["encoded"]; if (!$entry_content) { $entry_content = $entry_content["escaped"]; } } } if ($cache_images && is_writable(CACHE_DIR . '/images')) { $entry_content = cache_images($entry_content, $site_url, $debug_enabled); } if ($_REQUEST["xdebug"] == 2) { print "update_rss_feed: content: "; print $entry_content; print "\n"; } $entry_content_unescaped = $entry_content; if ($use_simplepie) { $entry_comments = strip_tags($item->data["comments"]); if ($item->get_author()) { $entry_author_item = $item->get_author(); $entry_author = $entry_author_item->get_name(); if (!$entry_author) { $entry_author = $entry_author_item->get_email(); } $entry_author = db_escape_string($entry_author); } } else { $entry_comments = strip_tags($item["comments"]); $entry_author = db_escape_string(strip_tags($item['dc']['creator'])); if ($item['author']) { if (is_array($item['author'])) { if (!$entry_author) { $entry_author = db_escape_string(strip_tags($item['author']['name'])); } if (!$entry_author) { $entry_author = db_escape_string(strip_tags($item['author']['email'])); } } if (!$entry_author) { $entry_author = db_escape_string(strip_tags($item['author'])); } } } if (preg_match('/^[\\t\\n\\r ]*$/', $entry_author)) { $entry_author = ''; } $entry_guid = db_escape_string(strip_tags($entry_guid)); $entry_guid = mb_substr($entry_guid, 0, 250); $result = db_query($link, "SELECT id FROM\tttrss_entries\n\t\t\t\t\tWHERE guid = '{$entry_guid}'"); $entry_content = db_escape_string($entry_content, false); $content_hash = "SHA1:" . sha1(strip_tags($entry_content)); $entry_title = db_escape_string($entry_title); $entry_link = db_escape_string($entry_link); $entry_comments = mb_substr(db_escape_string($entry_comments), 0, 250); $entry_author = mb_substr($entry_author, 0, 250); if ($use_simplepie) { $num_comments = 0; #FIXME# } else { $num_comments = db_escape_string($item["slash"]["comments"]); } if (!$num_comments) { $num_comments = 0; } if ($debug_enabled) { _debug("update_rss_feed: looking for tags [1]..."); } // parse <category> entries into tags $additional_tags = array(); if ($use_simplepie) { $additional_tags_src = $item->get_categories(); if (is_array($additional_tags_src)) { foreach ($additional_tags_src as $tobj) { array_push($additional_tags, $tobj->get_term()); } } if ($debug_enabled) { _debug("update_rss_feed: category tags:"); print_r($additional_tags); } } else { $t_ctr = $item['category#']; if ($t_ctr == 0) { $additional_tags = array(); } else { if ($t_ctr > 0) { $additional_tags = array($item['category']); if ($item['category@term']) { array_push($additional_tags, $item['category@term']); } for ($i = 0; $i <= $t_ctr; $i++) { if ($item["category#{$i}"]) { array_push($additional_tags, $item["category#{$i}"]); } if ($item["category#{$i}@term"]) { array_push($additional_tags, $item["category#{$i}@term"]); } } } } // parse <dc:subject> elements $t_ctr = $item['dc']['subject#']; if ($t_ctr > 0) { array_push($additional_tags, $item['dc']['subject']); for ($i = 0; $i <= $t_ctr; $i++) { if ($item['dc']["subject#{$i}"]) { array_push($additional_tags, $item['dc']["subject#{$i}"]); } } } } if ($debug_enabled) { _debug("update_rss_feed: looking for tags [2]..."); } /* taaaags */ // <a href="..." rel="tag">Xorg</a>, // $entry_tags = null; preg_match_all("/<a.*?rel=['\"]tag['\"].*?\\>([^<]+)<\\/a>/i", $entry_content_unescaped, $entry_tags); $entry_tags = $entry_tags[1]; $entry_tags = array_merge($entry_tags, $additional_tags); $entry_tags = array_unique($entry_tags); for ($i = 0; $i < count($entry_tags); $i++) { $entry_tags[$i] = mb_strtolower($entry_tags[$i], 'utf-8'); } if ($debug_enabled) { //_debug("update_rss_feed: unfiltered tags found:"); //print_r($entry_tags); } # sanitize content $entry_content = sanitize_article_content($entry_content); $entry_title = sanitize_article_content($entry_title); if ($debug_enabled) { _debug("update_rss_feed: done collecting data [TITLE:{$entry_title}]"); } db_query($link, "BEGIN"); if (db_num_rows($result) == 0) { if ($debug_enabled) { _debug("update_rss_feed: base guid not found"); } // base post entry does not exist, create it $result = db_query($link, "INSERT INTO ttrss_entries\n\t\t\t\t\t\t\t(title,\n\t\t\t\t\t\t\tguid,\n\t\t\t\t\t\t\tlink,\n\t\t\t\t\t\t\tupdated,\n\t\t\t\t\t\t\tcontent,\n\t\t\t\t\t\t\tcontent_hash,\n\t\t\t\t\t\t\tno_orig_date,\n\t\t\t\t\t\t\tdate_updated,\n\t\t\t\t\t\t\tdate_entered,\n\t\t\t\t\t\t\tcomments,\n\t\t\t\t\t\t\tnum_comments,\n\t\t\t\t\t\t\tauthor)\n\t\t\t\t\t\tVALUES\n\t\t\t\t\t\t\t('{$entry_title}',\n\t\t\t\t\t\t\t'{$entry_guid}',\n\t\t\t\t\t\t\t'{$entry_link}',\n\t\t\t\t\t\t\t'{$entry_timestamp_fmt}',\n\t\t\t\t\t\t\t'{$entry_content}',\n\t\t\t\t\t\t\t'{$content_hash}',\n\t\t\t\t\t\t\t{$no_orig_date},\n\t\t\t\t\t\t\tNOW(),\n\t\t\t\t\t\t\tNOW(),\n\t\t\t\t\t\t\t'{$entry_comments}',\n\t\t\t\t\t\t\t'{$num_comments}',\n\t\t\t\t\t\t\t'{$entry_author}')"); } else { // we keep encountering the entry in feeds, so we need to // update date_updated column so that we don't get horrible // dupes when the entry gets purged and reinserted again e.g. // in the case of SLOW SLOW OMG SLOW updating feeds $base_entry_id = db_fetch_result($result, 0, "id"); db_query($link, "UPDATE ttrss_entries SET date_updated = NOW()\n\t\t\t\t\t\tWHERE id = '{$base_entry_id}'"); } // now it should exist, if not - bad luck then $result = db_query($link, "SELECT\n\t\t\t\t\t\tid,content_hash,no_orig_date,title,\n\t\t\t\t\t\t" . SUBSTRING_FOR_DATE . "(date_updated,1,19) as date_updated,\n\t\t\t\t\t\t" . SUBSTRING_FOR_DATE . "(updated,1,19) as updated,\n\t\t\t\t\t\tnum_comments\n\t\t\t\t\tFROM\n\t\t\t\t\t\tttrss_entries\n\t\t\t\t\tWHERE guid = '{$entry_guid}'"); $entry_ref_id = 0; $entry_int_id = 0; if (db_num_rows($result) == 1) { if ($debug_enabled) { _debug("update_rss_feed: base guid found, checking for user record"); } // this will be used below in update handler $orig_content_hash = db_fetch_result($result, 0, "content_hash"); $orig_title = db_fetch_result($result, 0, "title"); $orig_num_comments = db_fetch_result($result, 0, "num_comments"); $orig_date_updated = strtotime(db_fetch_result($result, 0, "date_updated")); $ref_id = db_fetch_result($result, 0, "id"); $entry_ref_id = $ref_id; // check for user post link to main table // do we allow duplicate posts with same GUID in different feeds? if (get_pref($link, "ALLOW_DUPLICATE_POSTS", $owner_uid, false)) { $dupcheck_qpart = "AND (feed_id = '{$feed}' OR feed_id IS NULL)"; } else { $dupcheck_qpart = ""; } /* Collect article tags here so we could filter by them: */ $article_filters = get_article_filters($filters, $entry_title, $entry_content, $entry_link, $entry_timestamp, $entry_author, $entry_tags); if ($debug_enabled) { _debug("update_rss_feed: article filters: "); if (count($article_filters) != 0) { print_r($article_filters); } } if (find_article_filter($article_filters, "filter")) { db_query($link, "COMMIT"); // close transaction in progress continue; } $score = calculate_article_score($article_filters); if ($debug_enabled) { _debug("update_rss_feed: initial score: {$score}"); } $query = "SELECT ref_id, int_id FROM ttrss_user_entries WHERE\n\t\t\t\t\t\t\tref_id = '{$ref_id}' AND owner_uid = '{$owner_uid}'\n\t\t\t\t\t\t\t{$dupcheck_qpart}"; // if ($_REQUEST["xdebug"]) print "$query\n"; $result = db_query($link, $query); // okay it doesn't exist - create user entry if (db_num_rows($result) == 0) { if ($debug_enabled) { _debug("update_rss_feed: user record not found, creating..."); } if ($score >= -500 && !find_article_filter($article_filters, 'catchup')) { $unread = 'true'; $last_read_qpart = 'NULL'; } else { $unread = 'false'; $last_read_qpart = 'NOW()'; } if (find_article_filter($article_filters, 'mark') || $score > 1000) { $marked = 'true'; } else { $marked = 'false'; } if (find_article_filter($article_filters, 'publish')) { $published = 'true'; } else { $published = 'false'; } // N-grams if (DB_TYPE == "pgsql" and defined('_NGRAM_TITLE_DUPLICATE_THRESHOLD')) { $result = db_query($link, "SELECT COUNT(*) AS similar FROM\n\t\t\t\t\t\t\t\t\tttrss_entries,ttrss_user_entries\n\t\t\t\t\t\t\t\tWHERE ref_id = id AND updated >= NOW() - INTERVAL '7 day'\n\t\t\t\t\t\t\t\t\tAND similarity(title, '{$entry_title}') >= " . _NGRAM_TITLE_DUPLICATE_THRESHOLD . "\n\t\t\t\t\t\t\t\t\tAND owner_uid = {$owner_uid}"); $ngram_similar = db_fetch_result($result, 0, "similar"); if ($debug_enabled) { _debug("update_rss_feed: N-gram similar results: {$ngram_similar}"); } if ($ngram_similar > 0) { $unread = 'false'; } } $result = db_query($link, "INSERT INTO ttrss_user_entries\n\t\t\t\t\t\t\t\t(ref_id, owner_uid, feed_id, unread, last_read, marked,\n\t\t\t\t\t\t\t\t\tpublished, score, tag_cache, label_cache, uuid)\n\t\t\t\t\t\t\tVALUES ('{$ref_id}', '{$owner_uid}', '{$feed}', {$unread},\n\t\t\t\t\t\t\t\t{$last_read_qpart}, {$marked}, {$published}, '{$score}', '', '', '')"); if (PUBSUBHUBBUB_HUB && $published == 'true') { $rss_link = get_self_url_prefix() . "/public.php?op=rss&id=-2&key=" . get_feed_access_key($link, -2, false, $owner_uid); $p = new Publisher(PUBSUBHUBBUB_HUB); $pubsub_result = $p->publish_update($rss_link); } $result = db_query($link, "SELECT int_id FROM ttrss_user_entries WHERE\n\t\t\t\t\t\t\t\tref_id = '{$ref_id}' AND owner_uid = '{$owner_uid}' AND\n\t\t\t\t\t\t\t\tfeed_id = '{$feed}' LIMIT 1"); if (db_num_rows($result) == 1) { $entry_int_id = db_fetch_result($result, 0, "int_id"); } } else { if ($debug_enabled) { _debug("update_rss_feed: user record FOUND"); } $entry_ref_id = db_fetch_result($result, 0, "ref_id"); $entry_int_id = db_fetch_result($result, 0, "int_id"); } if ($debug_enabled) { _debug("update_rss_feed: RID: {$entry_ref_id}, IID: {$entry_int_id}"); } $post_needs_update = false; $update_insignificant = false; if ($orig_num_comments != $num_comments) { $post_needs_update = true; $update_insignificant = true; } if ($content_hash != $orig_content_hash) { $post_needs_update = true; $update_insignificant = false; } if (db_escape_string($orig_title) != $entry_title) { $post_needs_update = true; $update_insignificant = false; } // if post needs update, update it and mark all user entries // linking to this post as updated if ($post_needs_update) { if (defined('DAEMON_EXTENDED_DEBUG')) { _debug("update_rss_feed: post {$entry_guid} needs update..."); } // print "<!-- post $orig_title needs update : $post_needs_update -->"; db_query($link, "UPDATE ttrss_entries\n\t\t\t\t\t\t\tSET title = '{$entry_title}', content = '{$entry_content}',\n\t\t\t\t\t\t\t\tcontent_hash = '{$content_hash}',\n\t\t\t\t\t\t\t\tupdated = '{$entry_timestamp_fmt}',\n\t\t\t\t\t\t\t\tnum_comments = '{$num_comments}'\n\t\t\t\t\t\t\tWHERE id = '{$ref_id}'"); if (!$update_insignificant) { if ($mark_unread_on_update) { db_query($link, "UPDATE ttrss_user_entries\n\t\t\t\t\t\t\t\t\tSET last_read = null, unread = true WHERE ref_id = '{$ref_id}'"); } else { if ($update_on_checksum_change) { db_query($link, "UPDATE ttrss_user_entries\n\t\t\t\t\t\t\t\t\tSET last_read = null WHERE ref_id = '{$ref_id}'\n\t\t\t\t\t\t\t\t\t\tAND unread = false"); } } } } } db_query($link, "COMMIT"); if ($debug_enabled) { _debug("update_rss_feed: assigning labels..."); } assign_article_to_labels($link, $entry_ref_id, $article_filters, $owner_uid); if ($debug_enabled) { _debug("update_rss_feed: looking for enclosures..."); } // enclosures $enclosures = array(); if ($use_simplepie) { $encs = $item->get_enclosures(); if (is_array($encs)) { foreach ($encs as $e) { $e_item = array($e->link, $e->type, $e->length); array_push($enclosures, $e_item); } } } else { // <enclosure> $e_ctr = $item['enclosure#']; if ($e_ctr > 0) { $e_item = array($item['enclosure@url'], $item['enclosure@type'], $item['enclosure@length']); array_push($enclosures, $e_item); for ($i = 0; $i <= $e_ctr; $i++) { if ($item["enclosure#{$i}@url"]) { $e_item = array($item["enclosure#{$i}@url"], $item["enclosure#{$i}@type"], $item["enclosure#{$i}@length"]); array_push($enclosures, $e_item); } } } // <media:content> // can there be many of those? yes -fox $m_ctr = $item['media']['content#']; if ($m_ctr > 0) { $e_item = array($item['media']['content@url'], $item['media']['content@medium'], $item['media']['content@length']); array_push($enclosures, $e_item); for ($i = 0; $i <= $m_ctr; $i++) { if ($item["media"]["content#{$i}@url"]) { $e_item = array($item["media"]["content#{$i}@url"], $item["media"]["content#{$i}@medium"], $item["media"]["content#{$i}@length"]); array_push($enclosures, $e_item); } } } } if ($debug_enabled) { _debug("update_rss_feed: article enclosures:"); print_r($enclosures); } db_query($link, "BEGIN"); foreach ($enclosures as $enc) { $enc_url = db_escape_string($enc[0]); $enc_type = db_escape_string($enc[1]); $enc_dur = db_escape_string($enc[2]); $result = db_query($link, "SELECT id FROM ttrss_enclosures\n\t\t\t\t\t\tWHERE content_url = '{$enc_url}' AND post_id = '{$entry_ref_id}'"); if (db_num_rows($result) == 0) { db_query($link, "INSERT INTO ttrss_enclosures\n\t\t\t\t\t\t\t(content_url, content_type, title, duration, post_id) VALUES\n\t\t\t\t\t\t\t('{$enc_url}', '{$enc_type}', '', '{$enc_dur}', '{$entry_ref_id}')"); } } db_query($link, "COMMIT"); // check for manual tags (we have to do it here since they're loaded from filters) foreach ($article_filters as $f) { if ($f["type"] == "tag") { $manual_tags = trim_array(explode(",", $f["param"])); foreach ($manual_tags as $tag) { if (tag_is_valid($tag)) { array_push($entry_tags, $tag); } } } } // Skip boring tags $boring_tags = trim_array(explode(",", mb_strtolower(get_pref($link, 'BLACKLISTED_TAGS', $owner_uid, ''), 'utf-8'))); $filtered_tags = array(); $tags_to_cache = array(); if ($entry_tags && is_array($entry_tags)) { foreach ($entry_tags as $tag) { if (array_search($tag, $boring_tags) === false) { array_push($filtered_tags, $tag); } } } $filtered_tags = array_unique($filtered_tags); if ($debug_enabled) { _debug("update_rss_feed: filtered article tags:"); print_r($filtered_tags); } // Save article tags in the database if (count($filtered_tags) > 0) { db_query($link, "BEGIN"); foreach ($filtered_tags as $tag) { $tag = sanitize_tag($tag); $tag = db_escape_string($tag); if (!tag_is_valid($tag)) { continue; } $result = db_query($link, "SELECT id FROM ttrss_tags\n\t\t\t\t\t\t\tWHERE tag_name = '{$tag}' AND post_int_id = '{$entry_int_id}' AND\n\t\t\t\t\t\t\towner_uid = '{$owner_uid}' LIMIT 1"); if ($result && db_num_rows($result) == 0) { db_query($link, "INSERT INTO ttrss_tags\n\t\t\t\t\t\t\t\t\t(owner_uid,tag_name,post_int_id)\n\t\t\t\t\t\t\t\t\tVALUES ('{$owner_uid}','{$tag}', '{$entry_int_id}')"); } array_push($tags_to_cache, $tag); } /* update the cache */ $tags_to_cache = array_unique($tags_to_cache); $tags_str = db_escape_string(join(",", $tags_to_cache)); db_query($link, "UPDATE ttrss_user_entries\n\t\t\t\t\t\tSET tag_cache = '{$tags_str}' WHERE ref_id = '{$entry_ref_id}'\n\t\t\t\t\t\tAND owner_uid = {$owner_uid}"); db_query($link, "COMMIT"); } if ($debug_enabled) { _debug("update_rss_feed: article processed"); } } if (!$last_updated) { if ($debug_enabled) { _debug("update_rss_feed: new feed, catching it up..."); } catchup_feed($link, $feed, false, $owner_uid); } if ($debug_enabled) { _debug("purging feed..."); } purge_feed($link, $feed, 0, $debug_enabled); db_query($link, "UPDATE ttrss_feeds\n\t\t\t\tSET last_updated = NOW(), last_error = '' WHERE id = '{$feed}'"); // db_query($link, "COMMIT"); } else { if ($use_simplepie) { $error_msg = mb_substr($rss->error(), 0, 250); } else { $error_msg = mb_substr(magpie_error(), 0, 250); } if ($debug_enabled) { _debug("update_rss_feed: error fetching feed: {$error_msg}"); } $error_msg = db_escape_string($error_msg); db_query($link, "UPDATE ttrss_feeds SET last_error = '{$error_msg}',\n\t\t\t\t\tlast_updated = NOW() WHERE id = '{$feed}'"); } if ($use_simplepie) { unset($rss); } if ($debug_enabled) { _debug("update_rss_feed: done"); } }