function langfilter_prepare_body(&$a, &$b) { if (get_pconfig(local_user(), 'langfilter', 'disable')) { return; } if (local_user()) { $langs = get_pconfig(local_user(), 'langfilter', 'languages'); } if ($langs) { $arr = explode(',', $langs); } else { return; } $found = false; $l = new Text_LanguageDetect(); $l->_name_mode = 2; // two letter codes $l->_threshold = 600; // make it a bit harder to be confident with a lng // IOW make it more possible that lng is correct $lng = $l->detectSimple($b['html']); if ($lng == null) { return; } if (!in_array($lng, $arr)) { $found = true; } if ($lng == null) { $found = false; } if ($found) { $rnd = random_string(8); $b['html'] = '<div id="langfilter-wrap-' . $rnd . '" class="fakelink" onclick=openClose(\'langfilter-' . $rnd . '\'); >' . sprintf(t('unspoken language %s - Click to open/close'), $lng) . '</div><div id="langfilter-' . $rnd . '" style="display: none; " >' . $b['html'] . '</div>'; } }
} if (($detect_language == 3 || !$language && $detect_language == 2) && $text_sample) { try { if ($use_cld) { // Use PHP-CLD extension $php_cld = 'CLD\\detect'; // in quotes to prevent PHP 5.2 parse error $res = $php_cld($text_sample); if (is_array($res) && count($res) > 0) { $language = $res[0]['code']; } } else { //die('what'); // Use PEAR's Text_LanguageDetect if (!isset($l)) { $l = new Text_LanguageDetect(); $l->setNameMode(2); // return ISO 639-1 codes (e.g. "en") } $l_result = $l->detect($text_sample, 1); if (count($l_result) > 0) { $language = key($l_result); } } } catch (Exception $e) { //die('error: '.$e); // do nothing } } if ($language && strlen($language) < 7) { $newitem->addElement('dc:language', $language);
/** * @brief Takes a string and tries to identify the language. * * It uses the pear library Text_LanguageDetect and it can identify 52 human languages. * It returns the identified languges and a confidence score for each. * * Strings need to have a min length config['system']['language_detect_min_length'] * and you can influence the confidence that must be met before a result will get * returned through config['system']['language_detect_min_confidence']. * * @see http://pear.php.net/package/Text_LanguageDetect * @param string $s A string to examine * @return Language code in 2-letter ISO 639-1 (en, de, fr) format */ function detect_language($s) { require_once 'Text/LanguageDetect.php'; $min_length = get_config('system', 'language_detect_min_length'); if ($min_length === false) { $min_length = LANGUAGE_DETECT_MIN_LENGTH; } $min_confidence = get_config('system', 'language_detect_min_confidence'); if ($min_confidence === false) { $min_confidence = LANGUAGE_DETECT_MIN_CONFIDENCE; } // embedded apps have long base64 strings which will trip up the detector. $naked_body = preg_replace('/\\[app\\](.*?)\\[\\/app\\]/', '', $s); // strip off bbcode $naked_body = preg_replace('/\\[(.+?)\\]/', '', $naked_body); if (mb_strlen($naked_body) < intval($min_length)) { logger('string length less than ' . intval($min_length), LOGGER_DATA); return ''; } $l = new Text_LanguageDetect(); try { // return 2-letter ISO 639-1 (en) language code $l->setNameMode(2); $lng = $l->detectConfidence($naked_body); logger('detect language: ' . print_r($lng, true) . $naked_body, LOGGER_DATA); } catch (Text_LanguageDetect_Exception $e) { logger('detect language exception: ' . $e->getMessage(), LOGGER_DATA); } if (!$lng || !x($lng, 'language')) { return ''; } if ($lng['confidence'] < (double) $min_confidence) { logger('detect language: confidence less than ' . (double) $min_confidence, LOGGER_DATA); return ''; } return $lng['language']; }
} if (($detect_language == 3 || !$language && $detect_language == 2) && $text_sample) { try { if ($use_cld) { // Use PHP-CLD extension $php_cld = 'CLD\\detect'; // in quotes to prevent PHP 5.2 parse error $res = $php_cld($text_sample); if (is_array($res) && count($res) > 0) { $language = $res[0]['code']; } } else { //die('what'); // Use PEAR's Text_LanguageDetect if (!isset($l)) { $l = new Text_LanguageDetect('libraries/language-detect/lang.dat', 'libraries/language-detect/unicode_blocks.dat'); } $l_result = $l->detect($text_sample, 1); if (count($l_result) > 0) { $language = $language_codes[key($l_result)]; } } } catch (Exception $e) { //die('error: '.$e); // do nothing } } if ($language && strlen($language) < 7) { $newitem->addElement('dc:language', $language); } }
<?php /** * Demonstrates how to use ISO language codes. * * The "name mode" changes the way languages are accepted and returned. */ require_once 'Text/LanguageDetect.php'; $l = new Text_LanguageDetect(); //will output the ISO 639-1 two-letter language code // "de" $l->setNameMode(2); echo $l->detectSimple('Das ist ein kleiner Text') . "\n"; //will output the ISO 639-2 three-letter language code // "deu" $l->setNameMode(3); echo $l->detectSimple('Das ist ein kleiner Text') . "\n";
function item_add_language_opt(&$arr) { if (version_compare(PHP_VERSION, '5.3.0', '<')) { return; } // LanguageDetect.php not available ? if (x($arr, 'postopts')) { if (strstr($arr['postopts'], 'lang=')) { // do not override // TODO: add parameter to request overriding return; } $postopts = $arr['postopts']; } else { $postopts = ""; } require_once 'library/langdet/Text/LanguageDetect.php'; $naked_body = preg_replace('/\\[(.+?)\\]/', '', $arr['body']); $l = new Text_LanguageDetect(); //$lng = $l->detectConfidence($naked_body); //$arr['postopts'] = (($lng['language']) ? 'lang=' . $lng['language'] . ';' . $lng['confidence'] : ''); $lng = $l->detect($naked_body, 3); if (sizeof($lng) > 0) { if ($postopts != "") { $postopts .= '&'; } // arbitrary separator, to be reviewed $postopts .= 'lang='; $sep = ""; foreach ($lng as $language => $score) { $postopts .= $sep . $language . ";" . $score; $sep = ':'; } $arr['postopts'] = $postopts; } }
function item_post(&$a) { if (!local_user() && !remote_user() && !x($_REQUEST, 'commenter')) { return; } require_once 'include/security.php'; $uid = local_user(); if (x($_REQUEST, 'dropitems')) { $arr_drop = explode(',', $_REQUEST['dropitems']); drop_items($arr_drop); $json = array('success' => 1); echo json_encode($json); killme(); } call_hooks('post_local_start', $_REQUEST); // logger('postinput ' . file_get_contents('php://input')); logger('postvars ' . print_r($_REQUEST, true), LOGGER_DATA); $api_source = x($_REQUEST, 'api_source') && $_REQUEST['api_source'] ? true : false; $message_id = x($_REQUEST, 'message_id') && $api_source ? strip_tags($_REQUEST['message_id']) : ''; $return_path = x($_REQUEST, 'return') ? $_REQUEST['return'] : ''; $preview = x($_REQUEST, 'preview') ? intval($_REQUEST['preview']) : 0; // Check for doubly-submitted posts, and reject duplicates // Note that we have to ignore previews, otherwise nothing will post // after it's been previewed if (!$preview && x($_REQUEST['post_id_random'])) { if (x($_SESSION['post-random']) && $_SESSION['post-random'] == $_REQUEST['post_id_random']) { logger("item post: duplicate post", LOGGER_DEBUG); item_post_return($a->get_baseurl(), $api_source, $return_path); } else { $_SESSION['post-random'] = $_REQUEST['post_id_random']; } } /** * Is this a reply to something? */ $parent = x($_REQUEST, 'parent') ? intval($_REQUEST['parent']) : 0; $parent_uri = x($_REQUEST, 'parent_uri') ? trim($_REQUEST['parent_uri']) : ''; $parent_item = null; $parent_contact = null; $thr_parent = ''; $parid = 0; $r = false; $objecttype = null; if ($parent || $parent_uri) { $objecttype = ACTIVITY_OBJ_COMMENT; if (!x($_REQUEST, 'type')) { $_REQUEST['type'] = 'net-comment'; } if ($parent) { $r = q("SELECT * FROM `item` WHERE `id` = %d LIMIT 1", intval($parent)); } elseif ($parent_uri && local_user()) { // This is coming from an API source, and we are logged in $r = q("SELECT * FROM `item` WHERE `uri` = '%s' AND `uid` = %d LIMIT 1", dbesc($parent_uri), intval(local_user())); } // if this isn't the real parent of the conversation, find it if ($r !== false && count($r)) { $parid = $r[0]['parent']; $parent_uri = $r[0]['uri']; if ($r[0]['id'] != $r[0]['parent']) { $r = q("SELECT * FROM `item` WHERE `id` = `parent` AND `parent` = %d LIMIT 1", intval($parid)); } } if ($r === false || !count($r)) { notice(t('Unable to locate original post.') . EOL); if (x($_REQUEST, 'return')) { goaway($a->get_baseurl() . "/" . $return_path); } killme(); } $parent_item = $r[0]; $parent = $r[0]['id']; // multi-level threading - preserve the info but re-parent to our single level threading //if(($parid) && ($parid != $parent)) $thr_parent = $parent_uri; if ($parent_item['contact-id'] && $uid) { $r = q("SELECT * FROM `contact` WHERE `id` = %d AND `uid` = %d LIMIT 1", intval($parent_item['contact-id']), intval($uid)); if (count($r)) { $parent_contact = $r[0]; // If the contact id doesn't fit with the contact, then set the contact to null $thrparent = q("SELECT `author-link`, `network` FROM `item` WHERE `uri` = '%s' LIMIT 1", dbesc($thr_parent)); if (count($thrparent) and $thrparent[0]["network"] === NETWORK_OSTATUS and normalise_link($parent_contact["url"]) != normalise_link($thrparent[0]["author-link"])) { $parent_contact = null; require_once "include/Scrape.php"; $probed_contact = probe_url($thrparent[0]["author-link"]); if ($probed_contact["network"] != NETWORK_FEED) { $parent_contact = $probed_contact; $parent_contact["nurl"] = normalise_link($probed_contact["url"]); $parent_contact["thumb"] = $probed_contact["photo"]; $parent_contact["micro"] = $probed_contact["photo"]; } logger('parent contact: ' . print_r($parent_contact, true), LOGGER_DEBUG); } else { logger('no contact found: ' . print_r($thrparent, true), LOGGER_DEBUG); } } } } if ($parent) { logger('mod_item: item_post parent=' . $parent); } $profile_uid = x($_REQUEST, 'profile_uid') ? intval($_REQUEST['profile_uid']) : 0; $post_id = x($_REQUEST, 'post_id') ? intval($_REQUEST['post_id']) : 0; $app = x($_REQUEST, 'source') ? strip_tags($_REQUEST['source']) : ''; $extid = x($_REQUEST, 'extid') ? strip_tags($_REQUEST['extid']) : ''; $allow_moderated = false; // here is where we are going to check for permission to post a moderated comment. // First check that the parent exists and it is a wall item. if (x($_REQUEST, 'commenter') && (!$parent || !$parent_item['wall'])) { notice(t('Permission denied.') . EOL); if (x($_REQUEST, 'return')) { goaway($a->get_baseurl() . "/" . $return_path); } killme(); } // Now check that it is a page_type of PAGE_BLOG, and that valid personal details // have been provided, and run any anti-spam plugins // TODO if (!can_write_wall($a, $profile_uid) && !$allow_moderated) { notice(t('Permission denied.') . EOL); if (x($_REQUEST, 'return')) { goaway($a->get_baseurl() . "/" . $return_path); } killme(); } // is this an edited post? $orig_post = null; if ($post_id) { $i = q("SELECT * FROM `item` WHERE `uid` = %d AND `id` = %d LIMIT 1", intval($profile_uid), intval($post_id)); if (!count($i)) { killme(); } $orig_post = $i[0]; } $user = null; $r = q("SELECT * FROM `user` WHERE `uid` = %d LIMIT 1", intval($profile_uid)); if (count($r)) { $user = $r[0]; } if ($orig_post) { $str_group_allow = $orig_post['allow_gid']; $str_contact_allow = $orig_post['allow_cid']; $str_group_deny = $orig_post['deny_gid']; $str_contact_deny = $orig_post['deny_cid']; $location = $orig_post['location']; $coord = $orig_post['coord']; $verb = $orig_post['verb']; $objecttype = $orig_post['object-type']; $emailcc = $orig_post['emailcc']; $app = $orig_post['app']; $categories = $orig_post['file']; $title = notags(trim($_REQUEST['title'])); $body = escape_tags(trim($_REQUEST['body'])); $private = $orig_post['private']; $pubmail_enable = $orig_post['pubmail']; $network = $orig_post['network']; $guid = $orig_post['guid']; $extid = $orig_post['extid']; } else { // if coming from the API and no privacy settings are set, // use the user default permissions - as they won't have // been supplied via a form. if ($api_source && !array_key_exists('contact_allow', $_REQUEST) && !array_key_exists('group_allow', $_REQUEST) && !array_key_exists('contact_deny', $_REQUEST) && !array_key_exists('group_deny', $_REQUEST)) { $str_group_allow = $user['allow_gid']; $str_contact_allow = $user['allow_cid']; $str_group_deny = $user['deny_gid']; $str_contact_deny = $user['deny_cid']; } else { // use the posted permissions $str_group_allow = perms2str($_REQUEST['group_allow']); $str_contact_allow = perms2str($_REQUEST['contact_allow']); $str_group_deny = perms2str($_REQUEST['group_deny']); $str_contact_deny = perms2str($_REQUEST['contact_deny']); } $title = notags(trim($_REQUEST['title'])); $location = notags(trim($_REQUEST['location'])); $coord = notags(trim($_REQUEST['coord'])); $verb = notags(trim($_REQUEST['verb'])); $emailcc = notags(trim($_REQUEST['emailcc'])); $body = escape_tags(trim($_REQUEST['body'])); $network = notags(trim($_REQUEST['network'])); $guid = get_guid(32); $naked_body = preg_replace('/\\[(.+?)\\]/', '', $body); if (version_compare(PHP_VERSION, '5.3.0', '>=')) { $l = new Text_LanguageDetect(); //$lng = $l->detectConfidence($naked_body); //$postopts = (($lng['language']) ? 'lang=' . $lng['language'] . ';' . $lng['confidence'] : ''); $lng = $l->detect($naked_body, 3); if (sizeof($lng) > 0) { $postopts = ""; foreach ($lng as $language => $score) { if ($postopts == "") { $postopts = "lang="; } else { $postopts .= ":"; } $postopts .= $language . ";" . $score; } } logger('mod_item: detect language' . print_r($lng, true) . $naked_body, LOGGER_DATA); } else { $postopts = ''; } $private = strlen($str_group_allow) || strlen($str_contact_allow) || strlen($str_group_deny) || strlen($str_contact_deny) ? 1 : 0; if ($user['hidewall']) { $private = 2; } // If this is a comment, set the permissions from the parent. if ($parent_item) { $private = 0; // for non native networks use the network of the original post as network of the item if ($parent_item['network'] != NETWORK_DIASPORA and $parent_item['network'] != NETWORK_OSTATUS and $network == "") { $network = $parent_item['network']; } if ($parent_item['private'] || strlen($parent_item['allow_cid']) || strlen($parent_item['allow_gid']) || strlen($parent_item['deny_cid']) || strlen($parent_item['deny_gid'])) { $private = $parent_item['private'] ? $parent_item['private'] : 1; } $str_contact_allow = $parent_item['allow_cid']; $str_group_allow = $parent_item['allow_gid']; $str_contact_deny = $parent_item['deny_cid']; $str_group_deny = $parent_item['deny_gid']; } $pubmail_enable = x($_REQUEST, 'pubmail_enable') && intval($_REQUEST['pubmail_enable']) && !$private ? 1 : 0; // if using the API, we won't see pubmail_enable - figure out if it should be set if ($api_source && $profile_uid && $profile_uid == local_user() && !$private) { $mail_disabled = function_exists('imap_open') && !get_config('system', 'imap_disabled') ? 0 : 1; if (!$mail_disabled) { $r = q("SELECT * FROM `mailacct` WHERE `uid` = %d AND `server` != '' LIMIT 1", intval(local_user())); if (count($r) && intval($r[0]['pubmail'])) { $pubmail_enabled = true; } } } if (!strlen($body)) { if ($preview) { killme(); } info(t('Empty post discarded.') . EOL); if (x($_REQUEST, 'return')) { goaway($a->get_baseurl() . "/" . $return_path); } killme(); } } if (strlen($categories)) { // get the "fileas" tags for this post $filedas = file_tag_file_to_list($categories, 'file'); } // save old and new categories, so we can determine what needs to be deleted from pconfig $categories_old = $categories; $categories = file_tag_list_to_file(trim($_REQUEST['category']), 'category'); $categories_new = $categories; if (strlen($filedas)) { // append the fileas stuff to the new categories list $categories .= file_tag_list_to_file($filedas, 'file'); } // Work around doubled linefeeds in Tinymce 3.5b2 // First figure out if it's a status post that would've been // created using tinymce. Otherwise leave it alone. /* $plaintext = (local_user() ? intval(get_pconfig(local_user(),'system','plaintext')) || !feature_enabled($profile_uid,'richtext') : 0); if((! $parent) && (! $api_source) && (! $plaintext)) { $body = fix_mce_lf($body); }*/ $plaintext = local_user() ? !feature_enabled($profile_uid, 'richtext') : 0; if (!$parent && !$api_source && !$plaintext) { $body = fix_mce_lf($body); } // get contact info for poster $author = null; $self = false; $contact_id = 0; if (local_user() && local_user() == $profile_uid) { $self = true; $r = q("SELECT * FROM `contact` WHERE `uid` = %d AND `self` = 1 LIMIT 1", intval($_SESSION['uid'])); } elseif (remote_user()) { if (is_array($_SESSION['remote'])) { foreach ($_SESSION['remote'] as $v) { if ($v['uid'] == $profile_uid) { $contact_id = $v['cid']; break; } } } if ($contact_id) { $r = q("SELECT * FROM `contact` WHERE `id` = %d LIMIT 1", intval($contact_id)); } } if (count($r)) { $author = $r[0]; $contact_id = $author['id']; } // get contact info for owner if ($profile_uid == local_user()) { $contact_record = $author; } else { $r = q("SELECT * FROM `contact` WHERE `uid` = %d AND `self` = 1 LIMIT 1", intval($profile_uid)); if (count($r)) { $contact_record = $r[0]; } } $post_type = notags(trim($_REQUEST['type'])); if ($post_type === 'net-comment') { if ($parent_item !== null) { if ($parent_item['wall'] == 1) { $post_type = 'wall-comment'; } else { $post_type = 'remote-comment'; } } } /** * * When a photo was uploaded into the message using the (profile wall) ajax * uploader, The permissions are initially set to disallow anybody but the * owner from seeing it. This is because the permissions may not yet have been * set for the post. If it's private, the photo permissions should be set * appropriately. But we didn't know the final permissions on the post until * now. So now we'll look for links of uploaded messages that are in the * post and set them to the same permissions as the post itself. * */ $match = null; if (!$preview && preg_match_all("/\\[img([\\=0-9x]*?)\\](.*?)\\[\\/img\\]/", $body, $match)) { $images = $match[2]; if (count($images)) { $objecttype = ACTIVITY_OBJ_IMAGE; foreach ($images as $image) { if (!stristr($image, $a->get_baseurl() . '/photo/')) { continue; } $image_uri = substr($image, strrpos($image, '/') + 1); $image_uri = substr($image_uri, 0, strpos($image_uri, '-')); if (!strlen($image_uri)) { continue; } $srch = '<' . intval($contact_id) . '>'; $r = q("SELECT `id` FROM `photo` WHERE `allow_cid` = '%s' AND `allow_gid` = '' AND `deny_cid` = '' AND `deny_gid` = ''\n\t\t\t\t\tAND `resource-id` = '%s' AND `uid` = %d LIMIT 1", dbesc($srch), dbesc($image_uri), intval($profile_uid)); if (!count($r)) { continue; } $r = q("UPDATE `photo` SET `allow_cid` = '%s', `allow_gid` = '%s', `deny_cid` = '%s', `deny_gid` = '%s'\n\t\t\t\t\tWHERE `resource-id` = '%s' AND `uid` = %d AND `album` = '%s' ", dbesc($str_contact_allow), dbesc($str_group_allow), dbesc($str_contact_deny), dbesc($str_group_deny), dbesc($image_uri), intval($profile_uid), dbesc(t('Wall Photos'))); } } } /** * Next link in any attachment references we find in the post. */ $match = false; if (!$preview && preg_match_all("/\\[attachment\\](.*?)\\[\\/attachment\\]/", $body, $match)) { $attaches = $match[1]; if (count($attaches)) { foreach ($attaches as $attach) { $r = q("SELECT * FROM `attach` WHERE `uid` = %d AND `id` = %d LIMIT 1", intval($profile_uid), intval($attach)); if (count($r)) { $r = q("UPDATE `attach` SET `allow_cid` = '%s', `allow_gid` = '%s', `deny_cid` = '%s', `deny_gid` = '%s'\n\t\t\t\t\t\tWHERE `uid` = %d AND `id` = %d", dbesc($str_contact_allow), dbesc($str_group_allow), dbesc($str_contact_deny), dbesc($str_group_deny), intval($profile_uid), intval($attach)); } } } } // embedded bookmark in post? set bookmark flag $bookmark = 0; if (preg_match_all("/\\[bookmark\\=([^\\]]*)\\](.*?)\\[\\/bookmark\\]/ism", $body, $match, PREG_SET_ORDER)) { $objecttype = ACTIVITY_OBJ_BOOKMARK; $bookmark = 1; } $body = bb_translate_video($body); /** * Fold multi-line [code] sequences */ $body = preg_replace('/\\[\\/code\\]\\s*\\[code\\]/ism', "\n", $body); $body = scale_external_images($body, false); // Setting the object type if not defined before if (!$objecttype) { $objecttype = ACTIVITY_OBJ_NOTE; // Default value require_once "include/plaintext.php"; $objectdata = get_attached_data($body); if ($post["type"] == "link") { $objecttype = ACTIVITY_OBJ_BOOKMARK; } elseif ($post["type"] == "video") { $objecttype = ACTIVITY_OBJ_VIDEO; } elseif ($post["type"] == "photo") { $objecttype = ACTIVITY_OBJ_IMAGE; } } /** * Look for any tags and linkify them */ $str_tags = ''; $inform = ''; $tags = get_tags($body); /** * add a statusnet style reply tag if the original post was from there * and we are replying, and there isn't one already */ if ($parent_contact && $parent_contact['network'] === NETWORK_OSTATUS && $parent_contact['nick'] && !in_array('@' . $parent_contact['nick'], $tags)) { $body = '@' . $parent_contact['nick'] . ' ' . $body; $tags[] = '@' . $parent_contact['nick']; } $tagged = array(); $private_forum = false; if (count($tags)) { foreach ($tags as $tag) { if (strpos($tag, '#') === 0) { continue; } // If we already tagged 'Robert Johnson', don't try and tag 'Robert'. // Robert Johnson should be first in the $tags array $fullnametagged = false; for ($x = 0; $x < count($tagged); $x++) { if (stristr($tagged[$x], $tag . ' ')) { $fullnametagged = true; break; } } if ($fullnametagged) { continue; } $success = handle_tag($a, $body, $inform, $str_tags, local_user() ? local_user() : $profile_uid, $tag, $network); if ($success['replaced']) { $tagged[] = $tag; } if (is_array($success['contact']) && intval($success['contact']['prv'])) { $private_forum = true; $private_id = $success['contact']['id']; } } } if ($private_forum && !$parent && !$private) { // we tagged a private forum in a top level post and the message was public. // Restrict it. $private = 1; $str_contact_allow = '<' . $private_id . '>'; } $attachments = ''; $match = false; if (preg_match_all('/(\\[attachment\\]([0-9]+)\\[\\/attachment\\])/', $body, $match)) { foreach ($match[2] as $mtch) { $r = q("SELECT `id`,`filename`,`filesize`,`filetype` FROM `attach` WHERE `uid` = %d AND `id` = %d LIMIT 1", intval($profile_uid), intval($mtch)); if (count($r)) { if (strlen($attachments)) { $attachments .= ','; } $attachments .= '[attach]href="' . $a->get_baseurl() . '/attach/' . $r[0]['id'] . '" length="' . $r[0]['filesize'] . '" type="' . $r[0]['filetype'] . '" title="' . ($r[0]['filename'] ? $r[0]['filename'] : '') . '"[/attach]'; } $body = str_replace($match[1], '', $body); } } $wall = 0; if ($post_type === 'wall' || $post_type === 'wall-comment') { $wall = 1; } if (!strlen($verb)) { $verb = ACTIVITY_POST; } if ($network == "") { $network = NETWORK_DFRN; } $gravity = $parent ? 6 : 0; // even if the post arrived via API we are considering that it // originated on this site by default for determining relayability. $origin = x($_REQUEST, 'origin') ? intval($_REQUEST['origin']) : 1; $notify_type = $parent ? 'comment-new' : 'wall-new'; $uri = $message_id ? $message_id : item_new_uri($a->get_hostname(), $profile_uid); // Fallback so that we alway have a thr-parent if (!$thr_parent) { $thr_parent = $uri; } $datarray = array(); $datarray['uid'] = $profile_uid; $datarray['type'] = $post_type; $datarray['wall'] = $wall; $datarray['gravity'] = $gravity; $datarray['network'] = $network; $datarray['contact-id'] = $contact_id; $datarray['owner-name'] = $contact_record['name']; $datarray['owner-link'] = $contact_record['url']; $datarray['owner-avatar'] = $contact_record['thumb']; $datarray['author-name'] = $author['name']; $datarray['author-link'] = $author['url']; $datarray['author-avatar'] = $author['thumb']; $datarray['created'] = datetime_convert(); $datarray['edited'] = datetime_convert(); $datarray['commented'] = datetime_convert(); $datarray['received'] = datetime_convert(); $datarray['changed'] = datetime_convert(); $datarray['extid'] = $extid; $datarray['guid'] = $guid; $datarray['uri'] = $uri; $datarray['title'] = $title; $datarray['body'] = $body; $datarray['app'] = $app; $datarray['location'] = $location; $datarray['coord'] = $coord; $datarray['tag'] = $str_tags; $datarray['file'] = $categories; $datarray['inform'] = $inform; $datarray['verb'] = $verb; $datarray['object-type'] = $objecttype; $datarray['allow_cid'] = $str_contact_allow; $datarray['allow_gid'] = $str_group_allow; $datarray['deny_cid'] = $str_contact_deny; $datarray['deny_gid'] = $str_group_deny; $datarray['private'] = $private; $datarray['pubmail'] = $pubmail_enable; $datarray['attach'] = $attachments; $datarray['bookmark'] = intval($bookmark); $datarray['thr-parent'] = $thr_parent; $datarray['postopts'] = $postopts; $datarray['origin'] = $origin; $datarray['moderated'] = $allow_moderated; /** * These fields are for the convenience of plugins... * 'self' if true indicates the owner is posting on their own wall * If parent is 0 it is a top-level post. */ $datarray['parent'] = $parent; $datarray['self'] = $self; // $datarray['prvnets'] = $user['prvnets']; if ($orig_post) { $datarray['edit'] = true; } // Search for hashtags item_body_set_hashtags($datarray); // preview mode - prepare the body for display and send it via json if ($preview) { require_once 'include/conversation.php'; $o = conversation($a, array(array_merge($contact_record, $datarray)), 'search', false, true); logger('preview: ' . $o); echo json_encode(array('preview' => $o)); killme(); } call_hooks('post_local', $datarray); if (x($datarray, 'cancel')) { logger('mod_item: post cancelled by plugin.'); if ($return_path) { goaway($a->get_baseurl() . "/" . $return_path); } $json = array('cancel' => 1); if (x($_REQUEST, 'jsreload') && strlen($_REQUEST['jsreload'])) { $json['reload'] = $a->get_baseurl() . '/' . $_REQUEST['jsreload']; } echo json_encode($json); killme(); } // Fill the cache field put_item_in_cache($datarray); if ($orig_post) { $r = q("UPDATE `item` SET `title` = '%s', `body` = '%s', `tag` = '%s', `attach` = '%s', `file` = '%s', `rendered-html` = '%s', `rendered-hash` = '%s', `edited` = '%s', `changed` = '%s' WHERE `id` = %d AND `uid` = %d", dbesc($datarray['title']), dbesc($datarray['body']), dbesc($datarray['tag']), dbesc($datarray['attach']), dbesc($datarray['file']), dbesc($datarray['rendered-html']), dbesc($datarray['rendered-hash']), dbesc(datetime_convert()), dbesc(datetime_convert()), intval($post_id), intval($profile_uid)); create_tags_from_item($post_id); create_files_from_item($post_id); update_thread($post_id); // update filetags in pconfig file_tag_update_pconfig($uid, $categories_old, $categories_new, 'category'); proc_run('php', "include/notifier.php", 'edit_post', "{$post_id}"); if (x($_REQUEST, 'return') && strlen($return_path)) { logger('return: ' . $return_path); goaway($a->get_baseurl() . "/" . $return_path); } killme(); } else { $post_id = 0; } $r = q("INSERT INTO `item` (`guid`, `extid`, `uid`,`type`,`wall`,`gravity`, `network`, `contact-id`,`owner-name`,`owner-link`,`owner-avatar`, `author-name`, `author-link`, `author-avatar`,\n\t\t`created`, `edited`, `commented`, `received`, `changed`, `uri`, `thr-parent`, `title`, `body`, `app`, `location`, `coord`, `tag`, `inform`, `verb`, `object-type`, `postopts`,\n\t\t`allow_cid`, `allow_gid`, `deny_cid`, `deny_gid`, `private`, `pubmail`, `attach`, `bookmark`,`origin`, `moderated`, `file`, `rendered-html`, `rendered-hash`)\n\t\tVALUES( '%s', '%s', %d, '%s', %d, %d, '%s', %d, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', %d, %d, '%s', %d, %d, %d, '%s', '%s', '%s')", dbesc($datarray['guid']), dbesc($datarray['extid']), intval($datarray['uid']), dbesc($datarray['type']), intval($datarray['wall']), intval($datarray['gravity']), dbesc($datarray['network']), intval($datarray['contact-id']), dbesc($datarray['owner-name']), dbesc($datarray['owner-link']), dbesc($datarray['owner-avatar']), dbesc($datarray['author-name']), dbesc($datarray['author-link']), dbesc($datarray['author-avatar']), dbesc($datarray['created']), dbesc($datarray['edited']), dbesc($datarray['commented']), dbesc($datarray['received']), dbesc($datarray['changed']), dbesc($datarray['uri']), dbesc($datarray['thr-parent']), dbesc($datarray['title']), dbesc($datarray['body']), dbesc($datarray['app']), dbesc($datarray['location']), dbesc($datarray['coord']), dbesc($datarray['tag']), dbesc($datarray['inform']), dbesc($datarray['verb']), dbesc($datarray['object-type']), dbesc($datarray['postopts']), dbesc($datarray['allow_cid']), dbesc($datarray['allow_gid']), dbesc($datarray['deny_cid']), dbesc($datarray['deny_gid']), intval($datarray['private']), intval($datarray['pubmail']), dbesc($datarray['attach']), intval($datarray['bookmark']), intval($datarray['origin']), intval($datarray['moderated']), dbesc($datarray['file']), dbesc($datarray['rendered-html']), dbesc($datarray['rendered-hash'])); $r = q("SELECT `id` FROM `item` WHERE `uri` = '%s' LIMIT 1", dbesc($datarray['uri'])); if (!count($r)) { logger('mod_item: unable to retrieve post that was just stored.'); notice(t('System error. Post not saved.') . EOL); goaway($a->get_baseurl() . "/" . $return_path); // NOTREACHED } $post_id = $r[0]['id']; logger('mod_item: saved item ' . $post_id); $datarray["id"] = $post_id; $datarray["plink"] = $a->get_baseurl() . '/display/' . urlencode($datarray["guid"]); // update filetags in pconfig file_tag_update_pconfig($uid, $categories_old, $categories_new, 'category'); if ($parent) { // This item is the last leaf and gets the comment box, clear any ancestors $r = q("UPDATE `item` SET `last-child` = 0, `changed` = '%s' WHERE `parent` = %d ", dbesc(datetime_convert()), intval($parent)); update_thread($parent, true); // Inherit ACLs from the parent item. $r = q("UPDATE `item` SET `allow_cid` = '%s', `allow_gid` = '%s', `deny_cid` = '%s', `deny_gid` = '%s', `private` = %d\n\t\t\tWHERE `id` = %d", dbesc($parent_item['allow_cid']), dbesc($parent_item['allow_gid']), dbesc($parent_item['deny_cid']), dbesc($parent_item['deny_gid']), intval($parent_item['private']), intval($post_id)); if ($contact_record != $author) { notification(array('type' => NOTIFY_COMMENT, 'notify_flags' => $user['notify-flags'], 'language' => $user['language'], 'to_name' => $user['username'], 'to_email' => $user['email'], 'uid' => $user['uid'], 'item' => $datarray, 'link' => $a->get_baseurl() . '/display/' . urlencode($datarray['guid']), 'source_name' => $datarray['author-name'], 'source_link' => $datarray['author-link'], 'source_photo' => $datarray['author-avatar'], 'verb' => ACTIVITY_POST, 'otype' => 'item', 'parent' => $parent, 'parent_uri' => $parent_item['uri'])); } // Store the comment signature information in case we need to relay to Diaspora store_diaspora_comment_sig($datarray, $author, $self ? $a->user['prvkey'] : false, $parent_item, $post_id); } else { $parent = $post_id; if ($contact_record != $author) { notification(array('type' => NOTIFY_WALL, 'notify_flags' => $user['notify-flags'], 'language' => $user['language'], 'to_name' => $user['username'], 'to_email' => $user['email'], 'uid' => $user['uid'], 'item' => $datarray, 'link' => $a->get_baseurl() . '/display/' . urlencode($datarray['guid']), 'source_name' => $datarray['author-name'], 'source_link' => $datarray['author-link'], 'source_photo' => $datarray['author-avatar'], 'verb' => ACTIVITY_POST, 'otype' => 'item')); } } // fallback so that parent always gets set to non-zero. if (!$parent) { $parent = $post_id; } $r = q("UPDATE `item` SET `parent` = %d, `parent-uri` = '%s', `plink` = '%s', `changed` = '%s', `last-child` = 1, `visible` = 1\n\t\tWHERE `id` = %d", intval($parent), dbesc($parent == $post_id ? $uri : $parent_item['uri']), dbesc($a->get_baseurl() . '/display/' . urlencode($datarray['guid'])), dbesc(datetime_convert()), intval($post_id)); // photo comments turn the corresponding item visible to the profile wall // This way we don't see every picture in your new photo album posted to your wall at once. // They will show up as people comment on them. if (!$parent_item['visible']) { $r = q("UPDATE `item` SET `visible` = 1 WHERE `id` = %d", intval($parent_item['id'])); update_thread($parent_item['id']); } // update the commented timestamp on the parent q("UPDATE `item` set `commented` = '%s', `changed` = '%s' WHERE `id` = %d", dbesc(datetime_convert()), dbesc(datetime_convert()), intval($parent)); if ($post_id != $parent) { update_thread($parent); } call_hooks('post_local_end', $datarray); if (strlen($emailcc) && $profile_uid == local_user()) { $erecips = explode(',', $emailcc); if (count($erecips)) { foreach ($erecips as $recip) { $addr = trim($recip); if (!strlen($addr)) { continue; } $disclaimer = '<hr />' . sprintf(t('This message was sent to you by %s, a member of the Friendica social network.'), $a->user['username']) . '<br />'; $disclaimer .= sprintf(t('You may visit them online at %s'), $a->get_baseurl() . '/profile/' . $a->user['nickname']) . EOL; $disclaimer .= t('Please contact the sender by replying to this post if you do not wish to receive these messages.') . EOL; if (!$datarray['title'] == '') { $subject = email_header_encode($datarray['title'], 'UTF-8'); } else { $subject = email_header_encode('[Friendica]' . ' ' . sprintf(t('%s posted an update.'), $a->user['username']), 'UTF-8'); } $link = '<a href="' . $a->get_baseurl() . '/profile/' . $a->user['nickname'] . '"><img src="' . $author['thumb'] . '" alt="' . $a->user['username'] . '" /></a><br /><br />'; $html = prepare_body($datarray); $message = '<html><body>' . $link . $html . $disclaimer . '</body></html>'; include_once 'include/html2plain.php'; $params = array('fromName' => $a->user['username'], 'fromEmail' => $a->user['email'], 'toEmail' => $addr, 'replyTo' => $a->user['email'], 'messageSubject' => $subject, 'htmlVersion' => $message, 'textVersion' => html2plain($html . $disclaimer)); Emailer::send($params); } } } create_tags_from_item($post_id); create_files_from_item($post_id); if ($post_id == $parent) { add_thread($post_id); } // This is a real juggling act on shared hosting services which kill your processes // e.g. dreamhost. We used to start delivery to our native delivery agents in the background // and then run our plugin delivery from the foreground. We're now doing plugin delivery first, // because as soon as you start loading up a bunch of remote delivey processes, *this* page is // likely to get killed off. If you end up looking at an /item URL and a blank page, // it's very likely the delivery got killed before all your friends could be notified. // Currently the only realistic fixes are to use a reliable server - which precludes shared hosting, // or cut back on plugins which do remote deliveries. proc_run('php', "include/notifier.php", $notify_type, "{$post_id}"); logger('post_complete'); item_post_return($a->get_baseurl(), $api_source, $return_path); // NOTREACHED }
function update_rss_feed($feed, $ignore_daemon = false, $no_cache = false, $rss = false) { $debug_enabled = defined('DAEMON_EXTENDED_DEBUG') || $_REQUEST['xdebug']; _debug_suppress(!$debug_enabled); _debug("start", $debug_enabled); $result = db_query("SELECT id,update_interval,auth_login,\n\t\t\tfeed_url,auth_pass,cache_images,\n\t\t\tmark_unread_on_update, owner_uid,\n\t\t\tpubsub_state, auth_pass_encrypted,\n\t\t\t(SELECT max(date_entered) FROM\n\t\t\t\tttrss_entries, ttrss_user_entries where ref_id = id AND feed_id = '{$feed}') AS last_article_timestamp\n\t\t\tFROM ttrss_feeds WHERE id = '{$feed}'"); if (db_num_rows($result) == 0) { _debug("feed {$feed} NOT FOUND/SKIPPED", $debug_enabled); return false; } $last_article_timestamp = @strtotime(db_fetch_result($result, 0, "last_article_timestamp")); if (defined('_DISABLE_HTTP_304')) { $last_article_timestamp = 0; } $owner_uid = db_fetch_result($result, 0, "owner_uid"); $mark_unread_on_update = sql_bool_to_bool(db_fetch_result($result, 0, "mark_unread_on_update")); $pubsub_state = db_fetch_result($result, 0, "pubsub_state"); $auth_pass_encrypted = sql_bool_to_bool(db_fetch_result($result, 0, "auth_pass_encrypted")); db_query("UPDATE ttrss_feeds SET last_update_started = NOW()\n\t\t\tWHERE id = '{$feed}'"); $auth_login = db_fetch_result($result, 0, "auth_login"); $auth_pass = db_fetch_result($result, 0, "auth_pass"); if ($auth_pass_encrypted) { require_once "crypt.php"; $auth_pass = decrypt_string($auth_pass); } $cache_images = sql_bool_to_bool(db_fetch_result($result, 0, "cache_images")); $fetch_url = db_fetch_result($result, 0, "feed_url"); $feed = db_escape_string($feed); $date_feed_processed = date('Y-m-d H:i'); $cache_filename = CACHE_DIR . "/simplepie/" . sha1($fetch_url) . ".xml"; $pluginhost = new PluginHost(); $pluginhost->set_debug($debug_enabled); $user_plugins = get_pref("_ENABLED_PLUGINS", $owner_uid); $pluginhost->load(PLUGINS, PluginHost::KIND_ALL); $pluginhost->load($user_plugins, PluginHost::KIND_USER, $owner_uid); $pluginhost->load_data(); if ($rss && is_object($rss) && get_class($rss) == "FeedParser") { _debug("using previously initialized parser object"); } else { $rss_hash = false; $force_refetch = isset($_REQUEST["force_refetch"]); foreach ($pluginhost->get_hooks(PluginHost::HOOK_FETCH_FEED) as $plugin) { $feed_data = $plugin->hook_fetch_feed($feed_data, $fetch_url, $owner_uid, $feed, $last_article_timestamp, $auth_login, $auth_pass); } // try cache if (!$feed_data && file_exists($cache_filename) && is_readable($cache_filename) && !$auth_login && !$auth_pass && filemtime($cache_filename) > time() - 30) { _debug("using local cache [{$cache_filename}].", $debug_enabled); @($feed_data = file_get_contents($cache_filename)); if ($feed_data) { $rss_hash = sha1($feed_data); } } else { _debug("local cache will not be used for this feed", $debug_enabled); } // fetch feed from source if (!$feed_data) { _debug("fetching [{$fetch_url}]...", $debug_enabled); _debug("If-Modified-Since: " . gmdate('D, d M Y H:i:s \\G\\M\\T', $last_article_timestamp), $debug_enabled); $feed_data = fetch_file_contents($fetch_url, false, $auth_login, $auth_pass, false, $no_cache ? FEED_FETCH_NO_CACHE_TIMEOUT : FEED_FETCH_TIMEOUT, $force_refetch ? 0 : $last_article_timestamp); global $fetch_curl_used; if (!$fetch_curl_used) { $tmp = @gzdecode($feed_data); if ($tmp) { $feed_data = $tmp; } } $feed_data = trim($feed_data); _debug("fetch done.", $debug_enabled); /* if ($feed_data) { $error = verify_feed_xml($feed_data); if ($error) { _debug("error verifying XML, code: " . $error->code, $debug_enabled); if ($error->code == 26) { _debug("got error 26, trying to decode entities...", $debug_enabled); $feed_data = html_entity_decode($feed_data, ENT_COMPAT, 'UTF-8'); $error = verify_feed_xml($feed_data); if ($error) $feed_data = ''; } } } */ // cache vanilla feed data for re-use if ($feed_data && !$auth_pass && !$auth_login && is_writable(CACHE_DIR . "/simplepie")) { $new_rss_hash = sha1($feed_data); if ($new_rss_hash != $rss_hash) { _debug("saving {$cache_filename}", $debug_enabled); @file_put_contents($cache_filename, $feed_data); } } } if (!$feed_data) { global $fetch_last_error; global $fetch_last_error_code; _debug("unable to fetch: {$fetch_last_error} [{$fetch_last_error_code}]", $debug_enabled); $error_escaped = ''; // If-Modified-Since if ($fetch_last_error_code != 304) { $error_escaped = db_escape_string($fetch_last_error); } else { _debug("source claims data not modified, nothing to do.", $debug_enabled); } db_query("UPDATE ttrss_feeds SET last_error = '{$error_escaped}',\n\t\t\t\t\t\tlast_updated = NOW() WHERE id = '{$feed}'"); return; } } foreach ($pluginhost->get_hooks(PluginHost::HOOK_FEED_FETCHED) as $plugin) { $feed_data = $plugin->hook_feed_fetched($feed_data, $fetch_url, $owner_uid, $feed); } // set last update to now so if anything *simplepie* crashes later we won't be // continuously failing on the same feed //db_query("UPDATE ttrss_feeds SET last_updated = NOW() WHERE id = '$feed'"); if (!$rss) { $rss = new FeedParser($feed_data); $rss->init(); } if (DETECT_ARTICLE_LANGUAGE) { require_once "lib/languagedetect/LanguageDetect.php"; $lang = new Text_LanguageDetect(); $lang->setNameMode(2); } // print_r($rss); $feed = db_escape_string($feed); if (!$rss->error()) { // We use local pluginhost here because we need to load different per-user feed plugins $pluginhost->run_hooks(PluginHost::HOOK_FEED_PARSED, "hook_feed_parsed", $rss); _debug("processing feed data...", $debug_enabled); // db_query("BEGIN"); if (DB_TYPE == "pgsql") { $favicon_interval_qpart = "favicon_last_checked < NOW() - INTERVAL '12 hour'"; } else { $favicon_interval_qpart = "favicon_last_checked < DATE_SUB(NOW(), INTERVAL 12 HOUR)"; } $result = db_query("SELECT title,site_url,owner_uid,favicon_avg_color,\n\t\t\t\t(favicon_last_checked IS NULL OR {$favicon_interval_qpart}) AS\n\t\t\t\t\t\tfavicon_needs_check\n\t\t\t\tFROM ttrss_feeds WHERE id = '{$feed}'"); $registered_title = db_fetch_result($result, 0, "title"); $orig_site_url = db_fetch_result($result, 0, "site_url"); $favicon_needs_check = sql_bool_to_bool(db_fetch_result($result, 0, "favicon_needs_check")); $favicon_avg_color = db_fetch_result($result, 0, "favicon_avg_color"); $owner_uid = db_fetch_result($result, 0, "owner_uid"); $site_url = db_escape_string(mb_substr(rewrite_relative_url($fetch_url, $rss->get_link()), 0, 245)); _debug("site_url: {$site_url}", $debug_enabled); _debug("feed_title: " . $rss->get_title(), $debug_enabled); if ($favicon_needs_check || $force_refetch) { /* terrible hack: if we crash on floicon shit here, we won't check * the icon avgcolor again (unless the icon got updated) */ $favicon_file = ICONS_DIR . "/{$feed}.ico"; $favicon_modified = @filemtime($favicon_file); _debug("checking favicon...", $debug_enabled); check_feed_favicon($site_url, $feed); $favicon_modified_new = @filemtime($favicon_file); if ($favicon_modified_new > $favicon_modified) { $favicon_avg_color = ''; } if (file_exists($favicon_file) && function_exists("imagecreatefromstring") && $favicon_avg_color == '') { require_once "colors.php"; db_query("UPDATE ttrss_feeds SET favicon_avg_color = 'fail' WHERE\n\t\t\t\t\t\t\tid = '{$feed}'"); $favicon_color = db_escape_string(calculate_avg_color($favicon_file)); $favicon_colorstring = ",favicon_avg_color = '" . $favicon_color . "'"; } else { if ($favicon_avg_color == 'fail') { _debug("floicon failed on this file, not trying to recalculate avg color", $debug_enabled); } } db_query("UPDATE ttrss_feeds SET favicon_last_checked = NOW()\n\t\t\t\t\t{$favicon_colorstring}\n\t\t\t\t\tWHERE id = '{$feed}'"); } if (!$registered_title || $registered_title == "[Unknown]") { $feed_title = db_escape_string(mb_substr($rss->get_title(), 0, 199)); if ($feed_title) { _debug("registering title: {$feed_title}", $debug_enabled); db_query("UPDATE ttrss_feeds SET\n\t\t\t\t\t\ttitle = '{$feed_title}' WHERE id = '{$feed}'"); } } if ($site_url && $orig_site_url != $site_url) { db_query("UPDATE ttrss_feeds SET\n\t\t\t\t\tsite_url = '{$site_url}' WHERE id = '{$feed}'"); } _debug("loading filters & labels...", $debug_enabled); $filters = load_filters($feed, $owner_uid); $labels = get_all_labels($owner_uid); _debug("" . count($filters) . " filters loaded.", $debug_enabled); $items = $rss->get_items(); if (!is_array($items)) { _debug("no articles found.", $debug_enabled); db_query("UPDATE ttrss_feeds\n\t\t\t\t\tSET last_updated = NOW(), last_error = '' WHERE id = '{$feed}'"); return; // no articles } if ($pubsub_state != 2 && PUBSUBHUBBUB_ENABLED) { _debug("checking for PUSH hub...", $debug_enabled); $feed_hub_url = false; $links = $rss->get_links('hub'); if ($links && is_array($links)) { foreach ($links as $l) { $feed_hub_url = $l; break; } } _debug("feed hub url: {$feed_hub_url}", $debug_enabled); $feed_self_url = $fetch_url; $links = $rss->get_links('self'); if ($links && is_array($links)) { foreach ($links as $l) { $feed_self_url = $l; break; } } _debug("feed self url = {$feed_self_url}"); if ($feed_hub_url && $feed_self_url && function_exists('curl_init') && !ini_get("open_basedir")) { require_once 'lib/pubsubhubbub/subscriber.php'; $callback_url = get_self_url_prefix() . "/public.php?op=pubsub&id={$feed}"; $s = new Subscriber($feed_hub_url, $callback_url); $rc = $s->subscribe($feed_self_url); _debug("feed hub url found, subscribe request sent. [rc={$rc}]", $debug_enabled); db_query("UPDATE ttrss_feeds SET pubsub_state = 1\n\t\t\t\t\t\tWHERE id = '{$feed}'"); } } _debug("processing articles...", $debug_enabled); foreach ($items as $item) { if ($_REQUEST['xdebug'] == 3) { print_r($item); } $entry_guid = $item->get_id(); if (!$entry_guid) { $entry_guid = $item->get_link(); } if (!$entry_guid) { $entry_guid = make_guid_from_title($item->get_title()); } if (!$entry_guid) { continue; } $entry_guid = "{$owner_uid},{$entry_guid}"; $entry_guid_hashed = db_escape_string('SHA1:' . sha1($entry_guid)); _debug("guid {$entry_guid} / {$entry_guid_hashed}", $debug_enabled); $entry_timestamp = ""; $entry_timestamp = $item->get_date(); _debug("orig date: " . $item->get_date(), $debug_enabled); if ($entry_timestamp == -1 || !$entry_timestamp || $entry_timestamp > time()) { $entry_timestamp = time(); } $entry_timestamp_fmt = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp); _debug("date {$entry_timestamp} [{$entry_timestamp_fmt}]", $debug_enabled); // $entry_title = html_entity_decode($item->get_title(), ENT_COMPAT, 'UTF-8'); // $entry_title = decode_numeric_entities($entry_title); $entry_title = $item->get_title(); $entry_link = rewrite_relative_url($site_url, $item->get_link()); _debug("title {$entry_title}", $debug_enabled); _debug("link {$entry_link}", $debug_enabled); if (!$entry_title) { $entry_title = date("Y-m-d H:i:s", $entry_timestamp); } $entry_content = $item->get_content(); if (!$entry_content) { $entry_content = $item->get_description(); } if ($_REQUEST["xdebug"] == 2) { print "content: "; print $entry_content; print "\n"; } $entry_language = ""; if (DETECT_ARTICLE_LANGUAGE) { $entry_language = $lang->detect($entry_title . " " . $entry_content, 1); if (count($entry_language) > 0) { $possible = array_keys($entry_language); $entry_language = $possible[0]; _debug("detected language: {$entry_language}", $debug_enabled); } else { $entry_language = ""; } } $entry_comments = $item->get_comments_url(); $entry_author = $item->get_author(); $entry_guid = db_escape_string(mb_substr($entry_guid, 0, 245)); $entry_comments = db_escape_string(mb_substr(trim($entry_comments), 0, 245)); $entry_author = db_escape_string(mb_substr(trim($entry_author), 0, 245)); $num_comments = (int) $item->get_comments_count(); _debug("author {$entry_author}", $debug_enabled); _debug("num_comments: {$num_comments}", $debug_enabled); _debug("looking for tags...", $debug_enabled); // parse <category> entries into tags $additional_tags = array(); $additional_tags_src = $item->get_categories(); if (is_array($additional_tags_src)) { foreach ($additional_tags_src as $tobj) { array_push($additional_tags, $tobj); } } $entry_tags = array_unique($additional_tags); for ($i = 0; $i < count($entry_tags); $i++) { $entry_tags[$i] = mb_strtolower($entry_tags[$i], 'utf-8'); } _debug("tags found: " . join(",", $entry_tags), $debug_enabled); _debug("done collecting data.", $debug_enabled); $result = db_query("SELECT id, content_hash FROM ttrss_entries\n\t\t\t\t\tWHERE guid = '" . db_escape_string($entry_guid) . "' OR guid = '{$entry_guid_hashed}'"); if (db_num_rows($result) != 0) { $base_entry_id = db_fetch_result($result, 0, "id"); $entry_stored_hash = db_fetch_result($result, 0, "content_hash"); } else { $base_entry_id = false; $entry_stored_hash = ""; } $article = array("owner_uid" => $owner_uid, "guid" => $entry_guid, "title" => $entry_title, "content" => $entry_content, "link" => $entry_link, "tags" => $entry_tags, "author" => $entry_author, "language" => $entry_language, "feed" => array("id" => $feed, "fetch_url" => $fetch_url, "site_url" => $site_url)); $entry_plugin_data = ""; $entry_current_hash = calculate_article_hash($article, $pluginhost); _debug("article hash: {$entry_current_hash} [stored={$entry_stored_hash}]", $debug_enabled); if ($entry_current_hash == $entry_stored_hash && !isset($_REQUEST["force_rehash"])) { _debug("stored article seems up to date [IID: {$base_entry_id}], updating timestamp only", $debug_enabled); // we keep encountering the entry in feeds, so we need to // update date_updated column so that we don't get horrible // dupes when the entry gets purged and reinserted again e.g. // in the case of SLOW SLOW OMG SLOW updating feeds $base_entry_id = db_fetch_result($result, 0, "id"); db_query("UPDATE ttrss_entries SET date_updated = NOW()\n\t\t\t\t\t\tWHERE id = '{$base_entry_id}'"); // if we allow duplicate posts, we have to continue to // create the user entries for this feed if (!get_pref("ALLOW_DUPLICATE_POSTS", $owner_uid, false)) { continue; } } _debug("hash differs, applying plugin filters:", $debug_enabled); foreach ($pluginhost->get_hooks(PluginHost::HOOK_ARTICLE_FILTER) as $plugin) { _debug("... " . get_class($plugin), $debug_enabled); $start = microtime(true); $article = $plugin->hook_article_filter($article); _debug("=== " . sprintf("%.4f (sec)", microtime(true) - $start), $debug_enabled); $entry_plugin_data .= mb_strtolower(get_class($plugin)) . ","; } $entry_plugin_data = db_escape_string($entry_plugin_data); _debug("plugin data: {$entry_plugin_data}", $debug_enabled); $entry_tags = $article["tags"]; $entry_guid = db_escape_string($entry_guid); $entry_title = db_escape_string($article["title"]); $entry_author = db_escape_string($article["author"]); $entry_link = db_escape_string($article["link"]); $entry_content = $article["content"]; // escaped below if ($cache_images && is_writable(CACHE_DIR . '/images')) { cache_images($entry_content, $site_url, $debug_enabled); } $entry_content = db_escape_string($entry_content, false); db_query("BEGIN"); $result = db_query("SELECT id FROM\tttrss_entries\n\t\t\t\t\tWHERE (guid = '{$entry_guid}' OR guid = '{$entry_guid_hashed}')"); if (db_num_rows($result) == 0) { _debug("base guid [{$entry_guid}] not found", $debug_enabled); // base post entry does not exist, create it $result = db_query("INSERT INTO ttrss_entries\n\t\t\t\t\t\t\t(title,\n\t\t\t\t\t\t\tguid,\n\t\t\t\t\t\t\tlink,\n\t\t\t\t\t\t\tupdated,\n\t\t\t\t\t\t\tcontent,\n\t\t\t\t\t\t\tcontent_hash,\n\t\t\t\t\t\t\tno_orig_date,\n\t\t\t\t\t\t\tdate_updated,\n\t\t\t\t\t\t\tdate_entered,\n\t\t\t\t\t\t\tcomments,\n\t\t\t\t\t\t\tnum_comments,\n\t\t\t\t\t\t\tplugin_data,\n\t\t\t\t\t\t\tlang,\n\t\t\t\t\t\t\tauthor)\n\t\t\t\t\t\tVALUES\n\t\t\t\t\t\t\t('{$entry_title}',\n\t\t\t\t\t\t\t'{$entry_guid_hashed}',\n\t\t\t\t\t\t\t'{$entry_link}',\n\t\t\t\t\t\t\t'{$entry_timestamp_fmt}',\n\t\t\t\t\t\t\t'{$entry_content}',\n\t\t\t\t\t\t\t'{$entry_current_hash}',\n\t\t\t\t\t\t\tfalse,\n\t\t\t\t\t\t\tNOW(),\n\t\t\t\t\t\t\t'{$date_feed_processed}',\n\t\t\t\t\t\t\t'{$entry_comments}',\n\t\t\t\t\t\t\t'{$num_comments}',\n\t\t\t\t\t\t\t'{$entry_plugin_data}',\n\t\t\t\t\t\t\t'{$entry_language}',\n\t\t\t\t\t\t\t'{$entry_author}')"); $article_labels = array(); } else { $base_entry_id = db_fetch_result($result, 0, "id"); $article_labels = get_article_labels($base_entry_id, $owner_uid); } // now it should exist, if not - bad luck then $result = db_query("SELECT id FROM ttrss_entries\n\t\t\t\t\tWHERE guid = '{$entry_guid}' OR guid = '{$entry_guid_hashed}'"); $entry_ref_id = 0; $entry_int_id = 0; if (db_num_rows($result) == 1) { _debug("base guid found, checking for user record", $debug_enabled); $ref_id = db_fetch_result($result, 0, "id"); $entry_ref_id = $ref_id; /* $stored_guid = db_fetch_result($result, 0, "guid"); if ($stored_guid != $entry_guid_hashed) { if ($debug_enabled) _debug("upgrading compat guid to hashed one", $debug_enabled); db_query("UPDATE ttrss_entries SET guid = '$entry_guid_hashed' WHERE id = '$ref_id'"); } */ // check for user post link to main table // do we allow duplicate posts with same GUID in different feeds? if (get_pref("ALLOW_DUPLICATE_POSTS", $owner_uid, false)) { $dupcheck_qpart = "AND (feed_id = '{$feed}' OR feed_id IS NULL)"; } else { $dupcheck_qpart = ""; } /* Collect article tags here so we could filter by them: */ $article_filters = get_article_filters($filters, $entry_title, $entry_content, $entry_link, $entry_timestamp, $entry_author, $entry_tags); if ($debug_enabled) { _debug("article filters: ", $debug_enabled); if (count($article_filters) != 0) { print_r($article_filters); } } if (find_article_filter($article_filters, "filter")) { db_query("COMMIT"); // close transaction in progress continue; } $score = calculate_article_score($article_filters); _debug("initial score: {$score}", $debug_enabled); $query = "SELECT ref_id, int_id FROM ttrss_user_entries WHERE\n\t\t\t\t\t\t\tref_id = '{$ref_id}' AND owner_uid = '{$owner_uid}'\n\t\t\t\t\t\t\t{$dupcheck_qpart}"; // if ($_REQUEST["xdebug"]) print "$query\n"; $result = db_query($query); // okay it doesn't exist - create user entry if (db_num_rows($result) == 0) { _debug("user record not found, creating...", $debug_enabled); if ($score >= -500 && !find_article_filter($article_filters, 'catchup')) { $unread = 'true'; $last_read_qpart = 'NULL'; } else { $unread = 'false'; $last_read_qpart = 'NOW()'; } if (find_article_filter($article_filters, 'mark') || $score > 1000) { $marked = 'true'; } else { $marked = 'false'; } if (find_article_filter($article_filters, 'publish')) { $published = 'true'; } else { $published = 'false'; } // N-grams if (DB_TYPE == "pgsql" and defined('_NGRAM_TITLE_DUPLICATE_THRESHOLD')) { $result = db_query("SELECT COUNT(*) AS similar FROM\n\t\t\t\t\t\t\t\t\tttrss_entries,ttrss_user_entries\n\t\t\t\t\t\t\t\tWHERE ref_id = id AND updated >= NOW() - INTERVAL '7 day'\n\t\t\t\t\t\t\t\t\tAND similarity(title, '{$entry_title}') >= " . _NGRAM_TITLE_DUPLICATE_THRESHOLD . "\n\t\t\t\t\t\t\t\t\tAND owner_uid = {$owner_uid}"); $ngram_similar = db_fetch_result($result, 0, "similar"); _debug("N-gram similar results: {$ngram_similar}", $debug_enabled); if ($ngram_similar > 0) { $unread = 'false'; } } $last_marked = $marked == 'true' ? 'NOW()' : 'NULL'; $last_published = $published == 'true' ? 'NOW()' : 'NULL'; $result = db_query("INSERT INTO ttrss_user_entries\n\t\t\t\t\t\t\t\t(ref_id, owner_uid, feed_id, unread, last_read, marked,\n\t\t\t\t\t\t\t\tpublished, score, tag_cache, label_cache, uuid,\n\t\t\t\t\t\t\t\tlast_marked, last_published)\n\t\t\t\t\t\t\tVALUES ('{$ref_id}', '{$owner_uid}', '{$feed}', {$unread},\n\t\t\t\t\t\t\t\t{$last_read_qpart}, {$marked}, {$published}, '{$score}', '', '',\n\t\t\t\t\t\t\t\t'', {$last_marked}, {$last_published})"); if (PUBSUBHUBBUB_HUB && $published == 'true') { $rss_link = get_self_url_prefix() . "/public.php?op=rss&id=-2&key=" . get_feed_access_key(-2, false, $owner_uid); $p = new Publisher(PUBSUBHUBBUB_HUB); /* $pubsub_result = */ $p->publish_update($rss_link); } $result = db_query("SELECT int_id FROM ttrss_user_entries WHERE\n\t\t\t\t\t\t\t\tref_id = '{$ref_id}' AND owner_uid = '{$owner_uid}' AND\n\t\t\t\t\t\t\t\tfeed_id = '{$feed}' LIMIT 1"); if (db_num_rows($result) == 1) { $entry_int_id = db_fetch_result($result, 0, "int_id"); } } else { _debug("user record FOUND", $debug_enabled); $entry_ref_id = db_fetch_result($result, 0, "ref_id"); $entry_int_id = db_fetch_result($result, 0, "int_id"); } _debug("RID: {$entry_ref_id}, IID: {$entry_int_id}", $debug_enabled); db_query("UPDATE ttrss_entries\n\t\t\t\t\t\tSET title = '{$entry_title}',\n\t\t\t\t\t\t\tcontent = '{$entry_content}',\n\t\t\t\t\t\t\tcontent_hash = '{$entry_current_hash}',\n\t\t\t\t\t\t\tupdated = '{$entry_timestamp_fmt}',\n\t\t\t\t\t\t\tnum_comments = '{$num_comments}',\n\t\t\t\t\t\t\tplugin_data = '{$entry_plugin_data}',\n\t\t\t\t\t\t\tauthor = '{$entry_author}',\n\t\t\t\t\t\t\tlang = '{$entry_language}'\n\t\t\t\t\t\tWHERE id = '{$ref_id}'"); if ($mark_unread_on_update) { db_query("UPDATE ttrss_user_entries\n\t\t\t\t\t\t\tSET last_read = null, unread = true WHERE ref_id = '{$ref_id}'"); } } db_query("COMMIT"); _debug("assigning labels...", $debug_enabled); assign_article_to_label_filters($entry_ref_id, $article_filters, $owner_uid, $article_labels); _debug("looking for enclosures...", $debug_enabled); // enclosures $enclosures = array(); $encs = $item->get_enclosures(); if (is_array($encs)) { foreach ($encs as $e) { $e_item = array($e->link, $e->type, $e->length, $e->title, $e->width, $e->height); array_push($enclosures, $e_item); } } if ($debug_enabled) { _debug("article enclosures:", $debug_enabled); print_r($enclosures); } db_query("BEGIN"); // debugging // db_query("DELETE FROM ttrss_enclosures WHERE post_id = '$entry_ref_id'"); foreach ($enclosures as $enc) { $enc_url = db_escape_string($enc[0]); $enc_type = db_escape_string($enc[1]); $enc_dur = db_escape_string($enc[2]); $enc_title = db_escape_string($enc[3]); $enc_width = intval($enc[4]); $enc_height = intval($enc[5]); $result = db_query("SELECT id FROM ttrss_enclosures\n\t\t\t\t\t\tWHERE content_url = '{$enc_url}' AND post_id = '{$entry_ref_id}'"); if (db_num_rows($result) == 0) { db_query("INSERT INTO ttrss_enclosures\n\t\t\t\t\t\t\t(content_url, content_type, title, duration, post_id, width, height) VALUES\n\t\t\t\t\t\t\t('{$enc_url}', '{$enc_type}', '{$enc_title}', '{$enc_dur}', '{$entry_ref_id}', {$enc_width}, {$enc_height})"); } } db_query("COMMIT"); // check for manual tags (we have to do it here since they're loaded from filters) foreach ($article_filters as $f) { if ($f["type"] == "tag") { $manual_tags = trim_array(explode(",", $f["param"])); foreach ($manual_tags as $tag) { if (tag_is_valid($tag)) { array_push($entry_tags, $tag); } } } } // Skip boring tags $boring_tags = trim_array(explode(",", mb_strtolower(get_pref('BLACKLISTED_TAGS', $owner_uid, ''), 'utf-8'))); $filtered_tags = array(); $tags_to_cache = array(); if ($entry_tags && is_array($entry_tags)) { foreach ($entry_tags as $tag) { if (array_search($tag, $boring_tags) === false) { array_push($filtered_tags, $tag); } } } $filtered_tags = array_unique($filtered_tags); if ($debug_enabled) { _debug("filtered article tags:", $debug_enabled); print_r($filtered_tags); } // Save article tags in the database if (count($filtered_tags) > 0) { db_query("BEGIN"); foreach ($filtered_tags as $tag) { $tag = sanitize_tag($tag); $tag = db_escape_string($tag); if (!tag_is_valid($tag)) { continue; } $result = db_query("SELECT id FROM ttrss_tags\n\t\t\t\t\t\t\tWHERE tag_name = '{$tag}' AND post_int_id = '{$entry_int_id}' AND\n\t\t\t\t\t\t\towner_uid = '{$owner_uid}' LIMIT 1"); if ($result && db_num_rows($result) == 0) { db_query("INSERT INTO ttrss_tags\n\t\t\t\t\t\t\t\t\t(owner_uid,tag_name,post_int_id)\n\t\t\t\t\t\t\t\t\tVALUES ('{$owner_uid}','{$tag}', '{$entry_int_id}')"); } array_push($tags_to_cache, $tag); } /* update the cache */ $tags_to_cache = array_unique($tags_to_cache); $tags_str = db_escape_string(join(",", $tags_to_cache)); db_query("UPDATE ttrss_user_entries\n\t\t\t\t\t\tSET tag_cache = '{$tags_str}' WHERE ref_id = '{$entry_ref_id}'\n\t\t\t\t\t\tAND owner_uid = {$owner_uid}"); db_query("COMMIT"); } if (get_pref("AUTO_ASSIGN_LABELS", $owner_uid, false)) { _debug("auto-assigning labels...", $debug_enabled); foreach ($labels as $label) { $caption = preg_quote($label["caption"]); if ($caption && preg_match("/\\b{$caption}\\b/i", "{$tags_str} " . strip_tags($entry_content) . " {$entry_title}")) { if (!labels_contains_caption($article_labels, $caption)) { label_add_article($entry_ref_id, $caption, $owner_uid); } } } } _debug("article processed", $debug_enabled); } _debug("purging feed...", $debug_enabled); purge_feed($feed, 0, $debug_enabled); db_query("UPDATE ttrss_feeds\n\t\t\t\tSET last_updated = NOW(), last_error = '' WHERE id = '{$feed}'"); // db_query("COMMIT"); } else { $error_msg = db_escape_string(mb_substr($rss->error(), 0, 245)); _debug("fetch error: {$error_msg}", $debug_enabled); if (count($rss->errors()) > 1) { foreach ($rss->errors() as $error) { _debug("+ {$error}"); } } db_query("UPDATE ttrss_feeds SET last_error = '{$error_msg}',\n\t\t\t\tlast_updated = NOW() WHERE id = '{$feed}'"); unset($rss); } _debug("done", $debug_enabled); return $rss; }
<?php header('Content-type:text/html; charset=utf-8'); require_once 'Text/LanguageDetect.php'; $l = new Text_LanguageDetect(); $a = json_decode(file_get_contents(dirname(__FILE__) . '/data/text.js')); $ok = 0; $total = count($a); $start = microtime(true); for ($i = 0, $iM = $total; $i < $iM; $i++) { $r = $l->detect($a[$i]->text); $k = array_keys($r); if ($r[$k[0]] > 0.2) { $ok++; } } $end = microtime(true); $time = round($end - $start, 3); echo "{$iM} items processed in {$time} secs ({$ok} with a score > 0.2)\n";
<?php /** * example usage (CLI) * * @package Text_LanguageDetect * @version CVS: $Id: example_clui.php 322305 2012-01-15 00:04:17Z clockwerx $ */ require_once 'Text/LanguageDetect.php'; $l = new Text_LanguageDetect(); $stdin = fopen('php://stdin', 'r'); echo "Supported languages:\n"; $langs = $l->getLanguages(); sort($langs); echo join(', ', $langs); echo "\ntotal ", count($langs), "\n\n"; while ($line = fgets($stdin)) { $result = $l->detect($line, 4); print_r($result); $blocks = $l->detectUnicodeBlocks($line, true); print_r($blocks); } fclose($stdin); unset($l); /* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
function item_store($arr, $force_parent = false) { // If a Diaspora signature structure was passed in, pull it out of the // item array and set it aside for later storage. $dsprsig = null; if (x($arr, 'dsprsig')) { $dsprsig = json_decode(base64_decode($arr['dsprsig'])); unset($arr['dsprsig']); } // if an OStatus conversation url was passed in, it is stored and then // removed from the array. $ostatus_conversation = null; if (isset($arr["ostatus_conversation"])) { $ostatus_conversation = $arr["ostatus_conversation"]; unset($arr["ostatus_conversation"]); } if (x($arr, 'gravity')) { $arr['gravity'] = intval($arr['gravity']); } elseif ($arr['parent-uri'] === $arr['uri']) { $arr['gravity'] = 0; } elseif (activity_match($arr['verb'], ACTIVITY_POST)) { $arr['gravity'] = 6; } else { $arr['gravity'] = 6; } // extensible catchall if (!x($arr, 'type')) { $arr['type'] = 'remote'; } // Shouldn't happen but we want to make absolutely sure it doesn't leak from a plugin. if (strpos($arr['body'], '<') !== false || strpos($arr['body'], '>') !== false) { $arr['body'] = strip_tags($arr['body']); } if (version_compare(PHP_VERSION, '5.3.0', '>=')) { require_once 'library/langdet/Text/LanguageDetect.php'; $naked_body = preg_replace('/\\[(.+?)\\]/', '', $arr['body']); $l = new Text_LanguageDetect(); //$lng = $l->detectConfidence($naked_body); //$arr['postopts'] = (($lng['language']) ? 'lang=' . $lng['language'] . ';' . $lng['confidence'] : ''); $lng = $l->detect($naked_body, 3); if (sizeof($lng) > 0) { $postopts = ""; foreach ($lng as $language => $score) { if ($postopts == "") { $postopts = "lang="; } else { $postopts .= ":"; } $postopts .= $language . ";" . $score; } $arr['postopts'] = $postopts; } } $arr['wall'] = x($arr, 'wall') ? intval($arr['wall']) : 0; $arr['uri'] = x($arr, 'uri') ? notags(trim($arr['uri'])) : random_string(); $arr['extid'] = x($arr, 'extid') ? notags(trim($arr['extid'])) : ''; $arr['author-name'] = x($arr, 'author-name') ? notags(trim($arr['author-name'])) : ''; $arr['author-link'] = x($arr, 'author-link') ? notags(trim($arr['author-link'])) : ''; $arr['author-avatar'] = x($arr, 'author-avatar') ? notags(trim($arr['author-avatar'])) : ''; $arr['owner-name'] = x($arr, 'owner-name') ? notags(trim($arr['owner-name'])) : ''; $arr['owner-link'] = x($arr, 'owner-link') ? notags(trim($arr['owner-link'])) : ''; $arr['owner-avatar'] = x($arr, 'owner-avatar') ? notags(trim($arr['owner-avatar'])) : ''; $arr['created'] = x($arr, 'created') !== false ? datetime_convert('UTC', 'UTC', $arr['created']) : datetime_convert(); $arr['edited'] = x($arr, 'edited') !== false ? datetime_convert('UTC', 'UTC', $arr['edited']) : datetime_convert(); $arr['commented'] = datetime_convert(); $arr['received'] = datetime_convert(); $arr['changed'] = datetime_convert(); $arr['title'] = x($arr, 'title') ? notags(trim($arr['title'])) : ''; $arr['location'] = x($arr, 'location') ? notags(trim($arr['location'])) : ''; $arr['coord'] = x($arr, 'coord') ? notags(trim($arr['coord'])) : ''; $arr['last-child'] = x($arr, 'last-child') ? intval($arr['last-child']) : 0; $arr['visible'] = x($arr, 'visible') !== false ? intval($arr['visible']) : 1; $arr['deleted'] = 0; $arr['parent-uri'] = x($arr, 'parent-uri') ? notags(trim($arr['parent-uri'])) : ''; $arr['verb'] = x($arr, 'verb') ? notags(trim($arr['verb'])) : ''; $arr['object-type'] = x($arr, 'object-type') ? notags(trim($arr['object-type'])) : ''; $arr['object'] = x($arr, 'object') ? trim($arr['object']) : ''; $arr['target-type'] = x($arr, 'target-type') ? notags(trim($arr['target-type'])) : ''; $arr['target'] = x($arr, 'target') ? trim($arr['target']) : ''; $arr['plink'] = x($arr, 'plink') ? notags(trim($arr['plink'])) : ''; $arr['allow_cid'] = x($arr, 'allow_cid') ? trim($arr['allow_cid']) : ''; $arr['allow_gid'] = x($arr, 'allow_gid') ? trim($arr['allow_gid']) : ''; $arr['deny_cid'] = x($arr, 'deny_cid') ? trim($arr['deny_cid']) : ''; $arr['deny_gid'] = x($arr, 'deny_gid') ? trim($arr['deny_gid']) : ''; $arr['private'] = x($arr, 'private') ? intval($arr['private']) : 0; $arr['bookmark'] = x($arr, 'bookmark') ? intval($arr['bookmark']) : 0; $arr['body'] = x($arr, 'body') ? trim($arr['body']) : ''; $arr['tag'] = x($arr, 'tag') ? notags(trim($arr['tag'])) : ''; $arr['attach'] = x($arr, 'attach') ? notags(trim($arr['attach'])) : ''; $arr['app'] = x($arr, 'app') ? notags(trim($arr['app'])) : ''; $arr['origin'] = x($arr, 'origin') ? intval($arr['origin']) : 0; $arr['guid'] = x($arr, 'guid') ? notags(trim($arr['guid'])) : get_guid(); $arr['thr-parent'] = $arr['parent-uri']; if ($arr['parent-uri'] === $arr['uri']) { $parent_id = 0; $parent_deleted = 0; $allow_cid = $arr['allow_cid']; $allow_gid = $arr['allow_gid']; $deny_cid = $arr['deny_cid']; $deny_gid = $arr['deny_gid']; } else { // find the parent and snarf the item id and ACLs // and anything else we need to inherit $r = q("SELECT * FROM `item` WHERE `uri` = '%s' AND `uid` = %d ORDER BY `id` ASC LIMIT 1", dbesc($arr['parent-uri']), intval($arr['uid'])); if (count($r)) { // is the new message multi-level threaded? // even though we don't support it now, preserve the info // and re-attach to the conversation parent. if ($r[0]['uri'] != $r[0]['parent-uri']) { $arr['parent-uri'] = $r[0]['parent-uri']; $z = q("SELECT * FROM `item` WHERE `uri` = '%s' AND `parent-uri` = '%s' AND `uid` = %d \n\t\t\t\t\tORDER BY `id` ASC LIMIT 1", dbesc($r[0]['parent-uri']), dbesc($r[0]['parent-uri']), intval($arr['uid'])); if ($z && count($z)) { $r = $z; } } $parent_id = $r[0]['id']; $parent_deleted = $r[0]['deleted']; $allow_cid = $r[0]['allow_cid']; $allow_gid = $r[0]['allow_gid']; $deny_cid = $r[0]['deny_cid']; $deny_gid = $r[0]['deny_gid']; $arr['wall'] = $r[0]['wall']; // if the parent is private, force privacy for the entire conversation // This differs from the above settings as it subtly allows comments from // email correspondents to be private even if the overall thread is not. if ($r[0]['private']) { $arr['private'] = $r[0]['private']; } // Edge case. We host a public forum that was originally posted to privately. // The original author commented, but as this is a comment, the permissions // weren't fixed up so it will still show the comment as private unless we fix it here. if (intval($r[0]['forum_mode']) == 1 && !$r[0]['private']) { $arr['private'] = 0; } } else { // Allow one to see reply tweets from status.net even when // we don't have or can't see the original post. if ($force_parent) { logger('item_store: $force_parent=true, reply converted to top-level post.'); $parent_id = 0; $arr['parent-uri'] = $arr['uri']; $arr['gravity'] = 0; } else { logger('item_store: item parent was not found - ignoring item'); return 0; } $parent_deleted = 0; } } $r = q("SELECT `id` FROM `item` WHERE `uri` = '%s' AND `uid` = %d LIMIT 1", dbesc($arr['uri']), intval($arr['uid'])); if ($r && count($r)) { logger('item-store: duplicate item ignored. ' . print_r($arr, true)); return 0; } call_hooks('post_remote', $arr); if (x($arr, 'cancel')) { logger('item_store: post cancelled by plugin.'); return 0; } dbesc_array($arr); logger('item_store: ' . print_r($arr, true), LOGGER_DATA); $r = dbq("INSERT INTO `item` (`" . implode("`, `", array_keys($arr)) . "`) VALUES ('" . implode("', '", array_values($arr)) . "')"); // find the item we just created $r = q("SELECT `id` FROM `item` WHERE `uri` = '%s' AND `uid` = %d ORDER BY `id` ASC ", $arr['uri'], intval($arr['uid'])); if (count($r)) { $current_post = $r[0]['id']; logger('item_store: created item ' . $current_post); create_tags_from_item($r[0]['id']); } else { logger('item_store: could not locate created item'); return 0; } if (count($r) > 1) { logger('item_store: duplicated post occurred. Removing duplicates.'); q("DELETE FROM `item` WHERE `uri` = '%s' AND `uid` = %d AND `id` != %d ", $arr['uri'], intval($arr['uid']), intval($current_post)); } if (!$parent_id || $arr['parent-uri'] === $arr['uri']) { $parent_id = $current_post; } if (strlen($allow_cid) || strlen($allow_gid) || strlen($deny_cid) || strlen($deny_gid)) { $private = 1; } else { $private = $arr['private']; } // Set parent id - and also make sure to inherit the parent's ACL's. $r = q("UPDATE `item` SET `parent` = %d, `allow_cid` = '%s', `allow_gid` = '%s',\n\t\t`deny_cid` = '%s', `deny_gid` = '%s', `private` = %d, `deleted` = %d WHERE `id` = %d LIMIT 1", intval($parent_id), dbesc($allow_cid), dbesc($allow_gid), dbesc($deny_cid), dbesc($deny_gid), intval($private), intval($parent_deleted), intval($current_post)); create_tags_from_item($current_post); // Complete ostatus threads if ($ostatus_conversation) { complete_conversation($current_post, $ostatus_conversation); } $arr['id'] = $current_post; $arr['parent'] = $parent_id; $arr['allow_cid'] = $allow_cid; $arr['allow_gid'] = $allow_gid; $arr['deny_cid'] = $deny_cid; $arr['deny_gid'] = $deny_gid; $arr['private'] = $private; $arr['deleted'] = $parent_deleted; // update the commented timestamp on the parent q("UPDATE `item` set `commented` = '%s', `changed` = '%s' WHERE `id` = %d LIMIT 1", dbesc(datetime_convert()), dbesc(datetime_convert()), intval($parent_id)); if ($dsprsig) { q("insert into sign (`iid`,`signed_text`,`signature`,`signer`) values (%d,'%s','%s','%s') ", intval($current_post), dbesc($dsprsig->signed_text), dbesc($dsprsig->signature), dbesc($dsprsig->signer)); } /** * If this is now the last-child, force all _other_ children of this parent to *not* be last-child */ if ($arr['last-child']) { $r = q("UPDATE `item` SET `last-child` = 0 WHERE `parent-uri` = '%s' AND `uid` = %d AND `id` != %d", dbesc($arr['uri']), intval($arr['uid']), intval($current_post)); } tag_deliver($arr['uid'], $current_post); // Store the fresh generated item into the cache $cachefile = get_cachefile($arr["guid"] . "-" . hash("md5", $arr['body'])); if ($cachefile != '' and !file_exists($cachefile)) { $s = prepare_text($arr['body']); $a = get_app(); $stamp1 = microtime(true); file_put_contents($cachefile, $s); $a->save_timestamp($stamp1, "file"); logger('item_store: put item ' . $current_post . ' into cachefile ' . $cachefile); } $r = q('SELECT * FROM `item` WHERE id = %d', intval($current_post)); if (count($r) == 1) { call_hooks('post_remote_end', $r[0]); } else { logger('item_store: new item not found in DB, id ' . $current_post); } return $current_post; }
$raw_suggestion = html_entity_decode($title_suggestion, ENT_QUOTES, "UTF-8"); // Load noise dictionary, for tag generation $noise = split_lines(file_get_contents_cached("english.dic")->data); $noise = arraytolower($noise); foreach (explode(" ", $raw_suggestion) as $tag) { $tag = trim(clean_tag($tag)); if (strlen(trim($tag)) > 1 && in_array(strtolower(trim($tag)), $noise) === false) { $tag_list[] = strtolower($tag); } } $tag_list = array_unique($tag_list); $tags_suggestion = utf8_entities_if_needed(implode(", ", $tag_list)); } if ($detect_language) { require_once "Text/LanguageDetect.php"; $detector = new Text_LanguageDetect(); $detected_language = $detector->detectSimple(strip_tags($request->result)); } else { $detected_language = "English"; } ?> <h2>Submit a related site</h2> <form method="POST" action="/related-sites/add/submit/" class="submission"> <input type="hidden" name="url" value="<?php echo utf8entities($_POST['url']); ?> "> <h4>Website Title</h4>
function item_store($arr, $force_parent = false, $notify = false) { // If it is a posting where users should get notifications, then define it as wall posting if ($notify) { $arr['wall'] = 1; $arr['type'] = 'wall'; $arr['origin'] = 1; $arr['last-child'] = 1; $arr['network'] = NETWORK_DFRN; } // If a Diaspora signature structure was passed in, pull it out of the // item array and set it aside for later storage. $dsprsig = null; if (x($arr, 'dsprsig')) { $dsprsig = json_decode(base64_decode($arr['dsprsig'])); unset($arr['dsprsig']); } // if an OStatus conversation url was passed in, it is stored and then // removed from the array. $ostatus_conversation = null; if (isset($arr["ostatus_conversation"])) { $ostatus_conversation = $arr["ostatus_conversation"]; unset($arr["ostatus_conversation"]); } if (x($arr, 'gravity')) { $arr['gravity'] = intval($arr['gravity']); } elseif ($arr['parent-uri'] === $arr['uri']) { $arr['gravity'] = 0; } elseif (activity_match($arr['verb'], ACTIVITY_POST)) { $arr['gravity'] = 6; } else { $arr['gravity'] = 6; } // extensible catchall if (!x($arr, 'type')) { $arr['type'] = 'remote'; } /* check for create date and expire time */ $uid = intval($arr['uid']); $r = q("SELECT expire FROM user WHERE uid = %d", $uid); if (count($r)) { $expire_interval = $r[0]['expire']; if ($expire_interval > 0) { $expire_date = new DateTime('- ' . $expire_interval . ' days', new DateTimeZone('UTC')); $created_date = new DateTime($arr['created'], new DateTimeZone('UTC')); if ($created_date < $expire_date) { logger('item-store: item created (' . $arr['created'] . ') before expiration time (' . $expire_date->format(DateTime::W3C) . '). ignored. ' . print_r($arr, true), LOGGER_DEBUG); return 0; } } } // If there is no guid then take the same guid that was taken before for the same uri if (trim($arr['guid']) == "" and trim($arr['uri']) != "") { logger('item_store: checking for an existing guid for uri ' . $arr['uri'], LOGGER_DEBUG); $r = q("SELECT `guid` FROM `item` WHERE `uri` = '%s' AND `guid` != '' LIMIT 1", dbesc(trim($arr['uri']))); if (count($r)) { $arr['guid'] = $r[0]["guid"]; logger('item_store: found guid ' . $arr['guid'] . ' for uri ' . $arr['uri'], LOGGER_DEBUG); } } // Shouldn't happen but we want to make absolutely sure it doesn't leak from a plugin. // Deactivated, since the bbcode parser can handle with it - and it destroys posts with some smileys that contain "<" //if((strpos($arr['body'],'<') !== false) || (strpos($arr['body'],'>') !== false)) // $arr['body'] = strip_tags($arr['body']); if (version_compare(PHP_VERSION, '5.3.0', '>=')) { require_once 'library/langdet/Text/LanguageDetect.php'; $naked_body = preg_replace('/\\[(.+?)\\]/', '', $arr['body']); $l = new Text_LanguageDetect(); //$lng = $l->detectConfidence($naked_body); //$arr['postopts'] = (($lng['language']) ? 'lang=' . $lng['language'] . ';' . $lng['confidence'] : ''); $lng = $l->detect($naked_body, 3); if (sizeof($lng) > 0) { $postopts = ""; foreach ($lng as $language => $score) { if ($postopts == "") { $postopts = "lang="; } else { $postopts .= ":"; } $postopts .= $language . ";" . $score; } $arr['postopts'] = $postopts; } } $arr['wall'] = x($arr, 'wall') ? intval($arr['wall']) : 0; $arr['uri'] = x($arr, 'uri') ? notags(trim($arr['uri'])) : random_string(); $arr['extid'] = x($arr, 'extid') ? notags(trim($arr['extid'])) : ''; $arr['author-name'] = x($arr, 'author-name') ? notags(trim($arr['author-name'])) : ''; $arr['author-link'] = x($arr, 'author-link') ? notags(trim($arr['author-link'])) : ''; $arr['author-avatar'] = x($arr, 'author-avatar') ? notags(trim($arr['author-avatar'])) : ''; $arr['owner-name'] = x($arr, 'owner-name') ? notags(trim($arr['owner-name'])) : ''; $arr['owner-link'] = x($arr, 'owner-link') ? notags(trim($arr['owner-link'])) : ''; $arr['owner-avatar'] = x($arr, 'owner-avatar') ? notags(trim($arr['owner-avatar'])) : ''; $arr['created'] = x($arr, 'created') !== false ? datetime_convert('UTC', 'UTC', $arr['created']) : datetime_convert(); $arr['edited'] = x($arr, 'edited') !== false ? datetime_convert('UTC', 'UTC', $arr['edited']) : datetime_convert(); $arr['commented'] = datetime_convert(); $arr['received'] = datetime_convert(); $arr['changed'] = datetime_convert(); $arr['title'] = x($arr, 'title') ? notags(trim($arr['title'])) : ''; $arr['location'] = x($arr, 'location') ? notags(trim($arr['location'])) : ''; $arr['coord'] = x($arr, 'coord') ? notags(trim($arr['coord'])) : ''; $arr['last-child'] = x($arr, 'last-child') ? intval($arr['last-child']) : 0; $arr['visible'] = x($arr, 'visible') !== false ? intval($arr['visible']) : 1; $arr['deleted'] = 0; $arr['parent-uri'] = x($arr, 'parent-uri') ? notags(trim($arr['parent-uri'])) : ''; $arr['verb'] = x($arr, 'verb') ? notags(trim($arr['verb'])) : ''; $arr['object-type'] = x($arr, 'object-type') ? notags(trim($arr['object-type'])) : ''; $arr['object'] = x($arr, 'object') ? trim($arr['object']) : ''; $arr['target-type'] = x($arr, 'target-type') ? notags(trim($arr['target-type'])) : ''; $arr['target'] = x($arr, 'target') ? trim($arr['target']) : ''; $arr['plink'] = x($arr, 'plink') ? notags(trim($arr['plink'])) : ''; $arr['allow_cid'] = x($arr, 'allow_cid') ? trim($arr['allow_cid']) : ''; $arr['allow_gid'] = x($arr, 'allow_gid') ? trim($arr['allow_gid']) : ''; $arr['deny_cid'] = x($arr, 'deny_cid') ? trim($arr['deny_cid']) : ''; $arr['deny_gid'] = x($arr, 'deny_gid') ? trim($arr['deny_gid']) : ''; $arr['private'] = x($arr, 'private') ? intval($arr['private']) : 0; $arr['bookmark'] = x($arr, 'bookmark') ? intval($arr['bookmark']) : 0; $arr['body'] = x($arr, 'body') ? trim($arr['body']) : ''; $arr['tag'] = x($arr, 'tag') ? notags(trim($arr['tag'])) : ''; $arr['attach'] = x($arr, 'attach') ? notags(trim($arr['attach'])) : ''; $arr['app'] = x($arr, 'app') ? notags(trim($arr['app'])) : ''; $arr['origin'] = x($arr, 'origin') ? intval($arr['origin']) : 0; $arr['guid'] = x($arr, 'guid') ? notags(trim($arr['guid'])) : get_guid(30); $arr['network'] = x($arr, 'network') ? trim($arr['network']) : ''; if ($arr['plink'] == "") { $a = get_app(); $arr['plink'] = $a->get_baseurl() . '/display/' . urlencode($arr['guid']); } if ($arr['network'] == "") { $r = q("SELECT `network` FROM `contact` WHERE `id` = %d AND `uid` = %d LIMIT 1", intval($arr['contact-id']), intval($arr['uid'])); if (count($r)) { $arr['network'] = $r[0]["network"]; } // Fallback to friendica (why is it empty in some cases?) if ($arr['network'] == "") { $arr['network'] = NETWORK_DFRN; } logger("item_store: Set network to " . $arr["network"] . " for " . $arr["uri"], LOGGER_DEBUG); } $arr['thr-parent'] = $arr['parent-uri']; if ($arr['parent-uri'] === $arr['uri']) { $parent_id = 0; $parent_deleted = 0; $allow_cid = $arr['allow_cid']; $allow_gid = $arr['allow_gid']; $deny_cid = $arr['deny_cid']; $deny_gid = $arr['deny_gid']; $notify_type = 'wall-new'; } else { // find the parent and snarf the item id and ACLs // and anything else we need to inherit $r = q("SELECT * FROM `item` WHERE `uri` = '%s' AND `uid` = %d ORDER BY `id` ASC LIMIT 1", dbesc($arr['parent-uri']), intval($arr['uid'])); if (count($r)) { // is the new message multi-level threaded? // even though we don't support it now, preserve the info // and re-attach to the conversation parent. if ($r[0]['uri'] != $r[0]['parent-uri']) { $arr['parent-uri'] = $r[0]['parent-uri']; $z = q("SELECT * FROM `item` WHERE `uri` = '%s' AND `parent-uri` = '%s' AND `uid` = %d\n\t\t\t\t\tORDER BY `id` ASC LIMIT 1", dbesc($r[0]['parent-uri']), dbesc($r[0]['parent-uri']), intval($arr['uid'])); if ($z && count($z)) { $r = $z; } } $parent_id = $r[0]['id']; $parent_deleted = $r[0]['deleted']; $allow_cid = $r[0]['allow_cid']; $allow_gid = $r[0]['allow_gid']; $deny_cid = $r[0]['deny_cid']; $deny_gid = $r[0]['deny_gid']; $arr['wall'] = $r[0]['wall']; $notify_type = 'comment-new'; // if the parent is private, force privacy for the entire conversation // This differs from the above settings as it subtly allows comments from // email correspondents to be private even if the overall thread is not. if ($r[0]['private']) { $arr['private'] = $r[0]['private']; } // Edge case. We host a public forum that was originally posted to privately. // The original author commented, but as this is a comment, the permissions // weren't fixed up so it will still show the comment as private unless we fix it here. if (intval($r[0]['forum_mode']) == 1 && !$r[0]['private']) { $arr['private'] = 0; } // If its a post from myself then tag the thread as "mention" logger("item_store: Checking if parent " . $parent_id . " has to be tagged as mention for user " . $arr['uid'], LOGGER_DEBUG); $u = q("select * from user where uid = %d limit 1", intval($arr['uid'])); if (count($u)) { $a = get_app(); $self = normalise_link($a->get_baseurl() . '/profile/' . $u[0]['nickname']); logger("item_store: 'myself' is " . $self . " for parent " . $parent_id . " checking against " . $arr['author-link'] . " and " . $arr['owner-link'], LOGGER_DEBUG); if (normalise_link($arr['author-link']) == $self or normalise_link($arr['owner-link']) == $self) { q("UPDATE `thread` SET `mention` = 1 WHERE `iid` = %d", intval($parent_id)); logger("item_store: tagged thread " . $parent_id . " as mention for user " . $self, LOGGER_DEBUG); } } } else { // Allow one to see reply tweets from status.net even when // we don't have or can't see the original post. if ($force_parent) { logger('item_store: $force_parent=true, reply converted to top-level post.'); $parent_id = 0; $arr['parent-uri'] = $arr['uri']; $arr['gravity'] = 0; } else { logger('item_store: item parent was not found - ignoring item'); return 0; } $parent_deleted = 0; } } $r = q("SELECT `id` FROM `item` WHERE `uri` = '%s' AND `uid` = %d LIMIT 1", dbesc($arr['uri']), intval($arr['uid'])); if ($r && count($r)) { logger('item-store: duplicate item ignored. ' . print_r($arr, true)); return 0; } call_hooks('post_remote', $arr); if (x($arr, 'cancel')) { logger('item_store: post cancelled by plugin.'); return 0; } dbesc_array($arr); logger('item_store: ' . print_r($arr, true), LOGGER_DATA); $r = dbq("INSERT INTO `item` (`" . implode("`, `", array_keys($arr)) . "`) VALUES ('" . implode("', '", array_values($arr)) . "')"); // find the item we just created $r = q("SELECT `id` FROM `item` WHERE `uri` = '%s' AND `uid` = %d ORDER BY `id` ASC ", $arr['uri'], intval($arr['uid'])); if (count($r)) { $current_post = $r[0]['id']; logger('item_store: created item ' . $current_post); // Only check for notifications on start posts if ($arr['parent-uri'] === $arr['uri']) { add_thread($r[0]['id']); logger('item_store: Check notification for contact ' . $arr['contact-id'] . ' and post ' . $current_post, LOGGER_DEBUG); // Send a notification for every new post? $r = q("SELECT `notify_new_posts` FROM `contact` WHERE `id` = %d AND `uid` = %d AND `notify_new_posts` LIMIT 1", intval($arr['contact-id']), intval($arr['uid'])); if (count($r)) { logger('item_store: Send notification for contact ' . $arr['contact-id'] . ' and post ' . $current_post, LOGGER_DEBUG); $u = q("SELECT * FROM user WHERE uid = %d LIMIT 1", intval($arr['uid'])); $item = q("SELECT * FROM `item` WHERE `id` = %d AND `uid` = %d", intval($current_post), intval($arr['uid'])); $a = get_app(); require_once 'include/enotify.php'; notification(array('type' => NOTIFY_SHARE, 'notify_flags' => $u[0]['notify-flags'], 'language' => $u[0]['language'], 'to_name' => $u[0]['username'], 'to_email' => $u[0]['email'], 'uid' => $u[0]['uid'], 'item' => $item[0], 'link' => $a->get_baseurl() . '/display/' . urlencode($arr['guid']), 'source_name' => $item[0]['author-name'], 'source_link' => $item[0]['author-link'], 'source_photo' => $item[0]['author-avatar'], 'verb' => ACTIVITY_TAG, 'otype' => 'item')); logger('item_store: Notification sent for contact ' . $arr['contact-id'] . ' and post ' . $current_post, LOGGER_DEBUG); } } } else { logger('item_store: could not locate created item'); return 0; } if (count($r) > 1) { logger('item_store: duplicated post occurred. Removing duplicates.'); q("DELETE FROM `item` WHERE `uri` = '%s' AND `uid` = %d AND `id` != %d ", $arr['uri'], intval($arr['uid']), intval($current_post)); } if (!$parent_id || $arr['parent-uri'] === $arr['uri']) { $parent_id = $current_post; } if (strlen($allow_cid) || strlen($allow_gid) || strlen($deny_cid) || strlen($deny_gid)) { $private = 1; } else { $private = $arr['private']; } // Set parent id - and also make sure to inherit the parent's ACLs. $r = q("UPDATE `item` SET `parent` = %d, `allow_cid` = '%s', `allow_gid` = '%s',\n\t\t`deny_cid` = '%s', `deny_gid` = '%s', `private` = %d, `deleted` = %d WHERE `id` = %d", intval($parent_id), dbesc($allow_cid), dbesc($allow_gid), dbesc($deny_cid), dbesc($deny_gid), intval($private), intval($parent_deleted), intval($current_post)); // Complete ostatus threads if ($ostatus_conversation) { complete_conversation($current_post, $ostatus_conversation); } $arr['id'] = $current_post; $arr['parent'] = $parent_id; $arr['allow_cid'] = $allow_cid; $arr['allow_gid'] = $allow_gid; $arr['deny_cid'] = $deny_cid; $arr['deny_gid'] = $deny_gid; $arr['private'] = $private; $arr['deleted'] = $parent_deleted; // update the commented timestamp on the parent q("UPDATE `item` set `commented` = '%s', `changed` = '%s' WHERE `id` = %d", dbesc(datetime_convert()), dbesc(datetime_convert()), intval($parent_id)); update_thread($parent_id); if ($dsprsig) { q("insert into sign (`iid`,`signed_text`,`signature`,`signer`) values (%d,'%s','%s','%s') ", intval($current_post), dbesc($dsprsig->signed_text), dbesc($dsprsig->signature), dbesc($dsprsig->signer)); } /** * If this is now the last-child, force all _other_ children of this parent to *not* be last-child */ if ($arr['last-child']) { $r = q("UPDATE `item` SET `last-child` = 0 WHERE `parent-uri` = '%s' AND `uid` = %d AND `id` != %d", dbesc($arr['uri']), intval($arr['uid']), intval($current_post)); } $deleted = tag_deliver($arr['uid'], $current_post); // current post can be deleted if is for a communuty page and no mention are // in it. if (!$deleted) { // Store the fresh generated item into the cache $cachefile = get_cachefile($arr["guid"] . "-" . hash("md5", $arr['body'])); if ($cachefile != '' and !file_exists($cachefile)) { $s = prepare_text($arr['body']); $a = get_app(); $stamp1 = microtime(true); file_put_contents($cachefile, $s); $a->save_timestamp($stamp1, "file"); logger('item_store: put item ' . $current_post . ' into cachefile ' . $cachefile); } $r = q('SELECT * FROM `item` WHERE id = %d', intval($current_post)); if (count($r) == 1) { call_hooks('post_remote_end', $r[0]); } else { logger('item_store: new item not found in DB, id ' . $current_post); } } create_tags_from_item($current_post); create_files_from_item($current_post); if ($notify) { proc_run('php', "include/notifier.php", $notify_type, $current_post); } return $current_post; }
function test_unicode_off() { // see what happens when you turn the unicode setting off $myobj = new Text_LanguageDetect(); $str = 'This is a delightful sample of English text'; $myobj->useUnicodeBlocks(true); $result1 = $myobj->detectConfidence($str); $myobj->useUnicodeBlocks(false); $result2 = $myobj->detectConfidence($str); $this->assertEquals($result1, $result2); // note this test doesn't tell if unicode narrowing was actually used or not }
<?php /** * example usage (web) * * @package Text_LanguageDetect * @version CVS: $Id: example_web.php 205493 2006-01-18 00:26:57Z taak $ */ // browsers will encode multi-byte characters wrong unless they think the page is utf8-encoded header('Content-type: text/html; charset=utf-8', true); require_once 'Text/LanguageDetect.php'; $l = new Text_LanguageDetect(); if (isset($_REQUEST['q'])) { $q = stripslashes($_REQUEST['q']); } ?> <html> <head> <title>Text_LanguageDetect demonstration</title> </head> <body> <h2>Text_LanguageDetect</h2> <?php echo "<small>Supported languages:\n"; $langs = $l->getLanguages(); sort($langs); foreach ($langs as $lang) { echo ucfirst($lang), ', '; $i++; } echo "<br />total {$i}</small><br /><br />";