/** * Checks whether the specified URL is valid. * * Valid URLs must start with a protocol, even a relative one. * * @since 4.7.3 * @see wprss_validate_url() * @param string $url A URL to validate. * @return bool True if the specified string is a valud URL; false otherwize. */ public function is_valid_url($url) { $url = trim($url); $protocol_regex = '^([a-z][\\w-]+:)?//'; return !is_object($url) && preg_match(sprintf('!%1$s!', $protocol_regex), $url) && wprss_validate_url($url); }
/** * The main feed fetching function. * Fetches the feed items from the source provided and inserts them into the DB. * * Called on hook 'wprss_fetch_single_feed_hook'. * * @since 3.2 */ function wprss_fetch_insert_single_feed_items($feed_ID) { wprss_log_obj('Starting import of feed', $feed_ID, null, WPRSS_LOG_LEVEL_INFO); global $wprss_importing_feed; $wprss_importing_feed = $feed_ID; register_shutdown_function('wprss_detect_exec_timeout'); // Check if the feed source is active. if (wprss_is_feed_source_active($feed_ID) === FALSE && wprss_feed_source_force_next_fetch($feed_ID) === FALSE) { // If it is not active ( paused ), return without fetching the feed items. wprss_log('Feed is not active and not forced. Import cancelled.', null, WPRSS_LOG_LEVEL_INFO); return; } // If the feed source is forced for next fetch, remove the force next fetch data if (wprss_feed_source_force_next_fetch($feed_ID)) { delete_post_meta($feed_ID, 'wprss_force_next_fetch'); wprss_log('Force feed flag removed', null, WPRSS_LOG_LEVEL_SYSTEM); } $start_of_update = wprss_flag_feed_as_updating($feed_ID); wprss_log_obj('Start of import time updated', date('Y-m-d H:i:s', $start_of_update), null, WPRSS_LOG_LEVEL_SYSTEM); // Get the feed source URL from post meta, and filter it $feed_url = get_post_meta($feed_ID, 'wprss_url', true); wprss_log_obj('Original feed source URL', $feed_url, null, WPRSS_LOG_LEVEL_SYSTEM); $feed_url = apply_filters('wprss_feed_source_url', $feed_url, $feed_ID); wprss_log_obj('Actual feed source URL', $feed_url, null, WPRSS_LOG_LEVEL_INFO); // Get the feed limit from post meta $feed_limit = get_post_meta($feed_ID, 'wprss_limit', true); wprss_log_obj('Feed limit value is', $feed_limit, null, WPRSS_LOG_LEVEL_SYSTEM); // If the feed has no individual limit if ($feed_limit === '' || intval($feed_limit) <= 0) { wprss_log_obj('Using global limit', $feed_limit, null, WPRSS_LOG_LEVEL_NOTICE); // Get the global limit $global_limit = wprss_get_general_setting('limit_feed_items_imported'); // If no global limit is set, mark as NULL if ($global_limit === '' || intval($global_limit) <= 0) { $feed_limit = NULL; } else { $feed_limit = $global_limit; } } wprss_log_obj('Feed import limit', $feed_limit, null, WPRSS_LOG_LEVEL_INFO); // Filter the URL for validaty if (wprss_validate_url($feed_url)) { wprss_log_obj('Feed URL is valid', $feed_url, null, WPRSS_LOG_LEVEL_INFO); // Get the feed items from the source $items = wprss_get_feed_items($feed_url, $feed_ID); // If got NULL, convert to an empty array if ($items === NULL) { $items = array(); wprss_log('Items were NULL. Using empty array', null, WPRSS_LOG_LEVEL_WARNING); } // If using a limit ... if ($feed_limit === NULL) { $items_to_insert = $items; } else { $items_to_insert = array_slice($items, 0, $feed_limit); wprss_log_obj('Sliced a segment of items', count($items_to_insert), null, WPRSS_LOG_LEVEL_SYSTEM); } // Gather the permalinks of existing feed item's related to this feed source $existing_permalinks = wprss_get_existing_permalinks($feed_ID); wprss_log_obj('Retrieved existing permalinks', count($existing_permalinks), null, WPRSS_LOG_LEVEL_SYSTEM); // Check if we should only import uniquely-titled feed items. $existing_titles = array(); $unique_titles = FALSE; if (wprss_get_general_setting('unique_titles')) { $unique_titles = TRUE; $existing_titles = wprss_get_existing_titles(); wprss_log_obj('Retrieved existing titles from global', count($existing_titles), null, WPRSS_LOG_LEVEL_SYSTEM); } else { if (get_post_meta($feed_ID, 'wprss_unique_titles', true) === 'true') { $unique_titles = TRUE; $existing_titles = wprss_get_existing_titles($feed_ID); wprss_log_obj('Retrieved existing titles from feed source', count($existing_titles), null, WPRSS_LOG_LEVEL_SYSTEM); } } // Generate a list of items fetched, that are not already in the DB $new_items = array(); foreach ($items_to_insert as $item) { $permalink = wprss_normalize_permalink($item->get_permalink()); wprss_log_obj('Normalizing permalink', sprintf('%1$s -> %2$s', $item->get_permalink(), $permalink), null, WPRSS_LOG_LEVEL_SYSTEM); // Check if not blacklisted and not already imported $is_blacklisted = wprss_is_blacklisted($permalink); $permalink_exists = array_key_exists($permalink, $existing_permalinks); $title_exists = array_key_exists($item->get_title(), $existing_titles); if ($is_blacklisted === FALSE && $permalink_exists === FALSE && $title_exists === FALSE) { $new_items[] = $item; wprss_log_obj('Permalink OK', $permalink, null, WPRSS_LOG_LEVEL_SYSTEM); if ($unique_titles) { $existing_titles[$item->get_title()] = 1; } } else { if ($is_blacklisted) { wprss_log('Permalink blacklisted', null, WPRSS_LOG_LEVEL_SYSTEM); } if ($permalink_exists) { wprss_log('Permalink already exists', null, WPRSS_LOG_LEVEL_SYSTEM); } if ($title_exists) { wprss_log('Title already exists', null, WPRSS_LOG_LEVEL_SYSTEM); } } } $original_count = count($items_to_insert); $new_count = count($new_items); if ($new_count !== $original_count) { wprss_log_obj('Items filtered out', $original_count - $new_count, null, WPRSS_LOG_LEVEL_SYSTEM); } else { wprss_log('Items to import remained untouched. Not items already exist or are blacklisted.', null, WPRSS_LOG_LEVEL_SYSTEM); } $items_to_insert = $new_items; // If using a limit - delete any excess items to make room for the new items if ($feed_limit !== NULL) { wprss_log_obj('Some items may be deleted due to limit', $feed_limit, null, WPRSS_LOG_LEVEL_SYSTEM); // Get the number of feed items in DB, and their count $db_feed_items = wprss_get_feed_items_for_source($feed_ID); $num_db_feed_items = $db_feed_items->post_count; // Get the number of feed items we can store until we reach the limit $num_can_insert = $feed_limit - $num_db_feed_items; // Calculate how many feed items we must delete before importing, to keep to the limit $num_new_items = count($new_items); $num_feed_items_to_delete = $num_can_insert > $num_new_items ? 0 : $num_new_items - $num_can_insert; // Get an array with the DB feed items in reverse order (oldest first) $db_feed_items_reversed = array_reverse($db_feed_items->posts); // Cut the array to get only the first few that are to be deleted ( equal to $num_feed_items_to_delete ) $feed_items_to_delete = array_slice($db_feed_items_reversed, 0, $num_feed_items_to_delete); wprss_log(sprintf('There already are %1$d items in the database. %2$d items can be inserted. %3$d items will be deleted', $num_db_feed_items, $num_can_insert, $num_feed_items_to_delete), null, WPRSS_LOG_LEVEL_SYSTEM); // Iterate the feed items and delete them foreach ($feed_items_to_delete as $key => $post) { wp_delete_post($post->ID, TRUE); } if ($deleted_items_count = count($feed_items_to_delete)) { wprss_log_obj('Items deleted due to limit', $deleted_items_count, null, WPRSS_LOG_LEVEL_NOTICE); } } update_post_meta($feed_ID, 'wprss_last_update', $last_update_time = time()); update_post_meta($feed_ID, 'wprss_last_update_items', 0); wprss_log_obj('Last import time updated', $last_update_time, null, WPRSS_LOG_LEVEL_SYSTEM); // Insert the items into the db if (!empty($items_to_insert)) { wprss_log_obj('There are items to insert', count($items_to_insert), null, WPRSS_LOG_LEVEL_INFO); wprss_items_insert_post($items_to_insert, $feed_ID); } } else { wprss_log_obj('The feed URL is not valid! Please recheck', $feed_url); } $next_scheduled = get_post_meta($feed_ID, 'wprss_reschedule_event', TRUE); if ($next_scheduled !== '') { wprss_feed_source_update_start_schedule($feed_ID); delete_post_meta($feed_ID, 'wprss_reschedule_event'); wprss_log('Next update rescheduled', null, WPRSS_LOG_LEVEL_SYSTEM); } wprss_flag_feed_as_idle($feed_ID); wprss_log_obj('Import complete', $feed_ID, __FUNCTION__, WPRSS_LOG_LEVEL_INFO); }