Example #1
0
/**
 * Get all posts with 'item_id' set to a given item id
 *
 * @since 1.7
 *
 * @param string $post_type The post type to limit results to.
 * @param string $item_id The origin item id.
 * @param bool $ids_only Set to true if you want only an array of IDs returned in the query.
 *
 * @return object A standard WP_Query object.
 */
function pf_get_posts_by_id_for_check($post_type = false, $item_id, $ids_only = false)
{
    global $wpdb;
    # If the item is less than 24 hours old on nomination, check the whole database.
    #	$theDate = getdate();
    #$w = date('W');
    $r = array('meta_key' => 'item_id', 'meta_value' => $item_id, 'post_type' => array('post', pf_feed_item_post_type()));
    if ($ids_only) {
        $r['fields'] = 'ids';
        $r['no_found_rows'] = true;
        $r['cache_results'] = false;
    }
    if (false != $post_type) {
        $r['post_type'] = $post_type;
    }
    $postsAfter = new WP_Query($r);
    pf_log(' Checking for posts with item ID ' . $item_id . ' returned query with ' . $postsAfter->post_count . ' items.');
    #pf_log($postsAfter);
    return $postsAfter;
}
Example #2
0
 public static function assemble_feed_for_pull($feedObj = 0)
 {
     pf_log('Invoked: PF_Feed_Item::assemble_feed_for_pull()');
     ignore_user_abort(true);
     set_time_limit(0);
     # Chunking control, the goal here is to ensure that no feed assembly occurs while the feed assembly is already occuring.
     # Option: If true (1), the system is ready to assemble a chunk. If false (0), the system is already assembling a chunk.
     $ready_for_chunk_assembly = get_option(PF_SLUG . '_ready_to_chunk', 1);
     if ($ready_for_chunk_assembly === 0) {
         pf_log('Chunk already in progress.');
         return;
     } else {
         pf_log('Beginning next import chunk.');
         pf_log('The chunk state is set?');
         $chunk_state = update_option(PF_SLUG . '_ready_to_chunk', 0);
         pf_log($chunk_state);
     }
     if ($feedObj == 0) {
         $theFeed = self::get_the_feed_object();
         $feedObj = $theFeed;
     }
     # We need to init $sourceRepeat so it can be if 0 if nothing is happening.
     $sourceRepeat = 0;
     # We'll need this for our fancy query.
     global $wpdb;
     # Since rss_object places all the feed items into an array of arrays whose structure is standardized throughout,
     # We can do stuff with it, using the same structure of items as we do everywhere else.
     pf_log('Now beginning check and processing for entering items into the database.');
     $parent = $feedObj['parent_feed_id'];
     unset($feedObj['parent_feed_id']);
     foreach ($feedObj as $item) {
         $thepostscheck = 0;
         $thePostsDoubleCheck = 0;
         $item_id = $item['item_id'];
         $sourceRepeat = 0;
         //$queryForCheck = new WP_Query( array( 'post_type' => 'rssarchival', 'meta_key' => 'item_id', 'meta_value' => $item_id ) );
         # Originally this query tried to get every archive post earlier than 'now' to check.
         # But it occured to me that, since I'm doing a custom query anyway, I could just query for items with the ID I want.
         # Less query results, less time.
         //Perhaps I should do this outside of the foreach? One query and search it for each item_id and then return those not in?
         $querystr = $wpdb->prepare("\n\t\t\t\tSELECT {$wpdb->posts}.*, {$wpdb->postmeta}.*\n\t\t\t\tFROM {$wpdb->posts}, {$wpdb->postmeta}\n\t\t\t\tWHERE {$wpdb->posts}.ID = {$wpdb->postmeta}.post_id\n\t\t\t\tAND {$wpdb->postmeta}.meta_key = 'item_id'\n\t\t\t\tAND {$wpdb->postmeta}.meta_value = %s\n\t\t\t\tAND {$wpdb->posts}.post_type = %s\n\t\t\t\tORDER BY {$wpdb->posts}.post_date DESC\n\t\t\t ", $item_id, pf_feed_item_post_type());
         // AND $wpdb->posts.post_date < NOW() <- perhaps by removing we can better prevent simultaneous duplications?
         # Since I've altered the query, I could change this to just see if there are any items in the query results
         # and check based on that. But I haven't yet.
         $checkposts = $wpdb->get_results($querystr, OBJECT);
         //print_r($checkposts);
         if ($checkposts) {
             global $post;
             foreach ($checkposts as $post) {
                 setup_postdata($post);
                 //print_r(get_the_ID());
                 //print_r('< the ID');
                 if (get_post_meta($post->ID, 'item_id', $item_id, true) === $item_id) {
                     $thepostscheck++;
                     pf_log('We already have post ' . $item_id);
                 }
             }
         }
         wp_reset_query();
         if ($thepostscheck === 0) {
             $queryMoreStr = $wpdb->prepare("\n\t\t\t\t\t\tSELECT {$wpdb->posts}.*, {$wpdb->postmeta}.*\n\t\t\t\t\t\tFROM {$wpdb->posts}, {$wpdb->postmeta}\n\t\t\t\t\t\tWHERE {$wpdb->posts}.ID = {$wpdb->postmeta}.post_id\n\t\t\t\t\t\tAND {$wpdb->postmeta}.meta_key = 'item_link'\n\t\t\t\t\t\tAND {$wpdb->postmeta}.meta_value = %s\n\t\t\t\t\t\tAND {$wpdb->posts}.post_type = %s\n\t\t\t\t\t\tORDER BY {$wpdb->posts}.post_date DESC\n\t\t\t\t\t ", $item['item_link'], pf_feed_item_post_type());
             $checkpoststwo = $wpdb->get_results($queryMoreStr, OBJECT);
             if ($checkpoststwo) {
                 pf_log('Check for posts with the same link.');
                 foreach ($checkpoststwo as $post) {
                     setup_postdata($post);
                     # Post comparative values.
                     $theTitle = $post->post_title;
                     $postID = $post->ID;
                     $postDate = strtotime($post->post_date);
                     $postItemLink = get_post_meta($post->ID, 'item_link', true);
                     # Item comparative values.
                     $itemDate = strtotime($item['item_date']);
                     $itemTitle = $item['item_title'];
                     $itemLink = $item['item_link'];
                     # First check if it more recent than the currently stored item.
                     if ($theTitle == $itemTitle || $postItemLink == $itemLink) {
                         $thePostsDoubleCheck++;
                         pf_log('We already have the post ' . $theTitle . ' with the link ' . $itemLink);
                         $sourceRepeat = get_post_meta($postID, 'source_repeat', true);
                         if ($itemDate > $postDate) {
                             # If it is more recent, than this is the new dominant post.
                             $sourceRepeat++;
                         } elseif ($itemDate <= $postDate) {
                             # if it is less recent, then we need to increment the source count.
                             $sourceRepeat++;
                             if ($thePostsDoubleCheck > $sourceRepeat) {
                                 update_post_meta($postID, 'source_repeat', $sourceRepeat);
                             }
                             $thepostscheck++;
                         } else {
                             $thepostscheck = 0;
                         }
                     } else {
                         # If it isn't duplicated at all, then we need to give it a source repeat count of 0
                         $sourceRepeat = 0;
                     }
                 }
             }
         }
         wp_reset_query();
         # Why an increment here instead of a bool?
         # If I start getting errors, I can use this to check how many times an item is in the database.
         # Potentially I could even use this to clean the database from duplicates that might occur if
         # someone were to hit the refresh button at the same time as another person.
         #			$fo = fopen(PF_ROOT . "/modules/rss-import/rss-import.txt", 'a') or print_r('Can\'t open log file.');
         #			if ($fo != false){
         #				fwrite($fo, "\nSending " . $item['item_title'] . " to post table.");
         #				fclose($fo);
         #			}
         if ($thepostscheck === 0) {
             $item_title = $item['item_title'];
             $item_content = $item['item_content'];
             $item_feat_img = $item['item_feat_img'];
             $source_title = $item['source_title'];
             $item_date = $item['item_date'];
             $item_author = $item['item_author'];
             $item_link = $item['item_link'];
             $item_wp_date = $item['item_wp_date'];
             $item_tags = $item['item_tags'];
             if (!isset($item['parent_feed_id']) || !$item['parent_feed_id']) {
                 $item['parent_feed_id'] = $parent;
             }
             $feed_obj_id = $item['parent_feed_id'];
             $source_repeat = $sourceRepeat;
             # Trying to prevent bad or malformed HTML from entering the database.
             $item_title = strip_tags($item_title);
             $item_content = strip_tags($item_content, '<p> <strong> <bold> <i> <em> <emphasis> <del> <h1> <h2> <h3> <h4> <h5> <a> <img>');
             # Need to get rid of some weird characters that prevent inserting posts into the database.
             # From: http://www.alexpoole.name/web-development/282/remove-smart-quotes-bullets-dashes-and-other-junky-characters-from-a-string-with-php
             # And: http://www.enghiong.com/wp_insert_post-could-not-insert-post-into-the-database.html
             //$item_content = self::extra_special_sanatize($item_content);
             //$item_title = self::extra_special_sanatize($item_title);
             //$item_content = wpautop($item_content);
             //$postcontent = sanitize_post($item_content);
             //If we use the @ to prevent showing errors, everything seems to work. But it is still dedicating crap to the database...
             //Perhaps sanitize_post isn't the cause? What is then?
             # Do we want or need the post_status to be published?
             $data = array('post_status' => 'publish', 'post_type' => pf_feed_item_post_type(), 'item_title' => $item_title, 'post_parent' => $feed_obj_id, 'item_content' => $item_content, 'item_link' => $item_link, 'source_title' => $source_title, 'item_wp_date' => $item_wp_date, 'item_tags' => $item_tags);
             //RIGHT HERE is where the content is getting assigned a bunch of screwed up tags.
             //The content is coming in from the rss_object assembler a-ok. But something here saves them to the database screwy.
             //It looks like sanitize post is screwing them up terribly. But what to do about it without removing the security measures which we need to apply?
             $worked = 1;
             # The post gets created here, the $newNomID variable contains the new post's ID.
             $newNomID = self::create($data);
             $post_inserted_bool = self::post_inserted($newNomID, $data);
             if (!$post_inserted_bool) {
                 # It's the end of the world! Let's throw everything at this.
                 pf_log('Post will not go into the database. We will try again.');
                 $item_content = htmlentities(strip_tags($item_content), ENT_QUOTES, "UTF-8");
                 $item_content = wp_kses(stripslashes($item_content), array('p', 'a', 'b', 'em', 'strong'));
                 $item_content = self::extra_special_sanatize($item_content, true);
                 $item_content = wpautop($item_content);
                 $item_title = self::extra_special_sanatize($item_title, true);
                 $data['item_content'] = $item_content;
                 $newNomID = self::create($data);
                 $post_inserted_bool = self::post_inserted($newNomID, $data);
             }
             pf_log('End of wp_insert_post process.');
             //$posttest = get_post($newNomID);
             //print_r($posttest->post_content);
             # Somewhere in the process links with complex queries at the end (joined by ampersands) are getting encoded.
             # I don't want that, so I turn it back here.
             # For some reason this is only happening to the ampersands, so that's the only thing I'm changing.
             $item_link = str_replace('&amp;', '&', $item_link);
             # If it doesn't have a featured image assigned already, I use the set_ext_as_featured function to try and find one.
             # It also, if it finds one, sets it as the featured image for that post.
             if (!empty($_POST['item_feat_img'])) {
                 # Turned off set_ext_as_featured here, as that should only occur when items are nominated.
                 # Before nominations, the featured image should remain a meta field with an external link.
                 if (false === ($itemFeatImg = get_transient('feed_img_' . $itemUID))) {
                     set_time_limit(0);
                     # if it forces the issue when we try and get the image, there's nothing we can do.
                     $itemLink = str_replace('&amp;', '&', $itemLink);
                     if (pressforward()->og_reader->fetch($itemLink)) {
                         //If there is no featured image passed, let's try and grab the opengraph image.
                         $node = pressforward()->og_reader->fetch($itemLink);
                         $itemFeatImg = $node->image;
                     }
                     if ($itemFeatImg == '') {
                         //Thinking of starting a method here to pull the first image from the body of a post.
                         //http://stackoverflow.com/questions/138313/how-to-extract-img-src-title-and-alt-from-html-using-php
                         //http://stackoverflow.com/questions/1513418/get-all-images-url-from-string
                         //http://stackoverflow.com/questions/7479835/getting-the-first-image-in-string-with-php
                         //preg_match_all('/<img[^>]+>/i',$itemContent, $imgResult);
                         //$imgScript = $imgResult[0][0];
                     }
                     //Most RSS feed readers don't store the image locally. Should we?
                     set_transient('feed_img_' . $itemUID, $itemFeatImg, 60 * 60 * 24);
                 }
             }
             # adding the meta info about the feed item to the post's meta.
             $pf_meta_args = array(pf_meta_for_entry('item_id', $item_id), pf_meta_for_entry('source_title', $source_title), pf_meta_for_entry('item_date', $item_date), pf_meta_for_entry('item_author', $item_author), pf_meta_for_entry('item_link', $item_link), pf_meta_for_entry('item_feat_img', $item_feat_img), pf_meta_for_entry('item_wp_date', $item_wp_date), pf_meta_for_entry('sortable_item_date', strtotime($item_date)), pf_meta_for_entry('item_tags', $item_tags), pf_meta_for_entry('source_repeat', $source_repeat), pf_meta_for_entry('revertible_feed_text', $item_content));
             pf_meta_establish_post($newNomID, $pf_meta_args);
         }
     }
     update_option(PF_SLUG . '_ready_to_chunk', 1);
     #$Feed_Retrieve = new PF_Feed_Retrieve();
     pressforward()->pf_retrieve->advance_feeds();
     //die('Refreshing...');
 }