function searchwp_extract_pdf_text($post_id) { $indexer = new SearchWPIndexer(); return $indexer->extract_pdf_text(absint($post_id)); }
/** * Tap in to the SearchWP indexing process and check if a WP Document Revisions post is being indexed. If it is, we're * going to retrieve the most recent revision and extract the PDF content of that file. We'll store that data as a * pseudo Custom Field called wp_document_revision, allowing SearchWP to search for that content * * @param $post_metadata * @param $post_to_index * * @return bool */ function index_wp_document_revisions($post_metadata, $post_to_index) { // make sure it's a WP Document Revisions Document if ('document' !== $post_to_index->post_type) { return $post_metadata; } if (!class_exists('SearchWPIndexer')) { return $post_metadata; } // get the latest Revision $rev_id = $this->mywpdr_get_latest_revision($post_to_index); $rev_post = get_post($rev_id); $revision = get_post($rev_post->post_content); // grab the PDF content from Xpdf $indexer = new SearchWPIndexer(); $indexer->set_post($revision); $pdf_content = $indexer->extract_pdf_text(absint($revision->ID)); // add it to the pseudo-metadata array $post_metadata['swp_wp_document_revision'] = $pdf_content; return $post_metadata; }