function __construct() { $URL = get_option('scrapeURL'); if ($URL != '') { $path_parts = parse_url($URL); if ($path_parts) { $this->domain = $path_parts['scheme'] . '://' . $path_parts['host']; $this->URL = ''; if (isset($path_parts['path'])) { $this->URL .= $path_parts['path']; } if (isset($path_parts['query']) && $path_parts['query'] != '') { $this->URL .= '?' . $path_parts['query']; } if (!function_exists('request_filesystem_credentials')) { require_once ABSPATH . 'wp-admin/includes/file.php'; } /*$creds = request_filesystem_credentials($this->domain.$this->URL, '', true, false, null); global $wp_filesystem; WP_Filesystem(); $wp_filesystem->get_contents($this->domain.$this->URL);*/ $file_headers = @get_headers($this->domain . $this->URL); if (!$file_headers || $file_headers[0] != 'HTTP/1.1 404 Not Found') { $this->HTML = file_get_contents($this->domain . $this->URL); $spider = new spider($this->domain, $this->URL); $spider->calculate_scrape_details($this->HTML); $this->meta_structure = $spider->div_meta; } } } global $wpdb; $table = $wpdb->prefix . self::$db_table; if ($wpdb->get_var('SHOW TABLES LIKE \'' . $table . '\' ') == $table) { $this->scrapes = $wpdb->get_results('SELECT * FROM ' . $table); } add_action('admin_menu', array(&$this, 'site_importer_menu')); add_action('admin_init', array(&$this, 'site_importer_init')); add_action('wp_ajax_esi_update_option', array($this, 'esi_update_option_callback')); // add_action( 'wp_ajax_nopriv_esi_update_option', 'esi_update_option' ); }
/** * Setup the settings fields for the form and in the database */ public function site_importer_init() { $URL = get_option('scrapeURL'); if ($URL != '') { $path_parts = parse_url($URL); if ($path_parts) { $this->domain = $path_parts['scheme'] . '://' . $path_parts['host']; $this->URL = ''; if (isset($path_parts['path'])) { $this->URL .= $path_parts['path']; } if (isset($path_parts['query']) && $path_parts['query'] != '') { $this->URL .= '?' . $path_parts['query']; } if (!function_exists('request_filesystem_credentials')) { require_once ABSPATH . 'wp-admin/includes/file.php'; } /*$creds = request_filesystem_credentials($this->domain.$this->URL, '', true, false, null); global $wp_filesystem; WP_Filesystem(); $wp_filesystem->get_contents($this->domain.$this->URL);*/ $file_headers = @get_headers($this->domain . $this->URL); if (!$file_headers || $file_headers[0] != 'HTTP/1.1 404 Not Found') { $this->HTML = file_get_contents($this->domain . $this->URL); $spider = new spider($this->domain, $this->URL); $spider->calculate_scrape_details($this->HTML); $this->meta_structure = $spider->div_meta; } } } add_settings_section('scrape_url', 'Site crawler settings', array(&$this, 'site_url_details'), 'site_spider'); // $extra='<input type="submit" value="Update HTML Blocks" class="button button-primary" id="submit2" name="submit" disabled="disabled"> '; // 'change' => 'document.getElementById(\'submit2\').disabled = false' add_settings_field('scrapeURL', 'Website URL', array(&$this, 'text_field'), 'site_spider', 'scrape_url', array('name' => 'scrapeURL', 'label_for' => 'Website URL')); add_settings_field('scrapeDepth', 'Max depth', array(&$this, 'text_field'), 'site_spider', 'scrape_url', array('name' => 'scrapeDepth', 'label_for' => 'Max depth', 'tutor' => 'This field indicates how far into the site structure from the "Website URL" to crawl.<br/><br/><img src="' . plugins_url('/img/structure.png', dirname(__FILE__)) . '" width="70" height="47" alt="Site Structure" />')); add_settings_section('scrape_details', 'Scrape settings', array(&$this, 'scrape_details'), 'site_scrape'); add_settings_field('mainHTMLBlock', 'Main HTML block', array(&$this, 'select_box'), 'site_scrape', 'scrape_details', array('name' => 'mainHTMLBlock', 'label_for' => 'Main HTML block', 'options' => $this->meta_structure, 'options_name' => 'name', 'default' => '<body>', 'tutor' => 'These have been calculated from the Website URL and should only contain divs/sections which contain text content')); if (get_option('includeStart') == '1') { $checked = 'checked="checked"'; } else { $checked = ''; } $extra = '<label for="Include the start HTML">Include the start HTML</label>' . $this->checkbox_field(array('name' => 'includeStart', 'label_for' => 'Include the start HTML', 'tutor' => 'If ticked this will scrape this HTML in as well, otherwise it will strip it'), true); add_settings_field('startHTML', 'Start of HTML to scrape', array(&$this, 'text_field'), 'site_scrape', 'scrape_details', array('name' => 'startHTML', 'label_for' => 'Start of HTML to scrape', 'extra' => $extra, 'tutor' => 'Within the Main HTML block specify the start of the html to start scraping')); if (get_option('includeEnd') == '1') { $checked = 'checked="checked"'; } else { $checked = ''; } $extra = '<label for="Include the end HTML">Include the End HTML</label>' . $this->checkbox_field(array('name' => 'includeEnd', 'label_for' => 'Include the end HTML', 'tutor' => 'If ticked this will scrape this HTML in as well, otherwise it will strip it'), true); add_settings_field('endHTML', 'End of HTML to scrape', array(&$this, 'text_field'), 'site_scrape', 'scrape_details', array('name' => 'endHTML', 'label_for' => 'End of HTML to scrape', 'extra' => $extra, 'tutor' => 'Within the Main HTML block specifty where to stop scraping the content')); add_settings_section('filter_details', 'HTML adjustments', array(&$this, 'filter_details'), 'site_scrape'); add_settings_field('stripCSS', 'Strip inline CSS', array(&$this, 'checkbox_field'), 'site_scrape', 'filter_details', array('name' => 'stripCSS', 'label_for' => 'Strip inline CSS', 'tutor' => 'Tick this box to remove any inline CSS during the scraping process eg.<h1 style="color:blue"> would become <h1>')); add_settings_field('stripClass', 'Strip all classes', array(&$this, 'checkbox_field'), 'site_scrape', 'filter_details', array('name' => 'stripClass', 'label_for' => 'Strip all classes', 'tutor' => 'Classes may not be relevant once imported so tick here to have them removed eg.<h1 class"old_class_name"> would become <h1>')); add_settings_field('stripDiv', 'Strip all div tags', array(&$this, 'checkbox_field'), 'site_scrape', 'filter_details', array('name' => 'stripDiv', 'label_for' => 'Strip all div tags', 'tutor' => 'Tick here to have Divs removed eg.<div id="col1"> welcome </div> would become "welcome"')); add_settings_field('stripSpan', 'Strip all span tags', array(&$this, 'checkbox_field'), 'site_scrape', 'filter_details', array('name' => 'stripSpan', 'label_for' => 'Strip all span tags', 'tutor' => 'Tick here to have spans removed eg.<span class="red"> welcome </span> would become "welcome>"')); add_settings_field('replaceDomain', 'Remove scraped domain name from images and links', array(&$this, 'checkbox_field'), 'site_scrape', 'filter_details', array('name' => 'replaceDomain', 'label_for' => 'Remove scraped domain name from images and links', 'tutor' => 'If the site has absolute links to the old domain then these can be replace eg. www.oldsite.com/img/logo.png would become /img/logo.png')); add_settings_section('wordpress_settings', 'Wordpress Settings', false, 'site_importer'); add_settings_field('postType', 'Import into post type', array(&$this, 'select_box'), 'site_importer', 'wordpress_settings', array('name' => 'postType', 'label_for' => 'Import into post type', 'options_name' => 'name', 'options' => array(array('name' => 'post')), 'default' => 'page', 'tutor' => 'Set the post type to import items into either the Posts or as Pages')); add_settings_field('postNameRemove', 'Remove string from name', array(&$this, 'text_field'), 'site_importer', 'wordpress_settings', array('name' => 'postNameRemove', 'label_for' => 'Remove string from name', 'options_name' => 'name', 'tutor' => 'This string will be removed from the created page or post name eg. index or index,blog,en-gb ')); add_settings_section('image_settings', 'Image Settings', array(&$this, 'image_settings'), 'site_importer'); add_settings_field('importLocal', 'Import images on spidering domain', array(&$this, 'checkbox_field'), 'site_importer', 'image_settings', array('name' => 'importLocal', 'label_for' => 'Import images on spidering domain', 'tutor' => 'Images held within the Website URL will be imported into the media library and the HTML will be changed to point to the new image')); add_settings_field('importRemote', 'Import images on other domains', array(&$this, 'checkbox_field'), 'site_importer', 'image_settings', array('name' => 'importRemote', 'label_for' => 'Import remote images', 'tutor' => 'Images found on any domain apart from the main Website URL will be imported into the media library and the HTML will be changed to point to the new image')); add_settings_field('copyDuplicates', 'Copy duplicate images', array(&$this, 'checkbox_field'), 'site_importer', 'image_settings', array('name' => 'copyDuplicates', 'label_for' => 'Copy duplicate images', 'tutor' => 'If this image name appears in the media library then tick if you still want the image copied')); add_settings_section('seo_settings', 'Seo Settings', array(&$this, 'seo_settings'), 'site_importer'); $seo_plugin = ''; if (is_plugin_active('wordpress-seo/wp-seo.php')) { $seo_plugin .= 'Yoast SEO plugin'; } if (is_plugin_active('all-in-one-seo-pack/all_in_one_seo_pack.php')) { if ($seo_plugin != '') { $seo_plugin .= ' and the '; } $seo_plugin .= 'All in one SEO pack plugin'; } if (is_plugin_active('add-meta-tags/add-meta-tags.php')) { if ($seo_plugin != '') { $seo_plugin .= ' and the '; } $seo_plugin .= 'Add Meta Tags SEO plugin'; } if ($seo_plugin != '') { add_settings_field('importTitle', 'Import the title tag', array(&$this, 'checkbox_field'), 'site_importer', 'seo_settings', array('name' => 'importTitle', 'label_for' => 'Import the title tag', 'disabled' => false, 'tutor' => 'Import the title tag from the remote site directly into the ' . $seo_plugin . ' for the page')); add_settings_field('importDescription', 'Import the meta description', array(&$this, 'checkbox_field'), 'site_importer', 'seo_settings', array('name' => 'importDescription', 'label_for' => 'Import the meta description', 'disabled' => false, 'tutor' => 'Import the meta description tag from the remote site directly into the ' . $seo_plugin . ' for the page')); } else { add_settings_field('importTitle', 'Import the title tag', array(&$this, 'checkbox_field'), 'site_importer', 'seo_settings', array('name' => 'importTitle', 'label_for' => 'Import the title tag', 'disabled' => true, 'tutor' => 'No SEO plugins have been detected so the title tag can not be imported')); add_settings_field('importDescription', 'Import the meta description', array(&$this, 'checkbox_field'), 'site_importer', 'seo_settings', array('name' => 'importDescription', 'label_for' => 'Import the meta description', 'disabled' => true, 'tutor' => 'No SEO plugins have been detected so the Meta Description tag can not be imported')); } register_setting('site_spider', 'scrapeURL', array(&$this, 'check_site_url')); register_setting('site_spider', 'scrapeDepth', array(&$this, 'check_scrape_depth')); register_setting('site_scrape', 'mainHTMLBlock'); register_setting('site_scrape', 'startHTML', array(&$this, 'check_start_html_included')); register_setting('site_scrape', 'endHTML', array(&$this, 'check_end_html_included')); register_setting('site_scrape', 'includeStart'); register_setting('site_scrape', 'includeEnd'); register_setting('site_scrape', 'stripCSS'); register_setting('site_scrape', 'stripClass'); register_setting('site_scrape', 'stripDiv'); register_setting('site_scrape', 'stripSpan'); register_setting('site_scrape', 'replaceDomain'); register_setting('site_importer', 'importLocal'); register_setting('site_importer', 'importRemote'); register_setting('site_importer', 'copyDuplicates'); register_setting('site_importer', 'postType'); register_setting('site_importer', 'postNameRemove'); register_setting('site_importer', 'importTitle'); register_setting('site_importer', 'importDescription'); }