pq() публичный статический Метод

public static pq ( $arg1, $context = null )
Пример #1
0
 function createSpots()
 {
     // TODO :: Some caching ??
     $this->pq = $pq = new phpQuery();
     $this->dom = $dom = $pq->newDocument($this->owner->template->template_source);
     if (!$this->owner instanceof \Frontend) {
         $pq->pq($dom)->attr('xepan-page-content', 'true');
         $pq->pq($dom)->addClass('xepan-page-content');
     }
     foreach ($dom['.xepan-component'] as $d) {
         $d = $pq->pq($d);
         if (!$d->hasClass('xepan-serverside-component')) {
             continue;
         }
         $i = $this->spots++;
         $inner_html = $d->html();
         $with_spot = '{' . $this->owner->template->name . '_' . $i . '}' . $inner_html . '{/}';
         $d->html($with_spot);
     }
     $content = $this->updateBaseHrefForTemplates();
     $content = str_replace('<!--xEpan-ATK-Header-Start', '', $content);
     $content = str_replace('xEpan-ATK-Header-End-->', '', $content);
     $this->owner->template->loadTemplateFromString($content);
     $this->owner->template->trySet($this->app->page . '_active', 'active');
 }
Пример #2
0
 function Plugins_RunServerSideComponent($obj, $page)
 {
     include_once getcwd() . '/lib/phpQuery.php';
     $pq = new \phpQuery();
     $doc = $pq->newDocument($page['content']);
     $server = $doc['[data-is-serverside-component=true]'];
     foreach ($doc['[data-is-serverside-component=true]'] as $ssc) {
         $options = array();
         foreach ($ssc->attributes as $attrName => $attrNode) {
             $options[$attrName] = $pq->pq($ssc)->attr($attrName);
         }
         $namespace = $pq->pq($ssc)->attr('data-responsible-namespace');
         $view = $pq->pq($ssc)->attr('data-responsible-view');
         if (!file_exists($path = getcwd() . DS . 'epan-components' . DS . $namespace . DS . 'lib' . DS . 'View' . DS . 'Tools' . DS . str_replace("View_Tools_", "", $view) . '.php')) {
             $temp_view = $this->owner->add('View_Error')->set("Server Side Component Not Found :: {$namespace}/{$view}");
         } else {
             $temp_view = $this->owner->add("{$namespace}/{$view}", array('html_attributes' => $options, 'data_options' => $pq->pq($ssc)->attr('data-options')));
         }
         if (!$_GET['cut_object'] and !$_GET['cut_page']) {
             $html = $temp_view->getHTML();
             $pq->pq($ssc)->html("")->append($html);
         }
     }
     $page['content'] = $doc->htmlOuter();
 }
Пример #3
0
    public function go()
    {
        try
        {
            $this->checker = \Ns\Bitrix\Helper::Create('iblock')->useVariant('checker');
        }
        catch (\Exception $e)
        {
            prentExpection($e->getMessage());
        }
        foreach ($this->dom->find("table.fileinfo") as $table)
        {
            $this->arFields = array();
            $this->arFields["IBLOCK_ID"] = self::ALFADOCUMENTS_IBLOCK_ID;
            $table = \phpQuery::pq($table);
            $this->arFields["NAME"] = $table->find('a:eq(1)')->text();
            $this->arFields["PROPERTY_VALUES"]["LINK"] = $table->find('a:eq(1)')->attr("href");
            prent($this->arFields);
            $this->Add();

        }
        /**
         * Check and add element to infoblock Terminals
         */

        return true;
	}
Пример #4
0
 function start_el(&$output, $object, $depth = 0, $args = array(), $current_object_id = 0)
 {
     // append next menu element to $output
     parent::start_el($output, $object, $depth, $args, $current_object_id);
     // now let's add a custom form field
     if (!class_exists('phpQuery')) {
         // load phpQuery at the last moment, to minimise chance of conflicts (ok, it's probably a bit too defensive)
         require_once 'phpQuery-onefile.php';
     }
     $_doc = phpQuery::newDocumentHTML($output);
     $_li = phpQuery::pq('li.menu-item:last');
     // ":last" is important, because $output will contain all the menu elements before current element
     // if the last <li>'s id attribute doesn't match $item->ID something is very wrong, don't do anything
     // just a safety, should never happen...
     $menu_item_id = str_replace('menu-item-', '', $_li->attr('id'));
     if ($menu_item_id != $object->ID) {
         return;
     }
     // fetch previously saved meta for the post (menu_item is just a post type)
     $curr_bg = esc_attr(get_post_meta($menu_item_id, 'snpshpwp_menu_item_bg', TRUE));
     $curr_bg_pos = esc_attr(get_post_meta($menu_item_id, 'snpshpwp_menu_item_bg_pos', TRUE));
     $curr_upldr = '<span class="button media_upload_button" id="snpshpwp_upload_' . $menu_item_id . '">' . __('Upload', 'snpshpwp') . '</span>';
     // by means of phpQuery magic, inject a new input field
     $_li->find('a.item-delete')->before("\n\t\t\t\t\t<p class='snpshpwp_menu_item_bg description description-thin'>\n\t\t\t\t\t<label for='snpshpwp_menu_item_bg_{$menu_item_id}'>" . __('Background image', 'snpshpwp') . "<br/>\n\t\t\t\t\t<input type='text' value='{$curr_bg}' name='snpshpwp_menu_item_bg_{$menu_item_id}' /><br/>\n\t\t\t\t\t</label>\n\t\t\t\t\t{$curr_upldr}\n\t\t\t\t\t</p>\n\t\t\t\t\t<p class='snpshpwp_menu_item_bg_pos description description-thin'>\n\t\t\t\t\t<label for='snpshpwp_menu_item_bg_{$menu_item_id}'>" . __('Background orientation', 'snpshpwp') . "<br/>\n\t\t\t\t\t<select name='snpshpwp_menu_item_bg_pos_{$menu_item_id}'>\n\t\t\t\t\t\t<option value='left-landscape'" . ($curr_bg_pos == 'left-landscape' ? ' selected' : '') . ">" . __('Left Landscape', 'snpshpwp') . "</option>\n\t\t\t\t\t\t<option value='left-portraid'" . ($curr_bg_pos == 'left-portraid' ? ' selected' : '') . ">" . __('Left Portraid', 'snpshpwp') . "</option>\n\t\t\t\t\t\t<option value='right-landscape'" . ($curr_bg_pos == 'right-landscape' ? ' selected' : '') . ">" . __('Right Landscape', 'snpshpwp') . "</option>\n\t\t\t\t\t\t<option value='right-portraid'" . ($curr_bg_pos == 'right-portraid' ? ' selected' : '') . ">" . __('Right Portraid', 'snpshpwp') . "</option>\n\t\t\t\t\t\t<option value='pattern-repeat'" . ($curr_bg_pos == 'pattern-repeat' ? ' selected' : '') . ">" . __('Pattern', 'snpshpwp') . "</option>\n\t\t\t\t\t\t<option value='framed-full'" . ($curr_bg_pos == 'framed-full' ? ' selected' : '') . ">" . __('Framed', 'snpshpwp') . "</option>\n\t\t\t\t\t</select>\n\t\t\t\t\t</label>\n\t\t\t\t\t</p>\n\t\t\t\t\t");
     // swap the $output
     $output = $_doc->html();
 }
Пример #5
0
 function init()
 {
     parent::init();
     if (!$this->api->auth->isLoggedIn()) {
         $this->js()->univ()->errorMessage('You Are Not Logged In')->execute();
     }
     if ($_POST['length'] != strlen($_POST['body_html'])) {
         $this->js()->univ()->successMessage('Length send ' . $_POST['length'] . " AND Length calculated again is " . strlen($_POST['body_html']))->execute();
     }
     if ($_POST['crc32'] != sprintf("%u", crc32($_POST['body_html']))) {
         $this->js()->univ()->successMessage('CRC send ' . $_POST['crc32'] . " AND CRC calculated again is " . sprintf("%u", crc32($_POST['body_html'])))->execute();
     }
     try {
         $content = $_POST['body_html'];
         include_once getcwd() . '/lib/phpQuery.php';
         $pq = new \phpQuery();
         $doc =& $pq->newDocument(trim($content));
         // include_once getcwd().'/lib/phpQuery.php';
         // $doc = \phpQuery::newDocument( $content );
         $server = $doc['[data-is-serverside-component=true]'];
         foreach ($doc['[data-is-serverside-component=true]'] as $ssc) {
             $pq->pq($ssc)->html("")->append($html);
         }
         $content = $doc->htmlOuter();
         $this->api->current_page['content'] = urldecode(trim($content));
         $this->api->current_page['body_attributes'] = urldecode($_POST['body_attributes']);
         $this->api->exec_plugins('epan-page-before-save', $this->api->current_page);
         $this->api->current_page->save();
         $this->api->exec_plugins('epan-page-after-save', $this->api->current_page);
         if ($_POST['take_snapshot'] == 'Y') {
             // $this->api->exec_plugins('epan-page-before-snapshot',$this->api->current_page);
             $new_version = $this->api->current_page->ref('EpanPageSnapshots');
             $new_version['title'] = $this->api->current_page['title'];
             $new_version['keywords'] = $this->api->current_page['keywords'];
             $new_version['description'] = $this->api->current_page['description'];
             $new_version['body_attributes'] = $this->api->current_page['body_attributes'];
             $new_version['content'] = $this->api->current_page['content'];
             $new_version->save();
             // $this->api->exec_plugins('epan-page-after-snapshot',$this->api->current_page);
         }
     } catch (Exception_StopInit $e) {
     } catch (Exception $e) {
         throw $e;
         $this->js()->univ()->errorMessage('Error... Could not save your page ' . $e->getMEssage())->excute();
         exit;
     }
     echo "saved";
     exit;
 }
Пример #6
0
    public function go()
    {
        try
        {
            $this->checker = \Ns\Bitrix\Helper::Create('iblock')->useVariant('checker');
        }
        catch (\Exception $e)
        {
            prentExpection($e->getMessage());
        }
        foreach ($this->dom->find("span.cat_block") as $span)
        {
            $this->arFields = array();
            $this->arFields["IBLOCK_ID"] = self::ALFATAXES_IBLOCK_ID;
            /**
             * Получение Даты создания и Имени новости
             */
            $span = \phpQuery::pq($span);
            if ($span->find("strong"))
            {
                $mainSection = $this->findOrCreateSection($span->find("strong")->text());
            }
            $this->arFields["IBLOCK_SECTION_ID"] = $this->findOrCreateSection($span->find("a:eq(0)")->text(), $mainSection);
            foreach ($span->find('table.fileinfo') as $table)
            {
                $table = \phpQuery::pq($table);
                $this->arFields["NAME"] = $table->find("a:eq(1)")->text();
                $this->arFields["PROPERTY_VALUES"]["LINK"] = $table->find("a:eq(1)")->attr("href");
                prent($this->arFields);
                // $this->Add();
            }

            /**
             * Compose name for element of infoblock
             */
        }
        /**
         * Check and add element to infoblock Terminals
         */

        return true;
	}
Пример #7
0
Файл: page.php Проект: xepan/cms
 function init()
 {
     parent::init();
     if (!$this->api->auth->isLoggedIn()) {
         $this->js()->univ()->errorMessage('You Are Not Logged In')->execute();
     }
     if ($_POST['length'] != strlen($_POST['body_html'])) {
         $this->js()->univ()->errorMessage('Length send ' . $_POST['length'] . " AND Length calculated again is " . strlen($_POST['body_html']))->execute();
     }
     if ($_POST['crc32'] != sprintf("%u", crc32($_POST['body_html']))) {
         $this->js()->univ()->errorsMessage('CRC send ' . $_POST['crc32'] . " AND CRC calculated again is " . sprintf("%u", crc32($_POST['body_html'])))->execute();
     }
     if (strpos($_POST['file_path'], realpath('websites/' . $this->app->current_website_name) !== 0)) {
         $this->js()->univ()->errorMessage('You cannot save in this location')->execute();
     }
     $html_content = urldecode(trim($_POST['body_html']));
     // convert all absolute url to relative
     $domain = $this->app->pm->base_url . $this->app->pm->base_path . 'websites/' . $this->app->current_website_name . '/www/';
     $html_content = str_replace($domain, '', $html_content);
     // add {$Content} tag if its template being saved
     if (strpos($_POST['file_path'], $this->app->pm->base_path . 'websites/' . $this->app->current_website_name . '/www/layout/')) {
         $this->pq = $pq = new phpQuery();
         $this->dom = $dom = $pq->newDocument($html_content);
         foreach ($dom['.xepan-page-wrapper'] as $d) {
             $d = $pq->pq($d);
             $d->html('{$Content}');
         }
         $html_content = $dom->html();
         // $this->js()->univ()->errorMessage('Yes its template')->execute();
     }
     // $this->js()->univ()->errorMessage($this->app->pm->base_path.'websites/'.$this->app->current_website_name.'/www/layout/')->execute();
     try {
         file_put_contents($_POST['file_path'], $html_content);
         $this->js()->_selectorDocument()->univ()->successMessage("Content Saved")->execute();
     } catch (\Exception $e) {
         $this->js()->_selectorDocument()->univ()->errorMessage($e->getMessage())->execute();
     }
 }
Пример #8
0
    public function go()
    {
        try
        {
            $this->checker = \Ns\Bitrix\Helper::Create('iblock')->useVariant('checker');
        }
        catch (\Exception $e)
        {
            prentExpection($e->getMessage());
        }
        $table = \phpQuery::pq($this->dom->find("table.mainfnt"));
        foreach ($table->find("tr") as $tr)
        {
            /**
             * Получение Даты создания и Имени новости
             */
            $tr = \phpQuery::pq($tr);
            $this->arFields["PROPERTY_VALUES"]["CITY"] = $tr->find('td:eq(0)')->text();
            $this->arFields["PROPERTY_VALUES"]["ADDRESS"] = $tr->find('td:eq(1)')->text();
            $this->arFields["PROPERTY_VALUES"]["LOCATION"] = $tr->find('td:eq(2)')->text();
            $this->arFields["PROPERTY_VALUES"]["CURRENCY"] = $tr->find('td:eq(3)')->text();
            $this->arFields["PROPERTY_VALUES"]["OPERATION_TIME"] = $tr->find('td:eq(4)')->text();
            $this->arFields["PROPERTY_VALUES"]["STATUS"] = $tr->find('td:eq(5)')->text();
            if ($this->arFields["PROPERTY_VALUES"]["LOCATION"] == "РАСПОЛОЖЕНИЕ") {
                continue;
            }
            /**
             * Compose name for element of infoblock
             */
            $this->arFields["NAME"] = $this->composeName();
            $this->Add();
        }
        /**
         * Check and add element to infoblock Terminals
         */
        return true;
	}
Пример #9
0
 /**
  * Enter description here...
  *
  * @param string|phpQueryObject
  * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
  */
 public function add($selector = null)
 {
     if (!$selector) {
         return $this;
     }
     $stack = array();
     $this->elementsBackup = $this->elements;
     $found = phpQuery::pq($selector, $this->getDocumentID());
     $this->merge($found->elements);
     return $this->newInstance();
 }
Пример #10
0
 /**
  * Enter description here...
  *
  * @param unknown_type $e
  * @TODO trigger submit for form after form's  submit button has a click event
  */
 public static function handleSubmit($e, $callback = null)
 {
     $node = phpQuery::pq($e->target);
     if (!$node->is('form') || !$node->is('[action]')) {
         return;
     }
     // TODO document.location
     $xhr = isset($node->document->xhr) ? $node->document->xhr : null;
     $submit = pq($e->relatedTarget)->is(':submit') ? $e->relatedTarget : $node->find('*:submit:first')->get(0);
     $data = array();
     foreach ($node->serializeArray($submit) as $r) {
         // XXXt.c maybe $node->not(':submit')->add($sumit) would be better ?
         //		foreach($node->serializeArray($submit) as $r)
         $data[$r['name']] = $r['value'];
     }
     $options = array('type' => $node->attr('method') ? $node->attr('method') : 'GET', 'url' => resolve_url($e->data[0], $node->attr('action')), 'data' => $data, 'referer' => $node->document->location);
     if ($node->attr('enctype')) {
         $options['contentType'] = $node->attr('enctype');
     }
     $xhr = phpQuery::ajax($options, $xhr);
     if ((!$callback || !$callback instanceof Callback) && $e->data[1]) {
         $callback = $e->data[1];
     }
     if ($xhr->getLastResponse()->isSuccessful() && $callback) {
         phpQuery::callbackRun($callback, array(self::browserReceive($xhr)));
     }
 }
Пример #11
0
 public static function match($html, $data, $rule)
 {
     $match_hash = array();
     if ($data['dom']) {
         iPHP::import(iPHP_LIB . '/phpQuery.php');
         spider::$dataTest && $_GET['pq_debug'] && (phpQuery::$debug = 1);
         $doc = phpQuery::newDocumentHTML($html, 'UTF-8');
         if (strpos($data['rule'], '@') !== false) {
             list($content_dom, $content_attr) = explode("@", $data['rule']);
             $content_fun = 'attr';
         } else {
             list($content_dom, $content_fun, $content_attr) = explode("\n", $data['rule']);
         }
         $content_dom = trim($content_dom);
         $content_fun = trim($content_fun);
         $content_attr = trim($content_attr);
         $content_fun or $content_fun = 'html';
         if ($data['multi']) {
             $conArray = array();
             $_content = null;
             foreach ($doc[$content_dom] as $doc_key => $doc_value) {
                 if ($content_attr) {
                     $_content = phpQuery::pq($doc_value)->{$content_fun}($content_attr);
                 } else {
                     $_content = phpQuery::pq($doc_value)->{$content_fun}();
                 }
                 $cmd5 = md5($_content);
                 if ($match_hash[$cmd5]) {
                     break;
                 }
                 $conArray[$doc_key] = $_content;
                 $match_hash[$cmd5] = true;
             }
             if (spider::$dataTest) {
                 echo "<b>多条匹配结果:</b><pre>";
                 print_r($match_hash);
                 echo "</pre><hr />";
             }
             $content = implode('#--iCMS.PageBreak--#', $conArray);
             unset($conArray, $_content, $match_hash);
         } else {
             if ($content_attr) {
                 $content = $doc[$content_dom]->{$content_fun}($content_attr);
             } else {
                 $content = $doc[$content_dom]->{$content_fun}();
             }
         }
         phpQuery::unloadDocuments($doc->getDocumentID());
         unset($doc);
     } else {
         if (trim($data['rule']) == '<%content%>') {
             $content = $html;
         } else {
             $data_rule = spiderTools::pregTag($data['rule']);
             if (preg_match('/(<\\w+>|\\.\\*|\\.\\+|\\\\d|\\\\w)/i', $data_rule)) {
                 if ($data['multi']) {
                     preg_match_all('|' . $data_rule . '|is', $html, $matches, PREG_SET_ORDER);
                     $conArray = array();
                     foreach ((array) $matches as $mkey => $mat) {
                         $cmd5 = md5($mat['content']);
                         if ($match_hash[$cmd5]) {
                             break;
                         }
                         $conArray[$mkey] = $mat['content'];
                         $match_hash[$cmd5] = true;
                     }
                     if (spider::$dataTest) {
                         echo "<b>多条匹配结果:</b><pre>";
                         print_r($match_hash);
                         echo "</pre><hr />";
                     }
                     $content = implode('#--iCMS.PageBreak--#', $conArray);
                     unset($conArray, $match_hash);
                 } else {
                     preg_match('|' . $data_rule . '|is', $html, $matches, $PREG_SET_ORDER);
                     $content = $matches['content'];
                 }
             } else {
                 $content = $data_rule;
             }
         }
     }
     return $content;
 }
Пример #12
0
 function grab($url, $content, $regex_selector)
 {
     try {
         $parsed_url = parse_url($url);
         $start = microtime(true);
         // get Emails and Mobile Number and ...
         $pattern = '/[a-z0-9_\\-\\+\\.]+(@|(.)?\\[(.)?at(.)?\\](.)?)[a-z0-9\\-]+(\\.|(.)?\\[(.)?dot(.)?\\](.)?)([a-z]{2,3})(?:(\\.|(.)?\\[(.)?dot(.)?\\](.)?)[a-z]{2})?/i';
         $pattern = '/[a-z0-9_\\-\\+\\.]{1,80}+@[a-z0-9\\-]{1,80}+\\.([a-z]{2,3})(?:\\.[a-z]{2})?/i';
         // preg_match_all returns an associative array
         preg_match_all($pattern, $content, $email_found);
         // echo '<br/>'.$path . " [<b> $url </b>] @ <b>$max_page_depth</b> level". "<br/>";
         $end = microtime(true);
         // echo print_r($email_found[0],true) . ' in '.($end-$start).' seconds from <b>'.$url.'</b><br/>';
         // ob_flush();
         // flush();
         $this->grabbed_data[$parsed_url['host']][$parsed_url['path'] . $parsed_url['query']] = $email_found[0];
         $pq = new phpQuery();
         $doc = @$pq->newDocumentHTML($content);
         // if($max_domain_depth== $initial_domain_depth)
         $get_a = $doc[$regex_selector];
         // else
         // $get_a = $doc['a:contains("contact")'];
         // echo "Found Links: ";
         $unique_filtered_links = array();
         foreach ($get_a as $a) {
             // echo '<br/>--------  &nbsp; &nbsp; &nbsp; '.$pq->pq($a)->attr('href'). ' <br/>';
             preg_match('/(\\.pdf|\\.exe|\\.msi|\\.zip|\\.rar|\\.gz|\\.tar|\\.flv|\\.mov|\\.mpg|\\.mpeg)/i', $pq->pq($a)->attr('href'), $arr);
             if (count($arr)) {
                 // echo "Found pdf etc so not taking to check in ". $pq->pq($a)->attr('href') .'<br/>';
                 continue;
             }
             $new_website = parse_url($pq->pq($a)->attr('href'));
             if (!$new_website['scheme']) {
                 $new_website['scheme'] = $parsed_url['scheme'];
             }
             if (!$new_website['host']) {
                 $new_website['host'] = $parsed_url['host'];
             }
             $new_url = $new_website['scheme'] . '://' . $new_website['host'] . '/' . $new_website['path'] . $new_website['query'];
             // if(in_array($new_website['path'].$new_website['query'], array_keys($this->grabbed_data[$parsed_url['host']]))){
             // 	echo "Already Visited <br/>";
             // 	continue;
             // }
             if (!in_array($new_url, $unique_filtered_links)) {
                 $unique_filtered_links[] = $new_url;
             }
         }
         // echo "Unique Links to check <br/>";
         // print_r($unique_filtered_links);
         $start = microtime(true);
         $results = $this->multi_request($unique_filtered_links);
         // ====================
         // echo "Fetched ". count($unique_filtered_links).  " websites in ". (microtime(true) - $start) . ' seconds <br/>';
         $contact_us_pages = array();
         foreach ($unique_filtered_links as $id => $site_url) {
             // somehow if no result was found just carry on
             if (!$results[$id]) {
                 // echo "No Result for " . $site_url. '<br/>';
                 continue;
             }
             $parsed_url = parse_url($site_url);
             preg_match_all($pattern, $results[$id], $email_found);
             $this->grabbed_data[$parsed_url['host']][$parsed_url['path'] . $parsed_url['query']] = $email_found[0];
             $doc = @$pq->newDocumentHTML($results[$id]);
             $get_a = $doc['a:contains("contact")'];
             foreach ($get_a as $a) {
                 // echo '<br/>--------  &nbsp; &nbsp; &nbsp; '.$pq->pq($a)->attr('href'). ' <br/>';
                 preg_match('/(\\.pdf|\\.exe|\\.msi|\\.zip|\\.rar|\\.gz|\\.tar|\\.flv|\\.mov|\\.mpg|\\.mpeg)/i', $pq->pq($a)->attr('href'), $arr);
                 if (count($arr)) {
                     // echo "Found pdf etc so not taking to check in ". $pq->pq($a)->attr('href') .'<br/>';
                     continue;
                 }
                 $new_website = parse_url($pq->pq($a)->attr('href'));
                 if (!$new_website['scheme']) {
                     $new_website['scheme'] = $parsed_url['scheme'];
                 }
                 if (!$new_website['host']) {
                     $new_website['host'] = $parsed_url['host'];
                 }
                 $new_url = $new_website['scheme'] . '://' . $new_website['host'] . '/' . $new_website['path'] . $new_website['query'];
                 // if(in_array($new_website['path'].$new_website['query'], array_keys(is_array($this->grabbed_data[$parsed_url['host']])?:array()))){
                 // 	echo "Already Visited <br/>";
                 // 	continue;
                 // }
                 if (!in_array($new_url, $contact_us_pages)) {
                     $contact_us_pages[] = $new_url;
                 }
             }
         }
         // echo "Unique Contact Links to check <br/>";
         // print_r($contact_us_pages);
         $start = microtime(true);
         $results = $this->multi_request($contact_us_pages);
         // ====================
         // echo "Fetched ". count($contact_us_pages).  " contact-pages in ". (microtime(true) - $start) . ' seconds <br/>';
         foreach ($results as $id => $contact_page_content) {
             if (!$results[$id]) {
                 // echo "Contact Page no result ". $contact_us_pages[$id] .'<br/>';
                 continue;
             }
             $parsed_url = parse_url($contact_us_pages[$id]);
             preg_match_all($pattern, $contact_page_content, $email_found);
             $this->grabbed_data[$parsed_url['host']][$parsed_url['path'] . $parsed_url['query']] = $email_found[0];
         }
     } catch (Exception $e) {
         return;
     }
 }
Пример #13
0
 public static function check_content_code($content)
 {
     if (spider::$content_right_code) {
         if (strpos(spider::$content_right_code, 'DOM::') !== false) {
             iPHP::import(iPHP_LIB . '/phpQuery.php');
             $doc = phpQuery::newDocumentHTML($content, 'UTF-8');
             $pq_dom = str_replace('DOM::', '', spider::$content_right_code);
             $matches = (bool) (string) phpQuery::pq($pq_dom);
             phpQuery::unloadDocuments($doc->getDocumentID());
             unset($doc, $content);
         } else {
             $matches = strpos($content, spider::$content_right_code);
             unset($content);
         }
         if ($matches === false) {
             return false;
         }
     }
     if (spider::$content_error_code) {
         if (strpos(spider::$content_error_code, 'DOM::') !== false) {
             iPHP::import(iPHP_LIB . '/phpQuery.php');
             $doc = phpQuery::newDocumentHTML($content, 'UTF-8');
             $pq_dom = str_replace('DOM::', '', spider::$content_error_code);
             $_matches = (bool) (string) phpQuery::pq($pq_dom);
             phpQuery::unloadDocuments($doc->getDocumentID());
             unset($doc, $content);
         } else {
             $_matches = strpos($content, spider::$content_error_code);
             unset($content);
         }
         if ($_matches !== false) {
             return false;
         }
     }
     return true;
 }
Пример #14
0
    public function go()
    {
        try
        {
            $this->checker = \Ns\Bitrix\Helper::Create('iblock')->useVariant('checker');
        }
        catch (\Exception $e)
        {
            prentExpection($e->getMessage());
        }
        $table = \phpQuery::pq($this->dom->find("div#catalog")->find("table#tbl"));
        $index = 0;
        foreach ($table->find("tr") as $tr)
        {
            var_dump($index);
            if (++$index == 1) {
                continue;
            }
            $this->arFields = array();
            $this->arFields["IBLOCK_ID"] = self::ALFATERMINALS_IBLOCK_ID;
            $this->arFields["IBLOCK_SECTION_ID"] = ($this->type == "alfa") ? self::ALFATERMINALS_SECTION_ID : self::SBSTERMINALS_SECTION_ID;
            /**
             * Получение информации о банкомете
             */
            $tr = \phpQuery::pq($tr);
            /**
             * 1. Name
             */
            $this->arFields["NAME"] = $tr->find('td:eq(0)')->find("a")->text();
            $infoLink = $tr->find('td:eq(0)')->find("a")->attr("href");
            /**
             * 2. State
             */
            $this->arFields["PROPERTY_VALUES"]["STATE"] = $tr->find('td:eq(1)')->text();
            /**
             * 3. City as link
             */
            $this->arFields["PROPERTY_VALUES"]["BIND_CITY"] = $this->findCity($tr->find('td:eq(2)')->text());
            /**
             * 4. City text
             */
            $this->arFields["PROPERTY_VALUES"]["CITY"] = $tr->find('td:eq(2)')->text();
            /**
             * 5. Address
             */
            $this->arFields["PROPERTY_VALUES"]["ADDRESS"] = $tr->find('td:eq(3)')->text();
             /**
             * 5. Operating mode
             */
            $this->arFields["PROPERTY_VALUES"]["OPERATION_TIME"] = $tr->find('td:eq(4)')->text();
            /**
             * Find nessesary element of link
             */
            try
            {
                $this->arFields["PROPERTY_VALUES"]["WORK_TYPES"] = $this->findWorkType($this->arFields["PROPERTY_VALUES"]["OPERATION_TIME"]);
            }
            catch (\Exception $e)
            {
                prentExpection($e->getMessage());
            }

            $html = file_get_contents("http://www.alfabank.by" . $infoLink);
            $fullInfo = \phpQuery::newDocumentHTML($html);

            $info = \phpQuery::pq($fullInfo->find("div.content"));

            /**
             * Compose currency string
             */
            $info->find("div.section.s1")->find('table')->find('tr');
            foreach ($info->find("div.section.s4")->find('ul')->find('li') as $li) {
                $li = \phpQuery::pq($li);
                $strCurrency .= $li->text() . " ";
            }
            $this->arFields["PROPERTY_VALUES"]["CURRENCY"] = trim($strCurrency); unset($strCurrency);
            /**
             * Lat & len of map
             */
            // $coordinates = explode(",", $info->find("div.section:eq(5)")->find("div.ya_map_data")->text());
            $coordinates = $info->find("div.section:eq(5)")->find("div.ya_map_data")->text();
            $this->arFields["PROPERTY_VALUES"]["POINT"] = $coordinates;

            /**
             * Compose name for element of infoblock
             */
            prent($this->arFields);
            $this->Add();
        }
        /**
         * Check and add element to infoblock Terminals
         */

        return true;
	}
Пример #15
0
 function check_content_code($content)
 {
     if ($this->content_right_code) {
         if (strpos($this->content_right_code, 'DOM::') !== false) {
             iPHP::import(iPHP_LIB . '/phpQuery.php');
             $doc = phpQuery::newDocumentHTML($content, 'UTF-8');
             $pq_dom = str_replace('DOM::', '', $this->content_right_code);
             $matches = (bool) (string) phpQuery::pq($pq_dom);
         } else {
             $matches = strpos($content, $this->content_right_code);
         }
         if ($matches === false) {
             $match = false;
             return false;
         }
     }
     if ($this->content_error_code) {
         if (strpos($this->content_right_code, 'DOM::') !== false) {
             iPHP::import(iPHP_LIB . '/phpQuery.php');
             $doc = phpQuery::newDocumentHTML($content, 'UTF-8');
             $pq_dom = str_replace('DOM::', '', $this->content_right_code);
             $_matches = (bool) (string) phpQuery::pq($pq_dom);
         } else {
             $_matches = strpos($content, $this->content_error_code);
         }
         if ($_matches !== false) {
             $match = false;
             return false;
         }
     }
     $match = true;
     return compact('content', 'match');
 }
Пример #16
0
 public static function crawl($work = NULL, $pid = NULL, $_rid = NULL, $_urls = null, $callback = null)
 {
     $pid === NULL && ($pid = spider::$pid);
     if ($pid) {
         $project = spider::project($pid);
         $cid = $project['cid'];
         $rid = $project['rid'];
         $prule_list_url = $project['list_url'];
         $lastupdate = $project['lastupdate'];
     } else {
         $cid = spider::$cid;
         $rid = spider::$rid;
     }
     if (empty($rid) && $_rid !== NULL) {
         $rid = $_rid;
     }
     if ($work == 'shell') {
         $lastupdate = $project['lastupdate'];
         if ($project['psleep']) {
             if (time() - $lastupdate < $project['psleep']) {
                 echo '采集方案[' . $pid . "]:" . format_date($lastupdate) . "刚采集过了,请" . $project['psleep'] / 3600 . "小时后在继续采集\n";
                 return;
             }
         }
         echo "开始采集方案[" . $pid . "] 采集规则[" . $rid . "]\n";
     }
     $ruleA = spider::rule($rid);
     $rule = $ruleA['rule'];
     $urls = $rule['list_urls'];
     $project['urls'] && ($urls = $project['urls']);
     spiderUrls::$urls && ($urls = spiderUrls::$urls);
     $_urls && ($urls = $_urls);
     $urlsArray = explode("\n", $urls);
     $urlsArray = array_filter($urlsArray);
     $_urlsArray = $urlsArray;
     $urlsList = array();
     if ($work == 'shell') {
         // echo "$urls\n";
         print_r($urlsArray);
     }
     foreach ($_urlsArray as $_key => $_url) {
         $_url = htmlspecialchars_decode($_url);
         $_urlsList = array();
         /**
          * RULE@rid@url
          * url使用[rid]规则采集并返回列表结果
          */
         if (strpos($_url, 'RULE@') !== false) {
             list($___s, $_rid, $_urls) = explode('@', $_url);
             if (spider::$ruleTest) {
                 print_r('<b>使用[rid:' . $_rid . ']规则抓取列表</b>:' . $_urls);
                 echo "<hr />";
             }
             $_urlsList = spiderUrls::crawl($work, false, $_rid, $_urls, 'CALLBACK@URL');
             $urlsList = array_merge($urlsList, $_urlsList);
             unset($urlsArray[$_key]);
         } else {
             preg_match('|.*<(.*)>.*|is', $_url, $_matches);
             if ($_matches) {
                 list($format, $begin, $num, $step, $zeroize, $reverse) = explode(',', $_matches[1]);
                 $url = str_replace($_matches[1], '*', trim($_matches[0]));
                 $_urlsList = spiderTools::mkurls($url, $format, $begin, $num, $step, $zeroize, $reverse);
                 unset($urlsArray[$_key]);
                 $urlsList = array_merge($urlsList, $_urlsList);
             }
         }
     }
     $urlsList && ($urlsArray = array_merge($urlsArray, $urlsList));
     unset($_urlsArray, $_key, $_url, $_matches, $_urlsList, $urlsList);
     $urlsArray = array_unique($urlsArray);
     // spider::$useragent = $rule['user_agent'];
     // spider::$encoding  = $rule['curl']['encoding'];
     // spider::$referer   = $rule['curl']['referer'];
     // spider::$charset   = $rule['charset'];
     if (empty($urlsArray)) {
         if ($work == 'shell') {
             echo "采集列表为空!请填写!\n";
             return false;
         }
         iPHP::alert('采集列表为空!请填写!', 'js:parent.window.iCMS_MODAL.destroy();');
     }
     //      if(spider::$ruleTest){
     //          echo "<pre>";
     //          print_r(iS::escapeStr($project));
     //          print_r(iS::escapeStr($rule));
     //          echo "</pre>";
     //          echo "<hr />";
     //      }
     if ($rule['mode'] == "2") {
         iPHP::import(iPHP_LIB . '/phpQuery.php');
         spider::$ruleTest && $_GET['pq_debug'] && (phpQuery::$debug = 1);
     }
     $pubArray = array();
     $pubCount = array();
     $pubAllCount = array();
     spider::$curl_proxy = $rule['proxy'];
     spider::$urlslast = null;
     foreach ($urlsArray as $key => $url) {
         $url = trim($url);
         spider::$urlslast = $url;
         if ($work == 'shell') {
             echo '开始采集列表:' . $url . "\n";
         }
         if (spider::$ruleTest) {
             echo '<b>抓取列表:</b>' . $url . "<br />";
         }
         $html = spiderTools::remote($url);
         if (empty($html)) {
             continue;
         }
         if ($rule['mode'] == "2") {
             $doc = phpQuery::newDocumentHTML($html, 'UTF-8');
             $list_area = $doc[trim($rule['list_area_rule'])];
             // if(strpos($rule['list_area_format'], 'DOM::')!==false){
             //     $list_area = spiderTools::dataClean($rule['list_area_format'], $list_area);
             // }
             if ($rule['list_area_format']) {
                 $list_area_format = trim($rule['list_area_format']);
                 if (strpos($list_area_format, 'ARRAY::') !== false) {
                     $list_area_format = str_replace('ARRAY::', '', $list_area_format);
                     $lists = array();
                     foreach ($list_area as $la_key => $la) {
                         $lists[] = phpQuery::pq($list_area_format, $la);
                     }
                 } else {
                     $lists = phpQuery::pq($list_area_format, $list_area);
                 }
             } else {
                 $lists = $list_area;
             }
             // $lists = $list_area;
             //echo 'list:getDocumentID:'.$lists->getDocumentID()."\n";
         } else {
             $list_area_rule = spiderTools::pregTag($rule['list_area_rule']);
             if ($list_area_rule) {
                 preg_match('|' . $list_area_rule . '|is', $html, $matches, $PREG_SET_ORDER);
                 $list_area = $matches['content'];
             } else {
                 $list_area = $html;
             }
             $html = null;
             unset($html);
             if (spider::$ruleTest) {
                 echo iS::escapeStr($rule['list_area_rule']);
                 //              echo iS::escapeStr($list_area);
                 echo "<hr />";
             }
             if ($rule['list_area_format']) {
                 $list_area = spiderTools::dataClean($rule['list_area_format'], $list_area);
             }
             preg_match_all('|' . spiderTools::pregTag($rule['list_url_rule']) . '|is', $list_area, $lists, PREG_SET_ORDER);
             $list_area = null;
             unset($list_area);
             if ($rule['sort'] == "1") {
                 //arsort($lists);
             } elseif ($rule['sort'] == "2") {
                 asort($lists);
             } elseif ($rule['sort'] == "3") {
                 shuffle($lists);
             }
         }
         if (spider::$ruleTest) {
             echo '<b>列表区域规则:</b>' . iS::escapeStr($rule['list_area_rule']);
             echo "<hr />";
             echo '<b>列表区域抓取结果:</b>' . iS::escapeStr($list_area);
             echo "<hr />";
             echo '<b>列表链接规则:</b>' . iS::escapeStr($rule['list_url_rule']);
             echo "<hr />";
             echo '<b>网址合成规则:</b>' . iS::escapeStr($rule['list_url']);
             echo "<hr />";
         }
         if ($prule_list_url) {
             $rule['list_url'] = $prule_list_url;
         }
         //PID@xx 返回URL列表
         if ($callback == 'CALLBACK@URL') {
             $cbListUrl = array();
             foreach ($lists as $lkey => $row) {
                 list(spider::$title, spider::$url) = spiderTools::title_url($row, $rule, $url);
                 if (spider::$url === false) {
                     continue;
                 }
                 // if(spider::checker($work)===true){
                 $cbListUrl[] = spider::$url;
                 // }
             }
             return $cbListUrl;
         }
         if ($work == "shell") {
             $pubCount[$url]['count'] = count($lists);
             $pubAllCount['count'] += $pubCount[$url]['count'];
             echo "开始采集:" . $url . " 列表 " . $pubCount[$url]['count'] . "条记录\n";
             foreach ($lists as $lkey => $row) {
                 list(spider::$title, spider::$url) = spiderTools::title_url($row, $rule, $url);
                 if (spider::$url === false) {
                     continue;
                 }
                 $hash = md5(spider::$url);
                 echo "title:" . spider::$title . "\n";
                 echo "url:" . spider::$url . "\n";
                 spider::$rid = $rid;
                 $checker = spider::checker($work);
                 if ($checker === true) {
                     echo "开始采集....";
                     $callback = spider::publish("shell");
                     if ($callback['code'] == "1001") {
                         $pubCount[$url]['success']++;
                         $pubAllCount['success']++;
                         echo "....√\n";
                         if ($project['sleep']) {
                             echo "sleep:" . $project['sleep'] . "s\n";
                             if ($rule['mode'] != "2") {
                                 unset($lists[$lkey]);
                             }
                             gc_collect_cycles();
                             sleep($project['sleep']);
                         } else {
                             //sleep(1);
                         }
                     } else {
                         $pubCount[$url]['error']++;
                         $pubAllCount['error']++;
                         echo "error\n\n";
                         continue;
                     }
                 }
                 $pubCount[$url]['published']++;
                 $pubAllCount['published']++;
             }
             if ($rule['mode'] == "2") {
                 phpQuery::unloadDocuments($doc->getDocumentID());
             } else {
                 unset($lists);
             }
         }
         if ($work == "WEB@MANUAL") {
             $listsArray[$url] = $lists;
         }
         if ($work == "WEB@AUTO" || $work == 'DATA@RULE') {
             foreach ($lists as $lkey => $row) {
                 list(spider::$title, spider::$url) = spiderTools::title_url($row, $rule, $url);
                 if (spider::$url === false) {
                     continue;
                 }
                 $hash = md5(spider::$url);
                 if (spider::$ruleTest) {
                     echo '<b>列表抓取结果:</b>' . $lkey . '<br />';
                     echo spider::$title . ' (<a href="' . APP_URI . '&do=testdata' . '&url=' . urlencode(spider::$url) . '&rid=' . $rid . '&pid=' . $pid . '&title=' . urlencode(spider::$title) . '" target="_blank">测试内容规则</a>) <br />';
                     echo spider::$url . "<br />";
                     echo $hash . "<br /><hr />";
                 } else {
                     if (spider::checker($work) === true || spider::$dataTest) {
                         $suData = array('sid' => 0, 'url' => spider::$url, 'title' => spider::$title, 'cid' => $cid, 'rid' => $rid, 'pid' => $pid, 'hash' => $hash);
                         switch ($work) {
                             case 'DATA@RULE':
                                 $contentArray[$lkey] = spiderData::crawl();
                                 // $contentArray[$lkey] = spiderUrls::crawl($work,$_pid);
                                 unset($suData['sid']);
                                 $suData['title'] = addslashes($suData['title']);
                                 $suData += array('addtime' => time(), 'status' => '2', 'publish' => '2', 'indexid' => '0', 'pubdate' => '0');
                                 spider::$dataTest or $suid = iDB::insert('spider_url', $suData);
                                 $contentArray[$lkey]['spider_url'] = $suid;
                                 break;
                             case 'WEB@AUTO':
                                 $pubArray[] = $suData;
                                 break;
                         }
                     }
                 }
             }
         }
     }
     $lists = null;
     unset($lists);
     gc_collect_cycles();
     switch ($work) {
         case 'WEB@AUTO':
             return $pubArray;
             break;
         case 'DATA@RULE':
             return $contentArray;
             break;
         case 'WEB@MANUAL':
             return array('cid' => $cid, 'rid' => $rid, 'pid' => $pid, 'sid' => $sid, 'work' => $work, 'rule' => $rule, 'listsArray' => $listsArray);
             break;
         case "shell":
             echo "采集数据统结果:\n";
             print_r($pubCount);
             print_r($pubAllCount);
             echo "全部采集完成....\n";
             iDB::update('spider_project', array('lastupdate' => time()), array('id' => $pid));
             break;
     }
 }