Esempio n. 1
0
function lit_fichier_rss($url_fichier)
{
    global $opac_curl_available, $pmb_curl_timeout;
    $res = "";
    if ($opac_curl_available) {
        $timeout = $pmb_curl_timeout * 1 ? $pmb_curl_timeout * 1 : 5;
        $ch = curl_init($url_fichier);
        curl_setopt($ch, CURLOPT_HEADER, false);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
        configurer_proxy_curl($ch, $url_fichier);
        $res = curl_exec($ch);
        curl_close($ch);
    } else {
        $fp = fopen($url_fichier, "r");
        if ($fp) {
            while (!feof($fp)) {
                $res .= fread($fp, 2048);
            }
            fclose($fp);
        }
    }
    return $res;
}
Esempio n. 2
0
function lit_fichier_rss($url_fichier)
{
    global $opac_curl_available;
    $res = "";
    if ($opac_curl_available) {
        $ch = curl_init($url_fichier);
        curl_setopt($ch, CURLOPT_HEADER, false);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        configurer_proxy_curl($ch);
        $res = curl_exec($ch);
        curl_close($ch);
    } else {
        $fp = fopen($url_fichier, "r");
        if ($fp) {
            while (!feof($fp)) {
                $res .= fread($fp, 2048);
            }
            fclose($fp);
        }
    }
    return $res;
}
Esempio n. 3
0
 function search($source_id, $query, $search_id)
 {
     global $base_path, $charset;
     $params = $this->get_source_params($source_id);
     $this->fetch_global_properties();
     if ($params["PARAMETERS"]) {
         //Affichage du formulaire avec $params["PARAMETERS"]
         $vars = unserialize($params["PARAMETERS"]);
         foreach ($vars as $key => $val) {
             global ${$key};
             ${$key} = $val;
         }
     }
     if (!isset($url)) {
         $this->error_message = $this->msg["nasa_ads_unconfigured"];
         $this->error = 1;
         return;
     }
     $boolsearch = "";
     foreach ($query as $element) {
         $boolsearch .= implode(" ", $element->values) . " ";
     }
     $boolsearch = str_replace(" ", "+", $boolsearch);
     $addr = $url . "/cgi-bin/basic_connect?version=1&qsearch=" . rawurlencode($boolsearch);
     $ch = curl_init();
     // configuration des options CURL
     curl_setopt($ch, CURLOPT_URL, $addr);
     curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
     configurer_proxy_curl($ch, $addr);
     $cexec = curl_exec($ch);
     if (!$cexec) {
         $this->error = true;
         $this->error_message = "Can't get Lob answer : " . curl_error($ch);
     } else {
         if (strtoupper($charset) != "UTF-8") {
             $cexec = utf8_decode($cexec);
         }
         $sep_notices = "<tr><td colspan=6><HR></td></tr>";
         $query = get_field_betwen_2sep($cexec, $sep_notices, "</table>");
         $notices_html = explode($sep_notices, $query);
         foreach ($notices_html as $notice_html) {
             $data_notice = array();
             $tab_tr = explode("</tr>", $notice_html);
             $tab_td_0 = explode("</td>", $tab_tr[0]);
             $tab_td_1 = explode("</td>", $tab_tr[1]);
             $id = get_field_betwen_2sep($notice_html, "value=\"", "\"");
             $data_notice["id"] = $id;
             $titre = get_field_from_sep($tab_td_1[3], ">");
             $authors = get_field_from_sep($tab_td_1[1], ">");
             $date = get_field_from_sep($tab_td_0[4], ">");
             $links = explode("</a>", $tab_td_0[5]);
             foreach ($links as $link) {
                 $doc_type = substr($link, strlen($link) - 1);
                 $doc_link = get_field_betwen_2sep($link, "href=\"", "\"");
                 if ($doc_link) {
                     $data_notice["doc_links"][$doc_type]["link"] = $doc_link;
                     $data_notice["doc_links"][$doc_type]["label"] = $this->msg["nasa_ads_doc_" . strtolower($doc_type)];
                 }
             }
             $url_notice = "http://adsabs.harvard.edu/abs/{$id}";
             $data_notice["Url_notice"] = $url_notice;
             $ch_notice = curl_init();
             // configuration des options CURL
             curl_setopt($ch_notice, CURLOPT_URL, $url_notice);
             curl_setopt($ch_notice, CURLOPT_RETURNTRANSFER, true);
             configurer_proxy_curl($ch_notice, $url_notice);
             $cexec_notice = curl_exec($ch_notice);
             if (!$cexec_notice) {
                 $this->error = true;
                 $this->error_message = "Can't get Lob answer : " . curl_error($ch_notice);
             } else {
                 if (strtoupper($charset) != "UTF-8") {
                     $cexec_notice = utf8_decode($cexec_notice);
                 }
                 $cexec_notice = get_field_betwen_2sep($cexec_notice, "Translate This Page</a></strong>", "<form method");
                 $data_notice["Title"] = get_field_betwen_2sep($cexec_notice, "Title:</b></td><td><br></td><td align=\"left\" valign=\"top\">", "</td></tr>", 1);
                 $Authors = get_field_betwen_2sep($cexec_notice, "Authors:</b></td><td><br></td><td align=\"left\" valign=\"top\">", "</td></tr>");
                 $aut_list = explode("</a>", $Authors);
                 foreach ($aut_list as $aut) {
                     $autor = get_field_from_sep($aut, ">", 1);
                     if ($autor) {
                         $data_notice["Authors"][] = $autor;
                     }
                 }
                 $data_notice["Affiliation"] = get_field_betwen_2sep($cexec_notice, "Affiliation:</b></td><td><br></td><td align=\"left\" valign=\"top\">", "</td></tr>", 1);
                 $data_notice["Publication"] = get_field_betwen_2sep($cexec_notice, "Publication:</b></td><td><br></td><td align=\"left\" valign=\"top\">", "</td></tr>");
                 $data_notice["Publication_Date"] = get_field_betwen_2sep($cexec_notice, "Publication Date:</b></td><td><br></td><td align=\"left\" valign=\"top\">", "</td></tr>");
                 $Origin = get_field_betwen_2sep($cexec_notice, "Origin:</b></td><td><br></td><td align=\"left\" valign=\"top\">", "</td></tr>", 1);
                 $data_notice["Origin"] = get_field_betwen_2sep($Origin, "\">", "</a>");
                 $data_notice["Keywords"] = get_field_betwen_2sep($cexec_notice, "Keywords:</b></td><td><br></td><td align=\"left\" valign=\"top\">", "</td></tr>");
                 $DOI = get_field_betwen_2sep($cexec_notice, "DOI:</b></td><td><br></td><td align=\"left\" valign=\"top\">", "</td></tr>");
                 $data_notice["DOI"] = get_field_betwen_2sep($DOI, "\">", "</a>", 1);
                 $Bibliographic_Code = get_field_betwen_2sep($cexec_notice, "Bibliographic Code:</b></td><td><br></td><td align=\"left\" valign=\"top\">", "</td></tr>");
                 $data_notice["Bibliographic_Code"] = get_field_betwen_2sep($Bibliographic_Code, "href=\"", "\"");
                 $data_notice["Abstract"] = get_field_betwen_2sep($cexec_notice, "Abstract</h3>", "<hr>", 1, "<p><sup><sub>");
                 $order = array("\r\n", "\n", "\r");
                 $data_notice["Abstract"] = str_replace($order, " ", $data_notice["Abstract"]);
                 //	print"<pre>";print_r ($data_notice);print"</pre>";
                 $this->rec_record($this->notice_2_uni($data_notice), $source_id, $search_id);
                 if ($nb++ > 20) {
                     break;
                 }
             }
             curl_close($ch_notice);
         }
     }
     curl_close($ch);
 }
 function get_data($url)
 {
     //Remise à zéro des erreurs
     $this->error = false;
     $this->error_message = "";
     //Initialisation de la ressource
     $ch = curl_init();
     // configuration des options CURL
     curl_setopt($ch, CURLOPT_URL, $url);
     curl_setopt($ch, CURLOPT_WRITEFUNCTION, array(&$this, "parse_xml"));
     curl_setopt($ch, CURLOPT_HEADERFUNCTION, array(&$this, "verif_header"));
     curl_setopt($ch, CURLOPT_HEADER, 0);
     curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
     if ($this->time_out) {
         curl_setopt($ch, CURLOPT_TIMEOUT, $this->time_out);
     }
     //Réinitialisation du "retry_after"
     $this->retry_after = "";
     //Explosion des arguments de la requète pour ceux qui ne respectent pas la norme !!
     $query = substr($url, strpos($url, "?") + 1);
     $query = explode("&", $query);
     for ($i = 0; $i < count($query); $i++) {
         if (strpos($query[$i], "operation") !== false) {
             $operation = substr($query[$i], 9);
             break;
         }
     }
     //Initialisation du parser
     $this->xml_parser = xml_parser_create("utf-8");
     xml_parser_set_option($this->xml_parser, XML_OPTION_CASE_FOLDING, 0);
     xml_parser_set_option($this->xml_parser, XML_OPTION_SKIP_WHITE, 1);
     curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
     configurer_proxy_curl($ch, $url);
     $n_try = 0;
     $data = $cexec = curl_exec($ch);
     while ($cexec && $this->retry_after && $n_try < 3) {
         $n_try++;
         sleep((int) $this->retry_after * 1);
         $this->retry_after = "";
         $data = $cexec = curl_exec($ch);
     }
     if (!$cexec) {
         $this->error = true;
         $this->error_message = curl_error($ch);
     }
     xml_parser_free($this->xml_parser);
     $this->xml_parser = "";
     curl_close($ch);
     if ($this->error) {
         $this->error_message .= " - " . $url;
         unset($s);
         return;
     }
     $this->data = $data;
 }
 function analyse_response($url, $rcallback = "")
 {
     //Remise à zéro des erreurs
     $this->error = false;
     $this->error_message = "";
     //remise à zero des enregistrements
     if ($url != $this->next_request) {
         $this->records = array();
     }
     $this->next_request = "";
     $this->rtoken = "";
     //Initialisation de la ressource
     $this->remainder = '';
     $ch = curl_init();
     // configuration des options CURL
     curl_setopt($ch, CURLOPT_URL, $url);
     curl_setopt($ch, CURLOPT_WRITEFUNCTION, array(&$this, "parse_xml"));
     curl_setopt($ch, CURLOPT_HEADERFUNCTION, array(&$this, "verif_header"));
     curl_setopt($ch, CURLOPT_HEADER, 0);
     curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
     if ($this->time_out) {
         curl_setopt($ch, CURLOPT_TIMEOUT, $this->time_out);
     }
     //Réinitialisation du "retry_after"
     $this->retry_after = "";
     configurer_proxy_curl($ch, $url);
     //Explosion des arguments de la requête pour ceux qui ne respectent pas la norme !!
     $query = substr($url, strpos($url, "?") + 1);
     $query = explode("&", $query);
     for ($i = 0; $i < count($query); $i++) {
         if (strpos($query[$i], "verb") !== false) {
             $verb = substr($query[$i], 5);
             break;
         }
     }
     //Initialisation de l'environnement d'état du parser
     $s = new oai_parser($rcallback, $this->charset);
     //Si le verb est affecté, on prérempli histoire d'aider un peu... :-)
     if ($verb) {
         $s->verb = $verb;
     }
     //Initialisation du parser
     $this->xml_parser = xml_parser_create("utf-8");
     xml_set_object($this->xml_parser, $s);
     xml_parser_set_option($this->xml_parser, XML_OPTION_CASE_FOLDING, 0);
     xml_parser_set_option($this->xml_parser, XML_OPTION_SKIP_WHITE, 1);
     xml_set_element_handler($this->xml_parser, "oai_startElement", "oai_endElement");
     xml_set_character_data_handler($this->xml_parser, "oai_charElement");
     $n_try = 0;
     $cexec = curl_exec($ch);
     while ($cexec && $this->retry_after && $n_try < 3) {
         $n_try++;
         sleep((int) $this->retry_after * 1);
         $this->retry_after = "";
         $cexec = curl_exec($ch);
     }
     if (!$cexec) {
         $this->error = true;
         $this->error_message = curl_error($ch);
     }
     xml_parser_free($this->xml_parser);
     $this->xml_parser = "";
     curl_close($ch);
     if ($this->error) {
         $this->error_message .= " - " . $url;
         unset($s);
         return;
     }
     //Affectation des éléments de réponse
     if (stripos($this->charset, 'iso-8859-1') !== false) {
         $c = true;
     } else {
         $c = false;
     }
     //Test de l'url base
     if ($this->clean_base_url) {
         $p = strpos($s->tree[1][1]["CHAR"], "?");
         if ($p !== false) {
             $s->tree[1][1]["CHAR"] = substr($s->tree[1][1]["CHAR"], 0, $p);
         }
     }
     $this->response_date = $c ? utf8_decode($s->tree[1][0]["CHAR"]) : $s->tree[1][0]["CHAR"];
     $this->url_base = $c ? utf8_decode($s->tree[1][1]["CHAR"]) : $s->tree[1][1]["CHAR"];
     $this->request["URL_BASE"] = $c ? utf8_decode($s->tree[1][1]["CHAR"]) : $s->tree[1][1]["CHAR"];
     foreach ($s->tree[1][1]["ATTRIB"] as $key => $val) {
         if ($key != "resumptionToken") {
             $this->request["ATTRIBS"][$key] = $c ? utf8_decode($val) : $val;
         }
     }
     $this->verb = $c ? utf8_decode($s->tree[1][1]["ATTRIB"]["verb"]) : $s->tree[1][1]["ATTRIB"]["verb"];
     $this->rtoken = $s->rtoken;
     if ($s->tree[1][2]["NAME"] == "error") {
         $this->error = true;
         $this->error_message = "OAI Error, the server tell : " . $s->tree[1][2]["ATTRIB"]["code"] . " : " . $s->tree[1][2]["CHAR"];
         $this->error_oai_code = $s->tree[1][2]["ATTRIB"]["code"];
     }
     //Si c'est la requête identify
     if ($this->verb == "Identify") {
         $this->records[0] = $c ? utf8_decode($s->cur_elt) : $s->cur_elt;
     } else {
         if (!$rcallback) {
             for ($i = 0; $i < count($s->records); $i++) {
                 $this->records[] = $c ? utf8_decode($s->records[$i]) : $s->records[$i];
             }
         }
     }
     //Si on a un resumptionToken
     if (is_array($this->rtoken) && $this->rtoken["token"]) {
         $t_nr = explode('?', $this->request['URL_BASE']);
         $this->next_request = $t_nr[0] . "?verb=" . $s->verb . "&resumptionToken=" . rawurlencode($this->rtoken["token"]);
     }
     //Supression de l'environnement d'état !
     unset($s);
 }
 function maj_entrepot($source_id, $callback_progress = "", $recover = false, $recover_env = "")
 {
     global $base_path, $charset;
     $this->n_recu = 0;
     $this->callback_progress = $callback_progress;
     $params = $this->get_source_params($source_id);
     $this->fetch_global_properties();
     if ($params["PARAMETERS"]) {
         //Affichage du formulaire avec $params["PARAMETERS"]
         $vars = unserialize($params["PARAMETERS"]);
         foreach ($vars as $key => $val) {
             global ${$key};
             ${$key} = $val;
         }
     }
     if (!isset($url)) {
         $this->error_message = $this->msg["toutapprendre_unconfigured"];
         $this->error = 1;
         return;
     }
     //Recherche de la dernière date...
     $requete = "select unix_timestamp(max(date_import)) from entrepot_source_" . $source_id . " where 1;";
     $resultat = pmb_mysql_query($requete);
     if (pmb_mysql_num_rows($resultat)) {
         $last_date = pmb_mysql_result($resultat, 0, 0);
         if ($last_date) {
             $last_date += 3600 * 24;
         }
     }
     $ch = curl_init();
     $addr = $url;
     // configuration des options CURL
     curl_setopt($ch, CURLOPT_URL, $addr);
     curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
     configurer_proxy_curl($ch, $addr);
     $xml = curl_exec($ch);
     if ($charset == 'utf-8') {
         $xml = preg_replace('/[\\x00-\\x08\\x10\\x0B\\x0C\\x0E-\\x19\\x7F]' . '|[\\x00-\\x7F][\\x80-\\xBF]+' . '|([\\xC0\\xC1]|[\\xF0-\\xFF])[\\x80-\\xBF]*' . '|[\\xC2-\\xDF]((?![\\x80-\\xBF])|[\\x80-\\xBF]{2,})' . '|[\\xE0-\\xEF](([\\x80-\\xBF](?![\\x80-\\xBF]))|(?![\\x80-\\xBF]{2})|[\\x80-\\xBF]{3,})/S', '?', $xml);
     }
     @ini_set("zend.ze1_compatibility_mode", "0");
     $this->dom = new DomDocument();
     $this->dom->encoding = $charset;
     if (!@$this->dom->loadXML($xml)) {
         return 0;
     }
     $cours = $this->dom->getElementsByTagName('cours');
     foreach ($cours as $cour) {
         $data_notice = array();
         if ($cour->childNodes->length) {
             foreach ($cour->childNodes as $i) {
                 if ($charset != 'utf-8') {
                     $val = utf8_decode($i->nodeValue);
                 } else {
                     $val = $i->nodeValue;
                 }
                 $data_notice[$i->nodeName] = $val;
             }
         }
         $this->rec_record($this->notice_2_uni($data_notice), $source_id);
     }
     curl_close($ch);
     return $this->n_recu;
 }
Esempio n. 7
0
 function maj_entrepot($source_id, $callback_progress = "", $recover = false, $recover_env = "")
 {
     global $base_path, $charset;
     $this->n_recu = 0;
     $this->callback_progress = $callback_progress;
     $params = $this->get_source_params($source_id);
     $this->fetch_global_properties();
     if ($params["PARAMETERS"]) {
         //Affichage du formulaire avec $params["PARAMETERS"]
         $vars = unserialize($params["PARAMETERS"]);
         foreach ($vars as $key => $val) {
             global ${$key};
             ${$key} = $val;
         }
     }
     if (!isset($url)) {
         $this->error_message = $this->msg["emploi_unconfigured"];
         $this->error = 1;
         return;
     }
     //Recherche de la derni�re date...
     $requete = "select unix_timestamp(max(date_import)) from entrepot_source_" . $source_id . " where 1;";
     $resultat = mysql_query($requete);
     if (mysql_num_rows($resultat)) {
         $last_date = mysql_result($resultat, 0, 0);
         if ($last_date) {
             $last_date += 3600 * 24;
         }
     }
     $ch = curl_init();
     //$addr="http://travail-emploi.gouv.fr/etudes-recherche-statistiques-de,76/etudes-et-recherche,77/publications-dares,98/dares-analyses-dares-indicateurs,102/";
     $addr = $url;
     // configuration des options CURL
     curl_setopt($ch, CURLOPT_URL, $addr);
     curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
     configurer_proxy_curl($ch);
     $html = curl_exec($ch);
     if (!$html) {
         $sortir = 1;
     } else {
         //	if (strtoupper($charset)!="UTF-8") $html=utf8_decode($html);
         $notice_list = $this->get_field_betwen_2sep($html, "<h2 class=\"smaller\">", "</ul>");
         $notice_list = $this->get_field_betwen_2sep($notice_list, "<ul>", "<ul>");
         $sep_notices = "</li>\n";
         $notices_html = explode($sep_notices, $notice_list);
         $nb = 0;
         //print printr($notices_html);
         foreach ($notices_html as $notice_html) {
             $data_notice = array();
             $link = $this->get_field_betwen_2sep($notice_html, "<a href=\"", "\">");
             $link = "http://travail-emploi.gouv.fr/" . $link;
             curl_setopt($ch, CURLOPT_URL, $link);
             curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
             configurer_proxy_curl($ch);
             $html_notice_complete = curl_exec($ch);
             //	 		http://travail-emploi.gouv.fr/IMG/pdf/2012-035.pdf
             if (!$html_notice_complete) {
                 continue;
             }
             $data_notice["Url_notice"] = $link;
             $html_notice_complete = $this->get_field_from_sep($html_notice_complete, "<div class=\"gris clearfix\">");
             $data_notice["Publication_Date"] = $this->get_field_betwen_2sep($html_notice_complete, "<span class=\"date\">", "</span>");
             $data_notice["Title"] = $this->get_field_betwen_2sep($html_notice_complete, "<h1>", "</h1>");
             $data_notice["Abstract"] = $this->get_field_betwen_2sep($html_notice_complete, "<div class=\"texteencadre-spip spip\">", "<div class=\"listdoc\">", 1, 1);
             $data_notice["Abstract"] = str_replace("&#8217;", "'", $data_notice["Abstract"]);
             $zone_docnum = $this->get_field_from_sep($html_notice_complete, " spip_documents spip_lien_ok\">");
             $link_docnum = $this->get_field_betwen_2sep($zone_docnum, "<a href=\"", "\" ");
             if ($link_docnum) {
                 $data_notice["doc_links"][0]["link"] = "http://travail-emploi.gouv.fr/" . $link_docnum;
                 $data_notice["id"] = $this->get_field_betwen_2sep($link_docnum, "IMG/pdf/", ".pdf");
             }
             //	printr ($data_notice);
             if ($this->rec_record($this->notice_2_uni($data_notice), $source_id)) {
                 // notice d�j� en entrepos, on ne va pas chercher les suivantes
                 break;
             }
         }
     }
     curl_close($ch);
     return $this->n_recu;
 }
Esempio n. 8
0
 function search($source_id, $query, $search_id)
 {
     global $charset;
     global $opac_curl_proxy;
     global $base_path;
     $this->error = false;
     $this->error_message = "";
     $params = $this->get_source_params($source_id);
     $this->fetch_global_properties();
     if ($params["PARAMETERS"]) {
         //Affichage du formulaire avec $params["PARAMETERS"]
         $vars = unserialize($params["PARAMETERS"]);
         foreach ($vars as $key => $val) {
             global ${$key};
             ${$key} = $val;
         }
     }
     if (!$max_return) {
         $max_return = 100;
     }
     //Construction de la requête
     $boolsearch = "";
     for ($i = 0; $i < count($query); $i++) {
         $term = $query[$i];
         $chaine = "";
         switch ($term->ufield) {
             case "200\$a":
                 $chaine = "(TI=" . str_replace("*", "%", $term->values[0]) . " or ST=" . str_replace("*", "%", $term->values[0]) . ")";
                 break;
             case "010\$a":
                 $chaine = "IS=" . str_replace("*", "%", $term->values[0]);
                 break;
             case "7XX":
                 $chaine = "AU=" . str_replace("*", "%", $term->values[0]);
                 break;
             case "210\$c":
                 $chaine = "PU=" . str_replace("*", "%", $term->values[0]);
                 break;
             case "210\$d":
                 $chaine = "PY=" . str_replace("*", "%", $term->values[0]);
                 break;
             case "300":
             case "327":
             case "330":
             case "3XX":
                 $chaine = "ME=" . str_replace("*", "%", $term->values[0]);
                 break;
             case "60X":
                 $chaine = "FT=" . str_replace("*", "%", $term->values[0]);
                 break;
             case "XXX":
                 $chaine = "(TI=" . str_replace("*", "%", $term->values[0]) . " or ST=" . str_replace("*", "%", $term->values[0]) . " or AU=" . str_replace("*", "%", $term->values[0]) . " or FT=" . str_replace("*", "%", $term->values[0]) . " or ME=" . str_replace("*", "%", $term->values[0]) . " or IS=" . str_replace("*", "%", $term->values[0]) . " or PU=" . str_replace("*", "%", $term->values[0]) . ")";
                 break;
         }
         if ($chaine != "" && $i != 0) {
             switch ($term->inter) {
                 case "and":
                     $boolsearch .= " and ";
                     break;
                 case "or":
                     $boolsearch .= " or ";
                     break;
                 case "ex":
                     $boolsearch .= " and not ";
                     break;
             }
         }
         $boolsearch .= $chaine;
     }
     $boolthemes = "";
     if (count($themes)) {
         for ($i = 0; $i < count($themes); $i++) {
             if ($i != 0) {
                 $boolthemes .= " or ";
             }
             $boolthemes .= "FT=" . $themes[$i];
         }
         $boolthemes = "(" . $boolthemes . ")";
     }
     if ($boolthemes) {
         $boolsearch = "(" . $boolsearch . ") and " . $boolthemes;
     }
     $nb_pages = floor($max_return / 40);
     $stop = false;
     for ($i = 0; $i < $nb_pages; $i++) {
         $addr = "http://sun2.lehmanns.de/cgi-bin/work/xmlboolsearch?mode=xmlboolsearch&titcount={$max_return}&titnext=" . $i * 40 . "&boolstring=" . rawurlencode($boolsearch);
         //Initialisation de la ressource
         $ch = curl_init();
         // configuration des options CURL
         curl_setopt($ch, CURLOPT_URL, $addr);
         if ($params["TIMEOUT"]) {
             curl_setopt($ch, CURLOPT_TIMEOUT, (int) $params["TIMEOUT"]);
         }
         curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
         configurer_proxy_curl($ch);
         $cexec = curl_exec($ch);
         if (!$cexec) {
             $this->error = true;
             $this->error_message = "Can't get Lob answer : " . curl_error($ch);
             break;
         } else {
             if (strpos($cexec, "<LBook>") !== false) {
                 $unixml = $this->apply_xsl_to_xml($cexec, file_get_contents($base_path . "/admin/connecteurs/in/lehmanns/xslt/lehmanns2uni.xsl"));
                 $this->rec_records($unixml, $source_id, $search_id);
             } else {
                 $stop = true;
             }
         }
         curl_close($ch);
         if ($stop) {
             break;
         }
     }
 }
Esempio n. 9
0
 function maj_entrepot($source_id, $callback_progress = "", $recover = false, $recover_env = "")
 {
     global $base_path, $charset;
     $this->n_recu = 0;
     $this->callback_progress = $callback_progress;
     $params = $this->get_source_params($source_id);
     $this->fetch_global_properties();
     if ($params["PARAMETERS"]) {
         //Affichage du formulaire avec $params["PARAMETERS"]
         $vars = unserialize($params["PARAMETERS"]);
         foreach ($vars as $key => $val) {
             global ${$key};
             ${$key} = $val;
         }
     }
     if (!isset($url)) {
         $this->error_message = $this->msg["oecd_unconfigured"];
         $this->error = 1;
         return;
     }
     //Recherche de la derni�re date...
     $requete = "select unix_timestamp(max(date_import)) from entrepot_source_" . $source_id . " where 1;";
     $resultat = mysql_query($requete);
     if (mysql_num_rows($resultat)) {
         $last_date = mysql_result($resultat, 0, 0);
         if ($last_date) {
             $last_date += 3600 * 24;
         }
     }
     $ch = curl_init();
     //	http://www.oecd-ilibrary.org/fr/emploi/livres/2012
     // 	http://www.oecd-ilibrary.org/fr/questionssociales/livres/2012
     $addr = $url;
     $sortir = 0;
     // pour sortir du while!
     $page = 0;
     do {
         // configuration des options CURL
         curl_setopt($ch, CURLOPT_URL, $addr);
         curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
         configurer_proxy_curl($ch);
         $html = curl_exec($ch);
         if (!$html) {
             $sortir = 1;
         } else {
             if (strtoupper($charset) != "UTF-8") {
                 $html = utf8_decode($html);
             }
             $notice_list = $this->get_field_betwen_2sep($html, "</thead>\n<tbody>", "</tbody>\n</table>");
             $sep_notices = "</tr>\n<tr>";
             $notices_html = explode($sep_notices, $notice_list);
             //print printr($notices_html);
             foreach ($notices_html as $notice_html) {
                 $data_notice = array();
                 $type = $this->get_field_betwen_2sep($notice_html, "\"type nowrap box3\"><strong>", " </strong>");
                 if ($type == "Livre") {
                     $link = $this->get_field_betwen_2sep($notice_html, "<strong>\n<a href=\"", "\" title=\"");
                     $link = "http://www.oecd-ilibrary.org" . $link;
                     $data_notice["Url_notice"] = $link;
                     $zone_title = $this->get_field_betwen_2sep($notice_html, "</ul>\n<strong>\n", "</strong>");
                     $data_notice["Title"] = $this->get_field_betwen_2sep($zone_title, "rel=\"\"><span>", "</span>", 1);
                     $data_notice["Abstract"] = $this->get_field_betwen_2sep($notice_html, "class=\"abstract \"><span>", "</span>");
                     $date_zone = $this->get_field_betwen_2sep($notice_html, "nowrap box2\">\n", "&nbsp;");
                     $data_notice["Publication_Date"] = $this->get_field_betwen_2sep($date_zone, "nowrap box2\">\n", "\n");
                     $data_notice["numberofpages"] = $this->get_field_betwen_2sep($notice_html, " Pages: ", "\n");
                     $data_notice["Authors"][0] = $this->get_field_betwen_2sep($notice_html, "</a></strong><br />\n", ", Pages: ");
                     $zone_docnum = $this->get_field_betwen_2sep($notice_html, "<li class=\"last\">\n", "</li>");
                     $link_docnum = $this->get_field_betwen_2sep($zone_docnum, "<a href=\"", "\" ");
                     if ($link_docnum) {
                         $data_notice["doc_links"][0]["link"] = "http://www.oecd-ilibrary.org" . $link_docnum;
                     }
                     $data_notice["id"] = $this->get_field_betwen_2sep($data_notice["Url_notice"], "_", ";jsessionid");
                     //print $notice_html;
                 } else {
                     continue;
                 }
                 //	printr ($data_notice);
                 if ($this->rec_record($this->notice_2_uni($data_notice), $source_id)) {
                     // notice d�j� en entrepos, on ne va pas chercher les suivantes
                     break;
                 }
                 /*	if($nb++>20){
                 				$sortir=1; break;
                 			}*/
             }
         }
         if (!$sortir) {
             $sortir = 1;
             $next_page_link = "";
             $page_zone = $this->get_field_betwen_2sep($html, "bobby-inline pager", "</ul>");
             if ($page_zone) {
                 $next_page_link = $this->get_field_betwen_2sep($page_zone, "<li>|\n<a href=\"", "\" title=\"next page\"");
                 if ($next_page_link) {
                     $addr = "http://www.oecd-ilibrary.org" . $next_page_link;
                     ///print $adr;
                     $sortir = 0;
                 }
             }
         }
         if ($page++ > 20) {
             $sortir = 1;
         }
     } while (!$sortir);
     curl_close($ch);
     return $this->n_recu;
 }
Esempio n. 10
0
 function request($method, $url, $vars = array())
 {
     $this->handle = curl_init();
     # Set some default CURL options
     if ($this->timeout) {
         curl_setopt($this->handle, CURLOPT_CONNECTTIMEOUT, $this->timeout);
         curl_setopt($this->handle, CURLOPT_TIMEOUT, $this->timeout);
     }
     curl_setopt($this->handle, CURLOPT_COOKIEFILE, $this->cookie_file);
     curl_setopt($this->handle, CURLOPT_COOKIEJAR, $this->cookie_file);
     @curl_setopt($this->handle, CURLOPT_FOLLOWLOCATION, true);
     curl_setopt($this->handle, CURLOPT_HEADER, true);
     curl_setopt($this->handle, CURLOPT_POSTFIELDS, is_array($vars) ? http_build_query($vars, '', '&') : $vars);
     curl_setopt($this->handle, CURLOPT_REFERER, $this->referer);
     curl_setopt($this->handle, CURLOPT_RETURNTRANSFER, true);
     curl_setopt($this->handle, CURLOPT_URL, str_replace(" ", "%20", preg_replace("/#.*\$/", "", $url)));
     /*On supprime ce qui suit le # car c'est une ancre pour le navigateur et avec on consière la validation fausse alors qu'elle est bonne
      *On remplace les espaces par %20 pour la même raison
      */
     curl_setopt($this->handle, CURLOPT_USERAGENT, $this->user_agent);
     if ($this->limit) {
         curl_setopt($this->handle, CURLOPT_WRITEFUNCTION, array(&$this, 'getBodyOverflow'));
     }
     if ($this->save_file_name) {
         $this->header_detect = 0;
         curl_setopt($this->handle, CURLOPT_WRITEFUNCTION, array(&$this, 'saveBodyInFile'));
     }
     configurer_proxy_curl($this->handle, str_replace(" ", "%20", preg_replace("/#.*\$/", "", $url)));
     # Format custom headers for this request and set CURL option
     $headers = array();
     foreach ($this->headers as $key => $value) {
         $headers[] = $key . ': ' . $value;
     }
     curl_setopt($this->handle, CURLOPT_HTTPHEADER, $headers);
     # Determine the request method and set the correct CURL option
     switch ($method) {
         case 'GET':
             curl_setopt($this->handle, CURLOPT_HTTPGET, true);
             break;
         case 'POST':
             curl_setopt($this->handle, CURLOPT_POST, true);
             break;
         default:
             curl_setopt($this->handle, CURLOPT_CUSTOMREQUEST, $method);
     }
     # Set any custom CURL options
     foreach ($this->options as $option => $value) {
         curl_setopt($this->handle, constant('CURLOPT_' . str_replace('CURLOPT_', '', strtoupper($option))), $value);
     }
     $this->body_overflow = "";
     $response = curl_exec($this->handle);
     if ($this->limit) {
         $response = $this->body_overflow;
     }
     if ($response) {
         $response = new CurlResponse($response);
     } else {
         $this->error = curl_errno($this->handle) . ' - ' . curl_error($this->handle);
     }
     curl_close($this->handle);
     return $response;
 }
Esempio n. 11
0
 function fetch_and_record_images($isbn)
 {
     if (!is_dir($this->image_folder)) {
         return;
     }
     $result = array("thumb" => "", "front" => "", "back" => "");
     $ch = curl_init();
     curl_setopt($ch, CURLOPT_HEADER, 0);
     curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
     $folder = $this->image_folder;
     $folder_url = $this->image_folder_url;
     if (!file_exists($folder . "/" . $isbn . "_thumb.jpg")) {
         $url = str_replace("!!isbn!!", $isbn, $this->image_thumb_url);
         curl_setopt($ch, CURLOPT_URL, $url);
         configurer_proxy_curl($ch, $url);
         $buffer = curl_exec($ch);
         if (!curl_error($ch)) {
             file_put_contents($folder . "/" . $isbn . "_thumb.jpg", $buffer);
             $result['thumb'] = $folder_url . "/" . $isbn . "_thumb.jpg";
         }
     } else {
         $result['thumb'] = $folder_url . "/" . $isbn . "_thumb.jpg";
     }
     if (!file_exists($folder . "/" . $isbn . "_front.jpg")) {
         $url = str_replace("!!isbn!!", $isbn, $this->image_front);
         curl_setopt($ch, CURLOPT_URL, $url);
         configurer_proxy_curl($ch, $url);
         $buffer = curl_exec($ch);
         if (!curl_error($ch)) {
             file_put_contents($folder . "/" . $isbn . "_front.jpg", $buffer);
             $result['front'] = $folder_url . "/" . $isbn . "_front.jpg";
         }
     } else {
         $result['front'] = $folder_url . "/" . $isbn . "_front.jpg";
     }
     if (!file_exists($folder . "/" . $isbn . "_back.jpg")) {
         $url = str_replace("!!isbn!!", $isbn, $this->image_back);
         curl_setopt($ch, CURLOPT_URL, $url);
         configurer_proxy_curl($ch, $url);
         $buffer = curl_exec($ch);
         if (!curl_error($ch)) {
             file_put_contents($folder . "/" . $isbn . "_back.jpg", $buffer);
             $result['back'] = $folder_url . "/" . $isbn . "_back.jpg";
         }
     } else {
         $result['back'] = $folder_url . "/" . $isbn . "_back.jpg";
     }
     curl_close($ch);
     return $result;
 }
Esempio n. 12
0
 function search($source_id, $query, $search_id)
 {
     global $base_path, $charset;
     $params = $this->get_source_params($source_id);
     $this->fetch_global_properties();
     if ($params["PARAMETERS"]) {
         //Affichage du formulaire avec $params["PARAMETERS"]
         $vars = unserialize($params["PARAMETERS"]);
         foreach ($vars as $key => $val) {
             global ${$key};
             ${$key} = $val;
         }
     }
     if (!isset($url)) {
         $this->error_message = $this->msg["emploi_unconfigured"];
         $this->error = 1;
         return;
     }
     $boolsearch = "";
     foreach ($query as $element) {
         if ($boolsearch) {
             $boolsearch .= " ";
         }
         $boolsearch .= implode(" ", $element->values);
     }
     $boolsearch = str_replace(" ", "+", $boolsearch);
     //$addr=$url."/search?value21=true&value22=true&discontin=factbooks&value1=".rawurlencode($boolsearch)."&option1=titleAbstract&option18=sort&site=fr&form_name=quick&option21=discontinued&option22=excludeKeyTableEditions&option19=content_type&value19=books";
     $ch = curl_init();
     //$addr="http://travail-emploi.gouv.fr/etudes-recherche-statistiques-de,76/etudes-et-recherche,77/publications-dares,98/dares-analyses-dares-indicateurs,102/";
     $addr = $url;
     // configuration des options CURL
     curl_setopt($ch, CURLOPT_URL, $addr);
     curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
     configurer_proxy_curl($ch);
     $html = curl_exec($ch);
     if (!$html) {
         $sortir = 1;
     } else {
         //	if (strtoupper($charset)!="UTF-8") $html=utf8_decode($html);
         //	print $html;
         $notice_list = $this->get_field_betwen_2sep($html, "<h2 class=\"smaller\">", "</ul>");
         $notice_list = $this->get_field_betwen_2sep($notice_list, "<ul>", "<ul>");
         $sep_notices = "</li>\n";
         $notices_html = explode($sep_notices, $notice_list);
         //print printr($notices_html);
         foreach ($notices_html as $notice_html) {
             $data_notice = array();
             $link = $this->get_field_betwen_2sep($notice_html, "<a href=\"", "\">");
             $link = "http://travail-emploi.gouv.fr/" . $link;
             curl_setopt($ch, CURLOPT_URL, $link);
             curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
             configurer_proxy_curl($ch);
             $html_notice_complete = curl_exec($ch);
             //	 		http://travail-emploi.gouv.fr/IMG/pdf/2012-035.pdf
             if (!$html_notice_complete) {
                 continue;
             }
             $data_notice["Url_notice"] = $link;
             $html_notice_complete = $this->get_field_from_sep($html_notice_complete, "<div class=\"gris clearfix\">");
             $data_notice["Publication_Date"] = $this->get_field_betwen_2sep($html_notice_complete, "<span class=\"date\">", "</span>");
             $data_notice["Title"] = $this->get_field_betwen_2sep($html_notice_complete, "<h1>", "</h1>");
             $data_notice["Abstract"] = $this->get_field_betwen_2sep($html_notice_complete, "<div class=\"texteencadre-spip spip\">", "</div>", 1);
             $zone_docnum = $this->get_field_from_sep($html_notice_complete, " spip_documents spip_lien_ok\">");
             $link_docnum = $this->get_field_betwen_2sep($zone_docnum, "<a href=\"", "\" ");
             if ($link_docnum) {
                 $data_notice["doc_links"][0]["link"] = "http://travail-emploi.gouv.fr/" . $link_docnum;
                 $data_notice["id"] = $this->get_field_betwen_2sep($link_docnum, "IMG/pdf/", ".pdf");
             }
             //	printr ($data_notice);
             $this->rec_record($this->notice_2_uni($data_notice), $source_id, $search_id);
             if ($nb++ > 20) {
                 break;
             }
         }
     }
     curl_close($ch);
 }