function lit_fichier_rss($url_fichier) { global $opac_curl_available, $pmb_curl_timeout; $res = ""; if ($opac_curl_available) { $timeout = $pmb_curl_timeout * 1 ? $pmb_curl_timeout * 1 : 5; $ch = curl_init($url_fichier); curl_setopt($ch, CURLOPT_HEADER, false); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); configurer_proxy_curl($ch, $url_fichier); $res = curl_exec($ch); curl_close($ch); } else { $fp = fopen($url_fichier, "r"); if ($fp) { while (!feof($fp)) { $res .= fread($fp, 2048); } fclose($fp); } } return $res; }
function lit_fichier_rss($url_fichier) { global $opac_curl_available; $res = ""; if ($opac_curl_available) { $ch = curl_init($url_fichier); curl_setopt($ch, CURLOPT_HEADER, false); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); configurer_proxy_curl($ch); $res = curl_exec($ch); curl_close($ch); } else { $fp = fopen($url_fichier, "r"); if ($fp) { while (!feof($fp)) { $res .= fread($fp, 2048); } fclose($fp); } } return $res; }
function search($source_id, $query, $search_id) { global $base_path, $charset; $params = $this->get_source_params($source_id); $this->fetch_global_properties(); if ($params["PARAMETERS"]) { //Affichage du formulaire avec $params["PARAMETERS"] $vars = unserialize($params["PARAMETERS"]); foreach ($vars as $key => $val) { global ${$key}; ${$key} = $val; } } if (!isset($url)) { $this->error_message = $this->msg["nasa_ads_unconfigured"]; $this->error = 1; return; } $boolsearch = ""; foreach ($query as $element) { $boolsearch .= implode(" ", $element->values) . " "; } $boolsearch = str_replace(" ", "+", $boolsearch); $addr = $url . "/cgi-bin/basic_connect?version=1&qsearch=" . rawurlencode($boolsearch); $ch = curl_init(); // configuration des options CURL curl_setopt($ch, CURLOPT_URL, $addr); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); configurer_proxy_curl($ch, $addr); $cexec = curl_exec($ch); if (!$cexec) { $this->error = true; $this->error_message = "Can't get Lob answer : " . curl_error($ch); } else { if (strtoupper($charset) != "UTF-8") { $cexec = utf8_decode($cexec); } $sep_notices = "<tr><td colspan=6><HR></td></tr>"; $query = get_field_betwen_2sep($cexec, $sep_notices, "</table>"); $notices_html = explode($sep_notices, $query); foreach ($notices_html as $notice_html) { $data_notice = array(); $tab_tr = explode("</tr>", $notice_html); $tab_td_0 = explode("</td>", $tab_tr[0]); $tab_td_1 = explode("</td>", $tab_tr[1]); $id = get_field_betwen_2sep($notice_html, "value=\"", "\""); $data_notice["id"] = $id; $titre = get_field_from_sep($tab_td_1[3], ">"); $authors = get_field_from_sep($tab_td_1[1], ">"); $date = get_field_from_sep($tab_td_0[4], ">"); $links = explode("</a>", $tab_td_0[5]); foreach ($links as $link) { $doc_type = substr($link, strlen($link) - 1); $doc_link = get_field_betwen_2sep($link, "href=\"", "\""); if ($doc_link) { $data_notice["doc_links"][$doc_type]["link"] = $doc_link; $data_notice["doc_links"][$doc_type]["label"] = $this->msg["nasa_ads_doc_" . strtolower($doc_type)]; } } $url_notice = "http://adsabs.harvard.edu/abs/{$id}"; $data_notice["Url_notice"] = $url_notice; $ch_notice = curl_init(); // configuration des options CURL curl_setopt($ch_notice, CURLOPT_URL, $url_notice); curl_setopt($ch_notice, CURLOPT_RETURNTRANSFER, true); configurer_proxy_curl($ch_notice, $url_notice); $cexec_notice = curl_exec($ch_notice); if (!$cexec_notice) { $this->error = true; $this->error_message = "Can't get Lob answer : " . curl_error($ch_notice); } else { if (strtoupper($charset) != "UTF-8") { $cexec_notice = utf8_decode($cexec_notice); } $cexec_notice = get_field_betwen_2sep($cexec_notice, "Translate This Page</a></strong>", "<form method"); $data_notice["Title"] = get_field_betwen_2sep($cexec_notice, "Title:</b></td><td><br></td><td align=\"left\" valign=\"top\">", "</td></tr>", 1); $Authors = get_field_betwen_2sep($cexec_notice, "Authors:</b></td><td><br></td><td align=\"left\" valign=\"top\">", "</td></tr>"); $aut_list = explode("</a>", $Authors); foreach ($aut_list as $aut) { $autor = get_field_from_sep($aut, ">", 1); if ($autor) { $data_notice["Authors"][] = $autor; } } $data_notice["Affiliation"] = get_field_betwen_2sep($cexec_notice, "Affiliation:</b></td><td><br></td><td align=\"left\" valign=\"top\">", "</td></tr>", 1); $data_notice["Publication"] = get_field_betwen_2sep($cexec_notice, "Publication:</b></td><td><br></td><td align=\"left\" valign=\"top\">", "</td></tr>"); $data_notice["Publication_Date"] = get_field_betwen_2sep($cexec_notice, "Publication Date:</b></td><td><br></td><td align=\"left\" valign=\"top\">", "</td></tr>"); $Origin = get_field_betwen_2sep($cexec_notice, "Origin:</b></td><td><br></td><td align=\"left\" valign=\"top\">", "</td></tr>", 1); $data_notice["Origin"] = get_field_betwen_2sep($Origin, "\">", "</a>"); $data_notice["Keywords"] = get_field_betwen_2sep($cexec_notice, "Keywords:</b></td><td><br></td><td align=\"left\" valign=\"top\">", "</td></tr>"); $DOI = get_field_betwen_2sep($cexec_notice, "DOI:</b></td><td><br></td><td align=\"left\" valign=\"top\">", "</td></tr>"); $data_notice["DOI"] = get_field_betwen_2sep($DOI, "\">", "</a>", 1); $Bibliographic_Code = get_field_betwen_2sep($cexec_notice, "Bibliographic Code:</b></td><td><br></td><td align=\"left\" valign=\"top\">", "</td></tr>"); $data_notice["Bibliographic_Code"] = get_field_betwen_2sep($Bibliographic_Code, "href=\"", "\""); $data_notice["Abstract"] = get_field_betwen_2sep($cexec_notice, "Abstract</h3>", "<hr>", 1, "<p><sup><sub>"); $order = array("\r\n", "\n", "\r"); $data_notice["Abstract"] = str_replace($order, " ", $data_notice["Abstract"]); // print"<pre>";print_r ($data_notice);print"</pre>"; $this->rec_record($this->notice_2_uni($data_notice), $source_id, $search_id); if ($nb++ > 20) { break; } } curl_close($ch_notice); } } curl_close($ch); }
function get_data($url) { //Remise à zéro des erreurs $this->error = false; $this->error_message = ""; //Initialisation de la ressource $ch = curl_init(); // configuration des options CURL curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_WRITEFUNCTION, array(&$this, "parse_xml")); curl_setopt($ch, CURLOPT_HEADERFUNCTION, array(&$this, "verif_header")); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); if ($this->time_out) { curl_setopt($ch, CURLOPT_TIMEOUT, $this->time_out); } //Réinitialisation du "retry_after" $this->retry_after = ""; //Explosion des arguments de la requète pour ceux qui ne respectent pas la norme !! $query = substr($url, strpos($url, "?") + 1); $query = explode("&", $query); for ($i = 0; $i < count($query); $i++) { if (strpos($query[$i], "operation") !== false) { $operation = substr($query[$i], 9); break; } } //Initialisation du parser $this->xml_parser = xml_parser_create("utf-8"); xml_parser_set_option($this->xml_parser, XML_OPTION_CASE_FOLDING, 0); xml_parser_set_option($this->xml_parser, XML_OPTION_SKIP_WHITE, 1); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); configurer_proxy_curl($ch, $url); $n_try = 0; $data = $cexec = curl_exec($ch); while ($cexec && $this->retry_after && $n_try < 3) { $n_try++; sleep((int) $this->retry_after * 1); $this->retry_after = ""; $data = $cexec = curl_exec($ch); } if (!$cexec) { $this->error = true; $this->error_message = curl_error($ch); } xml_parser_free($this->xml_parser); $this->xml_parser = ""; curl_close($ch); if ($this->error) { $this->error_message .= " - " . $url; unset($s); return; } $this->data = $data; }
function analyse_response($url, $rcallback = "") { //Remise à zéro des erreurs $this->error = false; $this->error_message = ""; //remise à zero des enregistrements if ($url != $this->next_request) { $this->records = array(); } $this->next_request = ""; $this->rtoken = ""; //Initialisation de la ressource $this->remainder = ''; $ch = curl_init(); // configuration des options CURL curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_WRITEFUNCTION, array(&$this, "parse_xml")); curl_setopt($ch, CURLOPT_HEADERFUNCTION, array(&$this, "verif_header")); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); if ($this->time_out) { curl_setopt($ch, CURLOPT_TIMEOUT, $this->time_out); } //Réinitialisation du "retry_after" $this->retry_after = ""; configurer_proxy_curl($ch, $url); //Explosion des arguments de la requête pour ceux qui ne respectent pas la norme !! $query = substr($url, strpos($url, "?") + 1); $query = explode("&", $query); for ($i = 0; $i < count($query); $i++) { if (strpos($query[$i], "verb") !== false) { $verb = substr($query[$i], 5); break; } } //Initialisation de l'environnement d'état du parser $s = new oai_parser($rcallback, $this->charset); //Si le verb est affecté, on prérempli histoire d'aider un peu... :-) if ($verb) { $s->verb = $verb; } //Initialisation du parser $this->xml_parser = xml_parser_create("utf-8"); xml_set_object($this->xml_parser, $s); xml_parser_set_option($this->xml_parser, XML_OPTION_CASE_FOLDING, 0); xml_parser_set_option($this->xml_parser, XML_OPTION_SKIP_WHITE, 1); xml_set_element_handler($this->xml_parser, "oai_startElement", "oai_endElement"); xml_set_character_data_handler($this->xml_parser, "oai_charElement"); $n_try = 0; $cexec = curl_exec($ch); while ($cexec && $this->retry_after && $n_try < 3) { $n_try++; sleep((int) $this->retry_after * 1); $this->retry_after = ""; $cexec = curl_exec($ch); } if (!$cexec) { $this->error = true; $this->error_message = curl_error($ch); } xml_parser_free($this->xml_parser); $this->xml_parser = ""; curl_close($ch); if ($this->error) { $this->error_message .= " - " . $url; unset($s); return; } //Affectation des éléments de réponse if (stripos($this->charset, 'iso-8859-1') !== false) { $c = true; } else { $c = false; } //Test de l'url base if ($this->clean_base_url) { $p = strpos($s->tree[1][1]["CHAR"], "?"); if ($p !== false) { $s->tree[1][1]["CHAR"] = substr($s->tree[1][1]["CHAR"], 0, $p); } } $this->response_date = $c ? utf8_decode($s->tree[1][0]["CHAR"]) : $s->tree[1][0]["CHAR"]; $this->url_base = $c ? utf8_decode($s->tree[1][1]["CHAR"]) : $s->tree[1][1]["CHAR"]; $this->request["URL_BASE"] = $c ? utf8_decode($s->tree[1][1]["CHAR"]) : $s->tree[1][1]["CHAR"]; foreach ($s->tree[1][1]["ATTRIB"] as $key => $val) { if ($key != "resumptionToken") { $this->request["ATTRIBS"][$key] = $c ? utf8_decode($val) : $val; } } $this->verb = $c ? utf8_decode($s->tree[1][1]["ATTRIB"]["verb"]) : $s->tree[1][1]["ATTRIB"]["verb"]; $this->rtoken = $s->rtoken; if ($s->tree[1][2]["NAME"] == "error") { $this->error = true; $this->error_message = "OAI Error, the server tell : " . $s->tree[1][2]["ATTRIB"]["code"] . " : " . $s->tree[1][2]["CHAR"]; $this->error_oai_code = $s->tree[1][2]["ATTRIB"]["code"]; } //Si c'est la requête identify if ($this->verb == "Identify") { $this->records[0] = $c ? utf8_decode($s->cur_elt) : $s->cur_elt; } else { if (!$rcallback) { for ($i = 0; $i < count($s->records); $i++) { $this->records[] = $c ? utf8_decode($s->records[$i]) : $s->records[$i]; } } } //Si on a un resumptionToken if (is_array($this->rtoken) && $this->rtoken["token"]) { $t_nr = explode('?', $this->request['URL_BASE']); $this->next_request = $t_nr[0] . "?verb=" . $s->verb . "&resumptionToken=" . rawurlencode($this->rtoken["token"]); } //Supression de l'environnement d'état ! unset($s); }
function maj_entrepot($source_id, $callback_progress = "", $recover = false, $recover_env = "") { global $base_path, $charset; $this->n_recu = 0; $this->callback_progress = $callback_progress; $params = $this->get_source_params($source_id); $this->fetch_global_properties(); if ($params["PARAMETERS"]) { //Affichage du formulaire avec $params["PARAMETERS"] $vars = unserialize($params["PARAMETERS"]); foreach ($vars as $key => $val) { global ${$key}; ${$key} = $val; } } if (!isset($url)) { $this->error_message = $this->msg["toutapprendre_unconfigured"]; $this->error = 1; return; } //Recherche de la dernière date... $requete = "select unix_timestamp(max(date_import)) from entrepot_source_" . $source_id . " where 1;"; $resultat = pmb_mysql_query($requete); if (pmb_mysql_num_rows($resultat)) { $last_date = pmb_mysql_result($resultat, 0, 0); if ($last_date) { $last_date += 3600 * 24; } } $ch = curl_init(); $addr = $url; // configuration des options CURL curl_setopt($ch, CURLOPT_URL, $addr); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); configurer_proxy_curl($ch, $addr); $xml = curl_exec($ch); if ($charset == 'utf-8') { $xml = preg_replace('/[\\x00-\\x08\\x10\\x0B\\x0C\\x0E-\\x19\\x7F]' . '|[\\x00-\\x7F][\\x80-\\xBF]+' . '|([\\xC0\\xC1]|[\\xF0-\\xFF])[\\x80-\\xBF]*' . '|[\\xC2-\\xDF]((?![\\x80-\\xBF])|[\\x80-\\xBF]{2,})' . '|[\\xE0-\\xEF](([\\x80-\\xBF](?![\\x80-\\xBF]))|(?![\\x80-\\xBF]{2})|[\\x80-\\xBF]{3,})/S', '?', $xml); } @ini_set("zend.ze1_compatibility_mode", "0"); $this->dom = new DomDocument(); $this->dom->encoding = $charset; if (!@$this->dom->loadXML($xml)) { return 0; } $cours = $this->dom->getElementsByTagName('cours'); foreach ($cours as $cour) { $data_notice = array(); if ($cour->childNodes->length) { foreach ($cour->childNodes as $i) { if ($charset != 'utf-8') { $val = utf8_decode($i->nodeValue); } else { $val = $i->nodeValue; } $data_notice[$i->nodeName] = $val; } } $this->rec_record($this->notice_2_uni($data_notice), $source_id); } curl_close($ch); return $this->n_recu; }
function maj_entrepot($source_id, $callback_progress = "", $recover = false, $recover_env = "") { global $base_path, $charset; $this->n_recu = 0; $this->callback_progress = $callback_progress; $params = $this->get_source_params($source_id); $this->fetch_global_properties(); if ($params["PARAMETERS"]) { //Affichage du formulaire avec $params["PARAMETERS"] $vars = unserialize($params["PARAMETERS"]); foreach ($vars as $key => $val) { global ${$key}; ${$key} = $val; } } if (!isset($url)) { $this->error_message = $this->msg["emploi_unconfigured"]; $this->error = 1; return; } //Recherche de la derni�re date... $requete = "select unix_timestamp(max(date_import)) from entrepot_source_" . $source_id . " where 1;"; $resultat = mysql_query($requete); if (mysql_num_rows($resultat)) { $last_date = mysql_result($resultat, 0, 0); if ($last_date) { $last_date += 3600 * 24; } } $ch = curl_init(); //$addr="http://travail-emploi.gouv.fr/etudes-recherche-statistiques-de,76/etudes-et-recherche,77/publications-dares,98/dares-analyses-dares-indicateurs,102/"; $addr = $url; // configuration des options CURL curl_setopt($ch, CURLOPT_URL, $addr); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); configurer_proxy_curl($ch); $html = curl_exec($ch); if (!$html) { $sortir = 1; } else { // if (strtoupper($charset)!="UTF-8") $html=utf8_decode($html); $notice_list = $this->get_field_betwen_2sep($html, "<h2 class=\"smaller\">", "</ul>"); $notice_list = $this->get_field_betwen_2sep($notice_list, "<ul>", "<ul>"); $sep_notices = "</li>\n"; $notices_html = explode($sep_notices, $notice_list); $nb = 0; //print printr($notices_html); foreach ($notices_html as $notice_html) { $data_notice = array(); $link = $this->get_field_betwen_2sep($notice_html, "<a href=\"", "\">"); $link = "http://travail-emploi.gouv.fr/" . $link; curl_setopt($ch, CURLOPT_URL, $link); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); configurer_proxy_curl($ch); $html_notice_complete = curl_exec($ch); // http://travail-emploi.gouv.fr/IMG/pdf/2012-035.pdf if (!$html_notice_complete) { continue; } $data_notice["Url_notice"] = $link; $html_notice_complete = $this->get_field_from_sep($html_notice_complete, "<div class=\"gris clearfix\">"); $data_notice["Publication_Date"] = $this->get_field_betwen_2sep($html_notice_complete, "<span class=\"date\">", "</span>"); $data_notice["Title"] = $this->get_field_betwen_2sep($html_notice_complete, "<h1>", "</h1>"); $data_notice["Abstract"] = $this->get_field_betwen_2sep($html_notice_complete, "<div class=\"texteencadre-spip spip\">", "<div class=\"listdoc\">", 1, 1); $data_notice["Abstract"] = str_replace("’", "'", $data_notice["Abstract"]); $zone_docnum = $this->get_field_from_sep($html_notice_complete, " spip_documents spip_lien_ok\">"); $link_docnum = $this->get_field_betwen_2sep($zone_docnum, "<a href=\"", "\" "); if ($link_docnum) { $data_notice["doc_links"][0]["link"] = "http://travail-emploi.gouv.fr/" . $link_docnum; $data_notice["id"] = $this->get_field_betwen_2sep($link_docnum, "IMG/pdf/", ".pdf"); } // printr ($data_notice); if ($this->rec_record($this->notice_2_uni($data_notice), $source_id)) { // notice d�j� en entrepos, on ne va pas chercher les suivantes break; } } } curl_close($ch); return $this->n_recu; }
function search($source_id, $query, $search_id) { global $charset; global $opac_curl_proxy; global $base_path; $this->error = false; $this->error_message = ""; $params = $this->get_source_params($source_id); $this->fetch_global_properties(); if ($params["PARAMETERS"]) { //Affichage du formulaire avec $params["PARAMETERS"] $vars = unserialize($params["PARAMETERS"]); foreach ($vars as $key => $val) { global ${$key}; ${$key} = $val; } } if (!$max_return) { $max_return = 100; } //Construction de la requête $boolsearch = ""; for ($i = 0; $i < count($query); $i++) { $term = $query[$i]; $chaine = ""; switch ($term->ufield) { case "200\$a": $chaine = "(TI=" . str_replace("*", "%", $term->values[0]) . " or ST=" . str_replace("*", "%", $term->values[0]) . ")"; break; case "010\$a": $chaine = "IS=" . str_replace("*", "%", $term->values[0]); break; case "7XX": $chaine = "AU=" . str_replace("*", "%", $term->values[0]); break; case "210\$c": $chaine = "PU=" . str_replace("*", "%", $term->values[0]); break; case "210\$d": $chaine = "PY=" . str_replace("*", "%", $term->values[0]); break; case "300": case "327": case "330": case "3XX": $chaine = "ME=" . str_replace("*", "%", $term->values[0]); break; case "60X": $chaine = "FT=" . str_replace("*", "%", $term->values[0]); break; case "XXX": $chaine = "(TI=" . str_replace("*", "%", $term->values[0]) . " or ST=" . str_replace("*", "%", $term->values[0]) . " or AU=" . str_replace("*", "%", $term->values[0]) . " or FT=" . str_replace("*", "%", $term->values[0]) . " or ME=" . str_replace("*", "%", $term->values[0]) . " or IS=" . str_replace("*", "%", $term->values[0]) . " or PU=" . str_replace("*", "%", $term->values[0]) . ")"; break; } if ($chaine != "" && $i != 0) { switch ($term->inter) { case "and": $boolsearch .= " and "; break; case "or": $boolsearch .= " or "; break; case "ex": $boolsearch .= " and not "; break; } } $boolsearch .= $chaine; } $boolthemes = ""; if (count($themes)) { for ($i = 0; $i < count($themes); $i++) { if ($i != 0) { $boolthemes .= " or "; } $boolthemes .= "FT=" . $themes[$i]; } $boolthemes = "(" . $boolthemes . ")"; } if ($boolthemes) { $boolsearch = "(" . $boolsearch . ") and " . $boolthemes; } $nb_pages = floor($max_return / 40); $stop = false; for ($i = 0; $i < $nb_pages; $i++) { $addr = "http://sun2.lehmanns.de/cgi-bin/work/xmlboolsearch?mode=xmlboolsearch&titcount={$max_return}&titnext=" . $i * 40 . "&boolstring=" . rawurlencode($boolsearch); //Initialisation de la ressource $ch = curl_init(); // configuration des options CURL curl_setopt($ch, CURLOPT_URL, $addr); if ($params["TIMEOUT"]) { curl_setopt($ch, CURLOPT_TIMEOUT, (int) $params["TIMEOUT"]); } curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); configurer_proxy_curl($ch); $cexec = curl_exec($ch); if (!$cexec) { $this->error = true; $this->error_message = "Can't get Lob answer : " . curl_error($ch); break; } else { if (strpos($cexec, "<LBook>") !== false) { $unixml = $this->apply_xsl_to_xml($cexec, file_get_contents($base_path . "/admin/connecteurs/in/lehmanns/xslt/lehmanns2uni.xsl")); $this->rec_records($unixml, $source_id, $search_id); } else { $stop = true; } } curl_close($ch); if ($stop) { break; } } }
function maj_entrepot($source_id, $callback_progress = "", $recover = false, $recover_env = "") { global $base_path, $charset; $this->n_recu = 0; $this->callback_progress = $callback_progress; $params = $this->get_source_params($source_id); $this->fetch_global_properties(); if ($params["PARAMETERS"]) { //Affichage du formulaire avec $params["PARAMETERS"] $vars = unserialize($params["PARAMETERS"]); foreach ($vars as $key => $val) { global ${$key}; ${$key} = $val; } } if (!isset($url)) { $this->error_message = $this->msg["oecd_unconfigured"]; $this->error = 1; return; } //Recherche de la derni�re date... $requete = "select unix_timestamp(max(date_import)) from entrepot_source_" . $source_id . " where 1;"; $resultat = mysql_query($requete); if (mysql_num_rows($resultat)) { $last_date = mysql_result($resultat, 0, 0); if ($last_date) { $last_date += 3600 * 24; } } $ch = curl_init(); // http://www.oecd-ilibrary.org/fr/emploi/livres/2012 // http://www.oecd-ilibrary.org/fr/questionssociales/livres/2012 $addr = $url; $sortir = 0; // pour sortir du while! $page = 0; do { // configuration des options CURL curl_setopt($ch, CURLOPT_URL, $addr); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); configurer_proxy_curl($ch); $html = curl_exec($ch); if (!$html) { $sortir = 1; } else { if (strtoupper($charset) != "UTF-8") { $html = utf8_decode($html); } $notice_list = $this->get_field_betwen_2sep($html, "</thead>\n<tbody>", "</tbody>\n</table>"); $sep_notices = "</tr>\n<tr>"; $notices_html = explode($sep_notices, $notice_list); //print printr($notices_html); foreach ($notices_html as $notice_html) { $data_notice = array(); $type = $this->get_field_betwen_2sep($notice_html, "\"type nowrap box3\"><strong>", " </strong>"); if ($type == "Livre") { $link = $this->get_field_betwen_2sep($notice_html, "<strong>\n<a href=\"", "\" title=\""); $link = "http://www.oecd-ilibrary.org" . $link; $data_notice["Url_notice"] = $link; $zone_title = $this->get_field_betwen_2sep($notice_html, "</ul>\n<strong>\n", "</strong>"); $data_notice["Title"] = $this->get_field_betwen_2sep($zone_title, "rel=\"\"><span>", "</span>", 1); $data_notice["Abstract"] = $this->get_field_betwen_2sep($notice_html, "class=\"abstract \"><span>", "</span>"); $date_zone = $this->get_field_betwen_2sep($notice_html, "nowrap box2\">\n", " "); $data_notice["Publication_Date"] = $this->get_field_betwen_2sep($date_zone, "nowrap box2\">\n", "\n"); $data_notice["numberofpages"] = $this->get_field_betwen_2sep($notice_html, " Pages: ", "\n"); $data_notice["Authors"][0] = $this->get_field_betwen_2sep($notice_html, "</a></strong><br />\n", ", Pages: "); $zone_docnum = $this->get_field_betwen_2sep($notice_html, "<li class=\"last\">\n", "</li>"); $link_docnum = $this->get_field_betwen_2sep($zone_docnum, "<a href=\"", "\" "); if ($link_docnum) { $data_notice["doc_links"][0]["link"] = "http://www.oecd-ilibrary.org" . $link_docnum; } $data_notice["id"] = $this->get_field_betwen_2sep($data_notice["Url_notice"], "_", ";jsessionid"); //print $notice_html; } else { continue; } // printr ($data_notice); if ($this->rec_record($this->notice_2_uni($data_notice), $source_id)) { // notice d�j� en entrepos, on ne va pas chercher les suivantes break; } /* if($nb++>20){ $sortir=1; break; }*/ } } if (!$sortir) { $sortir = 1; $next_page_link = ""; $page_zone = $this->get_field_betwen_2sep($html, "bobby-inline pager", "</ul>"); if ($page_zone) { $next_page_link = $this->get_field_betwen_2sep($page_zone, "<li>|\n<a href=\"", "\" title=\"next page\""); if ($next_page_link) { $addr = "http://www.oecd-ilibrary.org" . $next_page_link; ///print $adr; $sortir = 0; } } } if ($page++ > 20) { $sortir = 1; } } while (!$sortir); curl_close($ch); return $this->n_recu; }
function request($method, $url, $vars = array()) { $this->handle = curl_init(); # Set some default CURL options if ($this->timeout) { curl_setopt($this->handle, CURLOPT_CONNECTTIMEOUT, $this->timeout); curl_setopt($this->handle, CURLOPT_TIMEOUT, $this->timeout); } curl_setopt($this->handle, CURLOPT_COOKIEFILE, $this->cookie_file); curl_setopt($this->handle, CURLOPT_COOKIEJAR, $this->cookie_file); @curl_setopt($this->handle, CURLOPT_FOLLOWLOCATION, true); curl_setopt($this->handle, CURLOPT_HEADER, true); curl_setopt($this->handle, CURLOPT_POSTFIELDS, is_array($vars) ? http_build_query($vars, '', '&') : $vars); curl_setopt($this->handle, CURLOPT_REFERER, $this->referer); curl_setopt($this->handle, CURLOPT_RETURNTRANSFER, true); curl_setopt($this->handle, CURLOPT_URL, str_replace(" ", "%20", preg_replace("/#.*\$/", "", $url))); /*On supprime ce qui suit le # car c'est une ancre pour le navigateur et avec on consière la validation fausse alors qu'elle est bonne *On remplace les espaces par %20 pour la même raison */ curl_setopt($this->handle, CURLOPT_USERAGENT, $this->user_agent); if ($this->limit) { curl_setopt($this->handle, CURLOPT_WRITEFUNCTION, array(&$this, 'getBodyOverflow')); } if ($this->save_file_name) { $this->header_detect = 0; curl_setopt($this->handle, CURLOPT_WRITEFUNCTION, array(&$this, 'saveBodyInFile')); } configurer_proxy_curl($this->handle, str_replace(" ", "%20", preg_replace("/#.*\$/", "", $url))); # Format custom headers for this request and set CURL option $headers = array(); foreach ($this->headers as $key => $value) { $headers[] = $key . ': ' . $value; } curl_setopt($this->handle, CURLOPT_HTTPHEADER, $headers); # Determine the request method and set the correct CURL option switch ($method) { case 'GET': curl_setopt($this->handle, CURLOPT_HTTPGET, true); break; case 'POST': curl_setopt($this->handle, CURLOPT_POST, true); break; default: curl_setopt($this->handle, CURLOPT_CUSTOMREQUEST, $method); } # Set any custom CURL options foreach ($this->options as $option => $value) { curl_setopt($this->handle, constant('CURLOPT_' . str_replace('CURLOPT_', '', strtoupper($option))), $value); } $this->body_overflow = ""; $response = curl_exec($this->handle); if ($this->limit) { $response = $this->body_overflow; } if ($response) { $response = new CurlResponse($response); } else { $this->error = curl_errno($this->handle) . ' - ' . curl_error($this->handle); } curl_close($this->handle); return $response; }
function fetch_and_record_images($isbn) { if (!is_dir($this->image_folder)) { return; } $result = array("thumb" => "", "front" => "", "back" => ""); $ch = curl_init(); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); $folder = $this->image_folder; $folder_url = $this->image_folder_url; if (!file_exists($folder . "/" . $isbn . "_thumb.jpg")) { $url = str_replace("!!isbn!!", $isbn, $this->image_thumb_url); curl_setopt($ch, CURLOPT_URL, $url); configurer_proxy_curl($ch, $url); $buffer = curl_exec($ch); if (!curl_error($ch)) { file_put_contents($folder . "/" . $isbn . "_thumb.jpg", $buffer); $result['thumb'] = $folder_url . "/" . $isbn . "_thumb.jpg"; } } else { $result['thumb'] = $folder_url . "/" . $isbn . "_thumb.jpg"; } if (!file_exists($folder . "/" . $isbn . "_front.jpg")) { $url = str_replace("!!isbn!!", $isbn, $this->image_front); curl_setopt($ch, CURLOPT_URL, $url); configurer_proxy_curl($ch, $url); $buffer = curl_exec($ch); if (!curl_error($ch)) { file_put_contents($folder . "/" . $isbn . "_front.jpg", $buffer); $result['front'] = $folder_url . "/" . $isbn . "_front.jpg"; } } else { $result['front'] = $folder_url . "/" . $isbn . "_front.jpg"; } if (!file_exists($folder . "/" . $isbn . "_back.jpg")) { $url = str_replace("!!isbn!!", $isbn, $this->image_back); curl_setopt($ch, CURLOPT_URL, $url); configurer_proxy_curl($ch, $url); $buffer = curl_exec($ch); if (!curl_error($ch)) { file_put_contents($folder . "/" . $isbn . "_back.jpg", $buffer); $result['back'] = $folder_url . "/" . $isbn . "_back.jpg"; } } else { $result['back'] = $folder_url . "/" . $isbn . "_back.jpg"; } curl_close($ch); return $result; }
function search($source_id, $query, $search_id) { global $base_path, $charset; $params = $this->get_source_params($source_id); $this->fetch_global_properties(); if ($params["PARAMETERS"]) { //Affichage du formulaire avec $params["PARAMETERS"] $vars = unserialize($params["PARAMETERS"]); foreach ($vars as $key => $val) { global ${$key}; ${$key} = $val; } } if (!isset($url)) { $this->error_message = $this->msg["emploi_unconfigured"]; $this->error = 1; return; } $boolsearch = ""; foreach ($query as $element) { if ($boolsearch) { $boolsearch .= " "; } $boolsearch .= implode(" ", $element->values); } $boolsearch = str_replace(" ", "+", $boolsearch); //$addr=$url."/search?value21=true&value22=true&discontin=factbooks&value1=".rawurlencode($boolsearch)."&option1=titleAbstract&option18=sort&site=fr&form_name=quick&option21=discontinued&option22=excludeKeyTableEditions&option19=content_type&value19=books"; $ch = curl_init(); //$addr="http://travail-emploi.gouv.fr/etudes-recherche-statistiques-de,76/etudes-et-recherche,77/publications-dares,98/dares-analyses-dares-indicateurs,102/"; $addr = $url; // configuration des options CURL curl_setopt($ch, CURLOPT_URL, $addr); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); configurer_proxy_curl($ch); $html = curl_exec($ch); if (!$html) { $sortir = 1; } else { // if (strtoupper($charset)!="UTF-8") $html=utf8_decode($html); // print $html; $notice_list = $this->get_field_betwen_2sep($html, "<h2 class=\"smaller\">", "</ul>"); $notice_list = $this->get_field_betwen_2sep($notice_list, "<ul>", "<ul>"); $sep_notices = "</li>\n"; $notices_html = explode($sep_notices, $notice_list); //print printr($notices_html); foreach ($notices_html as $notice_html) { $data_notice = array(); $link = $this->get_field_betwen_2sep($notice_html, "<a href=\"", "\">"); $link = "http://travail-emploi.gouv.fr/" . $link; curl_setopt($ch, CURLOPT_URL, $link); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); configurer_proxy_curl($ch); $html_notice_complete = curl_exec($ch); // http://travail-emploi.gouv.fr/IMG/pdf/2012-035.pdf if (!$html_notice_complete) { continue; } $data_notice["Url_notice"] = $link; $html_notice_complete = $this->get_field_from_sep($html_notice_complete, "<div class=\"gris clearfix\">"); $data_notice["Publication_Date"] = $this->get_field_betwen_2sep($html_notice_complete, "<span class=\"date\">", "</span>"); $data_notice["Title"] = $this->get_field_betwen_2sep($html_notice_complete, "<h1>", "</h1>"); $data_notice["Abstract"] = $this->get_field_betwen_2sep($html_notice_complete, "<div class=\"texteencadre-spip spip\">", "</div>", 1); $zone_docnum = $this->get_field_from_sep($html_notice_complete, " spip_documents spip_lien_ok\">"); $link_docnum = $this->get_field_betwen_2sep($zone_docnum, "<a href=\"", "\" "); if ($link_docnum) { $data_notice["doc_links"][0]["link"] = "http://travail-emploi.gouv.fr/" . $link_docnum; $data_notice["id"] = $this->get_field_betwen_2sep($link_docnum, "IMG/pdf/", ".pdf"); } // printr ($data_notice); $this->rec_record($this->notice_2_uni($data_notice), $source_id, $search_id); if ($nb++ > 20) { break; } } } curl_close($ch); }