function get_raw_title($redirect_data) { $rd_url = $redirect_data["url"]; $rd_cookies = $redirect_data["cookies"]; $rd_extra_headers = $redirect_data["extra_headers"]; $host = ""; $uri = ""; $port = 80; if (get_host_and_uri($rd_url, $host, $uri, $port) == False) { term_echo("get_host_and_uri=false"); return False; } $breakcode = "return ((strpos(strtolower(\$response),\"</title>\")!==False) or (strlen(\$response)>=10000));"; #$breakcode=""; $response = wget($host, $uri, $port, ICEWEASEL_UA, $rd_extra_headers, 20, $breakcode, 256); #var_dump($response); $html = strip_headers($response); $title = extract_raw_tag($html, "title"); $title = html_decode($title); $title = trim(html_decode($title)); if ($title == "") { term_echo(" get_raw_title: title is empty"); return False; } return $title; }
function parse_xml($html) { $parts = explode("<story", $html); array_shift($parts); $items = array(); for ($i = 0; $i < count($parts); $i++) { $item = array(); $item["type"] = "xml_story"; $item["title"] = extract_raw_tag($parts[$i], "title"); $item["title"] = html_decode($item["title"]); $item["title"] = html_decode($item["title"]); $item["title"] = replace_ctrl_chars($item["title"], " "); $item["title"] = str_replace(" ", " ", $item["title"]); $url = str_replace("&", "&", strip_ctrl_chars(extract_raw_tag($parts[$i], "url"))); term_echo("*** raw story url: " . $url); $item["url"] = get_redirected_url($url); $item["timestamp"] = time(); if ($item["title"] === False or $item["url"] === False) { continue; } $items[] = $item; } return $items; }
} $url = get_redirected_url($trailing); if ($url === False) { privmsg("error: unable to download source (get_redirected_url)"); return; } $host = ""; $uri = ""; $port = 80; if (get_host_and_uri($url, $host, $uri, $port) == False) { privmsg("error: unable to download source (get_host_and_uri)"); return; } $response = wget($host, $uri, $port); $source_html = strip_headers($response); $source_title = extract_raw_tag($source_html, "title"); term_echo($source_title); $i = strpos($source_title, "--"); if ($i !== False) { $source_title = trim(substr($source_title, 0, $i)); } $i = strpos($source_title, "|"); if ($i !== False) { $source_title = trim(substr($source_title, 0, $i)); } $i = strpos($source_title, " - "); if ($i !== False) { $source_title = trim(substr($source_title, 0, $i)); } $i = strpos($source_title, " : "); if ($i !== False) {
function sn_submit($url) { if ($url == "") { return False; } $url = get_redirected_url($url); if ($url === False) { privmsg("error: unable to download source (get_redirected_url)"); return False; } $host = ""; $uri = ""; $port = 80; if (get_host_and_uri($url, $host, $uri, $port) == False) { privmsg("error: unable to download source (get_host_and_uri)"); return False; } $response = wget($host, $uri, $port); if (get_host_and_uri($url, $host, $uri, $port) == False) { privmsg("error: unable to download source (wget)"); return False; } $source_html = strip_headers($response); $source_title = extract_raw_tag($source_html, "title"); $delimiters = array("--", "|", " - ", " : ", " — ", " • "); for ($i = 0; $i < count($delimiters); $i++) { $j = strpos($source_title, $delimiters[$i]); if ($j !== False) { $source_title = trim(substr($source_title, 0, $j)); } } if ($source_title === False or $source_title == "") { privmsg("error: title not found or empty"); return False; } $source_title = html_decode($source_title); $source_title = html_decode($source_title); $source_body = extract_meta_content($source_html, "description"); if ($source_body === False or $source_body == "") { $source_body = extract_meta_content($source_html, "og:description", "property"); if ($source_body === False or $source_body == "") { privmsg("error: description meta content not found or empty"); return False; } } $html = $source_html; $article = extract_raw_tag($html, "article"); if ($article !== False) { $html = $article; } strip_all_tag($html, "head"); strip_all_tag($html, "script"); strip_all_tag($html, "style"); #strip_all_tag($html,"a"); strip_all_tag($html, "strong"); $html = strip_tags($html, "<p>"); $html = lowercase_tags($html); $html = explode("<p", $html); $source_body = array(); for ($i = 0; $i < count($html); $i++) { $parts = explode(">", $html[$i]); if (count($parts) >= 2) { array_shift($parts); $html[$i] = implode(">", $parts); } $html[$i] = strip_tags($html[$i]); $html[$i] = clean_text($html[$i]); $host_parts = explode(".", $host); for ($j = 0; $j < count($host_parts); $j++) { if (strlen($host_parts[$j]) > 3) { if (strpos(strtolower($html[$i]), strtolower($host_parts[$j])) !== False) { continue 2; } } } if (filter($html[$i], "0123456789") != "") { continue; } if (strlen($html[$i]) > 1) { if ($html[$i][strlen($html[$i]) - 1] != ".") { continue; } while (True) { $j = strlen($html[$i]) - 1; if ($j < 0) { break; } $c = $html[$i][$j]; if ($c == ".") { break; } $html[$i] = substr($html[$i], 0, $j); } } if (strlen($html[$i]) > 100) { $source_body[] = $html[$i]; } } $source_body = implode("\n\n", $source_body); $source_body = html_decode($source_body); $source_body = html_decode($source_body); $host = "dev.soylentnews.org"; $port = 443; $uri = "/submit.pl"; $response = wget($host, $uri, $port, ICEWEASEL_UA); $html = strip_headers($response); $reskey = extract_text($html, "<input type=\"hidden\" id=\"reskey\" name=\"reskey\" value=\"", "\">"); if ($reskey === False) { privmsg("error: unable to extract reskey"); return False; } sleep(25); $params = array(); $params["reskey"] = $reskey; #$params["name"]=trim(substr($nick,0,50)); $params["name"] = get_bot_nick(); $params["email"] = ""; $params["subj"] = trim(substr($source_title, 0, 100)); $params["primaryskid"] = "1"; $params["tid"] = "6"; $params["sub_type"] = "plain"; $params["story"] = $source_body . "\n\n" . $url . "\n\n-- submitted from IRC"; $params["op"] = "SubmitStory"; $response = wpost($host, $uri, $port, ICEWEASEL_UA, $params); $html = strip_headers($response); strip_all_tag($html, "head"); strip_all_tag($html, "script"); strip_all_tag($html, "style"); strip_all_tag($html, "a"); $html = strip_tags($html); $html = clean_text($html); if (strpos($html, "Perhaps you would like to enter an email address or a URL next time. Thanks for the submission.") !== False) { privmsg("submission successful - https://{$host}/submit.pl?op=list"); return True; } else { privmsg("error: something went wrong with your submission"); return False; } }
function process_weather(&$location, $nick, $getdata = False) { $loc = get_location($location, $nick); term_echo("*** WEATHER LOCATION LOOKUP: {$loc}"); if ($loc === False) { if ($location == "") { return False; } $loc = $location; } $location = $loc; $loc_query = filter($loc, VALID_UPPERCASE . VALID_LOWERCASE . VALID_NUMERIC . " "); $prefs = get_prefs($nick); $fheit = "1"; $use_unit_pref = False; if (isset($prefs["unit"]) == True and $getdata == False) { if ($prefs["unit"] == "metric") { $use_unit_pref = True; $fheit = "0"; } if ($prefs["unit"] == "imperial") { $use_unit_pref = True; } } # https://www.google.com/search?gbv=1&q=weather+traralgon $url = "http://www.google.com.au/search?gbv=1&fheit={$fheit}&q=weather+" . urlencode($loc_query); term_echo($url); $response = wget("www.google.com.au", "/search?gbv=1&fheit={$fheit}&q=weather+" . urlencode($loc_query), 80, ICEWEASEL_UA, "", 60); $html = strip_headers($response); $delim1 = "<div class=\"e\">"; $delim2 = "</table>"; $html = extract_text($html, $delim1, $delim2); if ($html === False) { return False; } $html = replace_ctrl_chars($html, " "); $html = str_replace(" ", " ", $html); $html = html_decode($html); $html = html_decode($html); $location = trim(strip_tags(extract_raw_tag($html, "h3"))); if (substr($location, 0, 12) == "Weather for ") { $location = substr($location, 12); } $wind = trim(strip_tags(extract_text_nofalse($html, "style=\"white-space:nowrap;padding-right:15px;color:#666\">Wind: ", "</span>"))); $humidity = extract_text($html, "style=\"white-space:nowrap;padding-right:0px;vertical-align:top;color:#666\">Humidity: ", "</td>"); $parts = explode("<td", $html); $temps = array(); $tempsC = array(); $conds = array(); $days = array(); for ($i = 1; $i < count($parts); $i++) { $cond = extract_text($parts[$i], "alt=\"", "\""); $temp = extract_text($parts[$i], "<span class=\"wob_t\" style=\"display:inline\">", "</span>"); $day = extract_text($parts[$i], "colspan=\"2\" style=\"vertical-align:top;text-align:center\">", "</td>"); if ($cond !== False) { $conds[] = strtolower($cond); } if ($temp !== False) { $temps[] = $temp; $tempsC[] = sprintf("%.0f", (substr($temp, 0, strlen($temp) - 2) - 32) * 5 / 9) . "°C"; } if ($day !== False) { $days[] = $day; } } $offset = 0; $wind_caption = ", wind " . $wind; if ($wind == "") { $offset = 1; $wind_caption = ""; } if (count($conds) != 5 or count($temps) != 10 - $offset or count($tempsC) != 10 - $offset or count($days) != 4) { return False; } if ($use_unit_pref == False) { $result = $location . " - currently " . $temps[0] . " / " . $tempsC[0] . ", " . $conds[0] . $wind_caption . ", humidity " . $humidity . " - "; } else { $result = $location . " - currently " . $temps[0] . ", " . $conds[0] . $wind_caption . ", humidity " . $humidity . " - "; } $fulldays = array("Sun." => "Sunday", "Mon." => "Monday", "Tue." => "Tuesday", "Wed." => "Wednesday", "Thu." => "Thursday", "Fri." => "Friday", "Sat." => "Saturday"); for ($i = 1; $i <= 4; $i++) { $day = $days[$i - 1]; $day = $fulldays[$day]; if ($use_unit_pref == False) { $result = $result . $day . " " . $conds[$i] . " (" . $temps[$i * 2 + 1 - $offset] . ":" . $temps[$i * 2 - $offset] . " / " . $tempsC[$i * 2 + 1 - $offset] . ":" . $tempsC[$i * 2 - $offset] . ")"; } else { $result = $result . $day . " " . $conds[$i] . " (" . $temps[$i * 2 + 1 - $offset] . ":" . $temps[$i * 2 - $offset] . ")"; } if ($i < 4) { $result = $result . ", "; } } $color = "10"; if (isset($prefs["color"]) == True) { $color = $prefs["color"]; } $result = chr(3) . $color . $result; if ($getdata != False) { $data = array(); $data["tempF"] = $temps[0]; $data["tempC"] = $tempsC[0]; $data["cond"] = $conds[0]; $data["wind"] = $wind_caption; $data["humidity"] = $humidity; $data["location"] = $location; return $data; } return $result; }