function translate($lang_from, $lang_to, $msg) { $html = wget_ssl("translate.google.com", "/?sl=" . urlencode($lang_from) . "&tl=" . urlencode($lang_to) . "&js=n&ie=UTF-8&text=" . urlencode($msg)); $html = strip_headers($html); if ($html === False) { return ""; } strip_all_tag($html, "head"); strip_all_tag($html, "style"); strip_all_tag($html, "a"); $html = strip_tags($html, "<div>"); $delim1 = "TRANSLATED_TEXT='"; $delim2 = "';"; $i = strpos($html, $delim1) + strlen($delim1); if ($i === False) { return ""; } $html = substr($html, $i); $i = strpos($html, $delim2); if ($i === False) { return ""; } $result = trim(substr($html, 0, $i)); $result = str_replace("\\x26", "&", $result); $result = html_decode($result); $result = html_decode($result); return $result; }
function google_search($query) { $response = wget_ssl("www.google.com.au", "/search?source=hp&q=" . urlencode($query)); $html = strip_headers($response); strip_all_tag($html, "head"); strip_all_tag($html, "script"); strip_all_tag($html, "style"); $results = explode("<cite class=\"_Rm\">", $html); array_shift($results); if (count($results) == 0) { return False; } for ($i = 0; $i < count($results); $i++) { $results[$i] = explode("</cite>", $results[$i])[0]; $results[$i] = strip_tags($results[$i]); } return $results; }
function youtube_search($query) { $agent = ICEWEASEL_UA; $host = "www.youtube.com"; $uri = "/results"; $port = 443; $params = array(); $params["search_query"] = $query; $response = wpost($host, $uri, $port, $agent, $params); $html = strip_headers($response); strip_all_tag($html, "head"); strip_all_tag($html, "script"); strip_all_tag($html, "style"); $delim1 = "class=\"item-section\">"; $delim2 = "</ol>"; $html = extract_text_nofalse($html, $delim1, $delim2); $results = explode("<li><div class=\"yt-lockup yt-lockup-tile yt-lockup-video vve-check clearfix yt-uix-tile\"", $html); array_shift($results); if (count($results) == 0) { return False; } for ($i = 0; $i < count($results); $i++) { $parts = explode(">", $results[$i]); array_shift($parts); $results[$i] = implode(">", $parts); $delim1 = "<h3 class=\"yt-lockup-title \">"; $delim2 = "</h3>"; $results[$i] = extract_text_nofalse($results[$i], $delim1, $delim2); $delim1 = "<a href=\""; $delim2 = "\" "; $url = "https://www.youtube.com" . extract_text_nofalse($results[$i], $delim1, $delim2); $delim1 = "dir=\"ltr\">"; $delim2 = "</a>"; $title = extract_text_nofalse($results[$i], $delim1, $delim2); $title = html_decode($title); $title = html_decode($title); $delim1 = "> - Duration: "; $delim2 = ".</span>"; $time = extract_text_nofalse($results[$i], $delim1, $delim2); $results[$i] = $url . " - " . $title . " - " . $time; } return $results; }
function get_text($title, $section, $return = False, $return_lines_array = False) { if ($title == "") { wiki_privmsg($return, "wiki: get_text=invalid title"); return False; } $index = -1; $title = str_replace(" ", "_", $title); if ($section != "") { $uri = "/w/api.php?action=parse&format=php&page=" . urlencode($title) . "&prop=sections"; $response = wget(WIKI_HOST, $uri, 80, WIKI_USER_AGENT); $data = unserialize(strip_headers($response)); if (isset($data["parse"]["sections"]) == False) { wiki_privmsg($return, "wiki: get_text=error getting sections for page \"" . $title . "\""); return False; } $sections = $data["parse"]["sections"]; for ($i = 0; $i < count($sections); $i++) { $line = $sections[$i]["line"]; if (strtolower($section) == strtolower($line)) { $index = $sections[$i]["index"]; break; } } } $uri = "/w/api.php?action=parse&format=php&page=" . urlencode($title) . "&prop=text"; if ($index > 0) { $uri = $uri . "§ion={$index}"; } /*$url="http://".WIKI_HOST.$uri; $url=get_redirected_url($url); if (get_host_and_uri($url,&$host,&$uri,&$port)==False) { wiki_privmsg($return,"wiki: get_text=url parse failed"); return False; }*/ $response = wget(WIKI_HOST, $uri, 80, WIKI_USER_AGENT); $data = unserialize(strip_headers($response)); if (isset($data["parse"]["text"]["*"]) == True) { $text = $data["parse"]["text"]["*"]; if ($section != "") { $id = str_replace(" ", "_", $section); $id = str_replace("~", ".7E", $id); $id = str_replace("(", ".28", $id); $id = str_replace(")", ".29", $id); $head = "<span class=\"mw-headline\" id=\"{$id}\">{$section}</span>"; if (strpos($text, $head) === False) { wiki_privmsg($return, "wiki: get_text=section span not found"); return False; } } } else { wiki_privmsg($return, "wiki: get_text=section not found"); return False; } strip_comments($text); strip_all_tag($text, "h2"); strip_all_tag($text, "h3"); $text = strip_tags($text); $text = trim($text, " \t\n\r\v\""); $br = random_string(30); $text = str_replace("\n", $br, $text); $text = replace_ctrl_chars($text, " "); $text = html_decode($text); $text = clean_text($text); $url = "http://wiki.soylentnews.org/wiki/" . urlencode($title); if ($section != "") { $url = $url . "#{$id}"; } if ($return_lines_array == False) { $text = str_replace($br, " ", $text); $text = clean_text($text); if (strlen($text) > 400) { $text = trim(substr($text, 0, 400)) . "..."; } bot_ignore_next(); wiki_privmsg($return, $text); wiki_privmsg($return, $url); $result = $text; } else { $result = explode($br, $text); for ($i = 0; $i < count($result); $i++) { $result[$i] = trim($result[$i]); if (strlen($result[$i]) > 300) { $result[$i] = trim(substr($result[$i], 0, 300)) . "..."; } } delete_empty_elements($result); $result[] = $url; } return $result; }
return; } sleep(25); $params = array(); $params["reskey"] = $reskey; #$params["name"]=trim(substr($nick,0,50)); $params["name"] = get_bot_nick(); $params["email"] = ""; $params["subj"] = trim(substr($source_title, 0, 100)); $params["primaryskid"] = "1"; $params["tid"] = "6"; $params["sub_type"] = "plain"; $params["story"] = $source_body . "\n\n" . $url . "\n\n-- submitted from IRC"; $params["op"] = "SubmitStory"; $response = wpost($host, $uri, $port, ICEWEASEL_UA, $params); $html = strip_headers($response); strip_all_tag($html, "head"); strip_all_tag($html, "script"); strip_all_tag($html, "style"); strip_all_tag($html, "a"); $html = strip_tags($html); $html = clean_text($html); var_dump($html); # TODO: extract success/error message and output to IRC if (strpos($html, "Perhaps you would like to enter an email address or a URL next time. Thanks for the submission.") !== False) { privmsg("submission successful - https://{$host}/submit.pl?op=list"); } else { privmsg("error: something went wrong with your submission"); } # TODO: testing... much more testing #####################################################################################################
function source_define($host, $term, $params) { global $debug; $sterm = $term; if ($params["space_delim"] != "") { $sterm = str_replace(" ", $params["space_delim"], $sterm); } $uri = str_replace($params["template"], urlencode($sterm), $params["uri"]); term_echo("*** DEFINE: trying {$host}{$uri} on port " . $params["port"]); $response = wget($host, $uri, $params["port"], ICEWEASEL_UA, "", 20); $html = strip_headers($response); $html = replace_ctrl_chars($html, " "); strip_all_tag($html, "head"); strip_all_tag($html, "script"); if ($debug == "ON") { privmsg("debug [{$host}]: uri = \"{$uri}\""); $L = strlen($html); privmsg("debug [{$host}]: html length = \"{$L}\""); unset($L); privmsg("debug [{$host}]: delim_start = \"" . $params["delim_start"] . "\""); privmsg("debug [{$host}]: delim_end = " . $params["delim_end"] . "\""); } $i = strpos($html, $params["delim_start"]); $def = ""; if ($i !== False) { if ($debug == "ON") { privmsg("debug [{$host}]: delim_start pos = \"{$i}\""); } $html = substr($html, $i + strlen($params["delim_start"])); $i = strpos($html, $params["delim_end"]); if ($i !== False) { if ($debug == "ON") { privmsg("debug [{$host}]: delim_end pos = \"{$i}\""); } $def = trim(strip_tags(substr($html, 0, $i))); $def = str_replace(array("\n", "\r"), " ", $def); $def = str_replace(" ", " ", $def); if (strlen($def) > MAX_DEF_LENGTH) { $def = trim(substr($def, 0, MAX_DEF_LENGTH)) . "..."; } } } if ($def == "") { $location = exec_get_header($response, "location"); if ($location == "") { return False; } else { $new_term = extract_text($location, $params["get_param"], "&", True); if ($new_term != $term) { term_echo("redirecting to \"{$location}\""); if ($debug == "ON") { privmsg("debug [{$host}]: redirecting to \"{$location}\""); } return source_define($host, $new_term, $params); } else { return False; } } } else { if ($params["ignore"] != "" and strpos($def, $params["ignore"]) !== False) { return False; } if (strpos($def, "There aren't any definitions") !== False) { return False; } privmsg("[" . $params["name"] . "] " . chr(3) . "03{$term}" . chr(3) . ": " . html_decode($def)); return True; } }
function sn_submit($url) { if ($url == "") { return False; } $url = get_redirected_url($url); if ($url === False) { privmsg("error: unable to download source (get_redirected_url)"); return False; } $host = ""; $uri = ""; $port = 80; if (get_host_and_uri($url, $host, $uri, $port) == False) { privmsg("error: unable to download source (get_host_and_uri)"); return False; } $response = wget($host, $uri, $port); if (get_host_and_uri($url, $host, $uri, $port) == False) { privmsg("error: unable to download source (wget)"); return False; } $source_html = strip_headers($response); $source_title = extract_raw_tag($source_html, "title"); $delimiters = array("--", "|", " - ", " : ", " — ", " • "); for ($i = 0; $i < count($delimiters); $i++) { $j = strpos($source_title, $delimiters[$i]); if ($j !== False) { $source_title = trim(substr($source_title, 0, $j)); } } if ($source_title === False or $source_title == "") { privmsg("error: title not found or empty"); return False; } $source_title = html_decode($source_title); $source_title = html_decode($source_title); $source_body = extract_meta_content($source_html, "description"); if ($source_body === False or $source_body == "") { $source_body = extract_meta_content($source_html, "og:description", "property"); if ($source_body === False or $source_body == "") { privmsg("error: description meta content not found or empty"); return False; } } $html = $source_html; $article = extract_raw_tag($html, "article"); if ($article !== False) { $html = $article; } strip_all_tag($html, "head"); strip_all_tag($html, "script"); strip_all_tag($html, "style"); #strip_all_tag($html,"a"); strip_all_tag($html, "strong"); $html = strip_tags($html, "<p>"); $html = lowercase_tags($html); $html = explode("<p", $html); $source_body = array(); for ($i = 0; $i < count($html); $i++) { $parts = explode(">", $html[$i]); if (count($parts) >= 2) { array_shift($parts); $html[$i] = implode(">", $parts); } $html[$i] = strip_tags($html[$i]); $html[$i] = clean_text($html[$i]); $host_parts = explode(".", $host); for ($j = 0; $j < count($host_parts); $j++) { if (strlen($host_parts[$j]) > 3) { if (strpos(strtolower($html[$i]), strtolower($host_parts[$j])) !== False) { continue 2; } } } if (filter($html[$i], "0123456789") != "") { continue; } if (strlen($html[$i]) > 1) { if ($html[$i][strlen($html[$i]) - 1] != ".") { continue; } while (True) { $j = strlen($html[$i]) - 1; if ($j < 0) { break; } $c = $html[$i][$j]; if ($c == ".") { break; } $html[$i] = substr($html[$i], 0, $j); } } if (strlen($html[$i]) > 100) { $source_body[] = $html[$i]; } } $source_body = implode("\n\n", $source_body); $source_body = html_decode($source_body); $source_body = html_decode($source_body); $host = "dev.soylentnews.org"; $port = 443; $uri = "/submit.pl"; $response = wget($host, $uri, $port, ICEWEASEL_UA); $html = strip_headers($response); $reskey = extract_text($html, "<input type=\"hidden\" id=\"reskey\" name=\"reskey\" value=\"", "\">"); if ($reskey === False) { privmsg("error: unable to extract reskey"); return False; } sleep(25); $params = array(); $params["reskey"] = $reskey; #$params["name"]=trim(substr($nick,0,50)); $params["name"] = get_bot_nick(); $params["email"] = ""; $params["subj"] = trim(substr($source_title, 0, 100)); $params["primaryskid"] = "1"; $params["tid"] = "6"; $params["sub_type"] = "plain"; $params["story"] = $source_body . "\n\n" . $url . "\n\n-- submitted from IRC"; $params["op"] = "SubmitStory"; $response = wpost($host, $uri, $port, ICEWEASEL_UA, $params); $html = strip_headers($response); strip_all_tag($html, "head"); strip_all_tag($html, "script"); strip_all_tag($html, "style"); strip_all_tag($html, "a"); $html = strip_tags($html); $html = clean_text($html); if (strpos($html, "Perhaps you would like to enter an email address or a URL next time. Thanks for the submission.") !== False) { privmsg("submission successful - https://{$host}/submit.pl?op=list"); return True; } else { privmsg("error: something went wrong with your submission"); return False; } }