コード例 #1
0
ファイル: feeds_lib.php プロジェクト: cmn32480/exec-irc-bot
function parse_xml($html)
{
    $parts = explode("<story", $html);
    array_shift($parts);
    $items = array();
    for ($i = 0; $i < count($parts); $i++) {
        $item = array();
        $item["type"] = "xml_story";
        $item["title"] = extract_raw_tag($parts[$i], "title");
        $item["title"] = html_decode($item["title"]);
        $item["title"] = html_decode($item["title"]);
        $item["title"] = replace_ctrl_chars($item["title"], " ");
        $item["title"] = str_replace("  ", " ", $item["title"]);
        $url = str_replace("&amp;", "&", strip_ctrl_chars(extract_raw_tag($parts[$i], "url")));
        term_echo("*** raw story url: " . $url);
        $item["url"] = get_redirected_url($url);
        $item["timestamp"] = time();
        if ($item["title"] === False or $item["url"] === False) {
            continue;
        }
        $items[] = $item;
    }
    return $items;
}
コード例 #2
0
ファイル: submit.php プロジェクト: cmn32480/exec-irc-bot
# TODO: INCORPORATE HYPERLINK IN SUMMARY TEXT
# TODO: USE #rss-bot FOR MASS TEST INPUTS BUT DON'T ACTUALLY SUBMIT TO SITE
require_once "lib.php";
$trailing = $argv[1];
$dest = $argv[2];
$nick = $argv[3];
$alias = $argv[4];
if ($alias == "~submit-advert") {
    pm("#soylent", "*** to try automagically submitting a story to SoylentNews: ~submit <url>");
    return;
}
if ($trailing == "") {
    privmsg("usage: ~submit <url>");
    return;
}
$url = get_redirected_url($trailing);
if ($url === False) {
    privmsg("error: unable to download source (get_redirected_url)");
    return;
}
$host = "";
$uri = "";
$port = 80;
if (get_host_and_uri($url, $host, $uri, $port) == False) {
    privmsg("error: unable to download source (get_host_and_uri)");
    return;
}
$response = wget($host, $uri, $port);
$source_html = strip_headers($response);
$source_title = extract_raw_tag($source_html, "title");
term_echo($source_title);
コード例 #3
0
ファイル: lib_http.php プロジェクト: cmn32480/exec-irc-bot
function get_redirected_url($from_url, $url_list = "", $last_loc = "", $cookies = "")
{
    $url = trim($from_url);
    if ($url == "") {
        term_echo("get_redirected_url: empty url");
        return False;
    }
    #term_echo("  get_redirected_url: $url");
    $comp = parse_url($url);
    $host = "";
    if (isset($comp["host"]) == False) {
        if (is_array($url_list) == True) {
            if (count($url_list) > 0) {
                $host = parse_url($url_list[count($url_list) - 1], PHP_URL_HOST);
                $scheme = parse_url($url_list[count($url_list) - 1], PHP_URL_SCHEME);
                $url = $scheme . "://" . $host . $url;
            }
        }
    } else {
        $host = $comp["host"];
    }
    if ($host == "") {
        term_echo("get_redirected_url: redirect without host: " . $url);
        return False;
    }
    $uri = "/";
    if (isset($comp["path"]) == True) {
        $uri = $comp["path"];
    }
    if (isset($comp["query"]) == True) {
        if ($comp["query"] != "") {
            $uri = $uri . "?" . $comp["query"];
        }
    }
    if (isset($comp["fragment"]) == True) {
        if ($comp["fragment"] != "") {
            $uri = $uri . "#" . $comp["fragment"];
        }
    }
    $port = 80;
    if (isset($comp["scheme"]) == True) {
        if ($comp["scheme"] == "https") {
            $port = 443;
        }
    }
    if ($host == "" or $uri == "") {
        term_echo("get_redirected_url: empty host or uri");
        return False;
    }
    $extra_headers = "";
    if (isset($cookies[$host]) == True) {
        $cookie_strings = array();
        foreach ($cookies[$host] as $key => $value) {
            $cookie_strings[] = $key . "=" . $value;
        }
        $extra_headers = array();
        $extra_headers["Cookie"] = implode("; ", $cookie_strings);
    }
    #$breakcode="return (substr(\$response,strlen(\$response)-4)==\"\r\n\r\n\");";
    $breakcode = "return ((strlen(\$response)>10000) or (substr(\$response,strlen(\$response)-7)==\"</head>\"));";
    $response = wget($host, $uri, $port, ICEWEASEL_UA, $extra_headers, 10, $breakcode);
    if (is_array($cookies) == True) {
        $new_cookies = exec_get_cookies($response);
        if (count($new_cookies) > 0) {
            for ($i = 0; $i < count($new_cookies); $i++) {
                $parts = explode("; ", $new_cookies[$i]);
                $keyval = explode("=", $parts[0]);
                if (count($keyval) >= 2) {
                    $key = $keyval[0];
                    array_shift($keyval);
                    $value = implode("=", $keyval);
                    $cookies[$host][$key] = $value;
                }
            }
        }
    }
    #var_dump($response);
    $loc_header = trim(exec_get_header($response, "location", False));
    $location = $loc_header;
    # <META http-equiv="refresh" content="0;URL='http://www.goodgearguide.com.au/article/577990/how-encryption-keys-could-stolen-by-your-lunch/'">
    if ($location == "" or $location == $last_loc) {
        if (is_array($cookies) == False) {
            return $url;
        } else {
            return array("url" => $url, "cookies" => $cookies, "extra_headers" => $extra_headers);
        }
    } else {
        if ($location[0] == "/") {
            $location = $url . $location;
        }
        if (is_array($url_list) == True) {
            $n = 0;
            for ($i = 0; $i < count($url_list); $i++) {
                if ($url_list[$i] == $url_list) {
                    $n++;
                }
            }
            if ($n > 1) {
                term_echo("get_redirected_url: redirected url already been visited twice");
                return False;
            } else {
                $list = $url_list;
                $list[] = $url;
                if (count($list) < 10) {
                    return get_redirected_url($location, $list, $loc_header, $cookies);
                } else {
                    if (is_array($cookies) == False) {
                        return $url;
                    } else {
                        return array("url" => $url, "cookies" => $cookies, "extra_headers" => $extra_headers);
                    }
                }
            }
        } else {
            $list = array($url);
            return get_redirected_url($location, $list, $loc_header, $cookies);
        }
    }
}
コード例 #4
0
ファイル: sn_lib.php プロジェクト: cmn32480/exec-irc-bot
function sn_submit($url)
{
    if ($url == "") {
        return False;
    }
    $url = get_redirected_url($url);
    if ($url === False) {
        privmsg("error: unable to download source (get_redirected_url)");
        return False;
    }
    $host = "";
    $uri = "";
    $port = 80;
    if (get_host_and_uri($url, $host, $uri, $port) == False) {
        privmsg("error: unable to download source (get_host_and_uri)");
        return False;
    }
    $response = wget($host, $uri, $port);
    if (get_host_and_uri($url, $host, $uri, $port) == False) {
        privmsg("error: unable to download source (wget)");
        return False;
    }
    $source_html = strip_headers($response);
    $source_title = extract_raw_tag($source_html, "title");
    $delimiters = array("--", "|", " - ", " : ", " — ", " • ");
    for ($i = 0; $i < count($delimiters); $i++) {
        $j = strpos($source_title, $delimiters[$i]);
        if ($j !== False) {
            $source_title = trim(substr($source_title, 0, $j));
        }
    }
    if ($source_title === False or $source_title == "") {
        privmsg("error: title not found or empty");
        return False;
    }
    $source_title = html_decode($source_title);
    $source_title = html_decode($source_title);
    $source_body = extract_meta_content($source_html, "description");
    if ($source_body === False or $source_body == "") {
        $source_body = extract_meta_content($source_html, "og:description", "property");
        if ($source_body === False or $source_body == "") {
            privmsg("error: description meta content not found or empty");
            return False;
        }
    }
    $html = $source_html;
    $article = extract_raw_tag($html, "article");
    if ($article !== False) {
        $html = $article;
    }
    strip_all_tag($html, "head");
    strip_all_tag($html, "script");
    strip_all_tag($html, "style");
    #strip_all_tag($html,"a");
    strip_all_tag($html, "strong");
    $html = strip_tags($html, "<p>");
    $html = lowercase_tags($html);
    $html = explode("<p", $html);
    $source_body = array();
    for ($i = 0; $i < count($html); $i++) {
        $parts = explode(">", $html[$i]);
        if (count($parts) >= 2) {
            array_shift($parts);
            $html[$i] = implode(">", $parts);
        }
        $html[$i] = strip_tags($html[$i]);
        $html[$i] = clean_text($html[$i]);
        $host_parts = explode(".", $host);
        for ($j = 0; $j < count($host_parts); $j++) {
            if (strlen($host_parts[$j]) > 3) {
                if (strpos(strtolower($html[$i]), strtolower($host_parts[$j])) !== False) {
                    continue 2;
                }
            }
        }
        if (filter($html[$i], "0123456789") != "") {
            continue;
        }
        if (strlen($html[$i]) > 1) {
            if ($html[$i][strlen($html[$i]) - 1] != ".") {
                continue;
            }
            while (True) {
                $j = strlen($html[$i]) - 1;
                if ($j < 0) {
                    break;
                }
                $c = $html[$i][$j];
                if ($c == ".") {
                    break;
                }
                $html[$i] = substr($html[$i], 0, $j);
            }
        }
        if (strlen($html[$i]) > 100) {
            $source_body[] = $html[$i];
        }
    }
    $source_body = implode("\n\n", $source_body);
    $source_body = html_decode($source_body);
    $source_body = html_decode($source_body);
    $host = "dev.soylentnews.org";
    $port = 443;
    $uri = "/submit.pl";
    $response = wget($host, $uri, $port, ICEWEASEL_UA);
    $html = strip_headers($response);
    $reskey = extract_text($html, "<input type=\"hidden\" id=\"reskey\" name=\"reskey\" value=\"", "\">");
    if ($reskey === False) {
        privmsg("error: unable to extract reskey");
        return False;
    }
    sleep(25);
    $params = array();
    $params["reskey"] = $reskey;
    #$params["name"]=trim(substr($nick,0,50));
    $params["name"] = get_bot_nick();
    $params["email"] = "";
    $params["subj"] = trim(substr($source_title, 0, 100));
    $params["primaryskid"] = "1";
    $params["tid"] = "6";
    $params["sub_type"] = "plain";
    $params["story"] = $source_body . "\n\n" . $url . "\n\n-- submitted from IRC";
    $params["op"] = "SubmitStory";
    $response = wpost($host, $uri, $port, ICEWEASEL_UA, $params);
    $html = strip_headers($response);
    strip_all_tag($html, "head");
    strip_all_tag($html, "script");
    strip_all_tag($html, "style");
    strip_all_tag($html, "a");
    $html = strip_tags($html);
    $html = clean_text($html);
    if (strpos($html, "Perhaps you would like to enter an email address or a URL next time. Thanks for the submission.") !== False) {
        privmsg("submission successful - https://{$host}/submit.pl?op=list");
        return True;
    } else {
        privmsg("error: something went wrong with your submission");
        return False;
    }
}
コード例 #5
0
ファイル: title_lib.php プロジェクト: cmn32480/exec-irc-bot
function title_privmsg($trailing, $channel, $show_rd)
{
    $list_http = explode("http://", $trailing);
    array_shift($list_http);
    for ($i = 0; $i < count($list_http); $i++) {
        $parts = explode(" ", $list_http[$i]);
        $list_http[$i] = "http://" . $parts[0];
        if (substr($list_http[$i], 0, 7) != "http://") {
            unset($list_http[$i]);
        }
    }
    $list_http = array_values($list_http);
    $list_https = explode("https://", $trailing);
    array_shift($list_https);
    for ($i = 0; $i < count($list_https); $i++) {
        $parts = explode(" ", $list_https[$i]);
        $list_https[$i] = "https://" . $parts[0];
        if (substr($list_https[$i], 0, 8) != "https://") {
            unset($list_https[$i]);
        }
    }
    $list_https = array_values($list_https);
    $list = array_merge($list_http, $list_https);
    $out = array();
    for ($i = 0; $i < min(4, count($list)); $i++) {
        $redirect_data = get_redirected_url($list[$i], "", "", array());
        if ($redirect_data === False) {
            continue;
        }
        $rd_url = $redirect_data["url"];
        # INCORPORATED THE FOLLOWING CONDITION TO ACCOMMODATE ohmibod YOUTUBE TITLES
        if (strpos($rd_url, "youtube") !== False and $channel == "##anime-japanese") {
            continue;
        }
        $raw = get_raw_title($redirect_data);
        if ($raw !== False) {
            $def = translate("auto", "en", $raw);
            $msg = chr(3) . "13" . $raw . chr(3);
            if ($def != $raw and $def != "") {
                $msg = $msg . " [" . chr(3) . "04" . $def . chr(3) . "]";
            }
            if ($rd_url != $list[$i] and $show_rd == True) {
                $msg = $msg . " - " . chr(3) . "03" . $rd_url;
            }
            $out[] = $msg;
        } else {
            term_echo("title: get_raw_title returned false");
        }
    }
    $n = count($out);
    if ($n == 0) {
        term_echo("title: no titles to output");
    }
    for ($i = 0; $i < $n; $i++) {
        if ($i == $n - 1) {
            pm($channel, "└─ " . $out[$i]);
        } else {
            pm($channel, "├─ " . $out[$i]);
        }
    }
}
コード例 #6
0
ファイル: title.php プロジェクト: cmn32480/exec-irc-bot
     }
 } elseif (strtolower($trailing) == "off") {
     if ($bucket == "") {
         privmsg("  titles already disabled for " . chr(3) . "10{$dest}");
     } else {
         unset_bucket("<exec_title_{$dest}>");
         privmsg("  titles disabled for " . chr(3) . "10{$dest}");
     }
 } elseif (strtolower($trailing) == "url on") {
     set_bucket("<exec_title_url_{$dest}>", "on");
     privmsg("  enabled redirected url output for titles in " . chr(3) . "10{$dest}");
 } elseif (strtolower($trailing) == "url off") {
     unset_bucket("<exec_title_url_{$dest}>");
     privmsg("  disabled redirected url output for titles in " . chr(3) . "10{$dest}");
 } else {
     $redirect_data = get_redirected_url($trailing, "", "", array());
     if ($redirect_data === False) {
         term_echo("  title: get_redirected_url=false");
         return;
     }
     $rd_url = $redirect_data["url"];
     $raw = get_raw_title($redirect_data);
     if ($raw !== False) {
         $def = translate("auto", "en", $raw);
         $msg = chr(3) . "13" . $raw . chr(3);
         if ($def != $raw) {
             $msg = $msg . " [" . chr(3) . "04" . $def . chr(3) . "]";
         }
         if ($rd_url != $trailing) {
             $msg = $msg . " - " . chr(3) . "03" . $rd_url;
         }