Esempio n. 1
0
function quick_wget($trailing)
{
    $parts = explode(" ", $trailing);
    delete_empty_elements($parts);
    if (count($parts) < 2) {
        return False;
    }
    $url = $parts[0];
    array_shift($parts);
    $trailing = implode(" ", $parts);
    $parts = explode("<>", $trailing);
    delete_empty_elements($parts);
    if (count($parts) < 2) {
        return False;
    }
    $delim1 = trim($parts[0]);
    $delim2 = trim($parts[1]);
    $host = "";
    $uri = "";
    $port = "";
    if (get_host_and_uri($url, $host, $uri, $port) == False) {
        return False;
    }
    $response = wget_ssl($host, $uri, $port);
    $result = extract_text($response, $delim1, $delim2);
    if ($result === False) {
        return False;
    }
    $result = strip_tags($result);
    $result = html_decode($result);
    $result = html_decode($result);
    $result = trim($result);
    if ($result == "") {
        return False;
    }
    return $result;
}
Esempio n. 2
0
		if (isset($filename_field))
			{
			$filename = $uploadfiles[$n];
			if ($use_local)
				{
				$filename = mb_basename($filename);
				}
			update_field($ref,$filename_field, $filename);
			}

		# get file metadata 
		if (getval("no_exif","")=="") {extract_exif_comment($ref,$extension);}
		
		# extract text from documents (e.g. PDF, DOC).
		global $extracted_text_field;
		if (isset($extracted_text_field) && !$no_exif) {extract_text($ref,$extension);}

		$done++;

		# Add to collection?
		if ($collection!="")
			{
			$refs[] = $ref;
			}

		# Log this
		daily_stat("Resource upload",$ref);
		resource_log($ref,'u',0);

		}
Esempio n. 3
0
     //Save the data
     save_resource_data($ref,false);
     //Update creation times, extension and title
     sql_query("insert into resource_data (resource, resource_type_field, value) values ($ref,12,now()),($ref,148,now())");
     sql_query("update resource set file_extension = '" . $file_extension . "', field12 = now(), title ='".escape_check($filename)."'WHERE ref = '" . $ref . "'");
     $data['success']=true;
     $data['status']="success";
     $data['error']=false;
     $data['textStatus']= $file . " - Successfully Added";
     $data['ref']=$ref;
 }else{
     $data['error']=true;
     $data['textStatus']="could not move file to tmp";
 }
 if($resource_type==2){
     extract_text($ref,$file_extension);
 }
 if($resource_type != 2 && $resource_type != 3 && $resource_type != 4){
     //create preview files in that directory
     create_previews_using_im($ref,false,$file_extension);
     $nothumb = false;
 }else{
     $nothumb = true;
     sql_query("UPDATE resource SET is_transcoding = 1 WHERE ref = $ref");
     //Process previews in the backgrond and continue
     $attempts = 1;
     $command = "/usr/bin/php -q -f /var/www/plugins/mia_upload/pages/background_previews.php $resource_type $ref $file_extension $attempts";
     exec("$command > /dev/null &", $arrOutput);
 }
 savetoelastic($ref);
 echo(json_encode($data));
Esempio n. 4
0
function minion_talk($nick, $channel, $trailing)
{
    $relays_bucket = "activity.php/minion_talk/relays";
    $relays = get_array_bucket($relays_bucket);
    # flush all outdated relays
    $save_bucket = False;
    foreach ($relays as $freenode_nick => $freenode_channels) {
        foreach ($relays[$freenode_nick] as $freenode_channel => $data) {
            if (microtime(True) - $data["timestamp"] > 10 * 60) {
                unset($relays[$freenode_nick][$freenode_channel]);
                $save_bucket = True;
            }
        }
    }
    if ($nick != "") {
        $account = users_get_account($nick);
        $allowed = array("crutchy", "chromas", "mrcoolbp", "NCommander", "juggs", "TheMightyBuzzard");
        if (in_array($account, $allowed) == True) {
            if ($trailing == ".relays") {
                $n = 0;
                foreach ($relays as $freenode_nick => $freenode_channels) {
                    foreach ($relays[$freenode_nick] as $freenode_channel => $data) {
                        $rem = round(($data["timestamp"] + 10 * 60 - microtime(True)) / 60, 0);
                        pm($channel, chr(3) . "13  {$freenode_nick}: {$freenode_channel} => " . $data["channel"] . " (unset in {$rem} minutes)");
                        $n++;
                    }
                }
                if ($n == 0) {
                    pm($channel, chr(3) . "13  no channel relays currently active");
                }
                return;
            }
            $params = explode(">", $trailing);
            if (count($params) >= 2) {
                $freenode_channel = strtolower(trim($params[0]));
                if (substr($freenode_channel, 0, 1) == "#") {
                    array_shift($params);
                    $msg = trim(implode(">", $params));
                    if (strlen($msg) > 0) {
                        $commands = array("~minion raw sylnt :sylnt PRIVMSG {$freenode_channel} :<{$nick}> {$msg}");
                        internal_macro($commands);
                        $parts = explode(",", $msg);
                        $freenode_nick = strtolower(trim($parts[0]));
                        if (count($parts) > 1 and strpos($freenode_nick, " ") === False) {
                            $relays[$freenode_nick][$freenode_channel] = array("channel" => $channel, "timestamp" => microtime(True));
                            pm($channel, chr(3) . "13  ten minute relay set for \"{$freenode_nick}\" in \"{$freenode_channel}\" on freenode to \"{$channel}\" on this server");
                            $save_bucket = True;
                        }
                    }
                }
            }
        }
    }
    if ($channel == "#freenode") {
        $freenode_nick = extract_text($trailing, chr(3) . "03", chr(3) . " [", False);
        $freenode_channel = extract_text($trailing, chr(3) . " [" . chr(3) . "02", chr(3) . "] " . chr(3) . "05", False);
        if (isset($relays[strtolower($freenode_nick)][$freenode_channel]) == True) {
            $freenode_trailing = extract_text($trailing, chr(3) . "] " . chr(3) . "05", chr(3), True);
            pm($relays[strtolower($freenode_nick)][$freenode_channel]["channel"], chr(3) . "03" . $freenode_nick . chr(3) . " [" . chr(3) . "02" . $freenode_channel . chr(3) . "] " . chr(3) . "05" . $freenode_trailing);
        }
    }
    if ($save_bucket == True) {
        set_array_bucket($relays, $relays_bucket);
    }
}
Esempio n. 5
0
function source_define($host, $term, $params)
{
    global $debug;
    $sterm = $term;
    if ($params["space_delim"] != "") {
        $sterm = str_replace(" ", $params["space_delim"], $sterm);
    }
    $uri = str_replace($params["template"], urlencode($sterm), $params["uri"]);
    term_echo("*** DEFINE: trying {$host}{$uri} on port " . $params["port"]);
    $response = wget($host, $uri, $params["port"], ICEWEASEL_UA, "", 20);
    $html = strip_headers($response);
    $html = replace_ctrl_chars($html, " ");
    strip_all_tag($html, "head");
    strip_all_tag($html, "script");
    if ($debug == "ON") {
        privmsg("debug [{$host}]: uri = \"{$uri}\"");
        $L = strlen($html);
        privmsg("debug [{$host}]: html length = \"{$L}\"");
        unset($L);
        privmsg("debug [{$host}]: delim_start = \"" . $params["delim_start"] . "\"");
        privmsg("debug [{$host}]: delim_end = " . $params["delim_end"] . "\"");
    }
    $i = strpos($html, $params["delim_start"]);
    $def = "";
    if ($i !== False) {
        if ($debug == "ON") {
            privmsg("debug [{$host}]: delim_start pos = \"{$i}\"");
        }
        $html = substr($html, $i + strlen($params["delim_start"]));
        $i = strpos($html, $params["delim_end"]);
        if ($i !== False) {
            if ($debug == "ON") {
                privmsg("debug [{$host}]: delim_end pos = \"{$i}\"");
            }
            $def = trim(strip_tags(substr($html, 0, $i)));
            $def = str_replace(array("\n", "\r"), " ", $def);
            $def = str_replace("  ", " ", $def);
            if (strlen($def) > MAX_DEF_LENGTH) {
                $def = trim(substr($def, 0, MAX_DEF_LENGTH)) . "...";
            }
        }
    }
    if ($def == "") {
        $location = exec_get_header($response, "location");
        if ($location == "") {
            return False;
        } else {
            $new_term = extract_text($location, $params["get_param"], "&", True);
            if ($new_term != $term) {
                term_echo("redirecting to \"{$location}\"");
                if ($debug == "ON") {
                    privmsg("debug [{$host}]: redirecting to \"{$location}\"");
                }
                return source_define($host, $new_term, $params);
            } else {
                return False;
            }
        }
    } else {
        if ($params["ignore"] != "" and strpos($def, $params["ignore"]) !== False) {
            return False;
        }
        if (strpos($def, "There aren't any definitions") !== False) {
            return False;
        }
        privmsg("[" . $params["name"] . "] " . chr(3) . "03{$term}" . chr(3) . ": " . html_decode($def));
        return True;
    }
}
Esempio n. 6
0
$host = "soylentnews.org";
$list_uri = "/journal.pl?op=top";
$port = 80;
$msg = chr(3) . "08" . "********** " . chr(3) . "03" . chr(2) . "SOYLENTNEWS JOURNAL FEED" . chr(2) . chr(3) . "08" . " **********";
output($msg);
$last_id = 878;
if (file_exists(JOURNALS_ID_FILE) == True) {
    $last_id = file_get_contents(JOURNALS_ID_FILE);
}
$msg = "last journal = {$last_id}";
output($msg);
$response = wget($host, $list_uri, $port, ICEWEASEL_UA, "", 60);
$html = strip_headers($response);
$delim1 = "<!-- start template: ID 60, journaltop;journal;default -->";
$delim2 = "<!-- end template: ID 60, journaltop;journal;default -->";
$html = extract_text($html, $delim1, $delim2);
if ($html === False) {
    output("error: journal list not found");
    return;
}
$rows = explode("<tr>", $html);
array_shift($rows);
array_shift($rows);
$item_count = 20;
for ($i = 0; $i < max($item_count, count($rows)); $i++) {
    $cells = explode("<td valign=\"top\">", $rows[$i]);
    if (count($cells) != 4) {
        term_echo("*** SN JOURNAL FEED: invalid number of cells for row {$i}");
        continue;
    }
    # TODO: DEBUG HERE
    $faction = mysql_real_escape_string($_POST['faction']);
    $flags = mysql_real_escape_string($_POST['flags']);
    $model = htmlspecialchars($_POST['model']);
    $sql = "UPDATE  `characters` SET  `_Name` =  '{$name}',\n`_Cash` =  '{$cash}',\n`_Model` =  '{$model}',\n`_Flags` =  '{$flags}',\n`_Faction` =  '{$faction}' WHERE  `characters`.`_Key` = {$key}";
    echo "<div class='well'>\n<h2>Admin Edit Mode</h2>\n{$sql}\n</div>\n";
} else {
    $newphysicaldesc = mysql_real_escape_string($_POST['physdesc']);
    $result = mysql_query("SELECT * FROM  `characters` WHERE `_Schema` = '" . $gamemodecode . "' AND `_Key` LIKE  '" . $key . "'");
    if (mysql_error() == "") {
    } else {
        echo '<div class="alert alert-error"> ERROR' . mysql_error() . '</div>';
    }
    while ($row = @mysql_fetch_array($result)) {
        $data = $row['_Data'];
        $name = $row['_Name'];
        $oldphysdesc = extract_text($data, '"PhysDesc":"', '","');
    }
    echo "<div class='well'><h2>Replacing</h2> <code>{$oldphysdesc}</code> <h2>with</h2> <code>{$newphysicaldesc}</code> <h2>on {$name}</h2></div>";
    $sql = "UPDATE characters set _Data= replace(_Data, \"{$oldphysdesc}\", \"{$newphysicaldesc}\") WHERE `characters`.`_Key` ={$key}";
}
mysql_query($sql);
if (mysql_error() == "") {
    echo '<div class="alert alert-success">Character Updated!</div><br>
<a href="index.php"class="btn btn-large btn-block btn-success">Return to dashboard</a>
';
} else {
    echo '<div class="alert alert-error">ERROR - ' . mysql_error() . '<br>If this keeps happening you should contact the Owner about this!</div><br>
<input type="button" class="btn btn-large btn-block btn-error" value="Go Back" onclick="goBack()">
';
}
?>
function index_url($url, $level, $site_id, $md5sum, $domain, $indexdate, $sessid, $can_leave_domain, $reindex)
{
    global $min_delay;
    global $command_line;
    global $min_words_per_page;
    global $supdomain, $index_vpaths;
    global $user_agent, $tmp_urls, $delay_time, $domain_arr;
    global $db;
    $deletable = 0;
    $url_status = url_status($url);
    $thislevel = $level - 1;
    if (strstr($url_status['state'], "Relocation")) {
        $url = preg_replace("/ /", "", url_purify($url_status['path'], $url, $can_leave_domain));
        if ($url != '') {
            $result = $db->query("SELECT link FROM " . TABLE_PREFIX . "temp WHERE link=" . $db->quote($url) . " AND id=" . $db->quote($sessid));
            echo sql_errorstring(__FILE__, __LINE__);
            if ($result->fetch()) {
                $result->closeCursor();
                $db->exec("INSERT INTO " . TABLE_PREFIX . "temp (link, level, id) VALUES (" . $db->quote($url) . ", " . $db->quote($level) . ", " . $db->quote($sessid) . ")");
                echo sql_errorstring(__FILE__, __LINE__);
            }
        }
        $url_status['state'] == "redirected";
    }
    if (!$index_vpaths && $url_status['state'] == 'ok') {
        $url_parts = parse_url($url);
        $base = basename($url_parts['path']);
        if (strstr($base, '.') == false) {
            $url_status['state'] = "directory listing or default redirect";
        }
    }
    ini_set("user_agent", $user_agent);
    if ($url_status['state'] == 'ok') {
        $OKtoIndex = 1;
        $file_read_error = 0;
        if (time() - $delay_time < $min_delay) {
            sleep($min_delay - (time() - $delay_time));
        }
        $delay_time = time();
        if (!fst_lt_snd(phpversion(), "4.3.0")) {
            $file = file_get_contents($url);
            if ($file === FALSE) {
                $file_read_error = 1;
            }
        } else {
            $fl = @fopen($url, "r");
            if ($fl) {
                while ($buffer = @fgets($fl, 4096)) {
                    $file .= $buffer;
                }
            } else {
                $file_read_error = 1;
            }
            fclose($fl);
        }
        if ($file_read_error) {
            $contents = getFileContents($url);
            $file = $contents['file'];
        }
        $pageSize = number_format(strlen($file) / 1024, 2, ".", "");
        printPageSizeReport($pageSize);
        if ($url_status['content'] != 'text') {
            $file = extract_text($file, $url_status['content']);
        }
        printStandardReport('starting', $command_line);
        $newmd5sum = md5($file);
        if ($reindex == 0) {
            if ($md5sum == $newmd5sum) {
                printStandardReport('md5notChanged', $command_line);
                $OKtoIndex = 0;
            } else {
                if (isDuplicateMD5($newmd5sum)) {
                    $OKtoIndex = 0;
                    printStandardReport('duplicate', $command_line);
                }
            }
        }
        if (($md5sum != $newmd5sum || $reindex == 1) && $OKtoIndex == 1) {
            $urlparts = parse_url($url);
            $newdomain = $urlparts['host'];
            $type = 0;
            // remove link to css file
            //get all links from file
            $data = clean_file($file, $url, $url_status['content']);
            if ($data['noindex'] == 1) {
                $OKtoIndex = 0;
                $deletable = 1;
                printStandardReport('metaNoindex', $command_line);
            }
            $wordarray = unique_array(explode(" ", $data['content']));
            if ($data['nofollow'] != 1) {
                $links = get_links($file, $url, $can_leave_domain, $data['base']);
                $links = distinct_array($links);
                $all_links = count($links);
                $numoflinks = 0;
                //if there are any, add to the temp table, but only if there isnt such url already
                if (is_array($links)) {
                    reset($links);
                    while ($thislink = each($links)) {
                        if (!isset($tmp_urls[$thislink[1]]) || $tmp_urls[$thislink[1]] != 1) {
                            $tmp_urls[$thislink[1]] = 1;
                            $numoflinks++;
                            $db->exec("INSERT INTO " . TABLE_PREFIX . "temp (link, level, id) VALUES (" . $db->quote($thislink[1]) . ", " . $db->quote($level) . ", " . $db->quote($sessid) . ")");
                            echo sql_errorstring(__FILE__, __LINE__);
                        }
                    }
                }
            } else {
                printStandardReport('noFollow', $command_line);
            }
            if ($OKtoIndex == 1) {
                $title = $data['title'];
                $host = $data['host'];
                $path = $data['path'];
                $fulltxt = str_replace("\\'", "&quot;", $data['fulltext']);
                $desc = substr($data['description'], 0, 254);
                $language = substr($data['language'], 0, 2);
                $url_parts = parse_url($url);
                $domain_for_db = $url_parts['host'];
                if (isset($domain_arr[$domain_for_db])) {
                    $dom_id = $domain_arr[$domain_for_db];
                } else {
                    $db->exec("INSERT INTO " . TABLE_PREFIX . "domains (domain) VALUES (" . $db->quote($domain_for_db) . ")");
                    $dom_id = $db->lastInsertId();
                    $domain_arr[$domain_for_db] = $dom_id;
                }
                $wordarray = calc_weights($wordarray, $title, $host, $path, $data['keywords']);
                $tstamp = "'" . date("Y-m-d") . "'";
                //if there are words to index, add the link to the database, get its id, and add the word + their relation
                if (is_array($wordarray) && count($wordarray) > $min_words_per_page) {
                    $site_id = $db->quote($site_id);
                    $url = $db->quote($url);
                    $title = $db->quote($title);
                    $desc = $db->quote($desc);
                    $language = $db->quote($language);
                    $fulltxt = $db->quote($fulltxt);
                    $pageSize = $db->quote($pageSize);
                    $Qmd5sum = $db->quote($newmd5sum);
                    if ($md5sum == '') {
                        $db->exec("INSERT INTO " . TABLE_PREFIX . "links (site_id, url, title, description, language, fulltxt, indexdate, size, md5sum, level) VALUES ({$site_id}, {$url}, {$title}, {$desc}, {$language}, {$fulltxt}, {$tstamp}, {$pageSize}, {$Qmd5sum}, {$thislevel})");
                        $error = sql_errorstring(__FILE__, __LINE__);
                        if ($error) {
                            echo $error;
                            printStandardReport('skipped', $command_line);
                        } else {
                            $result = $db->query("SELECT link_id FROM " . TABLE_PREFIX . "links WHERE url={$url}");
                            echo sql_errorstring(__FILE__, __LINE__);
                            $row = $result->fetch();
                            $link_id = $row[0];
                            $result->closeCursor();
                            save_keywords($wordarray, $link_id, $dom_id);
                            printStandardReport('indexed', $command_line);
                        }
                    } else {
                        if ($md5sum != '' && $md5sum != $newmd5sum) {
                            //if page has changed, start updating
                            $result = $db->query("SELECT link_id FROM " . TABLE_PREFIX . "links WHERE url={$url}");
                            echo sql_errorstring(__FILE__, __LINE__);
                            $row = $result->fetch();
                            $link_id = $row[0];
                            $result->closeCursor();
                            for ($i = 0; $i <= 15; $i++) {
                                $char = dechex($i);
                                $db->exec("DELETE FROM " . TABLE_PREFIX . "link_keyword{$char} WHERE link_id={$link_id}");
                                echo sql_errorstring(__FILE__, __LINE__);
                            }
                            save_keywords($wordarray, $link_id, $dom_id);
                            $db->exec("UPDATE " . TABLE_PREFIX . "links SET title={$title}, description={$desc}, language={$language}, fulltxt={$fulltxt}, indexdate={$tstamp}, size={$pageSize}, md5sum={$Qmd5sum}, level={$thislevel} WHERE link_id={$link_id}");
                            echo sql_errorstring(__FILE__, __LINE__);
                            printStandardReport('re-indexed', $command_line);
                        }
                    }
                } else {
                    printStandardReport('minWords', $command_line);
                }
            }
        }
    } else {
        $deletable = 1;
        printUrlStatus($url_status['state'], $command_line);
    }
    if ($reindex == 1 && $deletable == 1) {
        check_for_removal($url);
    } else {
        if ($reindex == 1) {
            //???
        }
    }
    if (!isset($all_links)) {
        $all_links = 0;
    }
    if (!isset($numoflinks)) {
        $numoflinks = 0;
    }
    printLinksReport($numoflinks, $all_links, $command_line);
}
Esempio n. 9
0
function sn_submit($url)
{
    if ($url == "") {
        return False;
    }
    $url = get_redirected_url($url);
    if ($url === False) {
        privmsg("error: unable to download source (get_redirected_url)");
        return False;
    }
    $host = "";
    $uri = "";
    $port = 80;
    if (get_host_and_uri($url, $host, $uri, $port) == False) {
        privmsg("error: unable to download source (get_host_and_uri)");
        return False;
    }
    $response = wget($host, $uri, $port);
    if (get_host_and_uri($url, $host, $uri, $port) == False) {
        privmsg("error: unable to download source (wget)");
        return False;
    }
    $source_html = strip_headers($response);
    $source_title = extract_raw_tag($source_html, "title");
    $delimiters = array("--", "|", " - ", " : ", " — ", " • ");
    for ($i = 0; $i < count($delimiters); $i++) {
        $j = strpos($source_title, $delimiters[$i]);
        if ($j !== False) {
            $source_title = trim(substr($source_title, 0, $j));
        }
    }
    if ($source_title === False or $source_title == "") {
        privmsg("error: title not found or empty");
        return False;
    }
    $source_title = html_decode($source_title);
    $source_title = html_decode($source_title);
    $source_body = extract_meta_content($source_html, "description");
    if ($source_body === False or $source_body == "") {
        $source_body = extract_meta_content($source_html, "og:description", "property");
        if ($source_body === False or $source_body == "") {
            privmsg("error: description meta content not found or empty");
            return False;
        }
    }
    $html = $source_html;
    $article = extract_raw_tag($html, "article");
    if ($article !== False) {
        $html = $article;
    }
    strip_all_tag($html, "head");
    strip_all_tag($html, "script");
    strip_all_tag($html, "style");
    #strip_all_tag($html,"a");
    strip_all_tag($html, "strong");
    $html = strip_tags($html, "<p>");
    $html = lowercase_tags($html);
    $html = explode("<p", $html);
    $source_body = array();
    for ($i = 0; $i < count($html); $i++) {
        $parts = explode(">", $html[$i]);
        if (count($parts) >= 2) {
            array_shift($parts);
            $html[$i] = implode(">", $parts);
        }
        $html[$i] = strip_tags($html[$i]);
        $html[$i] = clean_text($html[$i]);
        $host_parts = explode(".", $host);
        for ($j = 0; $j < count($host_parts); $j++) {
            if (strlen($host_parts[$j]) > 3) {
                if (strpos(strtolower($html[$i]), strtolower($host_parts[$j])) !== False) {
                    continue 2;
                }
            }
        }
        if (filter($html[$i], "0123456789") != "") {
            continue;
        }
        if (strlen($html[$i]) > 1) {
            if ($html[$i][strlen($html[$i]) - 1] != ".") {
                continue;
            }
            while (True) {
                $j = strlen($html[$i]) - 1;
                if ($j < 0) {
                    break;
                }
                $c = $html[$i][$j];
                if ($c == ".") {
                    break;
                }
                $html[$i] = substr($html[$i], 0, $j);
            }
        }
        if (strlen($html[$i]) > 100) {
            $source_body[] = $html[$i];
        }
    }
    $source_body = implode("\n\n", $source_body);
    $source_body = html_decode($source_body);
    $source_body = html_decode($source_body);
    $host = "dev.soylentnews.org";
    $port = 443;
    $uri = "/submit.pl";
    $response = wget($host, $uri, $port, ICEWEASEL_UA);
    $html = strip_headers($response);
    $reskey = extract_text($html, "<input type=\"hidden\" id=\"reskey\" name=\"reskey\" value=\"", "\">");
    if ($reskey === False) {
        privmsg("error: unable to extract reskey");
        return False;
    }
    sleep(25);
    $params = array();
    $params["reskey"] = $reskey;
    #$params["name"]=trim(substr($nick,0,50));
    $params["name"] = get_bot_nick();
    $params["email"] = "";
    $params["subj"] = trim(substr($source_title, 0, 100));
    $params["primaryskid"] = "1";
    $params["tid"] = "6";
    $params["sub_type"] = "plain";
    $params["story"] = $source_body . "\n\n" . $url . "\n\n-- submitted from IRC";
    $params["op"] = "SubmitStory";
    $response = wpost($host, $uri, $port, ICEWEASEL_UA, $params);
    $html = strip_headers($response);
    strip_all_tag($html, "head");
    strip_all_tag($html, "script");
    strip_all_tag($html, "style");
    strip_all_tag($html, "a");
    $html = strip_tags($html);
    $html = clean_text($html);
    if (strpos($html, "Perhaps you would like to enter an email address or a URL next time. Thanks for the submission.") !== False) {
        privmsg("submission successful - https://{$host}/submit.pl?op=list");
        return True;
    } else {
        privmsg("error: something went wrong with your submission");
        return False;
    }
}
Esempio n. 10
0
 $pid_delim2 = "\">Parent";
 $pid_test = extract_text($pid_html, $pid_delim1, $pid_delim2);
 $pid = "";
 $parent_url = "";
 if ($pid_test !== False) {
     $pid = $pid_test;
     $parent_url = "http://soylentnews.org/comments.pl?sid={$sid}&cid={$pid}";
 }
 $subject_delim1 = "<h4><a name=\"{$cid}\">";
 $subject_delim2 = "</a>";
 $subject = extract_text($parts[$j], $subject_delim1, $subject_delim2);
 $subject = trim(strip_tags($subject));
 $subject = str_replace("  ", " ", $subject);
 $subject = html_decode($subject);
 $subject = html_decode($subject);
 $comment_body = extract_text($parts[$j], "<div id=\"comment_body_{$cid}\">", "</div>");
 $comment_body = replace_ctrl_chars($comment_body, " ");
 $comment_body = str_replace("</p>", " ", $comment_body);
 $comment_body = str_replace("<p>", " ", $comment_body);
 $comment_body = str_replace("<br>", " ", $comment_body);
 $comment_body = trim(strip_tags($comment_body));
 $comment_body = str_replace("  ", " ", $comment_body);
 $comment_body = html_decode($comment_body);
 $comment_body = html_decode($comment_body);
 $record = array();
 $record["user"] = $user;
 $record["uid"] = $uid;
 $record["score"] = $score;
 $record["score_num"] = $score_num;
 $record["subject"] = $subject;
 $record["title"] = $title;
Esempio n. 11
0
function parse_data($keys, $data, $suffix = "=")
{
    $result = array();
    $n = count($keys) - 1;
    if ($n < 0) {
        return False;
    }
    for ($i = 0; $i < $n; $i++) {
        $delim1 = $keys[$i] . $suffix;
        $delim2 = $keys[$i + 1] . $suffix;
        $result[$keys[$i]] = extract_text($data, $delim1, $delim2);
        if ($result[$keys[$i]] === False) {
            return False;
        }
    }
    $delim = $keys[$n] . $suffix;
    $result[$keys[$n]] = extract_text($data, $delim, "", True);
    if ($result[$keys[$n]] === False) {
        return False;
    }
    return $result;
}
Esempio n. 12
0
function extract_meta_content($html, $name, $key = "name")
{
    # <meta name="description" content="Researchers have made a breakthrough in blah blah blah." id="metasummary" />
    $lhtml = strtolower($html);
    $lname = strtolower($name);
    $parts = explode("<meta ", $lhtml);
    array_shift($parts);
    if (count($parts) == 0) {
        return False;
    }
    $result = "";
    for ($i = 0; $i < count($parts); $i++) {
        $n = extract_text($parts[$i], "{$key}=\"", "\"");
        if ($n === False) {
            continue;
        }
        if ($n != $lname) {
            continue;
        }
        $result = extract_text($parts[$i], "content=\"", "\"");
        break;
    }
    if ($result == "") {
        return False;
    }
    $i = strpos($lhtml, $result);
    if ($i === False) {
        return False;
    }
    $result = substr($html, $i, strlen($result));
    return $result;
}
Esempio n. 13
0
 $cpt = 0;
 fputs($fdc, "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n\r\n\t\t\t<?xml-stylesheet type=\"text/css\" href=\"style.css\" media=\"all\"?>\n\r\n\t\t\t<note uri=\"uri_note_" . $titre . "\">\n");
 foreach ($parties as $partie) {
     fputs($fdc, "<partie>");
     fputs($fdc, "<titrepartie>" . $partie['titre'] . "</titrepartie>");
     // echo "<pre>";
     // print_r($partie);
     // echo "</pre>";
     if ($partie['notes']) {
         foreach ($partie['notes'] as $file) {
             $fd = fopen('xml/note/' . $file . '.note', 'r');
             while (!feof($fd)) {
                 $line1 = fgets($fd);
                 if ($cpt == 0) {
                     if (match_tag('titre', $line1)) {
                         fputs($fdc, "<titre>" . extract_text('titre', $line1) . " [Compilation]</titre>\n");
                     } else {
                         if (!match_tag('note', $line1) and !match_tag('meta_note', $line1) and !match_tag('statut', $line1) and !match_tag('date_creation', $line1) and !match_tag('date_modification', $line1) and !match_tag('auteur', $line1) and !match_tag('contributeurs', $line1) and !match_tag('contributeur', $line1) and !match_tag('relecteur', $line1) and !match_tag('nom', $line1) and !match_tag('prenom', $line1) and !isComment($line1)) {
                             fputs($fdc, $line1);
                         }
                     }
                 } else {
                     if (!match_tag('note', $line1) and !match_tag('meta_note', $line1) and !match_tag('statut', $line1) and !match_tag('date_creation', $line1) and !match_tag('date_modification', $line1) and !match_tag('auteur', $line1) and !match_tag('contributeurs', $line1) and !match_tag('contributeur', $line1) and !match_tag('relecteur', $line1) and !match_tag('nom', $line1) and !match_tag('prenom', $line1) and !match_tag('titre', $line1) and !isComment($line1)) {
                         fputs($fdc, $line1);
                     }
                 }
             }
             $cpt++;
         }
     }
     fputs($fdc, '</partie>');
Esempio n. 14
0
function ProcessFolder($folder)
    {
    global $lang, $syncdir, $nogo, $staticsync_max_files, $count, $done, $modtimes, $lastsync, $ffmpeg_preview_extension, 
           $staticsync_autotheme, $staticsync_folder_structure, $staticsync_extension_mapping_default, 
           $staticsync_extension_mapping, $staticsync_mapped_category_tree, $staticsync_title_includes_path, 
           $staticsync_ingest, $staticsync_mapfolders, $staticsync_alternatives_suffix, $theme_category_levels, $staticsync_defaultstate;
    
    $collection = 0;
    
    echo "Processing Folder: $folder" . PHP_EOL;
    
    # List all files in this folder.
    $dh = opendir($folder);
    while (($file = readdir($dh)) !== false)
        {
        if ( $file == '.' || $file == '..')
            {
            continue;
            }
        $filetype  = filetype($folder . "/" . $file);
        $fullpath  = $folder . "/" . $file;
        $shortpath = str_replace($syncdir . "/", '', $fullpath);
        # Work out extension
        $extension = explode(".", $file);
        if(count($extension)>1)
            {
            $extension = trim(strtolower($extension[count($extension)-1]));
            }
        else
            {
            //No extension
            $extension="";
            }
       
        
        if ($staticsync_mapped_category_tree)
            {
            $path_parts = explode("/", $shortpath);
            array_pop($path_parts);
            touch_category_tree_level($path_parts);
            }   

        # -----FOLDERS-------------
        if ((($filetype == "dir") || $filetype == "link") && 
            (strpos($nogo, "[$file]") === false) && 
            (strpos($file, $staticsync_alternatives_suffix) === false))
            {
            # Recurse
            ProcessFolder($folder . "/" . $file);
            }

        # -------FILES---------------
        if (($filetype == "file") && (substr($file,0,1) != ".") && (strtolower($file) != "thumbs.db"))
            {

            /* Below Code Adapted  from CMay's bug report */
            global $banned_extensions;
            # Check to see if extension is banned, do not add if it is banned
            if(array_search($extension, $banned_extensions)){continue;}
            /* Above Code Adapted from CMay's bug report */
            
            $count++;
            if ($count > $staticsync_max_files) { return(true); }

            # Already exists?
            if (!isset($done[$shortpath]))
                {
                echo "Processing file: $fullpath" . PHP_EOL;
                
                if ($collection == 0 && $staticsync_autotheme)
                    {
                    # Make a new collection for this folder.
                    $e = explode("/", $shortpath);
                    $theme        = ucwords($e[0]);
                    $themesql     = "theme='" . ucwords(escape_check($e[0])) . "'";
                    $themecolumns = "theme";
                    $themevalues  = "'" . ucwords(escape_check($e[0])) . "'";
                    
                    if ($staticsync_folder_structure)
                        {
                        for ($x=0;$x<count($e)-1;$x++)
                            {
                            if ($x != 0)
                                {
                                $themeindex = $x+1;
                                if ($themeindex >$theme_category_levels)
                                    {
                                    $theme_category_levels = $themeindex;
                                    if ($x == count($e)-2)
                                        {
                                        echo PHP_EOL . PHP_EOL . 
                                             "UPDATE THEME_CATEGORY_LEVELS TO $themeindex IN CONFIG!!!!" . 
                                             PHP_EOL . PHP_EOL;
                                        }
                                    }
                                $th_name       = ucwords(escape_check($e[$x]));
                                $themesql     .= " AND theme{$themeindex} = '$th_name'";
                                $themevalues  .= ",'$th_name'";
                                $themecolumns .= ",theme{$themeindex}";
                                }
                            }
                        }

                    $name = (count($e) == 1) ? '' : $e[count($e)-2];
                    echo "Collection $name, theme=$theme" . PHP_EOL;
                    $escaped_name = escape_check($name);
                    $collection = sql_value("SELECT ref value FROM collection WHERE name='$escaped_name' AND $themesql", 0);
                    if ($collection == 0)
                        {
                        sql_query("INSERT INTO collection (name,created,public,$themecolumns,allow_changes) 
                                                   VALUES ('$escaped_name', NOW(), 1, $themevalues, 0)");
                        $collection = sql_insert_id();
                        }
                    }

                # Work out a resource type based on the extension.
                $type = $staticsync_extension_mapping_default;
                reset($staticsync_extension_mapping);
                foreach ($staticsync_extension_mapping as $rt => $extensions)
                    {
                    if (in_array($extension,$extensions)) { $type = $rt; }
                    }
                $modified_type = hook('modify_type', 'staticsync', array( $type ));
                if (is_numeric($modified_type)) { $type = $modified_type; }

                # Formulate a title
                if ($staticsync_title_includes_path)
                    {
                    $title_find = array('/',   '_', ".$extension" );
                    $title_repl = array(' - ', ' ', '');
                    $title      = ucfirst(str_ireplace($title_find, $title_repl, $shortpath));
                    }
                else
                    {
                    $title = str_ireplace(".$extension", '', $file);
                    }
                $modified_title = hook('modify_title', 'staticsync', array( $title ));
                if ($modified_title !== false) { $title = $modified_title; }

                # Import this file
                $r = import_resource($shortpath, $type, $title, $staticsync_ingest);
                if ($r !== false)
                    {
                    # Add to mapped category tree (if configured)
                    if (isset($staticsync_mapped_category_tree))
                        {
                        $basepath = '';
                        # Save tree position to category tree field

                        # For each node level, expand it back to the root so the full path is stored.
                        for ($n=0;$n<count($path_parts);$n++)
                            {
                            if ($basepath != '') 
                                { 
                                $basepath .= "~";
                                }
                            $basepath .= $path_parts[$n];
                            $path_parts[$n] = $basepath;
                            }
                        
                        update_field($r, $staticsync_mapped_category_tree, "," . join(",", $path_parts));
                        }           

                    # default access level. This may be overridden by metadata mapping.
                    $accessval = 0;

                    # StaticSync path / metadata mapping
                    # Extract metadata from the file path as per $staticsync_mapfolders in config.php
                    if (isset($staticsync_mapfolders))
                        {
                        foreach ($staticsync_mapfolders as $mapfolder)
                            {
                            $match = $mapfolder["match"];
                            $field = $mapfolder["field"];
                            $level = $mapfolder["level"];

                            if (strpos("/" . $shortpath, $match) !== false)
                                {
                                # Match. Extract metadata.
                                $path_parts = explode("/", $shortpath);
                                if ($level < count($path_parts))
                                    {
                                    // special cases first.
                                    if ($field == 'access')
                                        {
                                        # access level is a special case
                                        # first determine if the value matches a defined access level

                                        $value = $path_parts[$level-1];

                                        for ($n=0; $n<3; $n++){
                                            # if we get an exact match or a match except for case
                                            if ($value == $lang["access" . $n] || strtoupper($value) == strtoupper($lang['access' . $n]))
                                                {
                                                $accessval = $n;
                                                echo "Will set access level to " . $lang['access' . $n] . " ($n)" . PHP_EOL;
                                                }
                                            }

                                        }
                                    else 
                                        {
                                        # Save the value
                                        print_r($path_parts);
                                        $value = $path_parts[$level-1];
                                        update_field ($r, $field, $value);
                                        echo " - Extracted metadata from path: $value" . PHP_EOL;
                                        }
                                    }
                                }
                            }
                        }

                    # update access level
                    sql_query("UPDATE resource SET access = '$accessval',archive='$staticsync_defaultstate' WHERE ref = '$r'");

                    # Add any alternative files
                    $altpath = $fullpath . $staticsync_alternatives_suffix;
                    if ($staticsync_ingest && file_exists($altpath))
                        {
                        $adh = opendir($altpath);
                        while (($altfile = readdir($adh)) !== false)
                            {
                            $filetype = filetype($altpath . "/" . $altfile);
                            if (($filetype == "file") && (substr($file,0,1) != ".") && (strtolower($file) != "thumbs.db"))
                                {
                                # Create alternative file                               
                                # Find extension
                                $ext = explode(".", $altfile);
                                $ext = $ext[count($ext)-1];
                                
                                $description = str_replace("?", strtoupper($ext), $lang["originalfileoftype"]);
                                $file_size   = filesize_unlimited($altpath . "/" . $altfile);
                                
                                $aref = add_alternative_file($r, $altfile, $description, $altfile, $ext, $file_size);
                                $path = get_resource_path($r, true, '', true, $ext, -1, 1, false, '', $aref);
                                rename($altpath . "/" . $altfile,$path); # Move alternative file
                                }
                            }   
                        }

                    # Add to collection
                    if ($staticsync_autotheme)
                        {
                        $test = ''; 
                        $test = sql_query("SELECT * FROM collection_resource WHERE collection='$collection' AND resource='$r'");
                        if (count($test) == 0)
                            {
                            sql_query("INSERT INTO collection_resource (collection, resource, date_added) 
                                            VALUES ('$collection', '$r', NOW())");
                            }
                        }
                    }
                else
                    {
                    # Import failed - file still being uploaded?
                    echo " *** Skipping file - it was not possible to move the file (still being imported/uploaded?)" . PHP_EOL;
                    }
                }
            else
                {
                # check modified date and update previews if necessary
                $filemod = filemtime($fullpath);
                if (array_key_exists($shortpath,$modtimes) && ($filemod > strtotime($modtimes[$shortpath])))
                    {
                    # File has been modified since we last created previews. Create again.
                    $rd = sql_query("SELECT ref, has_image, file_modified, file_extension FROM resource 
                                        WHERE file_path='" . escape_check($shortpath) . "'");
                    if (count($rd) > 0)
                        {
                        $rd   = $rd[0];
                        $rref = $rd["ref"];

                        echo "Resource $rref has changed, regenerating previews: $fullpath" . PHP_EOL;
                        extract_exif_comment($rref,$rd["file_extension"]);

                        # extract text from documents (e.g. PDF, DOC).
                        global $extracted_text_field;
                        if (isset($extracted_text_field)) {
                            if (isset($unoconv_path) && in_array($extension,$unoconv_extensions)){
                                // omit, since the unoconv process will do it during preview creation below
                                }
                            else {
                            extract_text($rref,$extension);
                            }
                        }

                        # Store original filename in field, if set
                        global $filename_field;
                        if (isset($filename_field))
                            {
                            update_field($rref,$filename_field,$file);  
                            }

                        create_previews($rref, false, $rd["file_extension"], false, false, -1, false, $staticsync_ingest);
                        sql_query("UPDATE resource SET file_modified=NOW() WHERE ref='$rref'");
                        }
                    }
                }
            }   
        }   
    }
Esempio n. 15
0
function index_url($url, $level, $site_id, $md5sum, $domain, $indexdate, $sessid, $can_leave_domain, $reindex)
{
    global $entities, $min_delay;
    global $command_line;
    global $min_words_per_page;
    global $supdomain;
    global $mysql_table_prefix, $user_agent, $tmp_urls, $delay_time, $domain_arr;
    $needsReindex = 1;
    $deletable = 0;
    $url_status = url_status($url);
    $thislevel = $level - 1;
    if (strstr($url_status['state'], "Relocation")) {
        $url = preg_replace("/ /", "", url_purify($url_status['path'], $url, $can_leave_domain));
        if ($url != '') {
            $result = mysql_query("select link from " . $mysql_table_prefix . "temp where link='{$url}' && id = '{$sessid}'");
            echo mysql_error();
            $rows = mysql_numrows($result);
            if ($rows == 0) {
                mysql_query("insert into " . $mysql_table_prefix . "temp (link, level, id) values ('{$url}', '{$level}', '{$sessid}')");
                echo mysql_error();
            }
        }
        $url_status['state'] == "redirected";
    }
    /*
    		if ($indexdate <> '' && $url_status['date'] <> '') {
    			if ($indexdate > $url_status['date']) {
    				$url_status['state'] = "Date checked. Page contents not changed";
    				$needsReindex = 0;
    			}
    		}*/
    ini_set("user_agent", $user_agent);
    if ($url_status['state'] == 'ok') {
        $OKtoIndex = 1;
        $file_read_error = 0;
        if (time() - $delay_time < $min_delay) {
            sleep($min_delay - (time() - $delay_time));
        }
        $delay_time = time();
        if (!fst_lt_snd(phpversion(), "4.3.0")) {
            $file = file_get_contents($url);
            if ($file === FALSE) {
                $file_read_error = 1;
            }
        } else {
            $fl = @fopen($url, "r");
            if ($fl) {
                while ($buffer = @fgets($fl, 4096)) {
                    $file .= $buffer;
                }
            } else {
                $file_read_error = 1;
            }
            fclose($fl);
        }
        if ($file_read_error) {
            $contents = getFileContents($url);
            $file = $contents['file'];
        }
        $pageSize = number_format(strlen($file) / 1024, 2, ".", "");
        printPageSizeReport($pageSize);
        if ($url_status['content'] != 'text') {
            $file = extract_text($file, $url_status['content']);
        }
        printStandardReport('starting', $command_line);
        $newmd5sum = md5($file);
        if ($md5sum == $newmd5sum) {
            printStandardReport('md5notChanged', $command_line);
            $OKtoIndex = 0;
        } else {
            if (isDuplicateMD5($newmd5sum)) {
                $OKtoIndex = 0;
                printStandardReport('duplicate', $command_line);
            }
        }
        if (($md5sum != $newmd5sum || $reindex == 1) && $OKtoIndex == 1) {
            $urlparts = parse_url($url);
            $newdomain = $urlparts['host'];
            $type = 0;
            /*		if ($newdomain <> $domain)
            					$domainChanged = 1;
            
            				if ($domaincb==1) {
            					$start = strlen($newdomain) - strlen($supdomain);
            					if (substr($newdomain, $start) == $supdomain) {
            						$domainChanged = 0;
            					}
            				}*/
            // remove link to css file
            //get all links from file
            $data = clean_file($file, $url, $url_status['content']);
            if ($data['noindex'] == 1) {
                $OKtoIndex = 0;
                $deletable = 1;
                printStandardReport('metaNoindex', $command_line);
            }
            $wordarray = unique_array(explode(" ", $data['content']));
            if ($data['nofollow'] != 1) {
                $links = get_links($file, $url, $can_leave_domain, $data['base']);
                $links = distinct_array($links);
                $all_links = count($links);
                $numoflinks = 0;
                //if there are any, add to the temp table, but only if there isnt such url already
                if (is_array($links)) {
                    reset($links);
                    while ($thislink = each($links)) {
                        if ($tmp_urls[$thislink[1]] != 1) {
                            $tmp_urls[$thislink[1]] = 1;
                            $numoflinks++;
                            mysql_query("insert into " . $mysql_table_prefix . "temp (link, level, id) values ('{$thislink['1']}', '{$level}', '{$sessid}')");
                            echo mysql_error();
                        }
                    }
                }
            } else {
                printStandardReport('noFollow', $command_line);
            }
            if ($OKtoIndex == 1) {
                $title = $data['title'];
                $host = $data['host'];
                $path = $data['path'];
                $fulltxt = $data['fulltext'];
                $desc = substr($data['description'], 0, 254);
                $url_parts = parse_url($url);
                $domain_for_db = $url_parts['host'];
                if (isset($domain_arr[$domain_for_db])) {
                    $dom_id = $domain_arr[$domain_for_db];
                } else {
                    mysql_query("insert into " . $mysql_table_prefix . "domains (domain) values ('{$domain_for_db}')");
                    $dom_id = mysql_insert_id();
                    $domain_arr[$domain_for_db] = $dom_id;
                }
                $wordarray = calc_weights($wordarray, $title, $host, $path, $data['keywords']);
                //if there are words to index, add the link to the database, get its id, and add the word + their relation
                if (is_array($wordarray) && count($wordarray) > $min_words_per_page) {
                    if ($md5sum == '') {
                        mysql_query("insert into " . $mysql_table_prefix . "links (site_id, url, title, description, fulltxt, indexdate, size, md5sum, level) values ('{$site_id}', '{$url}', '{$title}', '{$desc}', '{$fulltxt}', curdate(), '{$pageSize}', '{$newmd5sum}', {$thislevel})");
                        echo mysql_error();
                        $result = mysql_query("select link_id from " . $mysql_table_prefix . "links where url='{$url}'");
                        echo mysql_error();
                        $row = mysql_fetch_row($result);
                        $link_id = $row[0];
                        save_keywords($wordarray, $link_id, $dom_id);
                        printStandardReport('indexed', $command_line);
                    } else {
                        if ($md5sum != '' && $md5sum != $newmd5sum) {
                            //if page has changed, start updating
                            $result = mysql_query("select link_id from " . $mysql_table_prefix . "links where url='{$url}'");
                            echo mysql_error();
                            $row = mysql_fetch_row($result);
                            $link_id = $row[0];
                            for ($i = 0; $i <= 15; $i++) {
                                $char = dechex($i);
                                mysql_query("delete from " . $mysql_table_prefix . "link_keyword{$char} where link_id={$link_id}");
                                echo mysql_error();
                            }
                            save_keywords($wordarray, $link_id, $dom_id);
                            $query = "update " . $mysql_table_prefix . "links set title='{$title}', description ='{$desc}', fulltxt = '{$fulltxt}', indexdate=now(), size = '{$pageSize}', md5sum='{$newmd5sum}', level={$thislevel} where link_id={$link_id}";
                            mysql_query($query);
                            echo mysql_error();
                            printStandardReport('re-indexed', $command_line);
                        }
                    }
                } else {
                    printStandardReport('minWords', $command_line);
                }
            }
        }
    } else {
        $deletable = 1;
        printUrlStatus($url_status['state'], $command_line);
    }
    if ($reindex == 1 && $deletable == 1) {
        check_for_removal($url);
    } else {
        if ($reindex == 1) {
        }
    }
    if (!isset($all_links)) {
        $all_links = 0;
    }
    if (!isset($numoflinks)) {
        $numoflinks = 0;
    }
    printLinksReport($numoflinks, $all_links, $command_line);
}
Esempio n. 16
0
$dest = $argv[2];
$nick = $argv[3];
$alias = $argv[4];
$cmd = $argv[5];
$agent = ICEWEASEL_UA;
$host = "www.just-one-liners.com";
$port = 80;
if (mt_rand(0, 4) == 0) {
    $uri = "/";
} else {
    $uri = "/category/confucius-say-wordplay";
}
$response = wget($host, $uri, $port, $agent);
$delim1 = "<h2 class=\"title\" id=\"post-";
$delim2 = "</h2>";
$text = extract_text($response, $delim1, $delim2);
if ($text === False) {
    return;
}
$i = strpos($text, "<");
if ($i === False) {
    return;
}
$text = substr($text, $i);
$text = replace_ctrl_chars($text, " ");
$text = trim(strip_tags($text));
$text = str_replace("  ", " ", $text);
$text = html_decode($text);
$text = html_decode($text);
$text_len = strlen($text);
$max_text_length = 300;
Esempio n. 17
0
    $result_hiragana = False;
    if ($result_hiragana_2 !== False and $result_hiragana_3 === False) {
        $result_hiragana = $result_hiragana_2;
    } elseif ($result_hiragana_2 === False and $result_hiragana_3 !== False) {
        $result_hiragana = $result_hiragana_3;
    } elseif ($result_hiragana_2 !== False and $result_hiragana_3 !== False) {
        $result_hiragana = $result_hiragana_2 . ", " . $result_hiragana_3;
    }
    # kanji
    $delim1 = "<span class=\"text\">";
    $delim2 = "      </span>";
    $result_kanji = extract_text($items[$i], $delim1, $delim2);
    # english
    $delim1 = "<span class=\"meaning-meaning\">";
    $delim2 = "</span>";
    $result_english = extract_text($items[$i], $delim1, $delim2);
    $result["hiragana"] = False;
    if ($result_hiragana !== False) {
        $result["hiragana"] = trim(strip_tags($result_hiragana));
    }
    $result["kanji"] = False;
    if ($result_kanji !== False) {
        $result["kanji"] = trim(strip_tags($result_kanji));
    }
    if ($result_english !== False) {
        $result["english"] = trim(strip_tags($result_english));
        $results[] = $result;
    }
}
$n = 0;
for ($i = 0; $i < count($results); $i++) {
    run_command($unocommand . " --format=pdf " . escapeshellarg($file));
    $path_parts = pathinfo($file);
    $basename_minus_extension = remove_extension($path_parts['basename']);
    $pdffile = $path_parts['dirname'] . "/" . $basename_minus_extension . ".pdf";
    if (file_exists($pdffile)) {
        # Attach this PDF file as an alternative download.
        sql_query("delete from resource_alt_files where resource = '" . $ref . "' and unoconv='1'");
        $alt_ref = add_alternative_file($ref, "PDF version");
        $alt_path = get_resource_path($ref, true, "", false, "pdf", -1, 1, false, "", $alt_ref);
        copy($pdffile, $alt_path);
        unlink($pdffile);
        sql_query("update resource_alt_files set file_name='{$ref}-converted.pdf',description='generated by Open Office',file_extension='pdf',file_size='" . filesize_unlimited($alt_path) . "',unoconv='1' where resource='{$ref}' and ref='{$alt_ref}'");
        # Set vars so we continue generating thumbs/previews as if this is a PDF file
        $extension = "pdf";
        $file = $alt_path;
        extract_text($ref, $extension, $alt_path);
    }
}
/* ----------------------------------------
	Calibre E-book processing
   ----------------------------------------
*/
global $calibre_extensions;
global $calibre_path;
if (in_array($extension, $calibre_extensions) && isset($calibre_path) && !isset($newfile)) {
    $calibrecommand = $calibre_path . "/ebook-convert";
    if (!file_exists($calibrecommand)) {
        exit("Calibre executable not found at '{$calibre_path}'");
    }
    $path_parts = pathinfo($file);
    $basename_minus_extension = remove_extension($path_parts['basename']);
         # Store original filename in field, if set
         if (isset($filename_field)) {
             $filename = $uploadfiles[$n];
             if ($use_local) {
                 $filename = mb_basename($filename);
             }
             update_field($ref, $filename_field, $filename);
         }
         # get file metadata
         if (getval("no_exif", "") == "") {
             extract_exif_comment($ref, $extension);
         }
         # extract text from documents (e.g. PDF, DOC).
         global $extracted_text_field;
         if (isset($extracted_text_field) && !$no_exif) {
             extract_text($ref, $extension);
         }
         $done++;
         # Add to collection?
         if ($collection != "") {
             $refs[] = $ref;
         }
         # Log this
         daily_stat("Resource upload", $ref);
         resource_log($ref, 'u', 0);
     }
 }
 if (!$use_local) {
     ftp_close($ftp);
 }
 switch ($done) {
Esempio n. 20
0
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# the following code posts a submission to SoylentNews

#return;

if ($nick<>"crutchy")
{
  privmsg("exec's submit script is borken. blame crutchy");
  return;
} */
$host = "soylentnews.org";
$port = 443;
$uri = "/submit.pl";
$response = wget($host, $uri, $port, ICEWEASEL_UA);
$html = strip_headers($response);
$reskey = extract_text($html, "<input type=\"hidden\" id=\"reskey\" name=\"reskey\" value=\"", "\">");
if ($reskey === False) {
    privmsg("error: unable to extract reskey");
    return;
}
sleep(25);
$params = array();
$params["reskey"] = $reskey;
#$params["name"]=trim(substr($nick,0,50));
$params["name"] = get_bot_nick();
$params["email"] = "";
$params["subj"] = trim(substr($source_title, 0, 100));
$params["primaryskid"] = "1";
$params["tid"] = "6";
$params["sub_type"] = "plain";
$params["story"] = $source_body . "\n\n" . $url . "\n\n-- submitted from IRC";
Esempio n. 21
0
    if (preg_match("#</?{$tag}([^/]*)>(([^/]+)</{$tag}>)?#i", $line)) {
        return true;
    }
    return false;
}
function isComment($line)
{
    if (preg_match("#<\\?([^/]*)\\?>#i", $line)) {
        return true;
    }
    return false;
}
$file1 = fopen('xml/note.xml', 'r');
while (!feof($file1)) {
    $line1 = fgets($file1);
    if (match_tag('titre', $line1)) {
        echo "<titre>" . extract_text('titre', $line1) . " [Compilation]</titre>\n";
    } else {
        if (!match_tag('note', $line1) and !match_tag('meta_note', $line1) and !match_tag('statut', $line1) and !match_tag('date_creation', $line1) and !match_tag('date_modification', $line1) and !match_tag('auteur', $line1) and !match_tag('contributeurs', $line1) and !match_tag('contributeur', $line1) and !match_tag('relecteur', $line1) and !match_tag('nom', $line1) and !match_tag('prenom', $line1) and !isComment($line1)) {
            echo $line1;
        }
    }
}
$file2 = fopen('xml/note2.xml', 'r');
echo "<?Note suivante?>\n";
while (!feof($file2)) {
    $line2 = fgets($file2);
    if (!match_tag('note', $line2) and !match_tag('meta_note', $line2) and !match_tag('statut', $line2) and !match_tag('date_creation', $line2) and !match_tag('date_modification', $line2) and !match_tag('auteur', $line2) and !match_tag('contributeurs', $line2) and !match_tag('contributeur', $line2) and !match_tag('relecteur', $line2) and !match_tag('nom', $line2) and !match_tag('prenom', $line2) and !match_tag('titre', $line2) and !isComment($line2)) {
        echo $line2;
    }
}
Esempio n. 22
0
function get_arch_content($buf, $name, $url, $chrSet)
{
    global $index_framesets, $command_line, $no_log, $can_leave_domain, $index_rss;
    $suffix = substr(strtolower($name), strrpos($name, ".") + 1);
    //  if special converter is required
    if ($suffix == 'pdf') {
        $buf = extract_text($buf, $file0, 'pdf', 0, $charSet);
    }
    if ($suffix == 'doc') {
        $buf = extract_text($buf, $file0, 'doc', 0, $chrSet);
    }
    if ($suffix == 'rtf') {
        $buf = extract_text($buf, $file0, 'rtf', 0, $chrSet);
    }
    if ($suffix == 'xls') {
        $buf = extract_text($buf, $file0, 'xls', 0, $chrSet);
    }
    if ($suffix == 'ptt') {
        $buf = extract_text($buf, $file0, 'ptt', 0, $chrSet);
    }
    if ($suffix == 'docx') {
        $buf = extract_text($buf, $file0, 'docx', 0, $chrSet);
    }
    if ($suffix == 'xlsx') {
        $buf = extract_text($buf, $file0, 'xlsx', 0, $chrSet);
    }
    //  for extracting framesets of this file enter here. Iframes will be extracted later on for the complete $file
    if ($index_framesets == '1') {
        if (preg_match("@<frameset[^>]*>(.*?)<\\/frameset>@si", $buf, $regs)) {
            printStandardReport('newFrameset', $command_line, $no_log);
            //  separate the <frameset> ....</frameset> part of this file
            $frame = $regs[1];
            $replace = get_frames($frame, $url, $can_leave_domain);
            $replace = "<body>" . $replace . "</body>";
            //  create the body tags for $buf
            //  include all replacements instead of the frameset tag into the actual file. This will become the body
            $buf = preg_replace("@<frameset.*?</frameset>@si", "{$replace}", $buf);
        }
    }
    // for extracting archived feeds enter here
    if (preg_match("/<rss|atom|<feed|<rdf|<rsd/si", substr($buf, 0, 400)) && $index_rss == '1') {
        $buf = get_arch_feeds($buf, $url);
    }
    return $buf;
}
Esempio n. 23
0
    return;
}
$uri = "/dictionary/meaning-of-" . urlencode($trailing) . ".html";
$response = wget(HOST, $uri);
$html = strip_headers($response);
if ($html === False) {
    privmsg("error downloading");
    return;
}
$items = explode("<div class=\"search_items\">", $html);
array_shift($items);
$n = min(MAX_ITEMS, count($items));
$results = array();
for ($i = 0; $i < $n; $i++) {
    $delim1 = "<ruby>";
    $delim2 = "</ruby>";
    $result = extract_text($items[$i], $delim1, $delim2);
    if ($result !== False) {
        $result = str_replace("<rp>(", " <rp>(", $result);
        $results[] = strip_tags($result);
    }
}
if (count($results) > 0) {
    for ($i = 0; $i < count($results); $i++) {
        privmsg($results[$i]);
    }
    privmsg(HOST . $uri);
} else {
    privmsg("no results");
}
#####################################################################################################
 function upload_file($ref, $no_exif = false, $revert = false, $autorotate = false)
 {
     hook("beforeuploadfile", "", array($ref));
     hook("clearaltfiles", "", array($ref));
     // optional: clear alternative files before uploading new resource
     # revert is mainly for metadata reversion, removing all metadata and simulating a reupload of the file from scratch.
     hook("removeannotations", "", array($ref));
     $exiftool_fullpath = get_utility_path("exiftool");
     # Process file upload for resource $ref
     if ($revert == true) {
         global $filename_field;
         $original_filename = get_data_by_field($ref, $filename_field);
         # Field 8 is used in a special way for staticsync, don't overwrite.
         $test_for_staticsync = get_resource_data($ref);
         if ($test_for_staticsync['file_path'] != "") {
             $staticsync_mod = " and resource_type_field != 8";
         } else {
             $staticsync_mod = "";
         }
         sql_query("delete from resource_data where resource={$ref} {$staticsync_mod}");
         sql_query("delete from resource_keyword where resource={$ref} {$staticsync_mod}");
         #clear 'joined' display fields which are based on metadata that is being deleted in a revert (original filename is reinserted later)
         $display_fields = get_resource_table_joins();
         if ($staticsync_mod != "") {
             $display_fields_new = array();
             for ($n = 0; $n < count($display_fields); $n++) {
                 if ($display_fields[$n] != 8) {
                     $display_fields_new[] = $display_fields[$n];
                 }
             }
             $display_fields = $display_fields_new;
         }
         $clear_fields = "";
         for ($x = 0; $x < count($display_fields); $x++) {
             $clear_fields .= "field" . $display_fields[$x] . "=''";
             if ($x < count($display_fields) - 1) {
                 $clear_fields .= ",";
             }
         }
         sql_query("update resource set " . $clear_fields . " where ref={$ref}");
         #also add the ref back into keywords:
         add_keyword_mappings($ref, $ref, -1);
         $extension = sql_value("select file_extension value from resource where ref={$ref}", "");
         $filename = get_resource_path($ref, true, "", false, $extension);
         $processfile['tmp_name'] = $filename;
     } else {
         # Work out which file has been posted
         if (isset($_FILES['userfile'])) {
             $processfile = $_FILES['userfile'];
         } elseif (isset($_FILES['Filedata'])) {
             $processfile = $_FILES['Filedata'];
         }
         # Java upload (at least) needs this
         # Plupload needs this
         if (isset($_REQUEST['name'])) {
             $filename = $_REQUEST['name'];
         } else {
             $filename = $processfile['name'];
         }
         global $filename_field;
         if ($no_exif && isset($filename_field)) {
             $user_set_filename = get_data_by_field($ref, $filename_field);
             if (trim($user_set_filename) != '') {
                 // Get extension of file just in case the user didn't provide one
                 $path_parts = pathinfo($filename);
                 $original_extension = $path_parts['extension'];
                 $filename = $user_set_filename;
                 // If the user filename doesn't have an extension add the original one
                 $path_parts = pathinfo($filename);
                 if (!isset($path_parts['extension'])) {
                     $filename .= '.' . $original_extension;
                 }
             }
         }
     }
     # Work out extension
     if (!isset($extension)) {
         # first try to get it from the filename
         $extension = explode(".", $filename);
         if (count($extension) > 1) {
             $extension = escape_check(trim(strtolower($extension[count($extension) - 1])));
         } else {
             if ($exiftool_fullpath != false) {
                 $file_type_by_exiftool = run_command($exiftool_fullpath . " -filetype -s -s -s " . escapeshellarg($processfile['tmp_name']));
                 if (strlen($file_type_by_exiftool) > 0) {
                     $extension = str_replace(" ", "_", trim(strtolower($file_type_by_exiftool)));
                     $filename = $filename;
                 } else {
                     return false;
                 }
             } else {
                 return false;
             }
         }
     }
     # Banned extension?
     global $banned_extensions;
     if (in_array($extension, $banned_extensions)) {
         return false;
     }
     $status = "Please provide a file name.";
     $filepath = get_resource_path($ref, true, "", true, $extension);
     if (!$revert) {
         # Remove existing file, if present
         hook("beforeremoveexistingfile", "", array("resourceId" => $ref));
         $old_extension = sql_value("select file_extension value from resource where ref='{$ref}'", "");
         if ($old_extension != "") {
             $old_path = get_resource_path($ref, true, "", true, $old_extension);
             if (file_exists($old_path)) {
                 unlink($old_path);
             }
         }
         // also remove any existing extracted icc profiles
         $icc_path = get_resource_path($ref, true, "", true, $extension . '.icc');
         if (file_exists($icc_path)) {
             unlink($icc_path);
         }
         global $pdf_pages;
         $iccx = 0;
         // if there is a -0.icc page, run through and delete as many as necessary.
         $finished = false;
         $badicc_path = str_replace(".icc", "-{$iccx}.icc", $icc_path);
         while (!$finished) {
             if (file_exists($badicc_path)) {
                 unlink($badicc_path);
                 $iccx++;
                 $badicc_path = str_replace(".icc", "-{$iccx}.icc", $icc_path);
             } else {
                 $finished = true;
             }
         }
         $iccx = 0;
     }
     if (!$revert) {
         if ($filename != "") {
             global $jupload_alternative_upload_location, $plupload_upload_location;
             if (isset($plupload_upload_location)) {
                 # PLUpload - file was sent chunked and reassembled - use the reassembled file location
                 $result = rename($plupload_upload_location, $filepath);
             } elseif (isset($jupload_alternative_upload_location)) {
                 # JUpload - file was sent chunked and reassembled - use the reassembled file location
                 $result = rename($jupload_alternative_upload_location, $filepath);
             } else {
                 # Standard upload.
                 if (!$revert) {
                     $result = move_uploaded_file($processfile['tmp_name'], $filepath);
                 } else {
                     $result = true;
                 }
             }
             if ($result == false) {
                 $status = "File upload error. Please check the size of the file you are trying to upload.";
                 return false;
             } else {
                 global $camera_autorotation;
                 global $ffmpeg_audio_extensions;
                 if ($camera_autorotation) {
                     if ($autorotate && !in_array($extension, $ffmpeg_audio_extensions)) {
                         AutoRotateImage($filepath);
                     }
                 }
                 chmod($filepath, 0777);
                 global $icc_extraction;
                 global $ffmpeg_supported_extensions;
                 if ($icc_extraction && $extension != "pdf" && !in_array($extension, $ffmpeg_supported_extensions)) {
                     extract_icc_profile($ref, $extension);
                 }
                 $status = "Your file has been uploaded.";
             }
         }
     }
     # Store extension in the database and update file modified time.
     if ($revert) {
         $has_image = "";
     } else {
         $has_image = ",has_image=0";
     }
     sql_query("update resource set file_extension='{$extension}',preview_extension='jpg',file_modified=now() {$has_image} where ref='{$ref}'");
     # delete existing resource_dimensions
     sql_query("delete from resource_dimensions where resource='{$ref}'");
     # get file metadata
     if (!$no_exif) {
         extract_exif_comment($ref, $extension);
     } else {
         global $merge_filename_with_title, $lang;
         if ($merge_filename_with_title) {
             $merge_filename_with_title_option = urlencode(getval('merge_filename_with_title_option', ''));
             $merge_filename_with_title_include_extensions = urlencode(getval('merge_filename_with_title_include_extensions', ''));
             $merge_filename_with_title_spacer = urlencode(getval('merge_filename_with_title_spacer', ''));
             $original_filename = '';
             if (isset($_REQUEST['name'])) {
                 $original_filename = $_REQUEST['name'];
             } else {
                 $original_filename = $processfile['name'];
             }
             if ($merge_filename_with_title_include_extensions == 'yes') {
                 $merged_filename = $original_filename;
             } else {
                 $merged_filename = strip_extension($original_filename);
             }
             // Get title field:
             $resource = get_resource_data($ref);
             $read_from = get_exiftool_fields($resource['resource_type']);
             for ($i = 0; $i < count($read_from); $i++) {
                 if ($read_from[$i]['name'] == 'title') {
                     $oldval = get_data_by_field($ref, $read_from[$i]['ref']);
                     if (strpos($oldval, $merged_filename) !== FALSE) {
                         continue;
                     }
                     switch ($merge_filename_with_title_option) {
                         case $lang['merge_filename_title_do_not_use']:
                             // Do nothing since the user doesn't want to use this feature
                             break;
                         case $lang['merge_filename_title_replace']:
                             $newval = $merged_filename;
                             break;
                         case $lang['merge_filename_title_prefix']:
                             $newval = $merged_filename . $merge_filename_with_title_spacer . $oldval;
                             if ($oldval == '') {
                                 $newval = $merged_filename;
                             }
                             break;
                         case $lang['merge_filename_title_suffix']:
                             $newval = $oldval . $merge_filename_with_title_spacer . $merged_filename;
                             if ($oldval == '') {
                                 $newval = $merged_filename;
                             }
                             break;
                         default:
                             // Do nothing
                             break;
                     }
                     update_field($ref, $read_from[$i]['ref'], $newval);
                 }
             }
         }
     }
     # extract text from documents (e.g. PDF, DOC).
     global $extracted_text_field;
     if (isset($extracted_text_field) && !$no_exif) {
         if (isset($unoconv_path) && in_array($extension, $unoconv_extensions)) {
             // omit, since the unoconv process will do it during preview creation below
         } else {
             extract_text($ref, $extension);
         }
     }
     # Store original filename in field, if set
     global $filename_field, $amended_filename;
     if (isset($filename_field)) {
         if (isset($amended_filename)) {
             $filename = $amended_filename;
         }
     }
     if (!$revert) {
         update_field($ref, $filename_field, $filename);
     } else {
         update_field($ref, $filename_field, $original_filename);
     }
     if (!$revert) {
         # Clear any existing FLV file or multi-page previews.
         global $pdf_pages;
         for ($n = 2; $n <= $pdf_pages; $n++) {
             # Remove preview page.
             $path = get_resource_path($ref, true, "scr", false, "jpg", -1, $n, false);
             if (file_exists($path)) {
                 unlink($path);
             }
             # Also try the watermarked version.
             $path = get_resource_path($ref, true, "scr", false, "jpg", -1, $n, true);
             if (file_exists($path)) {
                 unlink($path);
             }
         }
         # Remove any FLV video preview (except if the actual resource is an FLV file).
         global $ffmpeg_preview_extension;
         if ($extension != $ffmpeg_preview_extension) {
             $path = get_resource_path($ref, true, "", false, $ffmpeg_preview_extension);
             if (file_exists($path)) {
                 unlink($path);
             }
         }
         # Remove any FLV preview-only file
         $path = get_resource_path($ref, true, "pre", false, $ffmpeg_preview_extension);
         if (file_exists($path)) {
             unlink($path);
         }
         # Remove any MP3 (except if the actual resource is an MP3 file).
         if ($extension != "mp3") {
             $path = get_resource_path($ref, true, "", false, "mp3");
             if (file_exists($path)) {
                 unlink($path);
             }
         }
         # Create previews
         global $enable_thumbnail_creation_on_upload;
         if ($enable_thumbnail_creation_on_upload) {
             create_previews($ref, false, $extension);
         } else {
             # Offline thumbnail generation is being used. Set 'has_image' to zero so the offline create_previews.php script picks this up.
             sql_query("update resource set has_image=0 where ref='{$ref}'");
         }
     }
     # Update file dimensions
     get_original_imagesize($ref, $filepath, $extension);
     hook("Uploadfilesuccess", "", array("resourceId" => $ref));
     # Update disk usage
     update_disk_usage($ref);
     # Log this activity.
     $log_ref = resource_log($ref, "u", 0);
     hook("upload_image_after_log_write", "", array($ref, $log_ref));
     return $status;
 }
Esempio n. 25
0
/**
 * Created by PhpStorm.
 * User: kosmos
 * Date: 6/25/14
 * Time: 8:34 PM
 */
function get_site_texts($base_url)
{
    $file['content'] = '';
    $mainURL = $base_url;
    if (getMainUrl($mainURL)) {
        $rootURL = getMainUrl($mainURL);
    } else {
        $rootURL = $base_url;
    }
    //echo $base_url; die;
    //$uniq_text = array();
    $visited_pages = array();
    $html = @file_get_contents($base_url);
    if ($html) {
        array_push($visited_pages, $base_url);
        $html = removeJSCSS($html);
        $text[$mainURL] = extract_text($html);
        //yield $text;
        //yield $text[$mainURL];
        //$file['content'] .= showResult($text[$mainURL],$mainURL, $uniq_text);
        //yield $visited_pages; //uncomment this string to see links
        //$uniq_text = addUniqText($uniq_text,$text[$mainURL]);
        $urls = array();
        $urls = extract_urls($html, $mainURL, $rootURL, $visited_pages);
        //        echo "<pre>";
        //        print_r($urls); die;
        //$urls = array_unique($urls);
        while ($urls) {
            $url = array_shift($urls);
            if (strpos($url, $mainURL) !== false && !in_array($url, $visited_pages) && checkContentType($url)) {
                $html = @file_get_contents($url);
                if ($html) {
                    array_push($visited_pages, $url);
                    //yield $visited_pages; //uncomment this string to see links
                    $html = removeJSCSS($html);
                    $text[$url] = extract_text($html);
                    //$file['content'] .= showResult($text[$url],$url,$uniq_text);
                    //$uniq_text = addUniqText($uniq_text,$text[$url]);
                    //yield $text[$url];
                    $extracted_urls = extract_urls($html, $url, $rootURL, $visited_pages);
                    //$urls = my_array_push($urls,$extracted_urls);
                    foreach ($extracted_urls as $k => $v) {
                        if (!in_array($v, $urls) && !in_array($v, $visited_pages)) {
                            $urls[] = $v;
                        }
                    }
                    //                $urls = array_unique($urls);
                    //$visited_pages = array_merge(array_unique($visited_pages));
                    //                if($url == 'http://www.effectiff.com/articles') {
                    //
                    //                    echo "<pre>";
                    //                    print_r($urls); die;
                    //                }
                }
            } else {
                // if(checkContentType($url)) {
                //if(strpos($url,$mainURL) !== false) {
                //    array_push($visited_pages, $url);
                //    $visited_pages = array_merge(array_unique($visited_pages));
                //}
            }
        }
    }
    //$text = array_unique($text);
    return $text;
}
Esempio n. 26
0
    echo '<div class="alert alert-error"> ERROR' . mysql_error() . '</div>';
}
while ($row = @mysql_fetch_array($result)) {
    ?>

<center><h1>Hello <?php 
    echo $steampersona;
    ?>
.</h1></center>
<div class="container-fluid">
  <div class="row-fluid">
    <div class="span4">
      <!--Whitelists content-->
      <?php 
    $data = $row['_Data'];
    $whitelist = extract_text($data, '"Whitelisted":["', '"],"');
    $your_array = explode('","', $whitelist);
    $arrlength = count($your_array);
    echo "<div class='well'>";
    echo "<h2>Your Whitelists</h2>";
    for ($x = 0; $x < $arrlength; $x++) {
        echo $your_array[$x];
        echo "<br>";
    }
    echo "</div>";
    ?>
    </div>
    <div class="span8">
      <!--Other content-->
      <div class="container-fluid">
  <div class="row-fluid">
Esempio n. 27
0
function ProcessFolder($folder, $version_dir, &$resource_array, &$resource_error)
{
    global $lang, $syncdir, $nogo, $staticsync_max_files, $count, $done, $modtimes, $lastsync, $ffmpeg_preview_extension, $staticsync_autotheme, $staticsync_folder_structure, $staticsync_extension_mapping_default, $staticsync_extension_mapping, $staticsync_mapped_category_tree, $staticsync_title_includes_path, $staticsync_ingest, $staticsync_mapfolders, $staticsync_alternatives_suffix, $theme_category_levels, $staticsync_defaultstate, $additional_archive_states, $staticsync_extension_mapping_append_values, $image_alternatives, $exclude_resize, $post_host, $media_endpoint, $image_required_height, $sync_bucket, $aws_key, $aws_secret_key;
    $collection = 0;
    echo "Processing Folder: {$folder}" . PHP_EOL;
    #$alt_path = get_resource_path(59, TRUE, '', FALSE, 'png', -1, 1, FALSE, '', 4);
    # List all files in this folder.
    $dh = opendir($folder);
    while (($file = readdir($dh)) !== false) {
        if ($file == '.' || $file == '..') {
            continue;
        }
        $filetype = filetype($folder . "/" . $file);
        $fullpath = $folder . "/" . $file;
        $shortpath = str_replace($syncdir . "/", '', $fullpath);
        # Work out extension
        $extension = explode(".", $file);
        if (count($extension) > 1) {
            $extension = trim(strtolower($extension[count($extension) - 1]));
        } else {
            //No extension
            $extension = "";
        }
        if (strpos($fullpath, $nogo)) {
            echo "This directory is to be ignored." . PHP_EOL;
            continue;
        }
        if ($staticsync_mapped_category_tree) {
            $path_parts = explode("/", $shortpath);
            array_pop($path_parts);
            touch_category_tree_level($path_parts);
        }
        # -----FOLDERS-------------
        if (($filetype == "dir" || $filetype == "link") && strpos($nogo, "[{$file}]") === false && strpos($file, $staticsync_alternatives_suffix) === false) {
            # Get current version direcotries.
            if (preg_match("/[0-9]{2}-[0-9]{2}-[0-9]{4}\$/", $file)) {
                if (!in_array($file, $version_dir)) {
                    array_push($version_dir, $file);
                }
                if (preg_match('/in_progress*/', $file)) {
                    echo "The Barcode is still being processed." . PHP_EOL;
                    continue;
                }
            }
            # Recurse
            ProcessFolder($folder . "/" . $file, $version_dir, $resource_array, $resource_error);
        }
        $psd_files = array();
        if (preg_match('/images/', $fullpath)) {
            $path_array = explode('/', $fullpath);
            $psd_array = array_splice($path_array, 0, array_search('images', $path_array));
            $psd_path = implode('/', $psd_array) . '/psd/';
            $psd_files = array_diff(scandir($psd_path), array('..', '.'));
            foreach ($psd_files as $index => $psd_file) {
                $psd_files[$index] = pathinfo($psd_file, PATHINFO_FILENAME);
            }
        }
        # -------FILES---------------
        if ($filetype == "file" && substr($file, 0, 1) != "." && strtolower($file) != "thumbs.db") {
            /* Below Code Adapted  from CMay's bug report */
            global $banned_extensions;
            # Check to see if extension is banned, do not add if it is banned
            if (array_search($extension, $banned_extensions)) {
                continue;
            }
            /* Above Code Adapted from CMay's bug report */
            $count++;
            if ($count > $staticsync_max_files) {
                return true;
            }
            $last_sync_date = sql_value("select value from sysvars where name = 'last_sync'", "");
            $file_creation_date = date("Y-m-d H:i:s", filectime($fullpath));
            if (isset($last_sync_date) && $last_sync_date > $file_creation_date) {
                echo "No new file found.." . PHP_EOL;
                continue;
            }
            # Already exists?
            if (!isset($done[$shortpath])) {
                echo "Processing file: {$fullpath}" . PHP_EOL;
                if ($collection == 0 && $staticsync_autotheme) {
                    # Make a new collection for this folder.
                    $e = explode("/", $shortpath);
                    $theme = ucwords($e[0]);
                    $themesql = "theme='" . ucwords(escape_check($e[0])) . "'";
                    $themecolumns = "theme";
                    $themevalues = "'" . ucwords(escape_check($e[0])) . "'";
                    if ($staticsync_folder_structure) {
                        for ($x = 0; $x < count($e) - 1; $x++) {
                            if ($x != 0) {
                                $themeindex = $x + 1;
                                if ($themeindex > $theme_category_levels) {
                                    $theme_category_levels = $themeindex;
                                    if ($x == count($e) - 2) {
                                        echo PHP_EOL . PHP_EOL . "UPDATE THEME_CATEGORY_LEVELS TO {$themeindex} IN CONFIG!!!!" . PHP_EOL . PHP_EOL;
                                    }
                                }
                                $th_name = ucwords(escape_check($e[$x]));
                                $themesql .= " AND theme{$themeindex} = '{$th_name}'";
                                $themevalues .= ",'{$th_name}'";
                                $themecolumns .= ",theme{$themeindex}";
                            }
                        }
                    }
                    $name = count($e) == 1 ? '' : $e[count($e) - 2];
                    echo "Collection {$name}, theme={$theme}" . PHP_EOL;
                    $ul_username = $theme;
                    $escaped_name = escape_check($name);
                    $collection = sql_value("SELECT ref value FROM collection WHERE name='{$escaped_name}' AND {$themesql}", 0);
                    if ($collection == 0) {
                        sql_query("INSERT INTO collection (name,created,public,{$themecolumns},allow_changes)\n                                                   VALUES ('{$escaped_name}', NOW(), 1, {$themevalues}, 0)");
                        $collection = sql_insert_id();
                    }
                }
                # Work out a resource type based on the extension.
                $type = $staticsync_extension_mapping_default;
                reset($staticsync_extension_mapping);
                foreach ($staticsync_extension_mapping as $rt => $extensions) {
                    if (in_array($extension, $extensions)) {
                        $type = $rt;
                    }
                }
                $modified_type = hook('modify_type', 'staticsync', array($type));
                if (is_numeric($modified_type)) {
                    $type = $modified_type;
                }
                # Formulate a title
                if ($staticsync_title_includes_path) {
                    $title_find = array('/', '_', ".{$extension}");
                    $title_repl = array(' - ', ' ', '');
                    $title = ucfirst(str_ireplace($title_find, $title_repl, $shortpath));
                } else {
                    $title = str_ireplace(".{$extension}", '', $file);
                }
                $modified_title = hook('modify_title', 'staticsync', array($title));
                if ($modified_title !== false) {
                    $title = $modified_title;
                }
                # Import this file
                #$r = import_resource($shortpath, $type, $title, $staticsync_ingest);
                #Check for file name containing the psd.
                if (!empty($psd_files)) {
                    $image_file_array = explode('/', $fullpath);
                    $image_file = $image_file_array[count($image_file_array) - 1];
                    $image_psd_name = explode('_', $image_file)[0];
                    if (array_search($image_psd_name, $psd_files)) {
                        #Image name is in right format.
                        if (!validate_image_size($fullpath, $image_required_height)) {
                            $resource_error['size'][$file] = $fullpath;
                        }
                        $r = import_resource($fullpath, $type, $title, $staticsync_ingest);
                        sql_query("INSERT INTO resource_data (resource,resource_type_field,value)\n                               VALUES ('{$r}', (SELECT ref FROM resource_type_field WHERE name = 'logical_id'), '{$image_psd_name}')");
                        $original_filepath = sql_query("SELECT value FROM resource_data WHERE resource = '{$r}' AND\n                                                     resource_type_field = (SELECT ref FROM resource_type_field where name = 'original_filepath')");
                        if (isset($original_filepath)) {
                            sql_query("INSERT INTO resource_data (resource,resource_type_field,value)\n                                 VALUES ('{$r}',(SELECT ref FROM resource_type_field WHERE name = 'original_filepath'), '{$fullpath}')");
                        }
                    } else {
                        echo "Filename '{$fullpath}' is not in right format.." . PHP_EOL;
                        $resource_error['name'][$file] = $fullpath;
                        continue;
                    }
                } elseif (word_in_string($exclude_resize, explode('/', $fullpath))) {
                    $r = import_resource($fullpath, $type, $title, $staticsync_ingest);
                }
                if ($r !== false) {
                    array_push($resource_array, $r);
                    # Create current version for resource.
                    #print_r($version_dir);
                    if (count($version_dir) == 1) {
                        sql_query("INSERT into resource_data (resource,resource_type_field,value)\n                                    VALUES ('{$r}',(SELECT ref FROM resource_type_field WHERE name = 'current'), 'TRUE')");
                    }
                    $sync_status = sync_to_s3($syncdir, $sync_bucket, $aws_key, $aws_secret_key);
                    if (!$sync_status) {
                        echo "Failed to sync";
                    }
                    # Add to mapped category tree (if configured)
                    if (isset($staticsync_mapped_category_tree)) {
                        $basepath = '';
                        # Save tree position to category tree field
                        # For each node level, expand it back to the root so the full path is stored.
                        for ($n = 0; $n < count($path_parts); $n++) {
                            if ($basepath != '') {
                                $basepath .= "~";
                            }
                            $basepath .= $path_parts[$n];
                            $path_parts[$n] = $basepath;
                        }
                        update_field($r, $staticsync_mapped_category_tree, "," . join(",", $path_parts));
                    }
                    #This is an override to add user data to the resouces
                    if (!isset($userref)) {
                        $ul_username = ucfirst(strtolower($ul_username));
                        $current_user_ref = sql_query("Select ref from user where username = '******' ");
                        if (!empty($current_user_ref)) {
                            $current_user_ref = $current_user_ref[0]['ref'];
                            sql_query("UPDATE resource SET created_by='{$current_user_ref}' where ref = {$r}");
                        }
                    }
                    # default access level. This may be overridden by metadata mapping.
                    $accessval = 0;
                    # StaticSync path / metadata mapping
                    # Extract metadata from the file path as per $staticsync_mapfolders in config.php
                    if (isset($staticsync_mapfolders)) {
                        foreach ($staticsync_mapfolders as $mapfolder) {
                            $match = $mapfolder["match"];
                            $field = $mapfolder["field"];
                            $level = $mapfolder["level"];
                            if (strpos("/" . $shortpath, $match) !== false) {
                                # Match. Extract metadata.
                                $path_parts = explode("/", $shortpath);
                                if ($level < count($path_parts)) {
                                    // special cases first.
                                    if ($field == 'access') {
                                        # access level is a special case
                                        # first determine if the value matches a defined access level
                                        $value = $path_parts[$level - 1];
                                        for ($n = 0; $n < 3; $n++) {
                                            # if we get an exact match or a match except for case
                                            if ($value == $lang["access" . $n] || strtoupper($value) == strtoupper($lang['access' . $n])) {
                                                $accessval = $n;
                                                echo "Will set access level to " . $lang['access' . $n] . " ({$n})" . PHP_EOL;
                                            }
                                        }
                                    } else {
                                        if ($field == 'archive') {
                                            # archive level is a special case
                                            # first determin if the value matches a defined archive level
                                            $value = $mapfolder["archive"];
                                            $archive_array = array_merge(array(-2, -1, 0, 1, 2, 3), $additional_archive_states);
                                            if (in_array($value, $archive_array)) {
                                                $archiveval = $value;
                                                echo "Will set archive level to " . $lang['status' . $value] . " ({$archiveval})" . PHP_EOL;
                                            }
                                        } else {
                                            # Save the value
                                            #print_r($path_parts);
                                            $value = $path_parts[$level - 1];
                                            if ($staticsync_extension_mapping_append_values) {
                                                $given_value = $value;
                                                // append the values if possible...not used on dropdown, date, categroy tree, datetime, or radio buttons
                                                $field_info = get_resource_type_field($field);
                                                if (in_array($field['type'], array(0, 1, 2, 4, 5, 6, 7, 8))) {
                                                    $old_value = sql_value("select value value from resource_data where resource={$r} and resource_type_field={$field}", "");
                                                    $value = append_field_value($field_info, $value, $old_value);
                                                }
                                            }
                                            update_field($r, $field, trim($value));
                                            if (strtotime(trim($value))) {
                                                add_keyword_mappings($r, trim($value), $field, false, true);
                                            } else {
                                                add_keyword_mappings($r, trim($value), $field);
                                            }
                                            if ($staticsync_extension_mapping_append_values) {
                                                $value = $given_value;
                                            }
                                            echo " - Extracted metadata from path: {$value}" . PHP_EOL;
                                        }
                                    }
                                }
                            }
                        }
                    }
                    #Resize only original images.
                    if (!word_in_string($exclude_resize, explode('/', $fullpath))) {
                        echo "Creating preview..";
                        create_previews($r, false, $extension, false, false, -1, false, $staticsync_ingest);
                    }
                    # update access level
                    sql_query("UPDATE resource SET access = '{$accessval}',archive='{$staticsync_defaultstate}' WHERE ref = '{$r}'");
                    # Add any alternative files
                    $altpath = $fullpath . $staticsync_alternatives_suffix;
                    if ($staticsync_ingest && file_exists($altpath)) {
                        $adh = opendir($altpath);
                        while (($altfile = readdir($adh)) !== false) {
                            $filetype = filetype($altpath . "/" . $altfile);
                            if ($filetype == "file" && substr($file, 0, 1) != "." && strtolower($file) != "thumbs.db") {
                                # Create alternative file
                                # Find extension
                                $ext = explode(".", $altfile);
                                $ext = $ext[count($ext) - 1];
                                $description = str_replace("?", strtoupper($ext), $lang["originalfileoftype"]);
                                $file_size = filesize_unlimited($altpath . "/" . $altfile);
                                $aref = add_alternative_file($r, $altfile, $description, $altfile, $ext, $file_size);
                                $path = get_resource_path($r, true, '', true, $ext, -1, 1, false, '', $aref);
                                rename($altpath . "/" . $altfile, $path);
                                # Move alternative file
                            }
                        }
                    }
                    # Add to collection
                    if ($staticsync_autotheme) {
                        $test = '';
                        $test = sql_query("SELECT * FROM collection_resource WHERE collection='{$collection}' AND resource='{$r}'");
                        if (count($test) == 0) {
                            sql_query("INSERT INTO collection_resource (collection, resource, date_added)\n                                            VALUES ('{$collection}', '{$r}', NOW())");
                        }
                    }
                } else {
                    # Import failed - file still being uploaded?
                    echo " *** Skipping file - it was not possible to move the file (still being imported/uploaded?)" . PHP_EOL;
                }
            } else {
                # check modified date and update previews if necessary
                $filemod = filemtime($fullpath);
                if (array_key_exists($shortpath, $modtimes) && $filemod > strtotime($modtimes[$shortpath])) {
                    # File has been modified since we last created previews. Create again.
                    $rd = sql_query("SELECT ref, has_image, file_modified, file_extension FROM resource\n                                        WHERE file_path='" . escape_check($shortpath) . "'");
                    if (count($rd) > 0) {
                        $rd = $rd[0];
                        $rref = $rd["ref"];
                        echo "Resource {$rref} has changed, regenerating previews: {$fullpath}" . PHP_EOL;
                        extract_exif_comment($rref, $rd["file_extension"]);
                        # extract text from documents (e.g. PDF, DOC).
                        global $extracted_text_field;
                        if (isset($extracted_text_field)) {
                            if (isset($unoconv_path) && in_array($extension, $unoconv_extensions)) {
                                // omit, since the unoconv process will do it during preview creation below
                            } else {
                                extract_text($rref, $extension);
                            }
                        }
                        # Store original filename in field, if set
                        global $filename_field;
                        if (isset($filename_field)) {
                            update_field($rref, $filename_field, $file);
                        }
                        create_previews($rref, false, $rd["file_extension"], false, false, -1, false, $staticsync_ingest);
                        sql_query("UPDATE resource SET file_modified=NOW() WHERE ref='{$rref}'");
                    }
                }
            }
        }
    }
}
Esempio n. 28
0
function ProcessFolder($folder)
	{
	#echo "<br>processing folder $folder";
	global $syncdir,$nogo,$max,$count,$done,$modtimes,$lastsync, $ffmpeg_preview_extension, $staticsync_autotheme, $staticsync_folder_structure,$staticsync_extension_mapping_default, $staticsync_extension_mapping, $staticsync_mapped_category_tree,$staticsync_title_includes_path, $staticsync_ingest, $staticsync_mapfolders,$staticsync_alternatives_suffix;
	
	$collection=0;
	
	echo "Processing Folder: $folder\n";
	
	# List all files in this folder.
	$dh=opendir($folder);
	while (($file = readdir($dh)) !== false)
		{
		$filetype=filetype($folder . "/" . $file);
		$fullpath=$folder . "/" . $file;
		$shortpath=str_replace($syncdir . "/","",$fullpath);
		# Work out extension
		$extension=explode(".",$file);$extension=trim(strtolower($extension[count($extension)-1]));
		
		if ($staticsync_mapped_category_tree)
			{
			$path_parts=explode("/",$shortpath);
			array_pop($path_parts);
			touch_category_tree_level($path_parts);
			}	
		
		# -----FOLDERS-------------
		if ((($filetype=="dir") || $filetype=="link") && ($file!=".") && ($file!="..") && (strpos($nogo,"[" . $file . "]")===false) && strpos($file,$staticsync_alternatives_suffix)===false)
			{
			# Recurse
			#echo "\n$file : " . filemtime($folder . "/" . $file) . " > " . $lastsync;
			if (true || (strlen($lastsync)=="") || (filemtime($folder . "/" . $file)>($lastsync-26000)))
				{
				ProcessFolder($folder . "/" . $file);
				}
			}
			
		# -------FILES---------------
		if (($filetype=="file") && (substr($file,0,1)!=".") && (strtolower($file)!="thumbs.db"))
			{
			# Already exists?
			if (!in_array($shortpath,$done))
				{
				$count++;if ($count>$max) {return(true);}

				echo "Processing file: $fullpath\n";
				
				if ($collection==0 && $staticsync_autotheme)
					{
					# Make a new collection for this folder.
					$e=explode("/",$shortpath);
					$theme=ucwords($e[0]);
					$themesql="theme='".ucwords(escape_check($e[0]))."'";
					$themecolumns="theme";
					$themevalues="'".ucwords(escape_check($e[0]))."'";
					
					if ($staticsync_folder_structure){
						for ($x=0;$x<count($e)-1;$x++){
							if ($x==0){} else {$themeindex=$x+1;
							global $theme_category_levels;
							if ($themeindex>$theme_category_levels){
								$theme_category_levels=$themeindex;
								if ($x==count($e)-2){echo "\n\nUPDATE THEME_CATEGORY_LEVELS TO $themeindex IN CONFIG!!!!\n\n";}
							}
							$themesql.=" and theme".$themeindex."='".ucwords(escape_check($e[$x]))."'";
							$themevalues.=",'".ucwords(escape_check($e[$x]))."'";
							$themecolumns.=",theme".$themeindex;
							}
						}
					}
					
					$name=(count($e)==1?"":$e[count($e)-2]);
					echo "\nCollection $name, theme=$theme";
					$collection=sql_value("select ref value from collection where name='" . escape_check($name) . "' and " . $themesql ,0);
					if ($collection==0){
						sql_query("insert into collection (name,created,public,$themecolumns,allow_changes) values ('" . escape_check($name) . "',now(),1,".$themevalues.",0)");
						$collection=sql_insert_id();
					}
				}

				# Work out a resource type based on the extension.
				$type=$staticsync_extension_mapping_default;
				reset ($staticsync_extension_mapping);
				foreach ($staticsync_extension_mapping as $rt=>$extensions)
					{
					if (in_array($extension,$extensions)) {$type=$rt;}
					}
				
				# Formulate a title
				if ($staticsync_title_includes_path)
					{
					$title=str_ireplace("." . $extension,"",str_replace("/"," - ",$shortpath));
					$title=ucfirst(str_replace("_"," ",$title));
					}
				else
					{
					$title=str_ireplace("." . $extension,"",$file);
					}
				
				# Import this file
				$r=import_resource($shortpath,$type,$title,$staticsync_ingest);
				if ($r!==false)
					{
					# Add to mapped category tree (if configured)
					if (isset($staticsync_mapped_category_tree))
						{
						$basepath="";
						# Save tree position to category tree field
				
						# For each node level, expand it back to the root so the full path is stored.
						for ($n=0;$n<count($path_parts);$n++)
							{
							if ($basepath!="") {$basepath.="~";}
							$basepath.=$path_parts[$n];
							$path_parts[$n]=$basepath;
							}
						
						update_field ($r,$staticsync_mapped_category_tree,"," . join(",",$path_parts));
						#echo "update_field($r,$staticsync_mapped_category_tree," . "," . join(",",$path_parts) . ");\n";
						}			

					// default access level. This may be overridden by metadata mapping.
					$accessval = 0;

					# StaticSync path / metadata mapping
					# Extract metadata from the file path as per $staticsync_mapfolders in config.php
					if (isset($staticsync_mapfolders))
						{
						foreach ($staticsync_mapfolders as $mapfolder)
							{
							$match=$mapfolder["match"];
							$field=$mapfolder["field"];
							$level=$mapfolder["level"];
														
							global $lang;

							if (strpos("/" . $shortpath,$match)!==false)
								{
								# Match. Extract metadata.
								$path_parts=explode("/",$shortpath);
								if ($level<count($path_parts))
									{
									// special cases first.
									if ($field == 'access')
										{
											// access level is a special case
											// first determine if the value matches a defined access level

											$value = $path_parts[$level-1];

											for ($n=0; $n<3; $n++){
												// if we get an exact match or a match except for case
												if ($value == $lang["access" . $n] || strtoupper($value) == strtoupper($lang['access' . $n])){
													$accessval = $n;
													echo "Will set access level to " . $lang['access' . $n] . " ($n)\n";
												}
											}

										} else {
										# Save the value
										print_r($path_parts);
										$value=$path_parts[$level-1];
										update_field ($r,$field,$value);
										echo " - Extracted metadata from path: $value\n";
										}
									}
								}
							}
						}
					
					// update access level
					sql_query("update resource set access = '$accessval' where ref = '$r'");
					
					# Add any alternative files
					$altpath=$fullpath . $staticsync_alternatives_suffix;
					if ($staticsync_ingest && file_exists($altpath))
						{
						$adh=opendir($altpath);
						while (($altfile = readdir($adh)) !== false)
							{
							$filetype=filetype($altpath . "/" . $altfile);
							if (($filetype=="file") && (substr($file,0,1)!=".") && (strtolower($file)!="thumbs.db"))
								{
								# Create alternative file
								global $lang;
								
								# Find extension
								$ext=explode(".",$altfile);$ext=$ext[count($ext)-1];
								
								$aref = add_alternative_file($r, $altfile, str_replace("?",strtoupper($ext),$lang["originalfileoftype"]), $altfile, $ext, filesize_unlimited($altpath . "/" . $altfile));
								$path=get_resource_path($r, true, "", true, $ext, -1, 1, false, "", $aref);
								rename ($altpath . "/" . $altfile,$path); # Move alternative file
								}
							}	
						}
					
					# Add to collection
					if ($staticsync_autotheme)
						{
						$test="";	
						$test=sql_query("select * from collection_resource where collection='$collection' and resource='$r'");
						if (count($test)==0){
							sql_query("insert into collection_resource(collection,resource,date_added) values ('$collection','$r',now())");
							}
						}
					}
				else
					{
					# Import failed - file still being uploaded?
					echo " *** Skipping file - it was not possible to move the file (still being imported/uploaded?) \n";
					}
				}
			else
				{
				# check modified date and update previews if necessary
				$filemod=filemtime($fullpath);
				if (array_key_exists($shortpath,$modtimes) && ($filemod>strtotime($modtimes[$shortpath])))
					{
					# File has been modified since we last created previews. Create again.
					$rd=sql_query("select ref,has_image,file_modified,file_extension from resource where file_path='" . (escape_check($shortpath)) . "'");
					if (count($rd)>0)
						{
						$rd=$rd[0];
						$rref=$rd["ref"];
						
						echo "Resource $rref has changed, regenerating previews: $fullpath\n";
						extract_exif_comment($rref,$rd["file_extension"]);
						
						# extract text from documents (e.g. PDF, DOC).
						global $extracted_text_field;
						if (isset($extracted_text_field)) {
							if (isset($unoconv_path) && in_array($extension,$unoconv_extensions)){
								// omit, since the unoconv process will do it during preview creation below
								}
							else {
							extract_text($rref,$extension);
							}
						}

						# Store original filename in field, if set
						global $filename_field;
						if (isset($filename_field))
							{
							update_field($rref,$filename_field,$file);	
							}
						
						create_previews($rref,false,$rd["file_extension"]);
						sql_query("update resource set file_modified=now() where ref='$rref'");
						}
					}
				}
			}	
		}	
	}
Esempio n. 29
0
function process_weather(&$location, $nick, $getdata = False)
{
    $loc = get_location($location, $nick);
    term_echo("*** WEATHER LOCATION LOOKUP: {$loc}");
    if ($loc === False) {
        if ($location == "") {
            return False;
        }
        $loc = $location;
    }
    $location = $loc;
    $loc_query = filter($loc, VALID_UPPERCASE . VALID_LOWERCASE . VALID_NUMERIC . " ");
    $prefs = get_prefs($nick);
    $fheit = "1";
    $use_unit_pref = False;
    if (isset($prefs["unit"]) == True and $getdata == False) {
        if ($prefs["unit"] == "metric") {
            $use_unit_pref = True;
            $fheit = "0";
        }
        if ($prefs["unit"] == "imperial") {
            $use_unit_pref = True;
        }
    }
    # https://www.google.com/search?gbv=1&q=weather+traralgon
    $url = "http://www.google.com.au/search?gbv=1&fheit={$fheit}&q=weather+" . urlencode($loc_query);
    term_echo($url);
    $response = wget("www.google.com.au", "/search?gbv=1&fheit={$fheit}&q=weather+" . urlencode($loc_query), 80, ICEWEASEL_UA, "", 60);
    $html = strip_headers($response);
    $delim1 = "<div class=\"e\">";
    $delim2 = "</table>";
    $html = extract_text($html, $delim1, $delim2);
    if ($html === False) {
        return False;
    }
    $html = replace_ctrl_chars($html, " ");
    $html = str_replace("  ", " ", $html);
    $html = html_decode($html);
    $html = html_decode($html);
    $location = trim(strip_tags(extract_raw_tag($html, "h3")));
    if (substr($location, 0, 12) == "Weather for ") {
        $location = substr($location, 12);
    }
    $wind = trim(strip_tags(extract_text_nofalse($html, "style=\"white-space:nowrap;padding-right:15px;color:#666\">Wind: ", "</span>")));
    $humidity = extract_text($html, "style=\"white-space:nowrap;padding-right:0px;vertical-align:top;color:#666\">Humidity: ", "</td>");
    $parts = explode("<td", $html);
    $temps = array();
    $tempsC = array();
    $conds = array();
    $days = array();
    for ($i = 1; $i < count($parts); $i++) {
        $cond = extract_text($parts[$i], "alt=\"", "\"");
        $temp = extract_text($parts[$i], "<span class=\"wob_t\" style=\"display:inline\">", "</span>");
        $day = extract_text($parts[$i], "colspan=\"2\" style=\"vertical-align:top;text-align:center\">", "</td>");
        if ($cond !== False) {
            $conds[] = strtolower($cond);
        }
        if ($temp !== False) {
            $temps[] = $temp;
            $tempsC[] = sprintf("%.0f", (substr($temp, 0, strlen($temp) - 2) - 32) * 5 / 9) . "°C";
        }
        if ($day !== False) {
            $days[] = $day;
        }
    }
    $offset = 0;
    $wind_caption = ", wind " . $wind;
    if ($wind == "") {
        $offset = 1;
        $wind_caption = "";
    }
    if (count($conds) != 5 or count($temps) != 10 - $offset or count($tempsC) != 10 - $offset or count($days) != 4) {
        return False;
    }
    if ($use_unit_pref == False) {
        $result = $location . " - currently " . $temps[0] . " / " . $tempsC[0] . ", " . $conds[0] . $wind_caption . ", humidity " . $humidity . " - ";
    } else {
        $result = $location . " - currently " . $temps[0] . ", " . $conds[0] . $wind_caption . ", humidity " . $humidity . " - ";
    }
    $fulldays = array("Sun." => "Sunday", "Mon." => "Monday", "Tue." => "Tuesday", "Wed." => "Wednesday", "Thu." => "Thursday", "Fri." => "Friday", "Sat." => "Saturday");
    for ($i = 1; $i <= 4; $i++) {
        $day = $days[$i - 1];
        $day = $fulldays[$day];
        if ($use_unit_pref == False) {
            $result = $result . $day . " " . $conds[$i] . " (" . $temps[$i * 2 + 1 - $offset] . ":" . $temps[$i * 2 - $offset] . " / " . $tempsC[$i * 2 + 1 - $offset] . ":" . $tempsC[$i * 2 - $offset] . ")";
        } else {
            $result = $result . $day . " " . $conds[$i] . " (" . $temps[$i * 2 + 1 - $offset] . ":" . $temps[$i * 2 - $offset] . ")";
        }
        if ($i < 4) {
            $result = $result . ", ";
        }
    }
    $color = "10";
    if (isset($prefs["color"]) == True) {
        $color = $prefs["color"];
    }
    $result = chr(3) . $color . $result;
    if ($getdata != False) {
        $data = array();
        $data["tempF"] = $temps[0];
        $data["tempC"] = $tempsC[0];
        $data["cond"] = $conds[0];
        $data["wind"] = $wind_caption;
        $data["humidity"] = $humidity;
        $data["location"] = $location;
        return $data;
    }
    return $result;
}
Esempio n. 30
-6
function index_url($url, $level, $site_id, $md5sum, $domain, $indexdate, $sessid, $can_leave_domain, $reindex)
{
    global $tmp_urls, $delay_time, $domain_arr, $charSet, $url_status, $whitelist, $blacklist, $supdomain, $smp, $realnum, $dup_url, $entities, $command_line;
    if (DEBUG == '0') {
        error_reporting(0);
    } else {
        error_reporting(E_ERROR);
        //  otherwise  a non existing siemap.xml  would always cause a warning message
    }
    $needsReindex = 1;
    $deletable = 0;
    $url_status = url_status($url);
    $thislevel = $level - 1;
    if ($smp != 1 && Configure::read('follow_sitemap') == 1) {
        //  enter here if we don't already know a valid sitemap and if admin settings allowed us to do so
        $tmp_urls = get_temp_urls($sessid);
        //  reload previous temp
        $url2 = remove_sessid(convert_url($url));
        // get folder where sitemap should be and if exists, cut existing filename, suffix and subfolder
        //                Configure::read('local') = "http://localhost/publizieren/";   //  your base adress for your local server
        $sitemap_name = "sitemap.xml";
        //  could be individualized
        $host = parse_url($url2);
        $hostname = $host[host];
        if ($hostname == 'localhost') {
            $host1 = str_replace(Configure::read('local'), '', $url2);
        }
        $pos = strpos($host1, "/");
        //      on local server delete all behind the /
        if ($pos) {
            $host1 = substr($host1, 0, $pos);
        }
        //      build full adress again, now only until host
        if ($hostname == 'localhost') {
            $url2 = Configure::read('local') . $host1;
        } else {
            $url2 = "{$host['scheme']}://{$hostname}";
        }
        $input_file = "{$url2}/{$sitemap_name}";
        // create path to sitemap
        if ($handle = fopen($input_file, "r")) {
            // happy times, we found a new sitemap
            $links = get_sitemap($input_file, TABLE_PREFIX);
            // now extract links from sitemap.xml
            if ($links != '') {
                //  if links were extracted from sitemap.xml
                reset($links);
                while ($thislink = each($links)) {
                    //  check if we already know this link as a site url
                    $result = mysql_query("select url from " . TABLE_PREFIX . "sites where url like '{$thislink['1']}%'");
                    if (DEBUG > '0') {
                        echo mysql_error();
                    }
                    $rows = mysql_num_rows($result);
                    if ($rows == '0') {
                        // for all new links: save in temp table
                        mysql_query("insert into " . TABLE_PREFIX . "temp (link, level, id) values ('{$thislink['1']}', '{$level}', '{$sessid}')");
                        if (DEBUG > '0') {
                            echo mysql_error();
                        }
                    }
                }
                clean_resource($result);
                $smp = '1';
                //     there was a valid sitemap and we stored the new links
            }
            unset($links, $input_file);
            fclose($handle);
        }
    }
    if (strstr($url_status['state'], "Relocation")) {
        $url = eregi_replace(" ", "", url_purify($url_status['path'], $url, $can_leave_domain));
        if ($url != '') {
            $result = mysql_query("select link from " . TABLE_PREFIX . "temp where link='{$url}' && id = '{$sessid}'");
            if (DEBUG > '0') {
                echo mysql_error();
            }
            $rows = mysql_num_rows($result);
            if ($rows == 0) {
                mysql_query("insert into " . TABLE_PREFIX . "temp (link, level, id) values ('{$url}', '{$level}', '{$sessid}')");
                if (DEBUG > '0') {
                    echo mysql_error();
                }
            }
            clean_resource($result);
        }
        $url_status['state'] == "redirected";
    }
    ini_set("user_agent", Configure::read('user_agent'));
    if ($url_status['state'] == 'ok') {
        $OKtoIndex = 1;
        $file_read_error = 0;
        if (time() - $delay_time < Configure::read('min_delay')) {
            sleep(Configure::read('min_delay') - (time() - $delay_time));
        }
        $delay_time = time();
        if (!fst_lt_snd(phpversion(), "4.3.0")) {
            $file = file_get_contents($url);
            if ($file === FALSE) {
                $file_read_error = 1;
            }
        } else {
            $fl = @fopen($url, "r");
            if ($fl) {
                while ($buffer = @fgets($fl, 4096)) {
                    $file .= $buffer;
                }
                unset($buffer);
            } else {
                $file_read_error = 1;
            }
            fclose($fl);
        }
        if ($file_read_error || Configure::read('utf8') == 1) {
            unset($file);
            $contents = getFileContents($url);
            // parse_url to get charset
            $file = $contents['file'];
        }
        $pageSize = number_format(strlen($file) / 1024, 2, ".", "");
        printPageSizeReport($pageSize);
        if ($url_status['content'] != 'text') {
            $file = extract_text($file, $url_status['content']);
            //for DOCs, PDFs etc we need special converter
            if ($file == 'ERROR') {
                //      if error, suppress further indexing
                $OKtoIndex = 0;
                $file_read_error = 1;
            }
        }
        if (Configure::read('utf8') == 1) {
            //   enter here if file should be translated into utf-8
            $charSet = $contents['charset'];
            if ($charSet == '') {
                // if we did not find any charset, we will use our own
                $charSet = Configure::read('home_charset');
            }
            $charSet = strtoupper(trim($charSet));
            if (strpos($charSet, '8859')) {
                $conv_file = html_entity_decode($file);
            } else {
                $conv_file = $file;
                //  pure code
            }
            if ($charSet != "UTF-8") {
                //  enter here only, if site / file is not jet UTF-8 coded
                $iconv_file = iconv($charSet, "UTF-8", $conv_file);
                //      if installed, first try to use PHP function iconv
                if (trim($iconv_file) == "") {
                    // iconv is not installed or input charSet not available. We need to use class ConvertCharset
                    $charSet = str_ireplace('iso-', '', $charSet);
                    $charSet = str_ireplace('iso', '', $charSet);
                    $NewEncoding = new ConvertCharset($charSet, "utf-8");
                    $NewFileOutput = $NewEncoding->Convert($conv_file);
                    $file = $NewFileOutput;
                } else {
                    $file = $iconv_file;
                }
                unset($conv_file, $iconv_file, $NewEncoding, $NewFileOutput);
            }
        }
        $data = clean_file($file, $url, $url_status['content']);
        $newmd5sum = md5($data['content']);
        if ($md5sum == $newmd5sum) {
            printStandardReport('md5notChanged', $command_line);
            $OKtoIndex = 0;
            $realnum--;
        } else {
            if (Configure::read('use_white') == '1') {
                $found = '0';
                //  check if content of page matches any word in whitelist
                foreach ($whitelist as $key => $value) {
                    $met = stripos($file, $value);
                    if ($met) {
                        $found = '1';
                    }
                }
                if ($found == '0') {
                    printStandardReport('noWhitelist', $command_line);
                    $OKtoIndex = 0;
                    $realnum--;
                }
            }
            if (Configure::read('use_black') == '1') {
                $found = '0';
                //  check if content of page matches any word in blacklist
                foreach ($blacklist as $key => $value) {
                    $met = stripos($file, $value);
                    if ($met) {
                        $found = '1';
                    }
                }
                if ($found == '1') {
                    printStandardReport('matchBlacklist', $command_line);
                    $OKtoIndex = 0;
                    $realnum--;
                }
            }
            //     check for duplicate page content
            $result = mysql_query("select link_id from " . TABLE_PREFIX . "links where md5sum='{$newmd5sum}'");
            if (DEBUG > '0') {
                echo mysql_error();
            }
            if (mysql_num_rows($result) > 0) {
                //  display warning message and urls with duplicate content
                printStandardReport('duplicate', $command_line);
                $num_rows = mysql_num_rows($result);
                for ($i = 0; $i < $num_rows; $i++) {
                    $link_id = mysql_result($result, $i, "link_id");
                    $num = $i + 1;
                    $res = mysql_query("select url from " . TABLE_PREFIX . "links where link_id like '{$link_id}'");
                    if (DEBUG > '0') {
                        echo mysql_error();
                    }
                    $row = mysql_fetch_row($res);
                    $dup_url = $row[0];
                    clean_resource($res);
                    printDupReport($dup_url, $command_line);
                }
                if (Configure::read('dup_content') == '0') {
                    //  enter here, if pages with duplicate content should not be indexed/re-indexed
                    $OKtoIndex = 0;
                    $realnum--;
                } else {
                    $OKtoIndex = 1;
                }
            }
        }
        if (($md5sum != $newmd5sum || $reindex == 1) && $OKtoIndex == 1) {
            $urlparts = parse_url($url);
            $newdomain = $urlparts['host'];
            $type = 0;
            if ($data['noindex'] == 1) {
                $OKtoIndex = 0;
                $deletable = 1;
                printStandardReport('metaNoindex', $command_line);
            }
            if (Configure::read('use_white') == '1') {
                $found = '0';
                //  check if content of page matches any word in whitelist
                foreach ($whitelist as $key => $value) {
                    $met = stripos($data[fulltext], $value);
                    if ($met) {
                        $found = '1';
                    }
                }
                if ($found == '0') {
                    printStandardReport('noWhitelist', $command_line);
                    $OKtoIndex = 0;
                    $realnum--;
                }
            }
            if (Configure::read('use_black') == '1') {
                $found = '0';
                //  check if content of page matches any word in blacklist
                foreach ($blacklist as $key => $value) {
                    $met = stripos($data[fulltext], $value);
                    if ($met) {
                        $found = '1';
                    }
                }
                if ($found == '1') {
                    printStandardReport('matchBlacklist', $command_line);
                    $OKtoIndex = 0;
                    $realnum--;
                }
            }
            $wordarray = unique_array(explode(" ", $data['content']));
            if ($smp != 1) {
                if ($data['nofollow'] != 1) {
                    $links = get_links($file, $url, $can_leave_domain, $data['base']);
                    $links = distinct_array($links);
                    $all_links = count($links);
                    if ($all_links > Configure::read('max_links')) {
                        $all_links = Configure::read('max_links');
                    }
                    $links = array_slice($links, 0, Configure::read('max_links'));
                    if ($realnum < Configure::read('max_links')) {
                        $numoflinks = 0;
                        //if there are any, add to the temp table, but only if there isnt such url already
                        if (is_array($links)) {
                            reset($links);
                            if (DEBUG == '2') {
                                //  if debug mode, show details
                                printStandardReport('newLinks', $command_line);
                            }
                            while ($thislink = each($links)) {
                                if ($tmp_urls[$thislink[1]] != 1) {
                                    $tmp_urls[$thislink[1]] = 1;
                                    $numoflinks++;
                                    if (DEBUG == '2') {
                                        $act_link = $thislink[1];
                                        printNewLinks($act_link);
                                    }
                                    if ($numoflinks <= Configure::read('max_links')) {
                                        mysql_query("insert into " . TABLE_PREFIX . "temp (link, level, id) values ('{$thislink['1']}', '{$level}', '{$sessid}')");
                                    }
                                    if (DEBUG > '0') {
                                        echo mysql_error();
                                    }
                                }
                            }
                        }
                    }
                } else {
                    printStandardReport('noFollow', $command_line);
                }
                unset($file);
            }
            if ($OKtoIndex == 1) {
                if (Configure::read('link_check') == 0) {
                    $title = $data['title'];
                    $host = $data['host'];
                    $path = $data['path'];
                    $fulltxt = $data['fulltext'];
                    $desc = substr($data['description'], 0, 254);
                    $url_parts = parse_url($url);
                    $domain_for_db = $url_parts['host'];
                    if (isset($domain_arr[$domain_for_db])) {
                        $dom_id = $domain_arr[$domain_for_db];
                    } else {
                        mysql_query("insert into " . TABLE_PREFIX . "domains (domain) values ('{$domain_for_db}')");
                        $dom_id = mysql_insert_id();
                        $domain_arr[$domain_for_db] = $dom_id;
                    }
                    $wordarray = calc_weights($wordarray, $title, $host, $path, $data['keywords'], $url_parts);
                    //if there are words to index, add the link to the database, get its id, and add the word + their relation
                    if (is_array($wordarray) && count($wordarray) > Configure::read('min_words_per_page')) {
                        if ($md5sum == '') {
                            mysql_query("insert into " . TABLE_PREFIX . "links (site_id, url, title, description, fulltxt, indexdate, size, md5sum, level) values ('{$site_id}', '{$url}', '{$title}', '{$desc}', '{$fulltxt}', curdate(), '{$pageSize}', '{$newmd5sum}', {$thislevel})");
                            if (DEBUG > '0') {
                                echo mysql_error();
                            }
                            $result = mysql_query("select link_id from " . TABLE_PREFIX . "links where url='{$url}'");
                            if (DEBUG > '0') {
                                echo mysql_error();
                            }
                            $row = mysql_fetch_row($result);
                            $link_id = $row[0];
                            clean_resource($result);
                            if (DEBUG == '2') {
                                //  if debug mode, show details
                                printStandardReport('newKeywords', $command_line);
                            }
                            save_keywords($wordarray, $link_id, $dom_id);
                            if (DEBUG == '2') {
                                printStandardReport('indexed1', $command_line);
                            } else {
                                printStandardReport('indexed', $command_line);
                            }
                        } else {
                            if ($md5sum != '' && $md5sum != $newmd5sum) {
                                //if page has changed, start updating
                                $result = mysql_query("select link_id from " . TABLE_PREFIX . "links where url='{$url}'");
                                if (DEBUG > '0') {
                                    echo mysql_error();
                                }
                                $row = mysql_fetch_row($result);
                                $link_id = $row[0];
                                for ($i = 0; $i <= 15; $i++) {
                                    $char = dechex($i);
                                    mysql_query("delete from " . TABLE_PREFIX . "link_keyword{$char} where link_id={$link_id}");
                                    if (DEBUG > '0') {
                                        echo mysql_error();
                                    }
                                }
                                clean_resource($result);
                                if (DEBUG == '2') {
                                    //  if debug mode, show details
                                    printStandardReport('newKeywords', $command_line);
                                }
                                save_keywords($wordarray, $link_id, $dom_id);
                                $query = "update " . TABLE_PREFIX . "links set title='{$title}', description ='{$desc}', fulltxt = '{$fulltxt}', indexdate=now(), size = '{$pageSize}', md5sum='{$newmd5sum}', level={$thislevel} where link_id={$link_id}";
                                mysql_query($query);
                                if (DEBUG > '0') {
                                    echo mysql_error();
                                }
                                if (DEBUG == '2') {
                                    printStandardReport('re-indexed1', $command_line);
                                } else {
                                    printStandardReport('re-indexed', $command_line);
                                }
                            }
                        }
                    } else {
                        printStandardReport('minWords', $command_line);
                        $realnum--;
                    }
                } else {
                    printStandardReport('link_okay', $command_line);
                }
                unset($wordarray, $title, $fulltxt, $desc);
            }
        }
    } else {
        $deletable = 1;
        printUrlStatus($url_status['state'], $command_line);
    }
    if ($reindex == 1 && $deletable == 1) {
        check_for_removal($url);
    } else {
        if ($reindex == 1) {
        }
    }
    if (!isset($all_links)) {
        $all_links = 0;
    }
    if (!isset($numoflinks)) {
        $numoflinks = 0;
    }
    if ($smp != 1) {
        //      if valid sitemap found, no LinkReport
        printLinksReport($numoflinks, $all_links, $command_line);
    }
}