function quick_wget($trailing) { $parts = explode(" ", $trailing); delete_empty_elements($parts); if (count($parts) < 2) { return False; } $url = $parts[0]; array_shift($parts); $trailing = implode(" ", $parts); $parts = explode("<>", $trailing); delete_empty_elements($parts); if (count($parts) < 2) { return False; } $delim1 = trim($parts[0]); $delim2 = trim($parts[1]); $host = ""; $uri = ""; $port = ""; if (get_host_and_uri($url, $host, $uri, $port) == False) { return False; } $response = wget_ssl($host, $uri, $port); $result = extract_text($response, $delim1, $delim2); if ($result === False) { return False; } $result = strip_tags($result); $result = html_decode($result); $result = html_decode($result); $result = trim($result); if ($result == "") { return False; } return $result; }
if (isset($filename_field)) { $filename = $uploadfiles[$n]; if ($use_local) { $filename = mb_basename($filename); } update_field($ref,$filename_field, $filename); } # get file metadata if (getval("no_exif","")=="") {extract_exif_comment($ref,$extension);} # extract text from documents (e.g. PDF, DOC). global $extracted_text_field; if (isset($extracted_text_field) && !$no_exif) {extract_text($ref,$extension);} $done++; # Add to collection? if ($collection!="") { $refs[] = $ref; } # Log this daily_stat("Resource upload",$ref); resource_log($ref,'u',0); }
//Save the data save_resource_data($ref,false); //Update creation times, extension and title sql_query("insert into resource_data (resource, resource_type_field, value) values ($ref,12,now()),($ref,148,now())"); sql_query("update resource set file_extension = '" . $file_extension . "', field12 = now(), title ='".escape_check($filename)."'WHERE ref = '" . $ref . "'"); $data['success']=true; $data['status']="success"; $data['error']=false; $data['textStatus']= $file . " - Successfully Added"; $data['ref']=$ref; }else{ $data['error']=true; $data['textStatus']="could not move file to tmp"; } if($resource_type==2){ extract_text($ref,$file_extension); } if($resource_type != 2 && $resource_type != 3 && $resource_type != 4){ //create preview files in that directory create_previews_using_im($ref,false,$file_extension); $nothumb = false; }else{ $nothumb = true; sql_query("UPDATE resource SET is_transcoding = 1 WHERE ref = $ref"); //Process previews in the backgrond and continue $attempts = 1; $command = "/usr/bin/php -q -f /var/www/plugins/mia_upload/pages/background_previews.php $resource_type $ref $file_extension $attempts"; exec("$command > /dev/null &", $arrOutput); } savetoelastic($ref); echo(json_encode($data));
function minion_talk($nick, $channel, $trailing) { $relays_bucket = "activity.php/minion_talk/relays"; $relays = get_array_bucket($relays_bucket); # flush all outdated relays $save_bucket = False; foreach ($relays as $freenode_nick => $freenode_channels) { foreach ($relays[$freenode_nick] as $freenode_channel => $data) { if (microtime(True) - $data["timestamp"] > 10 * 60) { unset($relays[$freenode_nick][$freenode_channel]); $save_bucket = True; } } } if ($nick != "") { $account = users_get_account($nick); $allowed = array("crutchy", "chromas", "mrcoolbp", "NCommander", "juggs", "TheMightyBuzzard"); if (in_array($account, $allowed) == True) { if ($trailing == ".relays") { $n = 0; foreach ($relays as $freenode_nick => $freenode_channels) { foreach ($relays[$freenode_nick] as $freenode_channel => $data) { $rem = round(($data["timestamp"] + 10 * 60 - microtime(True)) / 60, 0); pm($channel, chr(3) . "13 {$freenode_nick}: {$freenode_channel} => " . $data["channel"] . " (unset in {$rem} minutes)"); $n++; } } if ($n == 0) { pm($channel, chr(3) . "13 no channel relays currently active"); } return; } $params = explode(">", $trailing); if (count($params) >= 2) { $freenode_channel = strtolower(trim($params[0])); if (substr($freenode_channel, 0, 1) == "#") { array_shift($params); $msg = trim(implode(">", $params)); if (strlen($msg) > 0) { $commands = array("~minion raw sylnt :sylnt PRIVMSG {$freenode_channel} :<{$nick}> {$msg}"); internal_macro($commands); $parts = explode(",", $msg); $freenode_nick = strtolower(trim($parts[0])); if (count($parts) > 1 and strpos($freenode_nick, " ") === False) { $relays[$freenode_nick][$freenode_channel] = array("channel" => $channel, "timestamp" => microtime(True)); pm($channel, chr(3) . "13 ten minute relay set for \"{$freenode_nick}\" in \"{$freenode_channel}\" on freenode to \"{$channel}\" on this server"); $save_bucket = True; } } } } } } if ($channel == "#freenode") { $freenode_nick = extract_text($trailing, chr(3) . "03", chr(3) . " [", False); $freenode_channel = extract_text($trailing, chr(3) . " [" . chr(3) . "02", chr(3) . "] " . chr(3) . "05", False); if (isset($relays[strtolower($freenode_nick)][$freenode_channel]) == True) { $freenode_trailing = extract_text($trailing, chr(3) . "] " . chr(3) . "05", chr(3), True); pm($relays[strtolower($freenode_nick)][$freenode_channel]["channel"], chr(3) . "03" . $freenode_nick . chr(3) . " [" . chr(3) . "02" . $freenode_channel . chr(3) . "] " . chr(3) . "05" . $freenode_trailing); } } if ($save_bucket == True) { set_array_bucket($relays, $relays_bucket); } }
function source_define($host, $term, $params) { global $debug; $sterm = $term; if ($params["space_delim"] != "") { $sterm = str_replace(" ", $params["space_delim"], $sterm); } $uri = str_replace($params["template"], urlencode($sterm), $params["uri"]); term_echo("*** DEFINE: trying {$host}{$uri} on port " . $params["port"]); $response = wget($host, $uri, $params["port"], ICEWEASEL_UA, "", 20); $html = strip_headers($response); $html = replace_ctrl_chars($html, " "); strip_all_tag($html, "head"); strip_all_tag($html, "script"); if ($debug == "ON") { privmsg("debug [{$host}]: uri = \"{$uri}\""); $L = strlen($html); privmsg("debug [{$host}]: html length = \"{$L}\""); unset($L); privmsg("debug [{$host}]: delim_start = \"" . $params["delim_start"] . "\""); privmsg("debug [{$host}]: delim_end = " . $params["delim_end"] . "\""); } $i = strpos($html, $params["delim_start"]); $def = ""; if ($i !== False) { if ($debug == "ON") { privmsg("debug [{$host}]: delim_start pos = \"{$i}\""); } $html = substr($html, $i + strlen($params["delim_start"])); $i = strpos($html, $params["delim_end"]); if ($i !== False) { if ($debug == "ON") { privmsg("debug [{$host}]: delim_end pos = \"{$i}\""); } $def = trim(strip_tags(substr($html, 0, $i))); $def = str_replace(array("\n", "\r"), " ", $def); $def = str_replace(" ", " ", $def); if (strlen($def) > MAX_DEF_LENGTH) { $def = trim(substr($def, 0, MAX_DEF_LENGTH)) . "..."; } } } if ($def == "") { $location = exec_get_header($response, "location"); if ($location == "") { return False; } else { $new_term = extract_text($location, $params["get_param"], "&", True); if ($new_term != $term) { term_echo("redirecting to \"{$location}\""); if ($debug == "ON") { privmsg("debug [{$host}]: redirecting to \"{$location}\""); } return source_define($host, $new_term, $params); } else { return False; } } } else { if ($params["ignore"] != "" and strpos($def, $params["ignore"]) !== False) { return False; } if (strpos($def, "There aren't any definitions") !== False) { return False; } privmsg("[" . $params["name"] . "] " . chr(3) . "03{$term}" . chr(3) . ": " . html_decode($def)); return True; } }
$host = "soylentnews.org"; $list_uri = "/journal.pl?op=top"; $port = 80; $msg = chr(3) . "08" . "********** " . chr(3) . "03" . chr(2) . "SOYLENTNEWS JOURNAL FEED" . chr(2) . chr(3) . "08" . " **********"; output($msg); $last_id = 878; if (file_exists(JOURNALS_ID_FILE) == True) { $last_id = file_get_contents(JOURNALS_ID_FILE); } $msg = "last journal = {$last_id}"; output($msg); $response = wget($host, $list_uri, $port, ICEWEASEL_UA, "", 60); $html = strip_headers($response); $delim1 = "<!-- start template: ID 60, journaltop;journal;default -->"; $delim2 = "<!-- end template: ID 60, journaltop;journal;default -->"; $html = extract_text($html, $delim1, $delim2); if ($html === False) { output("error: journal list not found"); return; } $rows = explode("<tr>", $html); array_shift($rows); array_shift($rows); $item_count = 20; for ($i = 0; $i < max($item_count, count($rows)); $i++) { $cells = explode("<td valign=\"top\">", $rows[$i]); if (count($cells) != 4) { term_echo("*** SN JOURNAL FEED: invalid number of cells for row {$i}"); continue; } # TODO: DEBUG HERE
$faction = mysql_real_escape_string($_POST['faction']); $flags = mysql_real_escape_string($_POST['flags']); $model = htmlspecialchars($_POST['model']); $sql = "UPDATE `characters` SET `_Name` = '{$name}',\n`_Cash` = '{$cash}',\n`_Model` = '{$model}',\n`_Flags` = '{$flags}',\n`_Faction` = '{$faction}' WHERE `characters`.`_Key` = {$key}"; echo "<div class='well'>\n<h2>Admin Edit Mode</h2>\n{$sql}\n</div>\n"; } else { $newphysicaldesc = mysql_real_escape_string($_POST['physdesc']); $result = mysql_query("SELECT * FROM `characters` WHERE `_Schema` = '" . $gamemodecode . "' AND `_Key` LIKE '" . $key . "'"); if (mysql_error() == "") { } else { echo '<div class="alert alert-error"> ERROR' . mysql_error() . '</div>'; } while ($row = @mysql_fetch_array($result)) { $data = $row['_Data']; $name = $row['_Name']; $oldphysdesc = extract_text($data, '"PhysDesc":"', '","'); } echo "<div class='well'><h2>Replacing</h2> <code>{$oldphysdesc}</code> <h2>with</h2> <code>{$newphysicaldesc}</code> <h2>on {$name}</h2></div>"; $sql = "UPDATE characters set _Data= replace(_Data, \"{$oldphysdesc}\", \"{$newphysicaldesc}\") WHERE `characters`.`_Key` ={$key}"; } mysql_query($sql); if (mysql_error() == "") { echo '<div class="alert alert-success">Character Updated!</div><br> <a href="index.php"class="btn btn-large btn-block btn-success">Return to dashboard</a> '; } else { echo '<div class="alert alert-error">ERROR - ' . mysql_error() . '<br>If this keeps happening you should contact the Owner about this!</div><br> <input type="button" class="btn btn-large btn-block btn-error" value="Go Back" onclick="goBack()"> '; } ?>
function index_url($url, $level, $site_id, $md5sum, $domain, $indexdate, $sessid, $can_leave_domain, $reindex) { global $min_delay; global $command_line; global $min_words_per_page; global $supdomain, $index_vpaths; global $user_agent, $tmp_urls, $delay_time, $domain_arr; global $db; $deletable = 0; $url_status = url_status($url); $thislevel = $level - 1; if (strstr($url_status['state'], "Relocation")) { $url = preg_replace("/ /", "", url_purify($url_status['path'], $url, $can_leave_domain)); if ($url != '') { $result = $db->query("SELECT link FROM " . TABLE_PREFIX . "temp WHERE link=" . $db->quote($url) . " AND id=" . $db->quote($sessid)); echo sql_errorstring(__FILE__, __LINE__); if ($result->fetch()) { $result->closeCursor(); $db->exec("INSERT INTO " . TABLE_PREFIX . "temp (link, level, id) VALUES (" . $db->quote($url) . ", " . $db->quote($level) . ", " . $db->quote($sessid) . ")"); echo sql_errorstring(__FILE__, __LINE__); } } $url_status['state'] == "redirected"; } if (!$index_vpaths && $url_status['state'] == 'ok') { $url_parts = parse_url($url); $base = basename($url_parts['path']); if (strstr($base, '.') == false) { $url_status['state'] = "directory listing or default redirect"; } } ini_set("user_agent", $user_agent); if ($url_status['state'] == 'ok') { $OKtoIndex = 1; $file_read_error = 0; if (time() - $delay_time < $min_delay) { sleep($min_delay - (time() - $delay_time)); } $delay_time = time(); if (!fst_lt_snd(phpversion(), "4.3.0")) { $file = file_get_contents($url); if ($file === FALSE) { $file_read_error = 1; } } else { $fl = @fopen($url, "r"); if ($fl) { while ($buffer = @fgets($fl, 4096)) { $file .= $buffer; } } else { $file_read_error = 1; } fclose($fl); } if ($file_read_error) { $contents = getFileContents($url); $file = $contents['file']; } $pageSize = number_format(strlen($file) / 1024, 2, ".", ""); printPageSizeReport($pageSize); if ($url_status['content'] != 'text') { $file = extract_text($file, $url_status['content']); } printStandardReport('starting', $command_line); $newmd5sum = md5($file); if ($reindex == 0) { if ($md5sum == $newmd5sum) { printStandardReport('md5notChanged', $command_line); $OKtoIndex = 0; } else { if (isDuplicateMD5($newmd5sum)) { $OKtoIndex = 0; printStandardReport('duplicate', $command_line); } } } if (($md5sum != $newmd5sum || $reindex == 1) && $OKtoIndex == 1) { $urlparts = parse_url($url); $newdomain = $urlparts['host']; $type = 0; // remove link to css file //get all links from file $data = clean_file($file, $url, $url_status['content']); if ($data['noindex'] == 1) { $OKtoIndex = 0; $deletable = 1; printStandardReport('metaNoindex', $command_line); } $wordarray = unique_array(explode(" ", $data['content'])); if ($data['nofollow'] != 1) { $links = get_links($file, $url, $can_leave_domain, $data['base']); $links = distinct_array($links); $all_links = count($links); $numoflinks = 0; //if there are any, add to the temp table, but only if there isnt such url already if (is_array($links)) { reset($links); while ($thislink = each($links)) { if (!isset($tmp_urls[$thislink[1]]) || $tmp_urls[$thislink[1]] != 1) { $tmp_urls[$thislink[1]] = 1; $numoflinks++; $db->exec("INSERT INTO " . TABLE_PREFIX . "temp (link, level, id) VALUES (" . $db->quote($thislink[1]) . ", " . $db->quote($level) . ", " . $db->quote($sessid) . ")"); echo sql_errorstring(__FILE__, __LINE__); } } } } else { printStandardReport('noFollow', $command_line); } if ($OKtoIndex == 1) { $title = $data['title']; $host = $data['host']; $path = $data['path']; $fulltxt = str_replace("\\'", """, $data['fulltext']); $desc = substr($data['description'], 0, 254); $language = substr($data['language'], 0, 2); $url_parts = parse_url($url); $domain_for_db = $url_parts['host']; if (isset($domain_arr[$domain_for_db])) { $dom_id = $domain_arr[$domain_for_db]; } else { $db->exec("INSERT INTO " . TABLE_PREFIX . "domains (domain) VALUES (" . $db->quote($domain_for_db) . ")"); $dom_id = $db->lastInsertId(); $domain_arr[$domain_for_db] = $dom_id; } $wordarray = calc_weights($wordarray, $title, $host, $path, $data['keywords']); $tstamp = "'" . date("Y-m-d") . "'"; //if there are words to index, add the link to the database, get its id, and add the word + their relation if (is_array($wordarray) && count($wordarray) > $min_words_per_page) { $site_id = $db->quote($site_id); $url = $db->quote($url); $title = $db->quote($title); $desc = $db->quote($desc); $language = $db->quote($language); $fulltxt = $db->quote($fulltxt); $pageSize = $db->quote($pageSize); $Qmd5sum = $db->quote($newmd5sum); if ($md5sum == '') { $db->exec("INSERT INTO " . TABLE_PREFIX . "links (site_id, url, title, description, language, fulltxt, indexdate, size, md5sum, level) VALUES ({$site_id}, {$url}, {$title}, {$desc}, {$language}, {$fulltxt}, {$tstamp}, {$pageSize}, {$Qmd5sum}, {$thislevel})"); $error = sql_errorstring(__FILE__, __LINE__); if ($error) { echo $error; printStandardReport('skipped', $command_line); } else { $result = $db->query("SELECT link_id FROM " . TABLE_PREFIX . "links WHERE url={$url}"); echo sql_errorstring(__FILE__, __LINE__); $row = $result->fetch(); $link_id = $row[0]; $result->closeCursor(); save_keywords($wordarray, $link_id, $dom_id); printStandardReport('indexed', $command_line); } } else { if ($md5sum != '' && $md5sum != $newmd5sum) { //if page has changed, start updating $result = $db->query("SELECT link_id FROM " . TABLE_PREFIX . "links WHERE url={$url}"); echo sql_errorstring(__FILE__, __LINE__); $row = $result->fetch(); $link_id = $row[0]; $result->closeCursor(); for ($i = 0; $i <= 15; $i++) { $char = dechex($i); $db->exec("DELETE FROM " . TABLE_PREFIX . "link_keyword{$char} WHERE link_id={$link_id}"); echo sql_errorstring(__FILE__, __LINE__); } save_keywords($wordarray, $link_id, $dom_id); $db->exec("UPDATE " . TABLE_PREFIX . "links SET title={$title}, description={$desc}, language={$language}, fulltxt={$fulltxt}, indexdate={$tstamp}, size={$pageSize}, md5sum={$Qmd5sum}, level={$thislevel} WHERE link_id={$link_id}"); echo sql_errorstring(__FILE__, __LINE__); printStandardReport('re-indexed', $command_line); } } } else { printStandardReport('minWords', $command_line); } } } } else { $deletable = 1; printUrlStatus($url_status['state'], $command_line); } if ($reindex == 1 && $deletable == 1) { check_for_removal($url); } else { if ($reindex == 1) { //??? } } if (!isset($all_links)) { $all_links = 0; } if (!isset($numoflinks)) { $numoflinks = 0; } printLinksReport($numoflinks, $all_links, $command_line); }
function sn_submit($url) { if ($url == "") { return False; } $url = get_redirected_url($url); if ($url === False) { privmsg("error: unable to download source (get_redirected_url)"); return False; } $host = ""; $uri = ""; $port = 80; if (get_host_and_uri($url, $host, $uri, $port) == False) { privmsg("error: unable to download source (get_host_and_uri)"); return False; } $response = wget($host, $uri, $port); if (get_host_and_uri($url, $host, $uri, $port) == False) { privmsg("error: unable to download source (wget)"); return False; } $source_html = strip_headers($response); $source_title = extract_raw_tag($source_html, "title"); $delimiters = array("--", "|", " - ", " : ", " — ", " • "); for ($i = 0; $i < count($delimiters); $i++) { $j = strpos($source_title, $delimiters[$i]); if ($j !== False) { $source_title = trim(substr($source_title, 0, $j)); } } if ($source_title === False or $source_title == "") { privmsg("error: title not found or empty"); return False; } $source_title = html_decode($source_title); $source_title = html_decode($source_title); $source_body = extract_meta_content($source_html, "description"); if ($source_body === False or $source_body == "") { $source_body = extract_meta_content($source_html, "og:description", "property"); if ($source_body === False or $source_body == "") { privmsg("error: description meta content not found or empty"); return False; } } $html = $source_html; $article = extract_raw_tag($html, "article"); if ($article !== False) { $html = $article; } strip_all_tag($html, "head"); strip_all_tag($html, "script"); strip_all_tag($html, "style"); #strip_all_tag($html,"a"); strip_all_tag($html, "strong"); $html = strip_tags($html, "<p>"); $html = lowercase_tags($html); $html = explode("<p", $html); $source_body = array(); for ($i = 0; $i < count($html); $i++) { $parts = explode(">", $html[$i]); if (count($parts) >= 2) { array_shift($parts); $html[$i] = implode(">", $parts); } $html[$i] = strip_tags($html[$i]); $html[$i] = clean_text($html[$i]); $host_parts = explode(".", $host); for ($j = 0; $j < count($host_parts); $j++) { if (strlen($host_parts[$j]) > 3) { if (strpos(strtolower($html[$i]), strtolower($host_parts[$j])) !== False) { continue 2; } } } if (filter($html[$i], "0123456789") != "") { continue; } if (strlen($html[$i]) > 1) { if ($html[$i][strlen($html[$i]) - 1] != ".") { continue; } while (True) { $j = strlen($html[$i]) - 1; if ($j < 0) { break; } $c = $html[$i][$j]; if ($c == ".") { break; } $html[$i] = substr($html[$i], 0, $j); } } if (strlen($html[$i]) > 100) { $source_body[] = $html[$i]; } } $source_body = implode("\n\n", $source_body); $source_body = html_decode($source_body); $source_body = html_decode($source_body); $host = "dev.soylentnews.org"; $port = 443; $uri = "/submit.pl"; $response = wget($host, $uri, $port, ICEWEASEL_UA); $html = strip_headers($response); $reskey = extract_text($html, "<input type=\"hidden\" id=\"reskey\" name=\"reskey\" value=\"", "\">"); if ($reskey === False) { privmsg("error: unable to extract reskey"); return False; } sleep(25); $params = array(); $params["reskey"] = $reskey; #$params["name"]=trim(substr($nick,0,50)); $params["name"] = get_bot_nick(); $params["email"] = ""; $params["subj"] = trim(substr($source_title, 0, 100)); $params["primaryskid"] = "1"; $params["tid"] = "6"; $params["sub_type"] = "plain"; $params["story"] = $source_body . "\n\n" . $url . "\n\n-- submitted from IRC"; $params["op"] = "SubmitStory"; $response = wpost($host, $uri, $port, ICEWEASEL_UA, $params); $html = strip_headers($response); strip_all_tag($html, "head"); strip_all_tag($html, "script"); strip_all_tag($html, "style"); strip_all_tag($html, "a"); $html = strip_tags($html); $html = clean_text($html); if (strpos($html, "Perhaps you would like to enter an email address or a URL next time. Thanks for the submission.") !== False) { privmsg("submission successful - https://{$host}/submit.pl?op=list"); return True; } else { privmsg("error: something went wrong with your submission"); return False; } }
$pid_delim2 = "\">Parent"; $pid_test = extract_text($pid_html, $pid_delim1, $pid_delim2); $pid = ""; $parent_url = ""; if ($pid_test !== False) { $pid = $pid_test; $parent_url = "http://soylentnews.org/comments.pl?sid={$sid}&cid={$pid}"; } $subject_delim1 = "<h4><a name=\"{$cid}\">"; $subject_delim2 = "</a>"; $subject = extract_text($parts[$j], $subject_delim1, $subject_delim2); $subject = trim(strip_tags($subject)); $subject = str_replace(" ", " ", $subject); $subject = html_decode($subject); $subject = html_decode($subject); $comment_body = extract_text($parts[$j], "<div id=\"comment_body_{$cid}\">", "</div>"); $comment_body = replace_ctrl_chars($comment_body, " "); $comment_body = str_replace("</p>", " ", $comment_body); $comment_body = str_replace("<p>", " ", $comment_body); $comment_body = str_replace("<br>", " ", $comment_body); $comment_body = trim(strip_tags($comment_body)); $comment_body = str_replace(" ", " ", $comment_body); $comment_body = html_decode($comment_body); $comment_body = html_decode($comment_body); $record = array(); $record["user"] = $user; $record["uid"] = $uid; $record["score"] = $score; $record["score_num"] = $score_num; $record["subject"] = $subject; $record["title"] = $title;
function parse_data($keys, $data, $suffix = "=") { $result = array(); $n = count($keys) - 1; if ($n < 0) { return False; } for ($i = 0; $i < $n; $i++) { $delim1 = $keys[$i] . $suffix; $delim2 = $keys[$i + 1] . $suffix; $result[$keys[$i]] = extract_text($data, $delim1, $delim2); if ($result[$keys[$i]] === False) { return False; } } $delim = $keys[$n] . $suffix; $result[$keys[$n]] = extract_text($data, $delim, "", True); if ($result[$keys[$n]] === False) { return False; } return $result; }
function extract_meta_content($html, $name, $key = "name") { # <meta name="description" content="Researchers have made a breakthrough in blah blah blah." id="metasummary" /> $lhtml = strtolower($html); $lname = strtolower($name); $parts = explode("<meta ", $lhtml); array_shift($parts); if (count($parts) == 0) { return False; } $result = ""; for ($i = 0; $i < count($parts); $i++) { $n = extract_text($parts[$i], "{$key}=\"", "\""); if ($n === False) { continue; } if ($n != $lname) { continue; } $result = extract_text($parts[$i], "content=\"", "\""); break; } if ($result == "") { return False; } $i = strpos($lhtml, $result); if ($i === False) { return False; } $result = substr($html, $i, strlen($result)); return $result; }
$cpt = 0; fputs($fdc, "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n\r\n\t\t\t<?xml-stylesheet type=\"text/css\" href=\"style.css\" media=\"all\"?>\n\r\n\t\t\t<note uri=\"uri_note_" . $titre . "\">\n"); foreach ($parties as $partie) { fputs($fdc, "<partie>"); fputs($fdc, "<titrepartie>" . $partie['titre'] . "</titrepartie>"); // echo "<pre>"; // print_r($partie); // echo "</pre>"; if ($partie['notes']) { foreach ($partie['notes'] as $file) { $fd = fopen('xml/note/' . $file . '.note', 'r'); while (!feof($fd)) { $line1 = fgets($fd); if ($cpt == 0) { if (match_tag('titre', $line1)) { fputs($fdc, "<titre>" . extract_text('titre', $line1) . " [Compilation]</titre>\n"); } else { if (!match_tag('note', $line1) and !match_tag('meta_note', $line1) and !match_tag('statut', $line1) and !match_tag('date_creation', $line1) and !match_tag('date_modification', $line1) and !match_tag('auteur', $line1) and !match_tag('contributeurs', $line1) and !match_tag('contributeur', $line1) and !match_tag('relecteur', $line1) and !match_tag('nom', $line1) and !match_tag('prenom', $line1) and !isComment($line1)) { fputs($fdc, $line1); } } } else { if (!match_tag('note', $line1) and !match_tag('meta_note', $line1) and !match_tag('statut', $line1) and !match_tag('date_creation', $line1) and !match_tag('date_modification', $line1) and !match_tag('auteur', $line1) and !match_tag('contributeurs', $line1) and !match_tag('contributeur', $line1) and !match_tag('relecteur', $line1) and !match_tag('nom', $line1) and !match_tag('prenom', $line1) and !match_tag('titre', $line1) and !isComment($line1)) { fputs($fdc, $line1); } } } $cpt++; } } fputs($fdc, '</partie>');
function ProcessFolder($folder) { global $lang, $syncdir, $nogo, $staticsync_max_files, $count, $done, $modtimes, $lastsync, $ffmpeg_preview_extension, $staticsync_autotheme, $staticsync_folder_structure, $staticsync_extension_mapping_default, $staticsync_extension_mapping, $staticsync_mapped_category_tree, $staticsync_title_includes_path, $staticsync_ingest, $staticsync_mapfolders, $staticsync_alternatives_suffix, $theme_category_levels, $staticsync_defaultstate; $collection = 0; echo "Processing Folder: $folder" . PHP_EOL; # List all files in this folder. $dh = opendir($folder); while (($file = readdir($dh)) !== false) { if ( $file == '.' || $file == '..') { continue; } $filetype = filetype($folder . "/" . $file); $fullpath = $folder . "/" . $file; $shortpath = str_replace($syncdir . "/", '', $fullpath); # Work out extension $extension = explode(".", $file); if(count($extension)>1) { $extension = trim(strtolower($extension[count($extension)-1])); } else { //No extension $extension=""; } if ($staticsync_mapped_category_tree) { $path_parts = explode("/", $shortpath); array_pop($path_parts); touch_category_tree_level($path_parts); } # -----FOLDERS------------- if ((($filetype == "dir") || $filetype == "link") && (strpos($nogo, "[$file]") === false) && (strpos($file, $staticsync_alternatives_suffix) === false)) { # Recurse ProcessFolder($folder . "/" . $file); } # -------FILES--------------- if (($filetype == "file") && (substr($file,0,1) != ".") && (strtolower($file) != "thumbs.db")) { /* Below Code Adapted from CMay's bug report */ global $banned_extensions; # Check to see if extension is banned, do not add if it is banned if(array_search($extension, $banned_extensions)){continue;} /* Above Code Adapted from CMay's bug report */ $count++; if ($count > $staticsync_max_files) { return(true); } # Already exists? if (!isset($done[$shortpath])) { echo "Processing file: $fullpath" . PHP_EOL; if ($collection == 0 && $staticsync_autotheme) { # Make a new collection for this folder. $e = explode("/", $shortpath); $theme = ucwords($e[0]); $themesql = "theme='" . ucwords(escape_check($e[0])) . "'"; $themecolumns = "theme"; $themevalues = "'" . ucwords(escape_check($e[0])) . "'"; if ($staticsync_folder_structure) { for ($x=0;$x<count($e)-1;$x++) { if ($x != 0) { $themeindex = $x+1; if ($themeindex >$theme_category_levels) { $theme_category_levels = $themeindex; if ($x == count($e)-2) { echo PHP_EOL . PHP_EOL . "UPDATE THEME_CATEGORY_LEVELS TO $themeindex IN CONFIG!!!!" . PHP_EOL . PHP_EOL; } } $th_name = ucwords(escape_check($e[$x])); $themesql .= " AND theme{$themeindex} = '$th_name'"; $themevalues .= ",'$th_name'"; $themecolumns .= ",theme{$themeindex}"; } } } $name = (count($e) == 1) ? '' : $e[count($e)-2]; echo "Collection $name, theme=$theme" . PHP_EOL; $escaped_name = escape_check($name); $collection = sql_value("SELECT ref value FROM collection WHERE name='$escaped_name' AND $themesql", 0); if ($collection == 0) { sql_query("INSERT INTO collection (name,created,public,$themecolumns,allow_changes) VALUES ('$escaped_name', NOW(), 1, $themevalues, 0)"); $collection = sql_insert_id(); } } # Work out a resource type based on the extension. $type = $staticsync_extension_mapping_default; reset($staticsync_extension_mapping); foreach ($staticsync_extension_mapping as $rt => $extensions) { if (in_array($extension,$extensions)) { $type = $rt; } } $modified_type = hook('modify_type', 'staticsync', array( $type )); if (is_numeric($modified_type)) { $type = $modified_type; } # Formulate a title if ($staticsync_title_includes_path) { $title_find = array('/', '_', ".$extension" ); $title_repl = array(' - ', ' ', ''); $title = ucfirst(str_ireplace($title_find, $title_repl, $shortpath)); } else { $title = str_ireplace(".$extension", '', $file); } $modified_title = hook('modify_title', 'staticsync', array( $title )); if ($modified_title !== false) { $title = $modified_title; } # Import this file $r = import_resource($shortpath, $type, $title, $staticsync_ingest); if ($r !== false) { # Add to mapped category tree (if configured) if (isset($staticsync_mapped_category_tree)) { $basepath = ''; # Save tree position to category tree field # For each node level, expand it back to the root so the full path is stored. for ($n=0;$n<count($path_parts);$n++) { if ($basepath != '') { $basepath .= "~"; } $basepath .= $path_parts[$n]; $path_parts[$n] = $basepath; } update_field($r, $staticsync_mapped_category_tree, "," . join(",", $path_parts)); } # default access level. This may be overridden by metadata mapping. $accessval = 0; # StaticSync path / metadata mapping # Extract metadata from the file path as per $staticsync_mapfolders in config.php if (isset($staticsync_mapfolders)) { foreach ($staticsync_mapfolders as $mapfolder) { $match = $mapfolder["match"]; $field = $mapfolder["field"]; $level = $mapfolder["level"]; if (strpos("/" . $shortpath, $match) !== false) { # Match. Extract metadata. $path_parts = explode("/", $shortpath); if ($level < count($path_parts)) { // special cases first. if ($field == 'access') { # access level is a special case # first determine if the value matches a defined access level $value = $path_parts[$level-1]; for ($n=0; $n<3; $n++){ # if we get an exact match or a match except for case if ($value == $lang["access" . $n] || strtoupper($value) == strtoupper($lang['access' . $n])) { $accessval = $n; echo "Will set access level to " . $lang['access' . $n] . " ($n)" . PHP_EOL; } } } else { # Save the value print_r($path_parts); $value = $path_parts[$level-1]; update_field ($r, $field, $value); echo " - Extracted metadata from path: $value" . PHP_EOL; } } } } } # update access level sql_query("UPDATE resource SET access = '$accessval',archive='$staticsync_defaultstate' WHERE ref = '$r'"); # Add any alternative files $altpath = $fullpath . $staticsync_alternatives_suffix; if ($staticsync_ingest && file_exists($altpath)) { $adh = opendir($altpath); while (($altfile = readdir($adh)) !== false) { $filetype = filetype($altpath . "/" . $altfile); if (($filetype == "file") && (substr($file,0,1) != ".") && (strtolower($file) != "thumbs.db")) { # Create alternative file # Find extension $ext = explode(".", $altfile); $ext = $ext[count($ext)-1]; $description = str_replace("?", strtoupper($ext), $lang["originalfileoftype"]); $file_size = filesize_unlimited($altpath . "/" . $altfile); $aref = add_alternative_file($r, $altfile, $description, $altfile, $ext, $file_size); $path = get_resource_path($r, true, '', true, $ext, -1, 1, false, '', $aref); rename($altpath . "/" . $altfile,$path); # Move alternative file } } } # Add to collection if ($staticsync_autotheme) { $test = ''; $test = sql_query("SELECT * FROM collection_resource WHERE collection='$collection' AND resource='$r'"); if (count($test) == 0) { sql_query("INSERT INTO collection_resource (collection, resource, date_added) VALUES ('$collection', '$r', NOW())"); } } } else { # Import failed - file still being uploaded? echo " *** Skipping file - it was not possible to move the file (still being imported/uploaded?)" . PHP_EOL; } } else { # check modified date and update previews if necessary $filemod = filemtime($fullpath); if (array_key_exists($shortpath,$modtimes) && ($filemod > strtotime($modtimes[$shortpath]))) { # File has been modified since we last created previews. Create again. $rd = sql_query("SELECT ref, has_image, file_modified, file_extension FROM resource WHERE file_path='" . escape_check($shortpath) . "'"); if (count($rd) > 0) { $rd = $rd[0]; $rref = $rd["ref"]; echo "Resource $rref has changed, regenerating previews: $fullpath" . PHP_EOL; extract_exif_comment($rref,$rd["file_extension"]); # extract text from documents (e.g. PDF, DOC). global $extracted_text_field; if (isset($extracted_text_field)) { if (isset($unoconv_path) && in_array($extension,$unoconv_extensions)){ // omit, since the unoconv process will do it during preview creation below } else { extract_text($rref,$extension); } } # Store original filename in field, if set global $filename_field; if (isset($filename_field)) { update_field($rref,$filename_field,$file); } create_previews($rref, false, $rd["file_extension"], false, false, -1, false, $staticsync_ingest); sql_query("UPDATE resource SET file_modified=NOW() WHERE ref='$rref'"); } } } } } }
function index_url($url, $level, $site_id, $md5sum, $domain, $indexdate, $sessid, $can_leave_domain, $reindex) { global $entities, $min_delay; global $command_line; global $min_words_per_page; global $supdomain; global $mysql_table_prefix, $user_agent, $tmp_urls, $delay_time, $domain_arr; $needsReindex = 1; $deletable = 0; $url_status = url_status($url); $thislevel = $level - 1; if (strstr($url_status['state'], "Relocation")) { $url = preg_replace("/ /", "", url_purify($url_status['path'], $url, $can_leave_domain)); if ($url != '') { $result = mysql_query("select link from " . $mysql_table_prefix . "temp where link='{$url}' && id = '{$sessid}'"); echo mysql_error(); $rows = mysql_numrows($result); if ($rows == 0) { mysql_query("insert into " . $mysql_table_prefix . "temp (link, level, id) values ('{$url}', '{$level}', '{$sessid}')"); echo mysql_error(); } } $url_status['state'] == "redirected"; } /* if ($indexdate <> '' && $url_status['date'] <> '') { if ($indexdate > $url_status['date']) { $url_status['state'] = "Date checked. Page contents not changed"; $needsReindex = 0; } }*/ ini_set("user_agent", $user_agent); if ($url_status['state'] == 'ok') { $OKtoIndex = 1; $file_read_error = 0; if (time() - $delay_time < $min_delay) { sleep($min_delay - (time() - $delay_time)); } $delay_time = time(); if (!fst_lt_snd(phpversion(), "4.3.0")) { $file = file_get_contents($url); if ($file === FALSE) { $file_read_error = 1; } } else { $fl = @fopen($url, "r"); if ($fl) { while ($buffer = @fgets($fl, 4096)) { $file .= $buffer; } } else { $file_read_error = 1; } fclose($fl); } if ($file_read_error) { $contents = getFileContents($url); $file = $contents['file']; } $pageSize = number_format(strlen($file) / 1024, 2, ".", ""); printPageSizeReport($pageSize); if ($url_status['content'] != 'text') { $file = extract_text($file, $url_status['content']); } printStandardReport('starting', $command_line); $newmd5sum = md5($file); if ($md5sum == $newmd5sum) { printStandardReport('md5notChanged', $command_line); $OKtoIndex = 0; } else { if (isDuplicateMD5($newmd5sum)) { $OKtoIndex = 0; printStandardReport('duplicate', $command_line); } } if (($md5sum != $newmd5sum || $reindex == 1) && $OKtoIndex == 1) { $urlparts = parse_url($url); $newdomain = $urlparts['host']; $type = 0; /* if ($newdomain <> $domain) $domainChanged = 1; if ($domaincb==1) { $start = strlen($newdomain) - strlen($supdomain); if (substr($newdomain, $start) == $supdomain) { $domainChanged = 0; } }*/ // remove link to css file //get all links from file $data = clean_file($file, $url, $url_status['content']); if ($data['noindex'] == 1) { $OKtoIndex = 0; $deletable = 1; printStandardReport('metaNoindex', $command_line); } $wordarray = unique_array(explode(" ", $data['content'])); if ($data['nofollow'] != 1) { $links = get_links($file, $url, $can_leave_domain, $data['base']); $links = distinct_array($links); $all_links = count($links); $numoflinks = 0; //if there are any, add to the temp table, but only if there isnt such url already if (is_array($links)) { reset($links); while ($thislink = each($links)) { if ($tmp_urls[$thislink[1]] != 1) { $tmp_urls[$thislink[1]] = 1; $numoflinks++; mysql_query("insert into " . $mysql_table_prefix . "temp (link, level, id) values ('{$thislink['1']}', '{$level}', '{$sessid}')"); echo mysql_error(); } } } } else { printStandardReport('noFollow', $command_line); } if ($OKtoIndex == 1) { $title = $data['title']; $host = $data['host']; $path = $data['path']; $fulltxt = $data['fulltext']; $desc = substr($data['description'], 0, 254); $url_parts = parse_url($url); $domain_for_db = $url_parts['host']; if (isset($domain_arr[$domain_for_db])) { $dom_id = $domain_arr[$domain_for_db]; } else { mysql_query("insert into " . $mysql_table_prefix . "domains (domain) values ('{$domain_for_db}')"); $dom_id = mysql_insert_id(); $domain_arr[$domain_for_db] = $dom_id; } $wordarray = calc_weights($wordarray, $title, $host, $path, $data['keywords']); //if there are words to index, add the link to the database, get its id, and add the word + their relation if (is_array($wordarray) && count($wordarray) > $min_words_per_page) { if ($md5sum == '') { mysql_query("insert into " . $mysql_table_prefix . "links (site_id, url, title, description, fulltxt, indexdate, size, md5sum, level) values ('{$site_id}', '{$url}', '{$title}', '{$desc}', '{$fulltxt}', curdate(), '{$pageSize}', '{$newmd5sum}', {$thislevel})"); echo mysql_error(); $result = mysql_query("select link_id from " . $mysql_table_prefix . "links where url='{$url}'"); echo mysql_error(); $row = mysql_fetch_row($result); $link_id = $row[0]; save_keywords($wordarray, $link_id, $dom_id); printStandardReport('indexed', $command_line); } else { if ($md5sum != '' && $md5sum != $newmd5sum) { //if page has changed, start updating $result = mysql_query("select link_id from " . $mysql_table_prefix . "links where url='{$url}'"); echo mysql_error(); $row = mysql_fetch_row($result); $link_id = $row[0]; for ($i = 0; $i <= 15; $i++) { $char = dechex($i); mysql_query("delete from " . $mysql_table_prefix . "link_keyword{$char} where link_id={$link_id}"); echo mysql_error(); } save_keywords($wordarray, $link_id, $dom_id); $query = "update " . $mysql_table_prefix . "links set title='{$title}', description ='{$desc}', fulltxt = '{$fulltxt}', indexdate=now(), size = '{$pageSize}', md5sum='{$newmd5sum}', level={$thislevel} where link_id={$link_id}"; mysql_query($query); echo mysql_error(); printStandardReport('re-indexed', $command_line); } } } else { printStandardReport('minWords', $command_line); } } } } else { $deletable = 1; printUrlStatus($url_status['state'], $command_line); } if ($reindex == 1 && $deletable == 1) { check_for_removal($url); } else { if ($reindex == 1) { } } if (!isset($all_links)) { $all_links = 0; } if (!isset($numoflinks)) { $numoflinks = 0; } printLinksReport($numoflinks, $all_links, $command_line); }
$dest = $argv[2]; $nick = $argv[3]; $alias = $argv[4]; $cmd = $argv[5]; $agent = ICEWEASEL_UA; $host = "www.just-one-liners.com"; $port = 80; if (mt_rand(0, 4) == 0) { $uri = "/"; } else { $uri = "/category/confucius-say-wordplay"; } $response = wget($host, $uri, $port, $agent); $delim1 = "<h2 class=\"title\" id=\"post-"; $delim2 = "</h2>"; $text = extract_text($response, $delim1, $delim2); if ($text === False) { return; } $i = strpos($text, "<"); if ($i === False) { return; } $text = substr($text, $i); $text = replace_ctrl_chars($text, " "); $text = trim(strip_tags($text)); $text = str_replace(" ", " ", $text); $text = html_decode($text); $text = html_decode($text); $text_len = strlen($text); $max_text_length = 300;
$result_hiragana = False; if ($result_hiragana_2 !== False and $result_hiragana_3 === False) { $result_hiragana = $result_hiragana_2; } elseif ($result_hiragana_2 === False and $result_hiragana_3 !== False) { $result_hiragana = $result_hiragana_3; } elseif ($result_hiragana_2 !== False and $result_hiragana_3 !== False) { $result_hiragana = $result_hiragana_2 . ", " . $result_hiragana_3; } # kanji $delim1 = "<span class=\"text\">"; $delim2 = " </span>"; $result_kanji = extract_text($items[$i], $delim1, $delim2); # english $delim1 = "<span class=\"meaning-meaning\">"; $delim2 = "</span>"; $result_english = extract_text($items[$i], $delim1, $delim2); $result["hiragana"] = False; if ($result_hiragana !== False) { $result["hiragana"] = trim(strip_tags($result_hiragana)); } $result["kanji"] = False; if ($result_kanji !== False) { $result["kanji"] = trim(strip_tags($result_kanji)); } if ($result_english !== False) { $result["english"] = trim(strip_tags($result_english)); $results[] = $result; } } $n = 0; for ($i = 0; $i < count($results); $i++) {
run_command($unocommand . " --format=pdf " . escapeshellarg($file)); $path_parts = pathinfo($file); $basename_minus_extension = remove_extension($path_parts['basename']); $pdffile = $path_parts['dirname'] . "/" . $basename_minus_extension . ".pdf"; if (file_exists($pdffile)) { # Attach this PDF file as an alternative download. sql_query("delete from resource_alt_files where resource = '" . $ref . "' and unoconv='1'"); $alt_ref = add_alternative_file($ref, "PDF version"); $alt_path = get_resource_path($ref, true, "", false, "pdf", -1, 1, false, "", $alt_ref); copy($pdffile, $alt_path); unlink($pdffile); sql_query("update resource_alt_files set file_name='{$ref}-converted.pdf',description='generated by Open Office',file_extension='pdf',file_size='" . filesize_unlimited($alt_path) . "',unoconv='1' where resource='{$ref}' and ref='{$alt_ref}'"); # Set vars so we continue generating thumbs/previews as if this is a PDF file $extension = "pdf"; $file = $alt_path; extract_text($ref, $extension, $alt_path); } } /* ---------------------------------------- Calibre E-book processing ---------------------------------------- */ global $calibre_extensions; global $calibre_path; if (in_array($extension, $calibre_extensions) && isset($calibre_path) && !isset($newfile)) { $calibrecommand = $calibre_path . "/ebook-convert"; if (!file_exists($calibrecommand)) { exit("Calibre executable not found at '{$calibre_path}'"); } $path_parts = pathinfo($file); $basename_minus_extension = remove_extension($path_parts['basename']);
# Store original filename in field, if set if (isset($filename_field)) { $filename = $uploadfiles[$n]; if ($use_local) { $filename = mb_basename($filename); } update_field($ref, $filename_field, $filename); } # get file metadata if (getval("no_exif", "") == "") { extract_exif_comment($ref, $extension); } # extract text from documents (e.g. PDF, DOC). global $extracted_text_field; if (isset($extracted_text_field) && !$no_exif) { extract_text($ref, $extension); } $done++; # Add to collection? if ($collection != "") { $refs[] = $ref; } # Log this daily_stat("Resource upload", $ref); resource_log($ref, 'u', 0); } } if (!$use_local) { ftp_close($ftp); } switch ($done) {
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # the following code posts a submission to SoylentNews #return; if ($nick<>"crutchy") { privmsg("exec's submit script is borken. blame crutchy"); return; } */ $host = "soylentnews.org"; $port = 443; $uri = "/submit.pl"; $response = wget($host, $uri, $port, ICEWEASEL_UA); $html = strip_headers($response); $reskey = extract_text($html, "<input type=\"hidden\" id=\"reskey\" name=\"reskey\" value=\"", "\">"); if ($reskey === False) { privmsg("error: unable to extract reskey"); return; } sleep(25); $params = array(); $params["reskey"] = $reskey; #$params["name"]=trim(substr($nick,0,50)); $params["name"] = get_bot_nick(); $params["email"] = ""; $params["subj"] = trim(substr($source_title, 0, 100)); $params["primaryskid"] = "1"; $params["tid"] = "6"; $params["sub_type"] = "plain"; $params["story"] = $source_body . "\n\n" . $url . "\n\n-- submitted from IRC";
if (preg_match("#</?{$tag}([^/]*)>(([^/]+)</{$tag}>)?#i", $line)) { return true; } return false; } function isComment($line) { if (preg_match("#<\\?([^/]*)\\?>#i", $line)) { return true; } return false; } $file1 = fopen('xml/note.xml', 'r'); while (!feof($file1)) { $line1 = fgets($file1); if (match_tag('titre', $line1)) { echo "<titre>" . extract_text('titre', $line1) . " [Compilation]</titre>\n"; } else { if (!match_tag('note', $line1) and !match_tag('meta_note', $line1) and !match_tag('statut', $line1) and !match_tag('date_creation', $line1) and !match_tag('date_modification', $line1) and !match_tag('auteur', $line1) and !match_tag('contributeurs', $line1) and !match_tag('contributeur', $line1) and !match_tag('relecteur', $line1) and !match_tag('nom', $line1) and !match_tag('prenom', $line1) and !isComment($line1)) { echo $line1; } } } $file2 = fopen('xml/note2.xml', 'r'); echo "<?Note suivante?>\n"; while (!feof($file2)) { $line2 = fgets($file2); if (!match_tag('note', $line2) and !match_tag('meta_note', $line2) and !match_tag('statut', $line2) and !match_tag('date_creation', $line2) and !match_tag('date_modification', $line2) and !match_tag('auteur', $line2) and !match_tag('contributeurs', $line2) and !match_tag('contributeur', $line2) and !match_tag('relecteur', $line2) and !match_tag('nom', $line2) and !match_tag('prenom', $line2) and !match_tag('titre', $line2) and !isComment($line2)) { echo $line2; } }
function get_arch_content($buf, $name, $url, $chrSet) { global $index_framesets, $command_line, $no_log, $can_leave_domain, $index_rss; $suffix = substr(strtolower($name), strrpos($name, ".") + 1); // if special converter is required if ($suffix == 'pdf') { $buf = extract_text($buf, $file0, 'pdf', 0, $charSet); } if ($suffix == 'doc') { $buf = extract_text($buf, $file0, 'doc', 0, $chrSet); } if ($suffix == 'rtf') { $buf = extract_text($buf, $file0, 'rtf', 0, $chrSet); } if ($suffix == 'xls') { $buf = extract_text($buf, $file0, 'xls', 0, $chrSet); } if ($suffix == 'ptt') { $buf = extract_text($buf, $file0, 'ptt', 0, $chrSet); } if ($suffix == 'docx') { $buf = extract_text($buf, $file0, 'docx', 0, $chrSet); } if ($suffix == 'xlsx') { $buf = extract_text($buf, $file0, 'xlsx', 0, $chrSet); } // for extracting framesets of this file enter here. Iframes will be extracted later on for the complete $file if ($index_framesets == '1') { if (preg_match("@<frameset[^>]*>(.*?)<\\/frameset>@si", $buf, $regs)) { printStandardReport('newFrameset', $command_line, $no_log); // separate the <frameset> ....</frameset> part of this file $frame = $regs[1]; $replace = get_frames($frame, $url, $can_leave_domain); $replace = "<body>" . $replace . "</body>"; // create the body tags for $buf // include all replacements instead of the frameset tag into the actual file. This will become the body $buf = preg_replace("@<frameset.*?</frameset>@si", "{$replace}", $buf); } } // for extracting archived feeds enter here if (preg_match("/<rss|atom|<feed|<rdf|<rsd/si", substr($buf, 0, 400)) && $index_rss == '1') { $buf = get_arch_feeds($buf, $url); } return $buf; }
return; } $uri = "/dictionary/meaning-of-" . urlencode($trailing) . ".html"; $response = wget(HOST, $uri); $html = strip_headers($response); if ($html === False) { privmsg("error downloading"); return; } $items = explode("<div class=\"search_items\">", $html); array_shift($items); $n = min(MAX_ITEMS, count($items)); $results = array(); for ($i = 0; $i < $n; $i++) { $delim1 = "<ruby>"; $delim2 = "</ruby>"; $result = extract_text($items[$i], $delim1, $delim2); if ($result !== False) { $result = str_replace("<rp>(", " <rp>(", $result); $results[] = strip_tags($result); } } if (count($results) > 0) { for ($i = 0; $i < count($results); $i++) { privmsg($results[$i]); } privmsg(HOST . $uri); } else { privmsg("no results"); } #####################################################################################################
function upload_file($ref, $no_exif = false, $revert = false, $autorotate = false) { hook("beforeuploadfile", "", array($ref)); hook("clearaltfiles", "", array($ref)); // optional: clear alternative files before uploading new resource # revert is mainly for metadata reversion, removing all metadata and simulating a reupload of the file from scratch. hook("removeannotations", "", array($ref)); $exiftool_fullpath = get_utility_path("exiftool"); # Process file upload for resource $ref if ($revert == true) { global $filename_field; $original_filename = get_data_by_field($ref, $filename_field); # Field 8 is used in a special way for staticsync, don't overwrite. $test_for_staticsync = get_resource_data($ref); if ($test_for_staticsync['file_path'] != "") { $staticsync_mod = " and resource_type_field != 8"; } else { $staticsync_mod = ""; } sql_query("delete from resource_data where resource={$ref} {$staticsync_mod}"); sql_query("delete from resource_keyword where resource={$ref} {$staticsync_mod}"); #clear 'joined' display fields which are based on metadata that is being deleted in a revert (original filename is reinserted later) $display_fields = get_resource_table_joins(); if ($staticsync_mod != "") { $display_fields_new = array(); for ($n = 0; $n < count($display_fields); $n++) { if ($display_fields[$n] != 8) { $display_fields_new[] = $display_fields[$n]; } } $display_fields = $display_fields_new; } $clear_fields = ""; for ($x = 0; $x < count($display_fields); $x++) { $clear_fields .= "field" . $display_fields[$x] . "=''"; if ($x < count($display_fields) - 1) { $clear_fields .= ","; } } sql_query("update resource set " . $clear_fields . " where ref={$ref}"); #also add the ref back into keywords: add_keyword_mappings($ref, $ref, -1); $extension = sql_value("select file_extension value from resource where ref={$ref}", ""); $filename = get_resource_path($ref, true, "", false, $extension); $processfile['tmp_name'] = $filename; } else { # Work out which file has been posted if (isset($_FILES['userfile'])) { $processfile = $_FILES['userfile']; } elseif (isset($_FILES['Filedata'])) { $processfile = $_FILES['Filedata']; } # Java upload (at least) needs this # Plupload needs this if (isset($_REQUEST['name'])) { $filename = $_REQUEST['name']; } else { $filename = $processfile['name']; } global $filename_field; if ($no_exif && isset($filename_field)) { $user_set_filename = get_data_by_field($ref, $filename_field); if (trim($user_set_filename) != '') { // Get extension of file just in case the user didn't provide one $path_parts = pathinfo($filename); $original_extension = $path_parts['extension']; $filename = $user_set_filename; // If the user filename doesn't have an extension add the original one $path_parts = pathinfo($filename); if (!isset($path_parts['extension'])) { $filename .= '.' . $original_extension; } } } } # Work out extension if (!isset($extension)) { # first try to get it from the filename $extension = explode(".", $filename); if (count($extension) > 1) { $extension = escape_check(trim(strtolower($extension[count($extension) - 1]))); } else { if ($exiftool_fullpath != false) { $file_type_by_exiftool = run_command($exiftool_fullpath . " -filetype -s -s -s " . escapeshellarg($processfile['tmp_name'])); if (strlen($file_type_by_exiftool) > 0) { $extension = str_replace(" ", "_", trim(strtolower($file_type_by_exiftool))); $filename = $filename; } else { return false; } } else { return false; } } } # Banned extension? global $banned_extensions; if (in_array($extension, $banned_extensions)) { return false; } $status = "Please provide a file name."; $filepath = get_resource_path($ref, true, "", true, $extension); if (!$revert) { # Remove existing file, if present hook("beforeremoveexistingfile", "", array("resourceId" => $ref)); $old_extension = sql_value("select file_extension value from resource where ref='{$ref}'", ""); if ($old_extension != "") { $old_path = get_resource_path($ref, true, "", true, $old_extension); if (file_exists($old_path)) { unlink($old_path); } } // also remove any existing extracted icc profiles $icc_path = get_resource_path($ref, true, "", true, $extension . '.icc'); if (file_exists($icc_path)) { unlink($icc_path); } global $pdf_pages; $iccx = 0; // if there is a -0.icc page, run through and delete as many as necessary. $finished = false; $badicc_path = str_replace(".icc", "-{$iccx}.icc", $icc_path); while (!$finished) { if (file_exists($badicc_path)) { unlink($badicc_path); $iccx++; $badicc_path = str_replace(".icc", "-{$iccx}.icc", $icc_path); } else { $finished = true; } } $iccx = 0; } if (!$revert) { if ($filename != "") { global $jupload_alternative_upload_location, $plupload_upload_location; if (isset($plupload_upload_location)) { # PLUpload - file was sent chunked and reassembled - use the reassembled file location $result = rename($plupload_upload_location, $filepath); } elseif (isset($jupload_alternative_upload_location)) { # JUpload - file was sent chunked and reassembled - use the reassembled file location $result = rename($jupload_alternative_upload_location, $filepath); } else { # Standard upload. if (!$revert) { $result = move_uploaded_file($processfile['tmp_name'], $filepath); } else { $result = true; } } if ($result == false) { $status = "File upload error. Please check the size of the file you are trying to upload."; return false; } else { global $camera_autorotation; global $ffmpeg_audio_extensions; if ($camera_autorotation) { if ($autorotate && !in_array($extension, $ffmpeg_audio_extensions)) { AutoRotateImage($filepath); } } chmod($filepath, 0777); global $icc_extraction; global $ffmpeg_supported_extensions; if ($icc_extraction && $extension != "pdf" && !in_array($extension, $ffmpeg_supported_extensions)) { extract_icc_profile($ref, $extension); } $status = "Your file has been uploaded."; } } } # Store extension in the database and update file modified time. if ($revert) { $has_image = ""; } else { $has_image = ",has_image=0"; } sql_query("update resource set file_extension='{$extension}',preview_extension='jpg',file_modified=now() {$has_image} where ref='{$ref}'"); # delete existing resource_dimensions sql_query("delete from resource_dimensions where resource='{$ref}'"); # get file metadata if (!$no_exif) { extract_exif_comment($ref, $extension); } else { global $merge_filename_with_title, $lang; if ($merge_filename_with_title) { $merge_filename_with_title_option = urlencode(getval('merge_filename_with_title_option', '')); $merge_filename_with_title_include_extensions = urlencode(getval('merge_filename_with_title_include_extensions', '')); $merge_filename_with_title_spacer = urlencode(getval('merge_filename_with_title_spacer', '')); $original_filename = ''; if (isset($_REQUEST['name'])) { $original_filename = $_REQUEST['name']; } else { $original_filename = $processfile['name']; } if ($merge_filename_with_title_include_extensions == 'yes') { $merged_filename = $original_filename; } else { $merged_filename = strip_extension($original_filename); } // Get title field: $resource = get_resource_data($ref); $read_from = get_exiftool_fields($resource['resource_type']); for ($i = 0; $i < count($read_from); $i++) { if ($read_from[$i]['name'] == 'title') { $oldval = get_data_by_field($ref, $read_from[$i]['ref']); if (strpos($oldval, $merged_filename) !== FALSE) { continue; } switch ($merge_filename_with_title_option) { case $lang['merge_filename_title_do_not_use']: // Do nothing since the user doesn't want to use this feature break; case $lang['merge_filename_title_replace']: $newval = $merged_filename; break; case $lang['merge_filename_title_prefix']: $newval = $merged_filename . $merge_filename_with_title_spacer . $oldval; if ($oldval == '') { $newval = $merged_filename; } break; case $lang['merge_filename_title_suffix']: $newval = $oldval . $merge_filename_with_title_spacer . $merged_filename; if ($oldval == '') { $newval = $merged_filename; } break; default: // Do nothing break; } update_field($ref, $read_from[$i]['ref'], $newval); } } } } # extract text from documents (e.g. PDF, DOC). global $extracted_text_field; if (isset($extracted_text_field) && !$no_exif) { if (isset($unoconv_path) && in_array($extension, $unoconv_extensions)) { // omit, since the unoconv process will do it during preview creation below } else { extract_text($ref, $extension); } } # Store original filename in field, if set global $filename_field, $amended_filename; if (isset($filename_field)) { if (isset($amended_filename)) { $filename = $amended_filename; } } if (!$revert) { update_field($ref, $filename_field, $filename); } else { update_field($ref, $filename_field, $original_filename); } if (!$revert) { # Clear any existing FLV file or multi-page previews. global $pdf_pages; for ($n = 2; $n <= $pdf_pages; $n++) { # Remove preview page. $path = get_resource_path($ref, true, "scr", false, "jpg", -1, $n, false); if (file_exists($path)) { unlink($path); } # Also try the watermarked version. $path = get_resource_path($ref, true, "scr", false, "jpg", -1, $n, true); if (file_exists($path)) { unlink($path); } } # Remove any FLV video preview (except if the actual resource is an FLV file). global $ffmpeg_preview_extension; if ($extension != $ffmpeg_preview_extension) { $path = get_resource_path($ref, true, "", false, $ffmpeg_preview_extension); if (file_exists($path)) { unlink($path); } } # Remove any FLV preview-only file $path = get_resource_path($ref, true, "pre", false, $ffmpeg_preview_extension); if (file_exists($path)) { unlink($path); } # Remove any MP3 (except if the actual resource is an MP3 file). if ($extension != "mp3") { $path = get_resource_path($ref, true, "", false, "mp3"); if (file_exists($path)) { unlink($path); } } # Create previews global $enable_thumbnail_creation_on_upload; if ($enable_thumbnail_creation_on_upload) { create_previews($ref, false, $extension); } else { # Offline thumbnail generation is being used. Set 'has_image' to zero so the offline create_previews.php script picks this up. sql_query("update resource set has_image=0 where ref='{$ref}'"); } } # Update file dimensions get_original_imagesize($ref, $filepath, $extension); hook("Uploadfilesuccess", "", array("resourceId" => $ref)); # Update disk usage update_disk_usage($ref); # Log this activity. $log_ref = resource_log($ref, "u", 0); hook("upload_image_after_log_write", "", array($ref, $log_ref)); return $status; }
/** * Created by PhpStorm. * User: kosmos * Date: 6/25/14 * Time: 8:34 PM */ function get_site_texts($base_url) { $file['content'] = ''; $mainURL = $base_url; if (getMainUrl($mainURL)) { $rootURL = getMainUrl($mainURL); } else { $rootURL = $base_url; } //echo $base_url; die; //$uniq_text = array(); $visited_pages = array(); $html = @file_get_contents($base_url); if ($html) { array_push($visited_pages, $base_url); $html = removeJSCSS($html); $text[$mainURL] = extract_text($html); //yield $text; //yield $text[$mainURL]; //$file['content'] .= showResult($text[$mainURL],$mainURL, $uniq_text); //yield $visited_pages; //uncomment this string to see links //$uniq_text = addUniqText($uniq_text,$text[$mainURL]); $urls = array(); $urls = extract_urls($html, $mainURL, $rootURL, $visited_pages); // echo "<pre>"; // print_r($urls); die; //$urls = array_unique($urls); while ($urls) { $url = array_shift($urls); if (strpos($url, $mainURL) !== false && !in_array($url, $visited_pages) && checkContentType($url)) { $html = @file_get_contents($url); if ($html) { array_push($visited_pages, $url); //yield $visited_pages; //uncomment this string to see links $html = removeJSCSS($html); $text[$url] = extract_text($html); //$file['content'] .= showResult($text[$url],$url,$uniq_text); //$uniq_text = addUniqText($uniq_text,$text[$url]); //yield $text[$url]; $extracted_urls = extract_urls($html, $url, $rootURL, $visited_pages); //$urls = my_array_push($urls,$extracted_urls); foreach ($extracted_urls as $k => $v) { if (!in_array($v, $urls) && !in_array($v, $visited_pages)) { $urls[] = $v; } } // $urls = array_unique($urls); //$visited_pages = array_merge(array_unique($visited_pages)); // if($url == 'http://www.effectiff.com/articles') { // // echo "<pre>"; // print_r($urls); die; // } } } else { // if(checkContentType($url)) { //if(strpos($url,$mainURL) !== false) { // array_push($visited_pages, $url); // $visited_pages = array_merge(array_unique($visited_pages)); //} } } } //$text = array_unique($text); return $text; }
echo '<div class="alert alert-error"> ERROR' . mysql_error() . '</div>'; } while ($row = @mysql_fetch_array($result)) { ?> <center><h1>Hello <?php echo $steampersona; ?> .</h1></center> <div class="container-fluid"> <div class="row-fluid"> <div class="span4"> <!--Whitelists content--> <?php $data = $row['_Data']; $whitelist = extract_text($data, '"Whitelisted":["', '"],"'); $your_array = explode('","', $whitelist); $arrlength = count($your_array); echo "<div class='well'>"; echo "<h2>Your Whitelists</h2>"; for ($x = 0; $x < $arrlength; $x++) { echo $your_array[$x]; echo "<br>"; } echo "</div>"; ?> </div> <div class="span8"> <!--Other content--> <div class="container-fluid"> <div class="row-fluid">
function ProcessFolder($folder, $version_dir, &$resource_array, &$resource_error) { global $lang, $syncdir, $nogo, $staticsync_max_files, $count, $done, $modtimes, $lastsync, $ffmpeg_preview_extension, $staticsync_autotheme, $staticsync_folder_structure, $staticsync_extension_mapping_default, $staticsync_extension_mapping, $staticsync_mapped_category_tree, $staticsync_title_includes_path, $staticsync_ingest, $staticsync_mapfolders, $staticsync_alternatives_suffix, $theme_category_levels, $staticsync_defaultstate, $additional_archive_states, $staticsync_extension_mapping_append_values, $image_alternatives, $exclude_resize, $post_host, $media_endpoint, $image_required_height, $sync_bucket, $aws_key, $aws_secret_key; $collection = 0; echo "Processing Folder: {$folder}" . PHP_EOL; #$alt_path = get_resource_path(59, TRUE, '', FALSE, 'png', -1, 1, FALSE, '', 4); # List all files in this folder. $dh = opendir($folder); while (($file = readdir($dh)) !== false) { if ($file == '.' || $file == '..') { continue; } $filetype = filetype($folder . "/" . $file); $fullpath = $folder . "/" . $file; $shortpath = str_replace($syncdir . "/", '', $fullpath); # Work out extension $extension = explode(".", $file); if (count($extension) > 1) { $extension = trim(strtolower($extension[count($extension) - 1])); } else { //No extension $extension = ""; } if (strpos($fullpath, $nogo)) { echo "This directory is to be ignored." . PHP_EOL; continue; } if ($staticsync_mapped_category_tree) { $path_parts = explode("/", $shortpath); array_pop($path_parts); touch_category_tree_level($path_parts); } # -----FOLDERS------------- if (($filetype == "dir" || $filetype == "link") && strpos($nogo, "[{$file}]") === false && strpos($file, $staticsync_alternatives_suffix) === false) { # Get current version direcotries. if (preg_match("/[0-9]{2}-[0-9]{2}-[0-9]{4}\$/", $file)) { if (!in_array($file, $version_dir)) { array_push($version_dir, $file); } if (preg_match('/in_progress*/', $file)) { echo "The Barcode is still being processed." . PHP_EOL; continue; } } # Recurse ProcessFolder($folder . "/" . $file, $version_dir, $resource_array, $resource_error); } $psd_files = array(); if (preg_match('/images/', $fullpath)) { $path_array = explode('/', $fullpath); $psd_array = array_splice($path_array, 0, array_search('images', $path_array)); $psd_path = implode('/', $psd_array) . '/psd/'; $psd_files = array_diff(scandir($psd_path), array('..', '.')); foreach ($psd_files as $index => $psd_file) { $psd_files[$index] = pathinfo($psd_file, PATHINFO_FILENAME); } } # -------FILES--------------- if ($filetype == "file" && substr($file, 0, 1) != "." && strtolower($file) != "thumbs.db") { /* Below Code Adapted from CMay's bug report */ global $banned_extensions; # Check to see if extension is banned, do not add if it is banned if (array_search($extension, $banned_extensions)) { continue; } /* Above Code Adapted from CMay's bug report */ $count++; if ($count > $staticsync_max_files) { return true; } $last_sync_date = sql_value("select value from sysvars where name = 'last_sync'", ""); $file_creation_date = date("Y-m-d H:i:s", filectime($fullpath)); if (isset($last_sync_date) && $last_sync_date > $file_creation_date) { echo "No new file found.." . PHP_EOL; continue; } # Already exists? if (!isset($done[$shortpath])) { echo "Processing file: {$fullpath}" . PHP_EOL; if ($collection == 0 && $staticsync_autotheme) { # Make a new collection for this folder. $e = explode("/", $shortpath); $theme = ucwords($e[0]); $themesql = "theme='" . ucwords(escape_check($e[0])) . "'"; $themecolumns = "theme"; $themevalues = "'" . ucwords(escape_check($e[0])) . "'"; if ($staticsync_folder_structure) { for ($x = 0; $x < count($e) - 1; $x++) { if ($x != 0) { $themeindex = $x + 1; if ($themeindex > $theme_category_levels) { $theme_category_levels = $themeindex; if ($x == count($e) - 2) { echo PHP_EOL . PHP_EOL . "UPDATE THEME_CATEGORY_LEVELS TO {$themeindex} IN CONFIG!!!!" . PHP_EOL . PHP_EOL; } } $th_name = ucwords(escape_check($e[$x])); $themesql .= " AND theme{$themeindex} = '{$th_name}'"; $themevalues .= ",'{$th_name}'"; $themecolumns .= ",theme{$themeindex}"; } } } $name = count($e) == 1 ? '' : $e[count($e) - 2]; echo "Collection {$name}, theme={$theme}" . PHP_EOL; $ul_username = $theme; $escaped_name = escape_check($name); $collection = sql_value("SELECT ref value FROM collection WHERE name='{$escaped_name}' AND {$themesql}", 0); if ($collection == 0) { sql_query("INSERT INTO collection (name,created,public,{$themecolumns},allow_changes)\n VALUES ('{$escaped_name}', NOW(), 1, {$themevalues}, 0)"); $collection = sql_insert_id(); } } # Work out a resource type based on the extension. $type = $staticsync_extension_mapping_default; reset($staticsync_extension_mapping); foreach ($staticsync_extension_mapping as $rt => $extensions) { if (in_array($extension, $extensions)) { $type = $rt; } } $modified_type = hook('modify_type', 'staticsync', array($type)); if (is_numeric($modified_type)) { $type = $modified_type; } # Formulate a title if ($staticsync_title_includes_path) { $title_find = array('/', '_', ".{$extension}"); $title_repl = array(' - ', ' ', ''); $title = ucfirst(str_ireplace($title_find, $title_repl, $shortpath)); } else { $title = str_ireplace(".{$extension}", '', $file); } $modified_title = hook('modify_title', 'staticsync', array($title)); if ($modified_title !== false) { $title = $modified_title; } # Import this file #$r = import_resource($shortpath, $type, $title, $staticsync_ingest); #Check for file name containing the psd. if (!empty($psd_files)) { $image_file_array = explode('/', $fullpath); $image_file = $image_file_array[count($image_file_array) - 1]; $image_psd_name = explode('_', $image_file)[0]; if (array_search($image_psd_name, $psd_files)) { #Image name is in right format. if (!validate_image_size($fullpath, $image_required_height)) { $resource_error['size'][$file] = $fullpath; } $r = import_resource($fullpath, $type, $title, $staticsync_ingest); sql_query("INSERT INTO resource_data (resource,resource_type_field,value)\n VALUES ('{$r}', (SELECT ref FROM resource_type_field WHERE name = 'logical_id'), '{$image_psd_name}')"); $original_filepath = sql_query("SELECT value FROM resource_data WHERE resource = '{$r}' AND\n resource_type_field = (SELECT ref FROM resource_type_field where name = 'original_filepath')"); if (isset($original_filepath)) { sql_query("INSERT INTO resource_data (resource,resource_type_field,value)\n VALUES ('{$r}',(SELECT ref FROM resource_type_field WHERE name = 'original_filepath'), '{$fullpath}')"); } } else { echo "Filename '{$fullpath}' is not in right format.." . PHP_EOL; $resource_error['name'][$file] = $fullpath; continue; } } elseif (word_in_string($exclude_resize, explode('/', $fullpath))) { $r = import_resource($fullpath, $type, $title, $staticsync_ingest); } if ($r !== false) { array_push($resource_array, $r); # Create current version for resource. #print_r($version_dir); if (count($version_dir) == 1) { sql_query("INSERT into resource_data (resource,resource_type_field,value)\n VALUES ('{$r}',(SELECT ref FROM resource_type_field WHERE name = 'current'), 'TRUE')"); } $sync_status = sync_to_s3($syncdir, $sync_bucket, $aws_key, $aws_secret_key); if (!$sync_status) { echo "Failed to sync"; } # Add to mapped category tree (if configured) if (isset($staticsync_mapped_category_tree)) { $basepath = ''; # Save tree position to category tree field # For each node level, expand it back to the root so the full path is stored. for ($n = 0; $n < count($path_parts); $n++) { if ($basepath != '') { $basepath .= "~"; } $basepath .= $path_parts[$n]; $path_parts[$n] = $basepath; } update_field($r, $staticsync_mapped_category_tree, "," . join(",", $path_parts)); } #This is an override to add user data to the resouces if (!isset($userref)) { $ul_username = ucfirst(strtolower($ul_username)); $current_user_ref = sql_query("Select ref from user where username = '******' "); if (!empty($current_user_ref)) { $current_user_ref = $current_user_ref[0]['ref']; sql_query("UPDATE resource SET created_by='{$current_user_ref}' where ref = {$r}"); } } # default access level. This may be overridden by metadata mapping. $accessval = 0; # StaticSync path / metadata mapping # Extract metadata from the file path as per $staticsync_mapfolders in config.php if (isset($staticsync_mapfolders)) { foreach ($staticsync_mapfolders as $mapfolder) { $match = $mapfolder["match"]; $field = $mapfolder["field"]; $level = $mapfolder["level"]; if (strpos("/" . $shortpath, $match) !== false) { # Match. Extract metadata. $path_parts = explode("/", $shortpath); if ($level < count($path_parts)) { // special cases first. if ($field == 'access') { # access level is a special case # first determine if the value matches a defined access level $value = $path_parts[$level - 1]; for ($n = 0; $n < 3; $n++) { # if we get an exact match or a match except for case if ($value == $lang["access" . $n] || strtoupper($value) == strtoupper($lang['access' . $n])) { $accessval = $n; echo "Will set access level to " . $lang['access' . $n] . " ({$n})" . PHP_EOL; } } } else { if ($field == 'archive') { # archive level is a special case # first determin if the value matches a defined archive level $value = $mapfolder["archive"]; $archive_array = array_merge(array(-2, -1, 0, 1, 2, 3), $additional_archive_states); if (in_array($value, $archive_array)) { $archiveval = $value; echo "Will set archive level to " . $lang['status' . $value] . " ({$archiveval})" . PHP_EOL; } } else { # Save the value #print_r($path_parts); $value = $path_parts[$level - 1]; if ($staticsync_extension_mapping_append_values) { $given_value = $value; // append the values if possible...not used on dropdown, date, categroy tree, datetime, or radio buttons $field_info = get_resource_type_field($field); if (in_array($field['type'], array(0, 1, 2, 4, 5, 6, 7, 8))) { $old_value = sql_value("select value value from resource_data where resource={$r} and resource_type_field={$field}", ""); $value = append_field_value($field_info, $value, $old_value); } } update_field($r, $field, trim($value)); if (strtotime(trim($value))) { add_keyword_mappings($r, trim($value), $field, false, true); } else { add_keyword_mappings($r, trim($value), $field); } if ($staticsync_extension_mapping_append_values) { $value = $given_value; } echo " - Extracted metadata from path: {$value}" . PHP_EOL; } } } } } } #Resize only original images. if (!word_in_string($exclude_resize, explode('/', $fullpath))) { echo "Creating preview.."; create_previews($r, false, $extension, false, false, -1, false, $staticsync_ingest); } # update access level sql_query("UPDATE resource SET access = '{$accessval}',archive='{$staticsync_defaultstate}' WHERE ref = '{$r}'"); # Add any alternative files $altpath = $fullpath . $staticsync_alternatives_suffix; if ($staticsync_ingest && file_exists($altpath)) { $adh = opendir($altpath); while (($altfile = readdir($adh)) !== false) { $filetype = filetype($altpath . "/" . $altfile); if ($filetype == "file" && substr($file, 0, 1) != "." && strtolower($file) != "thumbs.db") { # Create alternative file # Find extension $ext = explode(".", $altfile); $ext = $ext[count($ext) - 1]; $description = str_replace("?", strtoupper($ext), $lang["originalfileoftype"]); $file_size = filesize_unlimited($altpath . "/" . $altfile); $aref = add_alternative_file($r, $altfile, $description, $altfile, $ext, $file_size); $path = get_resource_path($r, true, '', true, $ext, -1, 1, false, '', $aref); rename($altpath . "/" . $altfile, $path); # Move alternative file } } } # Add to collection if ($staticsync_autotheme) { $test = ''; $test = sql_query("SELECT * FROM collection_resource WHERE collection='{$collection}' AND resource='{$r}'"); if (count($test) == 0) { sql_query("INSERT INTO collection_resource (collection, resource, date_added)\n VALUES ('{$collection}', '{$r}', NOW())"); } } } else { # Import failed - file still being uploaded? echo " *** Skipping file - it was not possible to move the file (still being imported/uploaded?)" . PHP_EOL; } } else { # check modified date and update previews if necessary $filemod = filemtime($fullpath); if (array_key_exists($shortpath, $modtimes) && $filemod > strtotime($modtimes[$shortpath])) { # File has been modified since we last created previews. Create again. $rd = sql_query("SELECT ref, has_image, file_modified, file_extension FROM resource\n WHERE file_path='" . escape_check($shortpath) . "'"); if (count($rd) > 0) { $rd = $rd[0]; $rref = $rd["ref"]; echo "Resource {$rref} has changed, regenerating previews: {$fullpath}" . PHP_EOL; extract_exif_comment($rref, $rd["file_extension"]); # extract text from documents (e.g. PDF, DOC). global $extracted_text_field; if (isset($extracted_text_field)) { if (isset($unoconv_path) && in_array($extension, $unoconv_extensions)) { // omit, since the unoconv process will do it during preview creation below } else { extract_text($rref, $extension); } } # Store original filename in field, if set global $filename_field; if (isset($filename_field)) { update_field($rref, $filename_field, $file); } create_previews($rref, false, $rd["file_extension"], false, false, -1, false, $staticsync_ingest); sql_query("UPDATE resource SET file_modified=NOW() WHERE ref='{$rref}'"); } } } } } }
function ProcessFolder($folder) { #echo "<br>processing folder $folder"; global $syncdir,$nogo,$max,$count,$done,$modtimes,$lastsync, $ffmpeg_preview_extension, $staticsync_autotheme, $staticsync_folder_structure,$staticsync_extension_mapping_default, $staticsync_extension_mapping, $staticsync_mapped_category_tree,$staticsync_title_includes_path, $staticsync_ingest, $staticsync_mapfolders,$staticsync_alternatives_suffix; $collection=0; echo "Processing Folder: $folder\n"; # List all files in this folder. $dh=opendir($folder); while (($file = readdir($dh)) !== false) { $filetype=filetype($folder . "/" . $file); $fullpath=$folder . "/" . $file; $shortpath=str_replace($syncdir . "/","",$fullpath); # Work out extension $extension=explode(".",$file);$extension=trim(strtolower($extension[count($extension)-1])); if ($staticsync_mapped_category_tree) { $path_parts=explode("/",$shortpath); array_pop($path_parts); touch_category_tree_level($path_parts); } # -----FOLDERS------------- if ((($filetype=="dir") || $filetype=="link") && ($file!=".") && ($file!="..") && (strpos($nogo,"[" . $file . "]")===false) && strpos($file,$staticsync_alternatives_suffix)===false) { # Recurse #echo "\n$file : " . filemtime($folder . "/" . $file) . " > " . $lastsync; if (true || (strlen($lastsync)=="") || (filemtime($folder . "/" . $file)>($lastsync-26000))) { ProcessFolder($folder . "/" . $file); } } # -------FILES--------------- if (($filetype=="file") && (substr($file,0,1)!=".") && (strtolower($file)!="thumbs.db")) { # Already exists? if (!in_array($shortpath,$done)) { $count++;if ($count>$max) {return(true);} echo "Processing file: $fullpath\n"; if ($collection==0 && $staticsync_autotheme) { # Make a new collection for this folder. $e=explode("/",$shortpath); $theme=ucwords($e[0]); $themesql="theme='".ucwords(escape_check($e[0]))."'"; $themecolumns="theme"; $themevalues="'".ucwords(escape_check($e[0]))."'"; if ($staticsync_folder_structure){ for ($x=0;$x<count($e)-1;$x++){ if ($x==0){} else {$themeindex=$x+1; global $theme_category_levels; if ($themeindex>$theme_category_levels){ $theme_category_levels=$themeindex; if ($x==count($e)-2){echo "\n\nUPDATE THEME_CATEGORY_LEVELS TO $themeindex IN CONFIG!!!!\n\n";} } $themesql.=" and theme".$themeindex."='".ucwords(escape_check($e[$x]))."'"; $themevalues.=",'".ucwords(escape_check($e[$x]))."'"; $themecolumns.=",theme".$themeindex; } } } $name=(count($e)==1?"":$e[count($e)-2]); echo "\nCollection $name, theme=$theme"; $collection=sql_value("select ref value from collection where name='" . escape_check($name) . "' and " . $themesql ,0); if ($collection==0){ sql_query("insert into collection (name,created,public,$themecolumns,allow_changes) values ('" . escape_check($name) . "',now(),1,".$themevalues.",0)"); $collection=sql_insert_id(); } } # Work out a resource type based on the extension. $type=$staticsync_extension_mapping_default; reset ($staticsync_extension_mapping); foreach ($staticsync_extension_mapping as $rt=>$extensions) { if (in_array($extension,$extensions)) {$type=$rt;} } # Formulate a title if ($staticsync_title_includes_path) { $title=str_ireplace("." . $extension,"",str_replace("/"," - ",$shortpath)); $title=ucfirst(str_replace("_"," ",$title)); } else { $title=str_ireplace("." . $extension,"",$file); } # Import this file $r=import_resource($shortpath,$type,$title,$staticsync_ingest); if ($r!==false) { # Add to mapped category tree (if configured) if (isset($staticsync_mapped_category_tree)) { $basepath=""; # Save tree position to category tree field # For each node level, expand it back to the root so the full path is stored. for ($n=0;$n<count($path_parts);$n++) { if ($basepath!="") {$basepath.="~";} $basepath.=$path_parts[$n]; $path_parts[$n]=$basepath; } update_field ($r,$staticsync_mapped_category_tree,"," . join(",",$path_parts)); #echo "update_field($r,$staticsync_mapped_category_tree," . "," . join(",",$path_parts) . ");\n"; } // default access level. This may be overridden by metadata mapping. $accessval = 0; # StaticSync path / metadata mapping # Extract metadata from the file path as per $staticsync_mapfolders in config.php if (isset($staticsync_mapfolders)) { foreach ($staticsync_mapfolders as $mapfolder) { $match=$mapfolder["match"]; $field=$mapfolder["field"]; $level=$mapfolder["level"]; global $lang; if (strpos("/" . $shortpath,$match)!==false) { # Match. Extract metadata. $path_parts=explode("/",$shortpath); if ($level<count($path_parts)) { // special cases first. if ($field == 'access') { // access level is a special case // first determine if the value matches a defined access level $value = $path_parts[$level-1]; for ($n=0; $n<3; $n++){ // if we get an exact match or a match except for case if ($value == $lang["access" . $n] || strtoupper($value) == strtoupper($lang['access' . $n])){ $accessval = $n; echo "Will set access level to " . $lang['access' . $n] . " ($n)\n"; } } } else { # Save the value print_r($path_parts); $value=$path_parts[$level-1]; update_field ($r,$field,$value); echo " - Extracted metadata from path: $value\n"; } } } } } // update access level sql_query("update resource set access = '$accessval' where ref = '$r'"); # Add any alternative files $altpath=$fullpath . $staticsync_alternatives_suffix; if ($staticsync_ingest && file_exists($altpath)) { $adh=opendir($altpath); while (($altfile = readdir($adh)) !== false) { $filetype=filetype($altpath . "/" . $altfile); if (($filetype=="file") && (substr($file,0,1)!=".") && (strtolower($file)!="thumbs.db")) { # Create alternative file global $lang; # Find extension $ext=explode(".",$altfile);$ext=$ext[count($ext)-1]; $aref = add_alternative_file($r, $altfile, str_replace("?",strtoupper($ext),$lang["originalfileoftype"]), $altfile, $ext, filesize_unlimited($altpath . "/" . $altfile)); $path=get_resource_path($r, true, "", true, $ext, -1, 1, false, "", $aref); rename ($altpath . "/" . $altfile,$path); # Move alternative file } } } # Add to collection if ($staticsync_autotheme) { $test=""; $test=sql_query("select * from collection_resource where collection='$collection' and resource='$r'"); if (count($test)==0){ sql_query("insert into collection_resource(collection,resource,date_added) values ('$collection','$r',now())"); } } } else { # Import failed - file still being uploaded? echo " *** Skipping file - it was not possible to move the file (still being imported/uploaded?) \n"; } } else { # check modified date and update previews if necessary $filemod=filemtime($fullpath); if (array_key_exists($shortpath,$modtimes) && ($filemod>strtotime($modtimes[$shortpath]))) { # File has been modified since we last created previews. Create again. $rd=sql_query("select ref,has_image,file_modified,file_extension from resource where file_path='" . (escape_check($shortpath)) . "'"); if (count($rd)>0) { $rd=$rd[0]; $rref=$rd["ref"]; echo "Resource $rref has changed, regenerating previews: $fullpath\n"; extract_exif_comment($rref,$rd["file_extension"]); # extract text from documents (e.g. PDF, DOC). global $extracted_text_field; if (isset($extracted_text_field)) { if (isset($unoconv_path) && in_array($extension,$unoconv_extensions)){ // omit, since the unoconv process will do it during preview creation below } else { extract_text($rref,$extension); } } # Store original filename in field, if set global $filename_field; if (isset($filename_field)) { update_field($rref,$filename_field,$file); } create_previews($rref,false,$rd["file_extension"]); sql_query("update resource set file_modified=now() where ref='$rref'"); } } } } } }
function process_weather(&$location, $nick, $getdata = False) { $loc = get_location($location, $nick); term_echo("*** WEATHER LOCATION LOOKUP: {$loc}"); if ($loc === False) { if ($location == "") { return False; } $loc = $location; } $location = $loc; $loc_query = filter($loc, VALID_UPPERCASE . VALID_LOWERCASE . VALID_NUMERIC . " "); $prefs = get_prefs($nick); $fheit = "1"; $use_unit_pref = False; if (isset($prefs["unit"]) == True and $getdata == False) { if ($prefs["unit"] == "metric") { $use_unit_pref = True; $fheit = "0"; } if ($prefs["unit"] == "imperial") { $use_unit_pref = True; } } # https://www.google.com/search?gbv=1&q=weather+traralgon $url = "http://www.google.com.au/search?gbv=1&fheit={$fheit}&q=weather+" . urlencode($loc_query); term_echo($url); $response = wget("www.google.com.au", "/search?gbv=1&fheit={$fheit}&q=weather+" . urlencode($loc_query), 80, ICEWEASEL_UA, "", 60); $html = strip_headers($response); $delim1 = "<div class=\"e\">"; $delim2 = "</table>"; $html = extract_text($html, $delim1, $delim2); if ($html === False) { return False; } $html = replace_ctrl_chars($html, " "); $html = str_replace(" ", " ", $html); $html = html_decode($html); $html = html_decode($html); $location = trim(strip_tags(extract_raw_tag($html, "h3"))); if (substr($location, 0, 12) == "Weather for ") { $location = substr($location, 12); } $wind = trim(strip_tags(extract_text_nofalse($html, "style=\"white-space:nowrap;padding-right:15px;color:#666\">Wind: ", "</span>"))); $humidity = extract_text($html, "style=\"white-space:nowrap;padding-right:0px;vertical-align:top;color:#666\">Humidity: ", "</td>"); $parts = explode("<td", $html); $temps = array(); $tempsC = array(); $conds = array(); $days = array(); for ($i = 1; $i < count($parts); $i++) { $cond = extract_text($parts[$i], "alt=\"", "\""); $temp = extract_text($parts[$i], "<span class=\"wob_t\" style=\"display:inline\">", "</span>"); $day = extract_text($parts[$i], "colspan=\"2\" style=\"vertical-align:top;text-align:center\">", "</td>"); if ($cond !== False) { $conds[] = strtolower($cond); } if ($temp !== False) { $temps[] = $temp; $tempsC[] = sprintf("%.0f", (substr($temp, 0, strlen($temp) - 2) - 32) * 5 / 9) . "°C"; } if ($day !== False) { $days[] = $day; } } $offset = 0; $wind_caption = ", wind " . $wind; if ($wind == "") { $offset = 1; $wind_caption = ""; } if (count($conds) != 5 or count($temps) != 10 - $offset or count($tempsC) != 10 - $offset or count($days) != 4) { return False; } if ($use_unit_pref == False) { $result = $location . " - currently " . $temps[0] . " / " . $tempsC[0] . ", " . $conds[0] . $wind_caption . ", humidity " . $humidity . " - "; } else { $result = $location . " - currently " . $temps[0] . ", " . $conds[0] . $wind_caption . ", humidity " . $humidity . " - "; } $fulldays = array("Sun." => "Sunday", "Mon." => "Monday", "Tue." => "Tuesday", "Wed." => "Wednesday", "Thu." => "Thursday", "Fri." => "Friday", "Sat." => "Saturday"); for ($i = 1; $i <= 4; $i++) { $day = $days[$i - 1]; $day = $fulldays[$day]; if ($use_unit_pref == False) { $result = $result . $day . " " . $conds[$i] . " (" . $temps[$i * 2 + 1 - $offset] . ":" . $temps[$i * 2 - $offset] . " / " . $tempsC[$i * 2 + 1 - $offset] . ":" . $tempsC[$i * 2 - $offset] . ")"; } else { $result = $result . $day . " " . $conds[$i] . " (" . $temps[$i * 2 + 1 - $offset] . ":" . $temps[$i * 2 - $offset] . ")"; } if ($i < 4) { $result = $result . ", "; } } $color = "10"; if (isset($prefs["color"]) == True) { $color = $prefs["color"]; } $result = chr(3) . $color . $result; if ($getdata != False) { $data = array(); $data["tempF"] = $temps[0]; $data["tempC"] = $tempsC[0]; $data["cond"] = $conds[0]; $data["wind"] = $wind_caption; $data["humidity"] = $humidity; $data["location"] = $location; return $data; } return $result; }
function index_url($url, $level, $site_id, $md5sum, $domain, $indexdate, $sessid, $can_leave_domain, $reindex) { global $tmp_urls, $delay_time, $domain_arr, $charSet, $url_status, $whitelist, $blacklist, $supdomain, $smp, $realnum, $dup_url, $entities, $command_line; if (DEBUG == '0') { error_reporting(0); } else { error_reporting(E_ERROR); // otherwise a non existing siemap.xml would always cause a warning message } $needsReindex = 1; $deletable = 0; $url_status = url_status($url); $thislevel = $level - 1; if ($smp != 1 && Configure::read('follow_sitemap') == 1) { // enter here if we don't already know a valid sitemap and if admin settings allowed us to do so $tmp_urls = get_temp_urls($sessid); // reload previous temp $url2 = remove_sessid(convert_url($url)); // get folder where sitemap should be and if exists, cut existing filename, suffix and subfolder // Configure::read('local') = "http://localhost/publizieren/"; // your base adress for your local server $sitemap_name = "sitemap.xml"; // could be individualized $host = parse_url($url2); $hostname = $host[host]; if ($hostname == 'localhost') { $host1 = str_replace(Configure::read('local'), '', $url2); } $pos = strpos($host1, "/"); // on local server delete all behind the / if ($pos) { $host1 = substr($host1, 0, $pos); } // build full adress again, now only until host if ($hostname == 'localhost') { $url2 = Configure::read('local') . $host1; } else { $url2 = "{$host['scheme']}://{$hostname}"; } $input_file = "{$url2}/{$sitemap_name}"; // create path to sitemap if ($handle = fopen($input_file, "r")) { // happy times, we found a new sitemap $links = get_sitemap($input_file, TABLE_PREFIX); // now extract links from sitemap.xml if ($links != '') { // if links were extracted from sitemap.xml reset($links); while ($thislink = each($links)) { // check if we already know this link as a site url $result = mysql_query("select url from " . TABLE_PREFIX . "sites where url like '{$thislink['1']}%'"); if (DEBUG > '0') { echo mysql_error(); } $rows = mysql_num_rows($result); if ($rows == '0') { // for all new links: save in temp table mysql_query("insert into " . TABLE_PREFIX . "temp (link, level, id) values ('{$thislink['1']}', '{$level}', '{$sessid}')"); if (DEBUG > '0') { echo mysql_error(); } } } clean_resource($result); $smp = '1'; // there was a valid sitemap and we stored the new links } unset($links, $input_file); fclose($handle); } } if (strstr($url_status['state'], "Relocation")) { $url = eregi_replace(" ", "", url_purify($url_status['path'], $url, $can_leave_domain)); if ($url != '') { $result = mysql_query("select link from " . TABLE_PREFIX . "temp where link='{$url}' && id = '{$sessid}'"); if (DEBUG > '0') { echo mysql_error(); } $rows = mysql_num_rows($result); if ($rows == 0) { mysql_query("insert into " . TABLE_PREFIX . "temp (link, level, id) values ('{$url}', '{$level}', '{$sessid}')"); if (DEBUG > '0') { echo mysql_error(); } } clean_resource($result); } $url_status['state'] == "redirected"; } ini_set("user_agent", Configure::read('user_agent')); if ($url_status['state'] == 'ok') { $OKtoIndex = 1; $file_read_error = 0; if (time() - $delay_time < Configure::read('min_delay')) { sleep(Configure::read('min_delay') - (time() - $delay_time)); } $delay_time = time(); if (!fst_lt_snd(phpversion(), "4.3.0")) { $file = file_get_contents($url); if ($file === FALSE) { $file_read_error = 1; } } else { $fl = @fopen($url, "r"); if ($fl) { while ($buffer = @fgets($fl, 4096)) { $file .= $buffer; } unset($buffer); } else { $file_read_error = 1; } fclose($fl); } if ($file_read_error || Configure::read('utf8') == 1) { unset($file); $contents = getFileContents($url); // parse_url to get charset $file = $contents['file']; } $pageSize = number_format(strlen($file) / 1024, 2, ".", ""); printPageSizeReport($pageSize); if ($url_status['content'] != 'text') { $file = extract_text($file, $url_status['content']); //for DOCs, PDFs etc we need special converter if ($file == 'ERROR') { // if error, suppress further indexing $OKtoIndex = 0; $file_read_error = 1; } } if (Configure::read('utf8') == 1) { // enter here if file should be translated into utf-8 $charSet = $contents['charset']; if ($charSet == '') { // if we did not find any charset, we will use our own $charSet = Configure::read('home_charset'); } $charSet = strtoupper(trim($charSet)); if (strpos($charSet, '8859')) { $conv_file = html_entity_decode($file); } else { $conv_file = $file; // pure code } if ($charSet != "UTF-8") { // enter here only, if site / file is not jet UTF-8 coded $iconv_file = iconv($charSet, "UTF-8", $conv_file); // if installed, first try to use PHP function iconv if (trim($iconv_file) == "") { // iconv is not installed or input charSet not available. We need to use class ConvertCharset $charSet = str_ireplace('iso-', '', $charSet); $charSet = str_ireplace('iso', '', $charSet); $NewEncoding = new ConvertCharset($charSet, "utf-8"); $NewFileOutput = $NewEncoding->Convert($conv_file); $file = $NewFileOutput; } else { $file = $iconv_file; } unset($conv_file, $iconv_file, $NewEncoding, $NewFileOutput); } } $data = clean_file($file, $url, $url_status['content']); $newmd5sum = md5($data['content']); if ($md5sum == $newmd5sum) { printStandardReport('md5notChanged', $command_line); $OKtoIndex = 0; $realnum--; } else { if (Configure::read('use_white') == '1') { $found = '0'; // check if content of page matches any word in whitelist foreach ($whitelist as $key => $value) { $met = stripos($file, $value); if ($met) { $found = '1'; } } if ($found == '0') { printStandardReport('noWhitelist', $command_line); $OKtoIndex = 0; $realnum--; } } if (Configure::read('use_black') == '1') { $found = '0'; // check if content of page matches any word in blacklist foreach ($blacklist as $key => $value) { $met = stripos($file, $value); if ($met) { $found = '1'; } } if ($found == '1') { printStandardReport('matchBlacklist', $command_line); $OKtoIndex = 0; $realnum--; } } // check for duplicate page content $result = mysql_query("select link_id from " . TABLE_PREFIX . "links where md5sum='{$newmd5sum}'"); if (DEBUG > '0') { echo mysql_error(); } if (mysql_num_rows($result) > 0) { // display warning message and urls with duplicate content printStandardReport('duplicate', $command_line); $num_rows = mysql_num_rows($result); for ($i = 0; $i < $num_rows; $i++) { $link_id = mysql_result($result, $i, "link_id"); $num = $i + 1; $res = mysql_query("select url from " . TABLE_PREFIX . "links where link_id like '{$link_id}'"); if (DEBUG > '0') { echo mysql_error(); } $row = mysql_fetch_row($res); $dup_url = $row[0]; clean_resource($res); printDupReport($dup_url, $command_line); } if (Configure::read('dup_content') == '0') { // enter here, if pages with duplicate content should not be indexed/re-indexed $OKtoIndex = 0; $realnum--; } else { $OKtoIndex = 1; } } } if (($md5sum != $newmd5sum || $reindex == 1) && $OKtoIndex == 1) { $urlparts = parse_url($url); $newdomain = $urlparts['host']; $type = 0; if ($data['noindex'] == 1) { $OKtoIndex = 0; $deletable = 1; printStandardReport('metaNoindex', $command_line); } if (Configure::read('use_white') == '1') { $found = '0'; // check if content of page matches any word in whitelist foreach ($whitelist as $key => $value) { $met = stripos($data[fulltext], $value); if ($met) { $found = '1'; } } if ($found == '0') { printStandardReport('noWhitelist', $command_line); $OKtoIndex = 0; $realnum--; } } if (Configure::read('use_black') == '1') { $found = '0'; // check if content of page matches any word in blacklist foreach ($blacklist as $key => $value) { $met = stripos($data[fulltext], $value); if ($met) { $found = '1'; } } if ($found == '1') { printStandardReport('matchBlacklist', $command_line); $OKtoIndex = 0; $realnum--; } } $wordarray = unique_array(explode(" ", $data['content'])); if ($smp != 1) { if ($data['nofollow'] != 1) { $links = get_links($file, $url, $can_leave_domain, $data['base']); $links = distinct_array($links); $all_links = count($links); if ($all_links > Configure::read('max_links')) { $all_links = Configure::read('max_links'); } $links = array_slice($links, 0, Configure::read('max_links')); if ($realnum < Configure::read('max_links')) { $numoflinks = 0; //if there are any, add to the temp table, but only if there isnt such url already if (is_array($links)) { reset($links); if (DEBUG == '2') { // if debug mode, show details printStandardReport('newLinks', $command_line); } while ($thislink = each($links)) { if ($tmp_urls[$thislink[1]] != 1) { $tmp_urls[$thislink[1]] = 1; $numoflinks++; if (DEBUG == '2') { $act_link = $thislink[1]; printNewLinks($act_link); } if ($numoflinks <= Configure::read('max_links')) { mysql_query("insert into " . TABLE_PREFIX . "temp (link, level, id) values ('{$thislink['1']}', '{$level}', '{$sessid}')"); } if (DEBUG > '0') { echo mysql_error(); } } } } } } else { printStandardReport('noFollow', $command_line); } unset($file); } if ($OKtoIndex == 1) { if (Configure::read('link_check') == 0) { $title = $data['title']; $host = $data['host']; $path = $data['path']; $fulltxt = $data['fulltext']; $desc = substr($data['description'], 0, 254); $url_parts = parse_url($url); $domain_for_db = $url_parts['host']; if (isset($domain_arr[$domain_for_db])) { $dom_id = $domain_arr[$domain_for_db]; } else { mysql_query("insert into " . TABLE_PREFIX . "domains (domain) values ('{$domain_for_db}')"); $dom_id = mysql_insert_id(); $domain_arr[$domain_for_db] = $dom_id; } $wordarray = calc_weights($wordarray, $title, $host, $path, $data['keywords'], $url_parts); //if there are words to index, add the link to the database, get its id, and add the word + their relation if (is_array($wordarray) && count($wordarray) > Configure::read('min_words_per_page')) { if ($md5sum == '') { mysql_query("insert into " . TABLE_PREFIX . "links (site_id, url, title, description, fulltxt, indexdate, size, md5sum, level) values ('{$site_id}', '{$url}', '{$title}', '{$desc}', '{$fulltxt}', curdate(), '{$pageSize}', '{$newmd5sum}', {$thislevel})"); if (DEBUG > '0') { echo mysql_error(); } $result = mysql_query("select link_id from " . TABLE_PREFIX . "links where url='{$url}'"); if (DEBUG > '0') { echo mysql_error(); } $row = mysql_fetch_row($result); $link_id = $row[0]; clean_resource($result); if (DEBUG == '2') { // if debug mode, show details printStandardReport('newKeywords', $command_line); } save_keywords($wordarray, $link_id, $dom_id); if (DEBUG == '2') { printStandardReport('indexed1', $command_line); } else { printStandardReport('indexed', $command_line); } } else { if ($md5sum != '' && $md5sum != $newmd5sum) { //if page has changed, start updating $result = mysql_query("select link_id from " . TABLE_PREFIX . "links where url='{$url}'"); if (DEBUG > '0') { echo mysql_error(); } $row = mysql_fetch_row($result); $link_id = $row[0]; for ($i = 0; $i <= 15; $i++) { $char = dechex($i); mysql_query("delete from " . TABLE_PREFIX . "link_keyword{$char} where link_id={$link_id}"); if (DEBUG > '0') { echo mysql_error(); } } clean_resource($result); if (DEBUG == '2') { // if debug mode, show details printStandardReport('newKeywords', $command_line); } save_keywords($wordarray, $link_id, $dom_id); $query = "update " . TABLE_PREFIX . "links set title='{$title}', description ='{$desc}', fulltxt = '{$fulltxt}', indexdate=now(), size = '{$pageSize}', md5sum='{$newmd5sum}', level={$thislevel} where link_id={$link_id}"; mysql_query($query); if (DEBUG > '0') { echo mysql_error(); } if (DEBUG == '2') { printStandardReport('re-indexed1', $command_line); } else { printStandardReport('re-indexed', $command_line); } } } } else { printStandardReport('minWords', $command_line); $realnum--; } } else { printStandardReport('link_okay', $command_line); } unset($wordarray, $title, $fulltxt, $desc); } } } else { $deletable = 1; printUrlStatus($url_status['state'], $command_line); } if ($reindex == 1 && $deletable == 1) { check_for_removal($url); } else { if ($reindex == 1) { } } if (!isset($all_links)) { $all_links = 0; } if (!isset($numoflinks)) { $numoflinks = 0; } if ($smp != 1) { // if valid sitemap found, no LinkReport printLinksReport($numoflinks, $all_links, $command_line); } }