function archiveToJson($zipFileContents, $log)
{
    $aFileName = "tmpArchive" . time() . ".zip";
    $theFile = fopen($aFileName, "w");
    // fwrite($log, "opened archive\n");
    fwrite($theFile, $zipFileContents);
    fclose($theFile);
    // fwrite($log, "rewrote archive, about to unzip\n");
    exec("unzip {$aFileName}");
    // fwrite($log, "unzipped, removing\n");
    exec("rm {$aFileName}");
    exec("rm -rf __MACOSX");
    // fwrite($log, "removed, starting parsing\n");
    $appName = str_replace(".zip", "", $_FILES['file']['name']);
    $englishBundle = opendir("{$appName}/Contents/Resources");
    $files = array();
    $files_resources = array();
    // fwrite($log, "starting file by file parsing\n");
    while (false !== ($file = readdir($englishBundle))) {
        // fwrite($log, $file);
        if (isLProj($file)) {
            $files[] = $file;
            $bundle = opendir("{$appName}/Contents/Resources/{$file}");
            $resources = array();
            while (false !== ($anotherFile = readdir($bundle))) {
                if (isStrings($anotherFile)) {
                    $fileLocation = "{$appName}/Contents/Resources/{$file}/" . $anotherFile;
                    $resources[] = transformStringsToJson(utf16_to_utf8(file_get_contents($fileLocation)), $fileLocation);
                }
            }
            $files_resources[] = $resources;
            closedir($bundle);
        }
    }
    // fwrite($log, "parsed file by file\n");
    closedir($englishBundle);
    exec("rm -rf \"{$appName}\"");
    $value = "({\"fileType\": \"zip\", \"fileName\": \"{$appName}\", \"resourcebundles\": [";
    // fwrite($log, "returning value\n");
    $i = 0;
    foreach ($files as $file) {
        $value = $value . "{name: \"{$file}\", resources: [" . implode(",", $files_resources[$i]) . "]}";
        $i += 1;
        if ($i != count($files)) {
            $value = $value . ",";
        }
    }
    // fwrite($log, $value);
    return $value . "]})";
}
예제 #2
0
function clean_file($file, $url, $type, $charSet, $use_nofollow, $use_robot, $can_leave_domain)
{
    global $db_con, $entities, $index_host, $index_meta_keywords, $index_meta_description, $case_sensitive, $utf_16;
    global $home_charset, $chrSet, $del_secchars, $index_rss, $converter_dir, $div_all, $div_hyphen, $del_dups;
    global $bb_decode, $ent_decode, $cn_seg, $quotes, $dup_quotes, $clear, $only_links, $text_length, $strict_high;
    global $use_divs, $not_divs, $not_divlist, $use_divlist, $ignore_fulltxt, $index_meta_title, $js_reloc;
    global $use_elems, $not_elems, $use_elementslist, $not_elementslist, $del_elems, $conv_puny, $include_dir;
    $new = array();
    $data = array();
    $string = '';
    $home_charset = strtoupper($home_charset);
    if ($utf_16) {
        //$file = mb_ereg_replace("\\0", "", $file);
        $file = utf16_to_utf8($file);
    }
    //      kill useless blanks, under scores and line feeds
    $file = preg_replace("/[  |\r\n|\\_]+/i", " ", $file);
    $urlparts = parse_addr($url);
    $host = $urlparts['host'];
    //remove filename from path and all tags which should be ignored
    $path = preg_replace('/([^\\/]+)$/i', "", $urlparts['path']);
    if ($use_nofollow == '1') {
        $file = preg_replace("@<!--sphider_noindex-->.*?<!--\\/sphider_noindex-->@si", " ", $file);
    }
    //  parse the HTML head
    $headdata = get_head_data($file, $url, $use_nofollow, $use_robot, $can_leave_domain, $type);
    $title = $headdata['title'];
    $description = $headdata['description'];
    $keywords = $headdata['keywords'];
    $file = preg_replace("@<head>.*?</head>@si", " ", $file);
    //  remove HTML head from file
    $file = preg_replace("@<!--.*?-->@si", " ", $file);
    $file = preg_replace("@<script[^>]*?>.*?<\\/script>@si", " ", $file);
    //$file = str_replace("window.location.replace", " ", $file);
    $file = preg_replace("@<style[^>]*>.*?<\\/style>@si", " ", $file);
    $file = preg_replace("/<link rel[^<>]*>/i", " ", $file);
    $file = preg_replace("@<div style=(\"|')display\\:none(\"|').*?<\\/div>@si", " ", $file);
    $file = preg_replace("@<a.*?>@si", " ", $file);
    $file = preg_replace("@<(object|img|audio|video).*?>@si", " ", $file);
    $file = preg_replace("@<(align|alt|data|body|form|height|input|id|name|span|src|table|td|type|width|layer|span).*?>@si", " ", $file);
    $file = preg_replace("@\\{document\\..*?\\}@si", " ", $file);
    //  if activated in Admin settings, ignore the full text
    if ($ignore_fulltxt == '1') {
        $file = '';
    }
    // if activated in Admin settings, remove all div contents as defined in common 'divs_not' list
    if ($not_divs == '1') {
        // JFIELD parse the doc into a DOM tree so we can
        // do cool stuff like exclude certain divs
        // echo "<pre>\n";
        global $myFile;
        $myFile = "";
        $myDepth = 0;
        $dom = new DOMDocument();
        $dom->loadHTML($file);
        /*
        // JFIELD figuring out image exclusion and stuff
        echo "<pre>HEY:\n";
        echo "$title\n";
        echo "$description\n";
        echo "$url\n";
        //var_dump( $dom->getElementById('shared-image-desc') );
        echo "</pre>\n";
        */
        // probably a better way to get the doc
        // than skipping over the non-doc like this
        foreach ($dom->childNodes as $item) {
            if (!$item->tagName) {
                continue;
            }
            recurseNodes($item);
        }
        $file = $myFile;
        // wikipedia - don't index content of image pages
        if (preg_match("/\\/images\\//", $url) && preg_match("/^File\\:/", $title)) {
            // image description stopwords
            $mystopwords = explode(" ", "wikipedia schools english featured article sos children file");
            $mydesc = str_replace($mystopwords, " ", strtolower($description));
            $description = '';
            // otherwise the unstripped version gets used
            $title = str_replace("File:", "", $title);
            // remove this noise
            $file = "{$title} {$mydesc} picture image";
        }
        // echo "</pre>\n";
        // END JFIELD
    }
    // if activated in Admin settings, fetch all div contents as defined in common 'divs_use' list
    if ($use_divs == '1') {
        foreach ($use_divlist as $thisid) {
            //    try to find divs with id as specified in common 'divs' list
            //  regexp ?
            if (strpos($thisid, "/") == "1" && strrpos($thisid, "/") == strlen($thisid) - 1) {
                $thisid = substr($thisid, 2, strlen($thisid) - 3);
                //  remove the regex capsules
            } else {
                //  for string input only
                if (strrpos($thisid, "*") == strlen($thisid) - 1) {
                    $thisid = str_replace("*", "(.*?)", $thisid);
                    //  replace wildcards at the end of string input
                }
            }
            if (preg_match_all("@(<div class|<div id)=(\"|')" . $thisid . "(\"|').*?(</div>)@si", $file, $found_divs, PREG_OFFSET_CAPTURE)) {
                foreach ($found_divs[0] as $another_div) {
                    //  walk through all found divs. Usually W3C does not allow more than one div with this id. But who knows . . . .
                    $this_divstart = $another_div[1];
                    //  get actual startpos from div-array
                    $i = "end";
                    //  if required $i will become the loop counter for nested divs
                    $nextstart = strpos($file, "<div", $this_divstart + 4);
                    //  find start pos of next div
                    $nextend = strpos($file, "</div", $this_divstart + 4);
                    //  find end pos of next div
                    //check for nested divs
                    $start1 = strpos($file, "<div", $nextstart + 4);
                    // find start pos of next div
                    if ($start1 && $start1 < $nextend) {
                        $i = "0";
                        //  yes, nested
                    }
                    while ($i != "end") {
                        //  loop for (multiple) 'nested divs'
                        $i = '0';
                        while ($nextstart && $nextstart < $nextend) {
                            // next div is a nested div?
                            $nextend1 = strpos($file, "</div", $nextstart + 4);
                            //  this is only the endpos of current div
                            $nextend = strpos($file, "</div", $nextend1 + 6);
                            //  find end pos of next div
                            $nextstart = strpos($file, "<div", $nextstart + 4);
                            // find start pos of next div
                            if ($nextstart && $nextstart < $nextend1) {
                                //  again nested in next layer?
                                $i++;
                                //  counter for next level nested divs
                            }
                        }
                        //  if nested divs were found, correct end pos of div to be deleted
                        while ($i > '1') {
                            $nextend = strpos($file, "</div", $nextend + 6);
                            $i--;
                        }
                        $nextend1 = strpos($file, "</div", $nextend + 6);
                        //  $nextend from former div (might have been nested)
                        if ($nextend1) {
                            $nextend = $nextend1;
                            //  defines next endpos
                        }
                        if (!$nextstart || $nextend < $nextstart) {
                            $i = 'end';
                            //  no longer nested divs
                        }
                    }
                    //  collect all divs to be indexed
                    $all_divs[] = substr($file, $this_divstart, $nextend + 6 - $this_divstart);
                }
                //  add content of all found divs to full text
                foreach ($all_divs as $use_thisdiv) {
                    $divfile .= " " . $use_thisdiv;
                }
            }
        }
        $file = $divfile;
        //  now this will be used as the body part of the page content
    }
    // if activated in Admin settings, fetch the content of all elements as defined in common 'elements_use' list and use the content of these elements as page content
    if ($use_elems == '1') {
        foreach ($use_elementslist as $this_element) {
            //    try to find elements with id as specified in common 'elöements_use' list
            //  regexp ?
            if (strpos($this_element, "/") == "1" && strrpos($this_element, "/") == strlen($this_element) - 1) {
                $this_element = substr($this_element, 2, strlen($this_element) - 3);
                //  remove the regex capsules
            }
            if (preg_match_all("@<{$this_element}.*?>.*?<\\/{$this_element}>@si", $file, $found_elements, PREG_OFFSET_CAPTURE)) {
                foreach ($found_elements as $new_element) {
                    //  walk through all found elementss.
                    foreach ($new_element as $new) {
                        //  build substring without content tags
                        $string = $new[0];
                        $string = substr($string, strpos($string, ">") + 1);
                        $string = substr($string, 0, strrpos($string, "<"));
                        //  collect all elements to be indexed
                        $all_elements[] = $string;
                    }
                }
            }
        }
        $file = '';
        //  add content of all found elements to full text
        foreach ($all_elements as $use_thiselem) {
            $file .= " " . $use_thiselem;
            //  now all this will be used as the body part of the page content
        }
    }
    // if activated in Admin settings, fetch the content of all elements as defined in common 'elements_not' list and delete that part of the page
    if ($not_elems == '1') {
        foreach ($not_elementslist as $this_element) {
            //    try to find elements with id as specified in common 'elements_not' list
            //  regexp ?
            if (strpos($this_element, "/") == "1" && strrpos($this_element, "/") == strlen($this_element) - 1) {
                $this_element = substr($this_element, 2, strlen($this_element) - 3);
                //  remove the regex capsules
            }
            if (preg_match_all("@<{$this_element}.*?>.*?<\\/{$this_element}>@si", $file, $found_elements, PREG_OFFSET_CAPTURE)) {
                foreach ($found_elements as $new_element) {
                    //  walk through all found elementss.
                    foreach ($new_element as $new) {
                        //  collect all elements to be ignored
                        $all_elements[] = $new[0];
                    }
                }
            }
        }
        //  remove the content of all found elements from full text
        foreach ($all_elements as $use_thiselem) {
            $file = str_replace($use_thiselem, " ", $file);
        }
    }
    //  parse bbcode
    if ($bb_decode == '1') {
        $file = bbcode($file);
    }
    $file = preg_replace("@<div.*?>@si", " ", $file);
    $file = preg_replace("@<\\/.*?>@si", " ", $file);
    //create spaces between tags, so that removing tags doesnt concatenate strings
    $file = preg_replace("/<[\\w ]+>/", "\\0 ", $file);
    $file = preg_replace("/<\\/[\\w ]+>/", "\\0 ", $file);
    $file = preg_replace("@<\\/a>@si", " ", $file);
    //  remove lost end tag
    //$file = strip_tags($file);  //  remove the content of HTML tags from $file (does not work for invalid written and unclosed tags)
    //  replaced since Sphider-plus version 2.7
    //  remove the content of HTML tags from $file
    $found_tags = array();
    $another_tag = array();
    if (preg_match_all("@<.*?>@s", $file, $found_tags, PREG_OFFSET_CAPTURE)) {
        foreach ($found_tags[0] as $another_tag) {
            //  walk through all found tags.
            if (strlen($another_tag[0]) < "500") {
                //  delete this tag from full text if not too long (unclosed)
                $file = str_replace($another_tag[0], " ", $file);
            }
        }
    }
    if ($del_elems) {
        //  if activated in Admin backend, delete  &lt; element /&gt; from full text
        $found_tags = array();
        $another_tag = array();
        if (preg_match_all("@\\&lt;.*?\\&gt;@s", $file, $found_tags, PREG_OFFSET_CAPTURE)) {
            foreach ($found_tags[0] as $another_tag) {
                //  walk through all found tags.
                $file = str_replace($another_tag[0], " ", $file);
            }
        }
    }
    if ($conv_puny) {
        //  make punycode readable
        require_once "{$include_dir}/idna_converter.php";
        // Initialize the converter class
        $IDN = new idna_convert(array('idn_version' => 2008));
        $found_tags = array();
        $another_tag = array();
        $this_tag = '';
        $file = str_replace("http", " http", $file);
        //place a blank in front of all http's
        if (preg_match_all("@http.*? @s", $file, $found_tags, PREG_OFFSET_CAPTURE)) {
            foreach ($found_tags[0] as $another_tag) {
                //  walk through all found tags.
                // Decode the URL to readable format
                $this_tag = $IDN->decode(rawurldecode($another_tag[0]));
                $this_tag = rawurldecode($this_tag);
                $file = str_replace($another_tag[0], $this_tag, $file);
            }
        }
    }
    $file = str_replace(" ", " ", $file);
    //  replace special (long) blanks with standard blank
    $file = str_replace("—", "'", $file);
    //  replace  invalid coded quotations
    $file = str_replace("©", "&#151;", $file);
    //  replace  invalid coded long dash with correct long dash
    $file = preg_replace("/   +/", " ", $file);
    //  replace TABs with a standard blank
    $file = preg_replace("/  +/", " ", $file);
    //  kill duplicate blanks
    $file = preg_replace("/__+/", " ", $file);
    //  kill duplicate underscore
    $file = preg_replace("/--+/", " ", $file);
    //  kill duplicate hyphens
    $file = preg_replace("/\\*\\*+/", " ", $file);
    //  kill duplicate stars
    $file = preg_replace("/\\#\\#+/", " ", $file);
    //  kill duplicate hash tags
    $file = str_replace(" &nbsp;", " ", $file);
    $file = str_replace("&nbsp;&nbsp;", " ", $file);
    //  kill duplicate &nbsp; blanks
    $file = str_replace("&shy;", "", $file);
    //  kill  break character
    $file = preg_replace("/\\☨\\☨+/", " ", $file);
    //  kill duplicates. . .  Yes, I've met something
    $file = preg_replace("/\\(\\(+/", " ", $file);
    //  kill duplicates.  . .  no comment
    $file = preg_replace("/\\<\\<+/", " ", $file);
    //  kill duplicates
    $file = preg_replace("/\\>\\>+/", " ", $file);
    //  kill duplicates
    $file = preg_replace("/\\*\\~+/", " ", $file);
    //  kill duplicates
    $file = preg_replace("/\\+\\++/", " ", $file);
    //  kill duplicates
    $file = preg_replace("/\\=\\=+/", " ", $file);
    //  kill duplicates
    $file = preg_replace("/\\~\\~+/", " ", $file);
    //  kill duplicates
    //  kill some other duplicates, already met on the Internet
    if ($del_dups) {
        $file = preg_replace("/\\(\\(+/", " ", $file);
        $file = preg_replace("/\\)\\)+/", " ", $file);
        $file = preg_replace("/\\~\\~+/", " ", $file);
        $file = preg_replace("/\\=\\=+/", " ", $file);
        $file = preg_replace("/\\?\\?+/", " ", $file);
        $file = preg_replace("/\\!\\!+/", " ", $file);
        $file = preg_replace("/\\.\\.+/", " ", $file);
        $file = preg_replace("/\\<\\<+/", " ", $file);
        $file = preg_replace("/\\>\\>+/", " ", $file);
        $file = preg_replace("/\\:\\:+/", " ", $file);
        $file = preg_replace("/\\+\\++/", " ", $file);
        $file = preg_replace("/\\-\\-+/", " ", $file);
        $file = preg_replace("/\\*\\*+/", " ", $file);
    }
    $file = str_replace(" &nbsp;", " ", $file);
    $file = str_replace("&nbsp;&nbsp;", " ", $file);
    //  kill duplicate &nbsp; blanks
    $file = str_replace("&shy;", "", $file);
    //  kill  break character
    //  kill some special cases
    $file = str_replace("&quot;", "\"", $file);
    $file = str_replace("…", " ", $file);
    if ($text_length != "0") {
        //  build substring of full text until last space in front of $text_length
        $file = substr($file, 0, strrpos(substr($file, 0, $text_length), " "));
    }
    if ($index_host == 1) {
        //  separate words in host and path
        $host_sep = preg_replace("/\\.|\\/|\\\\/", " ", $host);
        $path_sep = preg_replace("/\\.|\\/|\\\\/", " ", $path);
        $file = $file . " " . $host . " " . $host_sep;
        $file = $file . " " . $path . " " . $path_sep;
    }
    if ($headdata['title'] && $index_meta_title) {
        $file = $file . " " . $title;
    }
    if ($index_meta_description == 1) {
        $file = $file . " " . $description;
    }
    if ($index_meta_keywords == 1) {
        $file = $file . " " . $keywords;
    }
    if ($ent_decode == '1') {
        //  as it seems, the PHP function html_entity_decode() has some problems.
        //  In case that 2 entities are placed directly together like: &mdash;&nbsp;
        //  we are obliged to be helpful by eliminating one of them
        $file = str_replace("&nbsp;", " ", $file);
        //  now PHP does not get confused
        $file = html_entity_decode($file, ENT_QUOTES, 'UTF-8');
        $title = str_replace("&nbsp;", " ", $title);
        $title = html_entity_decode($title, ENT_QUOTES, 'UTF-8');
    }
    //  correct some other trash found on the Internet
    $file = str_replace("�", "fi", $file);
    $file = str_replace("fl", "fl", $file);
    //  for URLs use entities, so that links become readable in full text
    $file = str_replace("<a href=\"http://www.", "&lt;a href=&quot;http://www.", $file);
    //  replace .. with a standard blank
    $file = str_replace("...", " ", $file);
    //  kill duplicate blanks  " ", \r, \t, \n and \f
    if (preg_match("@8859|utf@", $charSet)) {
        $file = preg_replace("/[\\s,]+/", " ", $file);
    }
    if ($index_rss == '1') {
        $file = preg_replace('/0b/si', '.', $file);
        // try to correct bad charset interpretation
        $file = preg_replace('//si', '\'', $file);
        $trash = array("\r\n", "\n", "\r", "0E", "0C", "0I");
        // kill 'LF' and the others
    } else {
        $trash = array("\r\n", "\f", "\n", "\r", "\t");
    }
    $replace = ' ';
    $file = str_replace($trash, $replace, $file);
    $fulltext = $file;
    //  required for result listing as extract around the keywords and for PHRASE search
    if ($del_secchars) {
        $file = del_secchars($file);
    }
    //  use the cleaned $file to just highlight the pure query term in result listing
    if ($strict_high) {
        $fulltext = $file;
    }
    //  convert all single quotes into standard quote
    if ($quotes == '1') {
        $all_quotes = array("&#8216;" => "'", "&lsquo;" => "'", "&#8217;" => "'", "&rsquo;" => "'", "&#8242;" => "'", "&prime;" => "'", "‘" => "'", "‘" => "'", "´" => "'", "`" => "'", "’" => "'", "‘" => "'", "’" => "'", "’" => "'");
        //reset($all_quotes);
        while ($char = each($all_quotes)) {
            $file = preg_replace("/" . $char[0] . "/si", $char[1], $file);
            $title = preg_replace("/" . $char[0] . "/si", $char[1], $title);
            $description = preg_replace("/" . $char[0] . "/si", $char[1], $description);
            $keywords = preg_replace("/" . $char[0] . "/si", $char[1], $keywords);
        }
    }
    //  convert all double quotes into standard quotations
    if ($dup_quotes == '1') {
        $all_quotes = array("“" => "\"", "�" => "\"", "„" => "\"");
        reset($all_quotes);
        while ($char = each($all_quotes)) {
            $file = preg_replace("/" . $char[0] . "/i", $char[1], $file);
            $title = preg_replace("/" . $char[0] . "/i", $char[1], $title);
            $description = preg_replace("/" . $char[0] . "/i", $char[1], $description);
            $keywords = preg_replace("/" . $char[0] . "/i", $char[1], $keywords);
        }
    }
    //  split words at hyphen, single quote, dot and comma into their basics
    if ($div_all || $div_hyphen) {
        $file = split_words($file);
        // jfield: yes, we want to index words split on hyphens,
        // but what does that have to do with changing the appearance
        // of the title? sphider_plus, you so crazy
        // $title          = split_words($title);
        $description = split_words($description);
        $keywords = split_words($keywords);
    }
    reset($entities);
    while ($char = each($entities)) {
        $file = preg_replace("/" . $char[0] . "/i", $char[1], $file);
        $title = preg_replace("/" . $char[0] . "/i", $char[1], $title);
        $description = preg_replace("/" . $char[0] . "/i", $char[1], $description);
        $keywords = preg_replace("/" . $char[0] . "/i", $char[1], $keywords);
    }
    //  replace special (long) blanks in title
    $title = str_replace(" ", " ", $title);
    //remove all the fancy jokes some webmasters add
    $title = preg_replace("@<(.*?)>@si", "", $title);
    $title = preg_replace("@ +@si", " ", $title);
    //  replace TABs with a standard blank
    $fulltext = preg_replace("/   +/", " ", $fulltext);
    $count = count(preg_split("/[\\s,]+/", $fulltext));
    $data['fulltext'] = $db_con->real_escape_string($fulltext);
    $data['content'] = $file;
    $data['title'] = $db_con->real_escape_string($title);
    $data['description'] = $db_con->real_escape_string($description);
    $data['keywords'] = $db_con->real_escape_string($keywords);
    $data['host'] = $host;
    $data['path'] = $path;
    $data['nofollow'] = $headdata['nofollow'];
    $data['noindex'] = $headdata['noindex'];
    $data['base'] = $headdata['base'];
    $data['cano_link'] = $headdata['cano_link'];
    $data['count'] = $count;
    $data['refresh'] = $headdata['refresh'];
    $data['wait'] = $headdata['wait'];
    if ($clear == 1) {
        unset($char, $file, $fulltext, $path_sep, $headdata, $regs, $urlparts, $host);
    }
    return $data;
}
 // czy ktos cos nie kombinuje?
 if (!is_uploaded_file($_FILES['userfile']['tmp_name'])) {
     die("Cos nie tak z wysylaniem pliku, sprobuj ponownie...");
 }
 // wczytuje plik
 $some_file = $_FILES['userfile']['tmp_name'];
 $filesize = filesize($some_file);
 $fp = fopen($some_file, "r");
 $filecontent = fread($fp, $filesize);
 fclose($fp);
 // kasuje tymczasowy plik uploadu
 unlink($_FILES['userfile']['tmp_name']);
 unset($_FILES['userfile']);
 // sprawdz czy utf16 i konwert jesli tak
 if ($filesize >= 2 && ($filecontent[0] == 0x0 || $filecontent[1] == 0x0 || $filecontent[0] == 0xff && $filecontent[1] == 0xfe || ($filecontent[0] == 0xfe || $filecontent[1] == 0xff))) {
     $filecontent = utf16_to_utf8($filecontent);
 }
 if (strlen($filecontent) >= 3 && ord($filecontent[0]) == 0xef && ord($filecontent[1]) == 0xbb && ord($filecontent[2]) == 0xbf) {
     // cut UTF-8 BOM
     $filecontent = substr($filecontent, 3);
 }
 $filecontent = explode("\n", $filecontent);
 $dane_i = -1;
 // parsuje plik
 $listaKodowOP = '';
 foreach ($filecontent as $line) {
     $rec = preg_split('[,]', trim($line), 4);
     if (count($rec) >= 4) {
         // wyglada na skrzynke
         if (substr($rec[0], 0, 2) == $oc_waypoint) {
             $dane_i++;
예제 #4
0
/** 
 * Initiates pre-processing according to directives given.
 * Checks presence of all necessary fields.
 * Empties Stage table and inserts new lines - with formatted fields
 * 
 * pre-processing can be:
 * <UL>
 * <LI> utf16_to_utf8
 * <LI> utf8_decode
 * <LI> CSVSetSemicolonDelimiter
 * <LI> CSVCutThousandDelimiter
 * <LI> CSVCutDecimals
 * </UL>
 * Last version of pre-processed file can be found in _ADJUSTED.. for debugging
 *
 * @param string $FPath File with complete path
 * @param array $fieldMap Array with directives and field mapping for the given file type
 * @return boolean true if finished successully - else false
 */	
function FileRead($FPath,$fieldMap) 
{
	$specialElementName=array("TEMP_STAGE_TABLE","NOTAUTOID","FILTERFIELD_NAME","FILTERFIELD_VALUE"
							 ,"UTF16DECODE","SETSEMICOLONDELIMITER","CUTTHOUSANDDELIMITER","1000SCALE"
							 ,"APPEND_DATA_TO_TABLE","CHECKSUMFIELD","AUTOFILL"); 	 	 	 	 	 

	
	$extra=0;
	foreach($specialElementName as $v)
	if (isset($fieldMap[$v])) $extra++;
	
	$requiredColumnNum= count($fieldMap) - $extra ;  //(- other elements (E.g. TEMP_STAGE_TABLE element)				
	//-------------------------------------------------------------------------------------------------------------
	
	
	//////////////////// SPECIAL DIRECTIVES ////////////////////////////////////////////////////////////
	$handle = fopen($FPath, "rb");	
	$contents = fread($handle, filesize($FPath));
	
	
	if (isset($fieldMap['UTF16DECODE']))
	{		
		//$contents = mb_convert_encoding($contents, "ISO-8859-1","UTF-16LE"); //need PHP5
		//$contents = iconv("UTF-16LE","ISO-8859-1",$contents); //need PHP5??! //DON'T WORK !!!				
		$contents=utf16_to_utf8($contents);
		$contents=utf8_decode($contents); //now data is stored with ISO-8859-1 charset				
	}		


		

	//this would change the actual delimiter to semicolon
	//Values inside quotes (like ,) will not be  replaced by CSVSetSemicolonDelimiter()	
	if (isset($fieldMap['SETSEMICOLONDELIMITER']) || isset($_GET['SETSEMICOLONDELIMITER']) )
	{
		if (isset($fieldMap['SETSEMICOLONDELIMITER']))
			$delimiter=$fieldMap['SETSEMICOLONDELIMITER'];
		else 
			$delimiter=$_GET['SETSEMICOLONDELIMITER']; //this allow to use this option manually....
		
		
		$contents=CSVSetSemicolonDelimiter($contents,$delimiter);				
	}
	
	
	if (isset($fieldMap['CUTTHOUSANDDELIMITER']))
	{	
		$contents=CSVCutThousandDelimiter($contents);				
	}

	
	
	
	if ( ! isset($fieldMap['1000SCALE'])) //<< otherwise leave the decimal cutting to cutrevenue() function!
	{	
		///////////////////////////////////////////////////////cutRevenue() do again a  similar work//////
		$contents=CSVCutDecimals($contents); //cut all decimals with . separator 		
		////////////////////////////////////////////////	
	}	
	

	
	
	//never delete this files => the version of last import operation remain for debug issues!  
	$hw = fopen($FPath."_ADJUSTED", 'w'); 
	fwrite($hw, $contents); 			
	//////////////////////////////////////////////////////////////////////////////////////////////////////////////

	//fwrite($hw, substr($contents,0,40000)); 			
	//return true;	
		
	$handle = fopen($FPath."_ADJUSTED", "rb");	
	
	$num_inserts=0;
	$length=4024;
	$field_delimiter=";";

	////////////////////////////////////////all CSV files must have semicolon(;) delimiter at this point!////////////
	$test_data=fgetcsv($handle, $length, $field_delimiter);
	if (count($test_data)<$requiredColumnNum) 
	{
	      echo "Something is wrong with the number of field of adjusted file(".count($test_data)." instead of ".$requiredColumnNum.")
	      		see file with _ADJUSTED extension on /Msgs/pto/";
	      return false;	
	}
	rewind($handle);
	////////////////////////////////////////////////////////////////
	

	
	
	$idxMap=array();
	$idxFilter=-1;
	$numFiltered=0;
	
	
	//########################### SpreadSheet FIELD POSITION recognition ##########################
	$spreadsheet_field_name=fgetcsv($handle, $length, $field_delimiter); // these are 1st lines
	for ($i=0;$i<count($spreadsheet_field_name);$i++)
	{
		$spreadsheet_field_name[$i]=trim(str_replace("?","",$spreadsheet_field_name[$i])); //there could be ?? before the column name string..

		//echo  $spreadsheet_field_name[$i]."---";
		if ( array_key_exists( $spreadsheet_field_name[$i] , $fieldMap  )) 
		{
			$idxMap[$spreadsheet_field_name[$i]]=$i; //add a new element to $idxMap that associate the field position inside the spreadsheet 
			//echo  $spreadsheet_field_name[$i]."---".$i."<BR>";
		}

		//detect the idx of the spreadsheet filter (that filter column values could not be inserted into the db)
		if (isset($fieldMap['FILTERFIELD_NAME']) && $spreadsheet_field_name[$i]==$fieldMap['FILTERFIELD_NAME']) 
			$idxFilter=$i;											
			
			
		//echo "<br />";
	}
	

	
	if (count($idxMap)!= $requiredColumnNum )
	{
		//print_r($idxMap);
		//print_r($fieldMap);
		
		echo ("Error: not all required field founded inside the spreadsheet 
				<br />only ".count($idxMap)."/".( $requiredColumnNum )." founded
				<br />Missing are:<br /> ");
		
		foreach ($fieldMap as $k=>$v)
		{
			if (in_array($k,$specialElementName)) continue;
			
			if (!isset($idxMap[$k])) echo "<b>".$k."</b><br />";
		}
		
		return false;
	}	
	//////////////////////////////////////////////////////////////////####################à

	
	if (!isset($fieldMap['APPEND_DATA_TO_TABLE'])) //empty the table only when needed
	{	
		$SQL0="DELETE FROM ".$fieldMap['TEMP_STAGE_TABLE']." WHERE 1=1";
		//echo "DEBUG: ".$SQL0."<BR>\n";
		if (!mysql_query($SQL0))
		{
			echo "Error in ".$SQL0."<BR>".mysql_error();
			return false;
		}
	}
	
	$checksumTotal=false;
	
	while ( ($data = fgetcsv($handle, $length, $field_delimiter)) !==false ) 
	{

		if ($data[0]==null) continue; //empty lines - where Task Field is empty

		if ($data[0]=="Total" && $data[1]==null && $data[2]==null && $data[3]==null && $data[4]==null) //total - drop
		{
			//$checksumTotal=str_replace(",","",$data[3]); //drop thousand commas
			continue; 
		}
		
		//echo $data    [  $idxFilter  ]."==".$fieldMap['FILTERFIELD_VALUE']."?";
		if (isset($fieldMap['FILTERFIELD_NAME']) && $data[ $idxFilter ]!= $fieldMap['FILTERFIELD_VALUE']) 
		{ 
			$numFiltered++;
			continue;			
		}
		
	    $num_inserts++;  
	    //echo "Processing#".$num_inserts."........ at time ".time()."<BR>".var_dump($data)."<BR>\n";
		  
	    
		$SQLFieldValues="";
		$SQLFieldNames="";
	  	
		foreach ($idxMap as $spreadsheetFieldName => $spreadsheetFieldIdx) 
		{

			$fieldValue=$data    [  $spreadsheetFieldIdx   ]; //spreadsheet value
			$fieldName =$fieldMap[  $spreadsheetFieldName  ]; //mysql table field name
			

			
			
			//echo "---".$fieldName."-------------------".$fieldValue."<br />";
			
			if ((	strpos(strtolower($spreadsheetFieldName),'revenue')!==false ||
			        strpos(strtolower($spreadsheetFieldName),'amount')!==false    ) &&
			        strpos(strtolower($spreadsheetFieldName),'date') ===false)			        
			{
				//echo "----".$fieldName."-------------------".$fieldValue."<br />";
				//if ($spreadsheetFieldName=="Bookings Amount - CD")
				//echo $spreadsheetFieldName.":::::::".$fieldValue.">>>>".CutRevenue($fieldValue);
				$fieldValue=CutRevenue($fieldValue,$fieldMap['1000SCALE']); // remove number fractions
			}
			
			
			if ( strpos(strtolower($spreadsheetFieldName),'date') !==false || strpos($spreadsheetFieldName,'FORECAST_DT') !==false )
			{
				//echo "-----".$fieldName."-------------------".$fieldValue."<br />";
				$fieldValue=ConvertDate($fieldValue,$spreadsheetFieldName,$num_inserts); // dates - format conversion
			}
			
			
			if (  trim($spreadsheetFieldName)=="Fiscal Quarter"  )
			{
				//ES: "2012 FY Q 1"
				
				$fieldValue=str_replace(" ","",$fieldValue);
				
				$exp=explode("FY",$fieldValue);
				$fieldValue=$exp[1]."FY".substr($exp[0],2);
				
				//echo $fieldValue."<br />";
			}
			if (isset($fieldMap['AUTOFILL']) && $fieldMap['AUTOFILL']!=='false' && $fieldName!="ChildName") {
				if ($fieldValue!='') {
					$LastVal[$fieldName]=$fieldValue;
				} else {
					$fieldValue=$LastVal[$fieldName];
				}
			}
			

			$SQLFieldNames .=", `".$fieldName."`" ;	
	  		$SQLFieldValues.=",'".addslashes(trim(str_replace("'","`",$fieldValue)))."'";
	  		
		 }  
		
		 if (!isset($fieldMap['NOTAUTOID']))
		 {
		 	$SQLFieldNames="id".$SQLFieldNames;
		 	$SQLFieldValues="''".$SQLFieldValues;
		 }
		 else 
		 {
		 	//remove comma
		 	$SQLFieldNames=substr($SQLFieldNames,1);
		 	$SQLFieldValues=substr($SQLFieldValues,1);
		 }


		 
		 $SQL1="INSERT INTO ".$fieldMap['TEMP_STAGE_TABLE']." (".$SQLFieldNames.") VALUES (".$SQLFieldValues.")";
		  
		  mysql_query($SQL1) or die("Error in ".$SQL1."<BR>".mysql_error());
		  //echo "DEBUG: ".$num_inserts.":".$SQL1."<BR>\n";			

	}

	
	if (isset($fieldMap['CHECKSUMFIELD']) && $checksumTotal!==false) //go on with checksum
	{
		 $SQL1="SELECT SUM(".$fieldMap['CHECKSUMFIELD'].") FROM ".$fieldMap['TEMP_STAGE_TABLE'];
		  
		 $result=mysql_query($SQL1) or die("Error in ".$SQL1."<BR>".mysql_error());
		 $row=mysql_fetch_array($result);
		 
		 
		 echo "<br /><br />JITENDER CHECKSUM: <b>".$checksumTotal."</b><br />IMPORT CHECKSUM: <b>".$row[0]."</b><br /><br />";
		 
		 if ($checksumTotal!=$row[0])
		 	die ("CHECKSUM ERROR");
		
	}
	
	
	if (isset($fieldMap['FILTERFIELD_NAME']))
	{
		echo "Filtered ".$numFiltered." rows. Inserted ".$num_inserts." rows";
	}
	else
		echo "Inserted ".$num_inserts." rows";
		
	echo "FileRead Done<BR>\n";
	
	
	return true;
}
        $success = TRUE;
    } else {
        $success = FALSE;
    }
}
if ($success) {
    $short_filename = substr($filename, 0, -4) . "gz";
    $unzip_filename = substr($filename, 0, -5);
    exec("mv " . $filename . " " . $short_filename);
    exec("gunzip -d " . $short_filename);
    $fh = fopen($unzip_filename, "r");
    while (!feof($fh)) {
        $filedata .= fgets($fh, 4096);
    }
    fclose($fh);
    $filedata = utf16_to_utf8($filedata);
    $filedata = str_replace("PROJECT-TITLE>", "PROJECTTITLE>", $filedata);
    $filedata = str_replace("LAST-INDEX>", "LASTINDEX>", $filedata);
    $filedata = str_replace("COLOR-SCHEME>", "COLORSCHEME>", $filedata);
    //echo($filedata);
    $dom = new domDocument();
    $dom->loadXML($filedata);
    $achz = simplexml_import_dom($dom);
    //print_r($achz);
    $this_project = new Project();
    $this_project->title = $achz->PROJECTTITLE;
    if (strtolower($achz->CLASSIFICATION) == "u") {
        $this_project->classification = "U";
    }
    if (strtolower($achz->CLASSIFICATION) == "fouo") {
        $this_project->classification = "U";
예제 #6
0
파일: sendmail.php 프로젝트: Roffo/ptsignup
                $dec .= chr(0x80 | $c >> 6 & 0x3f);
                $dec .= chr(0x80 | $c >> 0 & 0x3f);
            } else {
                $dec .= chr(0xc0 | $c >> 6 & 0x1f);
                $dec .= chr(0x80 | $c >> 0 & 0x3f);
            }
        }
    }
    return $dec;
}
// base64_encode(str);
$email = $_POST['email'];
$pw = $_POST['pw'];
/*
$date = new DateTime();
$timestamp = $date->getTimestamp();
*/
$today = (string) date("Y-m-d");
$url = "http://archive.ptengine.com/templets/miapex/php//regSesSendEmail.php";
$link = "https://report.ptengine.com/activation/activation_form.htm?ptengine=";
$link = $link . base64_encode(utf16_to_utf8($email));
$data = array('email' => $email, 'password' => $pw, 'link' => $link, 'date' => $today);
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_POST, 1);
curl_setopt($curl, CURLOPT_POSTFIELDS, $data);
$response = curl_exec($curl);
curl_close($curl);
echo $response;
//echo json_encode($data);
예제 #7
0
    ?>
<code class="prettyprint" style="white-space:pre"><?php 
    $fp = fopen($recv_filepath, "r");
    $is_utf16 = false;
    if (!feof($fp)) {
        $line = fread($fp, 128);
        $is_utf16 = is_utf16($line);
        rewind($fp);
    }
    $stat = fstat($fp);
    $modified_year = date('Y', $stat['mtime']);
    $copyright_years = date('Y', $stat['ctime']);
    while ($fp && !feof($fp)) {
        $line = fread($fp, 32768);
        if ($is_utf16) {
            $line = utf16_to_utf8($line);
        }
        echo htmlentities($line, ENT_NOQUOTES, 'UTF-8', FALSE);
    }
    fclose($fp);
    ?>
</code>
<?php 
}
if ($modified_year != $copyright_years) {
    $copyright_years .= '-' . $modified_year;
}
?>
    <div class="watermark">
    <?php 
if ($show_copyright) {