<?php

require_once '../../../classes/CreateDocx.inc';
$options = array('paragraph' => true, 'list' => true, 'table' => true, 'footnote' => true, 'endnote' => true, 'chart' => 0);
CreateDocx::DOCX2TXT('../../files/Text.docx', 'document_1.txt', $options);
function extract_text($file, $file0, $source_type, $url, $chrSet)
{
    global $db_con, $tmp_dir, $pdftotext_path, $catdoc_path, $xls2csv_path, $op_system, $mb, $debug;
    global $catppt_path, $home_charset, $command_line, $no_log, $clear, $converter_dir, $cl, $index_xmeta;
    $result = array();
    $home_charset1 = str_ireplace('iso-', '', $home_charset);
    $charset_int = str_ireplace('iso', '', $home_charset1);
    $temp_file = "tmp_file";
    $filename = $tmp_dir . "/" . $temp_file;
    if ($source_type == 'ods') {
        $filename .= "." . $source_type . "";
    }
    if ($source_type == 'doc') {
        $filename .= "." . $source_type . "";
    }
    if ($source_type == 'docx') {
        $filename .= "." . $source_type . "";
    }
    if ($source_type == 'xlsx') {
        $filename .= "." . $source_type . "";
    }
    if (!($handle = fopen($filename, 'w'))) {
        die("Cannot open file {$filename} in temp folder");
    }
    mysqltest();
    if (fwrite($handle, $file) === FALSE) {
        die("Cannot write to file {$filename} in temp folder");
    }
    fclose($handle);
    mysqltest();
    //      for PDF documents enter here
    if ($source_type == 'pdf') {
        /*
        include('../converter/pdf2text.php');
        $a = new PDF2Text();
        $a->setFilename($filename);
        $a->decodePDF();
        echo $a->output();
        */
        if (!($handle = fopen($pdftotext_path, 'rb'))) {
            printStandardReport('errorNoPDFConv', $command_line);
            $result[] = 'ERROR';
        } else {
            //   prepare command line for PDF converter
            if ($op_system != 'win') {
                $command = "" . $pdftotext_path . " -enc UTF-8 " . $filename . "";
            } else {
                $command = "" . $pdftotext_path . " -cfg xpdfrc " . $filename . " -";
            }
            $a = exec($command, $result, $retval);
            //  convert the PDF document
            if ($retval != '0') {
                //   error handler for PDF file converter
                if ($retval == '1' || $retval == '3' || $retval == '127') {
                    if ($retval == '1') {
                        printStandardReport('errorOpenPDF', $command_line);
                    }
                    if ($retval == '3') {
                        printStandardReport('permissionError', $command_line);
                    }
                    if ($retval == '127') {
                        printStandardReport('noConverter', $command_line);
                    }
                } else {
                    printStandardReport('ufoError', $command_line);
                }
                $result[] = 'ERROR';
            }
            $result = implode(' ', $result);
        }
        //      for DOC and RTF files enter here
    } else {
        if ($source_type == 'doc' || $source_type == 'rtf') {
            /*
            echo "\r\n\r\n<br /> op_system: '$op_system'<br />\r\n";
            echo "\r\n\r\n<br /> catdoc_path: '$catdoc_path'<br />\r\n";
            echo "\r\n\r\n<br /> charset_int: '$charset_int'<br />\r\n";
            echo "\r\n\r\n<br /> filename: '$filename'<br />\r\n";
            */
            if ($op_system == 'win') {
                $command = "" . $catdoc_path . " -s " . $charset_int . " -d utf-8 -x " . $filename . "";
                $a = exec($command, $result, $retval);
                if (stristr($result[0], "catdoc.exe")) {
                    printDocReport($result[0], $cl);
                }
            } else {
                $message = "&nbsp;&nbsp;&nbsp;&nbsp;Indexing of .doc and .rtf documents is currently not supported on LINUX OS.";
                printDocReport($message, $cl);
                /*
                                $retval = '';
                                $catdoc_path = str_ireplace("catdoc.exe", "catdoc.lin", $catdoc_path);
                                //$command = "".$catdoc_path." -cfg xpdfrc ".$filename." -";
                                $command = "".$catdoc_path." -s ".$charset_int." -d utf-8 -w -x ".$filename."";
                				$a = exec($command, $result, $retval);  //  convert the DOC document
                //echo "\r\n\r\n<br /> retval: '$retval'<br />\r\n";
                //echo "\r\n\r\n<br>result Array:<br><pre>";print_r($result);echo "</pre>\r\n";
                                if ($retval) {
                                    $result = 'ERROR';
                //echo "\r\n\r\n<br /> retval: '$retval'<br />\r\n";
                                    if($retval == '2') {
                                        $message = "&nbsp;&nbsp;&nbsp;&nbsp;File to be converted not found";
                                        printDocReport($message, $cl);
                                    }
                                     else if($retval == '3') {
                                        $message = "&nbsp;&nbsp;&nbsp;&nbsp;Path to file not found.";
                                        printDocReport($message, $cl);
                                    }
                                    else if($retval == '11') {
                                        $message = "&nbsp;&nbsp;&nbsp;&nbsp;The executable is corrupted.";
                                        printDocReport($message, $cl);
                                    }
                                    else if($retval == '12') {
                                        $message = "&nbsp;&nbsp;&nbsp;&nbsp;Out of memory execution.";
                                        printDocReport($message, $cl);
                                    }
                                    else if($retval == '22') {
                                        $message = "&nbsp;&nbsp;&nbsp;&nbsp; dll error";
                                        printDocReport($message, $cl);
                                    }
                                    else if($retval == '31') {
                                        $message = "&nbsp;&nbsp;&nbsp;&nbsp;The association is missing, use Shell to try the OpenWith dialog.";
                                        printDocReport($message, $cl);
                                    }
                                    else if($retval == '32') {
                                        $message = "&nbsp;&nbsp;&nbsp;&nbsp;File could not be opened.";
                                        printDocReport($message, $cl);
                                    }
                                    else if($retval == '126') {
                                        $message = "&nbsp;&nbsp;&nbsp;&nbsp;Command invoked cannot execute (Permission problem or command is not an executable).";
                                        printDocReport($message, $cl);
                                    }
                                    else if($retval == 127) {
                                        $message = "&nbsp;&nbsp;&nbsp;&nbsp;Command not found.";
                                        printDocReport($message, $cl);
                                    }
                                    else if($retval == 128) {
                                        $message = "&nbsp;&nbsp;&nbsp;&nbsp;Invalid argument to exit. Exit takes only integer range 0 – 255.";
                                        printDocReport($message, $cl);
                                    }
                
                                    else if($retval > 128 && $retval < 255) {
                                        $message = "&nbsp;&nbsp;&nbsp;&nbsp;Fatal error code $retval";
                                        printDocReport($message, $cl);
                                    }
                                    else if($retval == 255) {
                                        $message = "&nbsp;&nbsp;&nbsp;&nbsp;Exit status out of range. Exit takes only integer range 0 – 255.";
                                        printDocReport($message, $cl);
                                    } else {
                                        $message = "&nbsp;&nbsp;&nbsp;&nbsp;Unknown error code $retval.";
                                        printDocReport($message, $cl);
                                    }
                                }
                */
            }
            //      for PPT files enter here
        } else {
            if ($source_type == 'ppt') {
                //  currently unsupported,as a failure was encountered for large PowerPoint presentations
                $a = '';
                /*
                            $command = $catppt_path." -s $charset_int -d utf-8 $filename";
                            $a = exec($command, $result, $retval);
                */
                //      for XLS spreadsheets enter here
            } else {
                if ($source_type == 'xls') {
                    $error = '';
                    require_once "" . $converter_dir . "/xls_reader.php";
                    $data = new Spreadsheet_Excel_Reader();
                    if ($mb == '1') {
                        //  if extention exists, change 'iconv' to mb_convert_encoding:
                        $data->setUTFEncoder('mb');
                    }
                    // set output encoding.
                    $data->setOutputEncoding('UTF-8');
                    //  read this document
                    $data->read($filename);
                    $error = $data->_ole->error;
                    if ($error == '1') {
                        printStandardReport('xlsError', $command_line, $no_log);
                        $result = 'ERROR';
                    } else {
                        $result = '';
                        $boundsheets = array();
                        $sheets = array();
                        $boundsheets = $data->boundsheets;
                        // get all tables in this file
                        $sheets = $data->sheets;
                        // get content of all sheets in all tables
                        if ($boundsheets) {
                            foreach ($boundsheets as &$bs) {
                                $result .= "" . $bs['name'] . ", ";
                                //  collect all table names in this file
                            }
                            if ($sheets) {
                                foreach ($sheets as &$sheet) {
                                    $cells = $sheet['cells'];
                                    if ($cells) {
                                        //  ignore all empty cells
                                        foreach ($cells as &$cell) {
                                            foreach ($cell as &$content) {
                                                $result .= "" . $content . ", ";
                                                //  collect content of all cells
                                            }
                                        }
                                    }
                                }
                            }
                            if (strtoupper($home_charset) == 'ISO-8859-1') {
                                $result = utf8_encode($result);
                            }
                        }
                    }
                    //      for ODS spreadsheets enter here
                } else {
                    if ($source_type == 'ods') {
                        require_once "" . $converter_dir . "/ods_reader.php";
                        $reader = ods_reader::reader($filename);
                        $sheets = $reader->read($filename);
                        if ($sheets) {
                            $result = '';
                            foreach ($sheets as &$sheet) {
                                if ($sheet) {
                                    foreach ($sheet as &$cell) {
                                        if ($cell) {
                                            //  ignore all empty cells
                                            foreach ($cell as &$content) {
                                                $result .= "" . $content . " ";
                                                //  collect content of all cells
                                            }
                                        }
                                    }
                                }
                            }
                        } else {
                            $result = 'ERROR';
                        }
                        //      for ODT documents enter here
                    } else {
                        if ($source_type == 'odt') {
                            require_once "" . $converter_dir . "/odt_reader.php";
                            $x = new odt_reader();
                            // Unzip the document
                            $u = $x->odt_unzip($filename, false);
                            // read the document
                            $result = $x->odt_read($u[0], 2);
                            //  create some blanks around the <div> tags
                            $result = str_replace("<", " <", $result);
                            $result = str_replace(">", "> ", $result);
                            //echo "\r\n\r\n<br /> odt result: $result<br />\r\n";
                            //  for DOCX files enter here
                        } else {
                            if ($source_type == 'docx') {
                                //  converter class supplied by http://www.phpdocx.com
                                $options = array('paragraph' => false, 'list' => false, 'table' => false, 'footnote' => false, 'endnote' => false, 'chart' => 0);
                                $docx_file = "docx.txt";
                                $result = '';
                                require_once "" . $converter_dir . "/docx/CreateDocx.inc";
                                CreateDocx::DOCX2TXT($filename, $tmp_dir . "/" . $docx_file, $options);
                                if ($file = @file_get_contents($tmp_dir . "/" . $docx_file)) {
                                    $result = "{$file} ";
                                }
                                if ($index_xmeta) {
                                    require_once "" . $converter_dir . "/xmeta_converter.php";
                                    $docxmeta = new x_metadata();
                                    $docxmeta->setDocument($filename);
                                    /*
                                                    echo "Title : " . $docxmeta->getTitle() . "<br>";
                                                    echo "Subject : " . $docxmeta->getSubject() . "<br>";
                                                    echo "Creator : " . $docxmeta->getCreator() . "<br>";
                                                    echo "Keywords : " . $docxmeta->getKeywords() . "<br>";
                                                    echo "Description : " . $docxmeta->getDescription() . "<br>";
                                                    echo "Last Modified By : " . $docxmeta->getLastModifiedBy() . "<br>";
                                                    echo "Revision : " . $docxmeta->getRevision() . "<br>";
                                                    echo "Date Created : " . $docxmeta->getDateCreated() . "<br>";
                                                    echo "Date Modified : " . $docxmeta->getDateModified() . "<br>";
                                    */
                                    $result .= $docxmeta->getTitle() . $docxmeta->getSubject() . $docxmeta->getCreator() . $docxmeta->getKeywords() . $docxmeta->getDescription() . $docxmeta->getLastModifiedBy() . $docxmeta->getRevision() . $docxmeta->getDateCreated() . $docxmeta->getDateModified();
                                }
                                @unlink($tmp_dir . "/" . $docx_file);
                                /*
                                            if($result && $chrSet != "UTF-8") {
                                                $result = @mb_convert_encoding($result, "UTF-8", $chrSet);
                                            }
                                */
                                //  for XLSX spreadsheets enter here
                            } else {
                                if ($source_type == 'xlsx') {
                                    $result = '';
                                    $i = 1;
                                    $name = '';
                                    $finished = false;
                                    $names = array();
                                    require_once "" . $converter_dir . "/xlsx_reader.php";
                                    $xlsx = new SimpleXLSX($filename);
                                    $names = $xlsx->sheetNames();
                                    //echo "\r\n\r\n<br>names array:<br><pre>";print_r($names);echo "</pre>\r\n";
                                    if ($debug == 2 && $names) {
                                        printXLSXreport(count($names), $cl);
                                    }
                                    foreach ($names as $my_name) {
                                        $result .= $my_name . " ";
                                        if ($debug == 2) {
                                            printActKeyword($my_name);
                                        }
                                    }
                                    while (!$finished) {
                                        //  get all sheets
                                        if ($rows = $xlsx->rows($i)) {
                                            foreach ($rows as $key) {
                                                foreach ($key as $val) {
                                                    if ($val) {
                                                        $result .= " " . $val;
                                                        //  add value of each cell
                                                    }
                                                }
                                            }
                                        } else {
                                            $finished = true;
                                            // no more sheets found
                                        }
                                        //$my_name = $xlsx->sheetName($i);
                                        //echo "\r\n\r\n<br /> sheet name $i: '$my_name'<br />\r\n";
                                        $i++;
                                        //  try to get next sheet
                                    }
                                    if ($index_xmeta) {
                                        require_once "" . $converter_dir . "/xmeta_converter.php";
                                        $xlscxmeta = new x_metadata();
                                        $xlscxmeta->setDocument($filename);
                                        /*
                                                        echo "Title : " . $xlscxmeta->getTitle() . "<br>";
                                                        echo "Subject : " . $xlscxmeta->getSubject() . "<br>";
                                                        echo "Creator : " . $xlscxmeta->getCreator() . "<br>";
                                                        echo "Keywords : " . $xlscxmeta->getKeywords() . "<br>";
                                                        echo "Description : " . $xlscxmeta->getDescription() . "<br>";
                                                        echo "Last Modified By : " . $xlscxmeta->getLastModifiedBy() . "<br>";
                                                        echo "Revision : " . $xlscxmeta->getRevision() . "<br>";
                                                        echo "Date Created : " . $xlscxmeta->getDateCreated() . "<br>";
                                                        echo "Date Modified : " . $xlscxmeta->getDateModified() . "<br>";
                                        */
                                        $result .= $xlscxmeta->getTitle() . $xlscxmeta->getSubject() . $xlscxmeta->getCreator() . $xlscxmeta->getKeywords() . $xlscxmeta->getDescription() . $xlscxmeta->getLastModifiedBy() . $xlscxmeta->getRevision() . $xlscxmeta->getDateCreated() . $xlscxmeta->getDateModified();
                                    }
                                    /*
                                                if($result && $chrSet != "UTF-8") {
                                                    $result = @mb_convert_encoding($result, "UTF-8", $chrSet);
                                                }
                                    */
                                    //  for JavaScript enter here
                                } else {
                                    if ($source_type == 'js') {
                                        $result = extract_js($file);
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
    if ($result != 'ERROR') {
        if (is_array($result)) {
            $result = implode(" ", $result);
        }
        $count = strlen($result);
        if ($count == '0') {
            //      if there was not one word found, print warning message
            if ($source_type == 'js') {
                printStandardReport('jsEmpty', $command_line, $no_log);
            } else {
                printStandardReport('nothingFound', $command_line, $no_log);
            }
            $result = 'ERROR';
        }
    }
    unlink($filename);
    mysqltest();
    if ($clear == 1) {
        unset($command, $retval, $a, $file, $count);
    }
    return $result;
}