function convertOneFile($file, $html_dir, $json_dir) { // print "<p>".__METHOD__.": {$file->getRealPath()} --> $html_dir</p>"; $dir = $html_dir; $infile = $file->getRealPath(); $outfile = $file->getBasename(".docx"); $cmd = "/Applications/LibreOffice.app/Contents/MacOS/soffice --headless --convert-to \"html:XHTML Writer File:UTF8\" --outdir $dir \"$infile\""; // print "<p>$cmd</p>"; system( $cmd ."> null 2> null"); print "converted $infile to html outfile : {$outfile}.html\n"; $convertedFilePath = "{$html_dir}/{$outfile}.html"; print "converted file path : $convertedFilePath \n"; $memDoc = new MembershipHtmlDoc($convertedFilePath); $memDoc->parse(); // Now fix up the three images $dn = $memDoc->getMembershipObject()->document_name; $d1 = $memDoc->getMembershipObject()->member_one->photo_src_data; $d2 = $memDoc->getMembershipObject()->member_two->photo_src_data; $d3 = $memDoc->getMembershipObject()->vehicle->photo_src_data; unset($memDoc->getMembershipObject()->member_one->photo_src_data); unset($memDoc->getMembershipObject()->member_two->photo_src_data); unset($memDoc->getMembershipObject()->vehicle->photo_src_data); $this->dataManager->putFile($dn, $memDoc->getMembershipObject()); $this->dataManager->savePhotoAsBase64($dn, "member_one", "jpg", $d1); $this->dataManager->savePhotoAsBase64($dn, "member_two", "jpg", $d2); $this->dataManager->savePhotoAsBase64($dn, "vehicle", "jpg", $d3); }
* Step 3 - save in a new json format by calling MembershipHtml::toJson() * * * Along the was change the file name of the membership document from something like * * "SRMN Membership Application Form 5.20a huntercarl .docx" * * "huntercarl.json" */ var_dump($argv); $fn = $argv[1]; $converter = new DocxToHtmlConverter(); print ($convertedFilePath = $converter->convert($argv[1]))."\n"; $dom = new DOMDocument(); // $dom->loadHTMLFile($convertedFilePath); //$dom->loadHTMLFile(__DIR__."/".$fn); $memDoc = new MembershipHtmlDoc($convertedFilePath); $memDoc->parse(); print_r($memDoc->getMembershipObject()); file_put_contents( __DIR__."/".$memDoc->getMemberShipObject()->document_name.".json", json_encode($memDoc->getMembershipObject(), JSON_PRETTY_PRINT) ); ?>
$this->parseJourneys(); $this->parseImages(); } function getMembershipObject() { return $this->object; } } var_dump($argv); $fn = $argv[1]; // // First convert to html // // $converter = new DocxToHtmlConverter(); // print ($convertedFilePath = $converter->convert($argv[1]))."\n"; $convertedFilePath = "./html_files/huntercarl.html"; $dom = new DOMDocument(); $dom->loadHTMLFile($convertedFilePath); //$dom->loadHTMLFile(__DIR__."/".$fn); $memDoc = new MembershipHtmlDoc($dom); $memDoc->parse(); print_r(json_encode($memDoc->getMembershipObject())); ?>