/** * @param $shortDestination destination for short abstract. * If null, no short abstracts will be extracted. * @param $longDestination destination for long abstract. * If null, no long abstracts will be extracted. */ public function __construct() { parent::__construct(); //no validation required $this->shortPredicate = RDFtriple::URI(RDFS_COMMENT, false); $this->longPredicate = RDFtriple::predicate("abstract"); }
/** * @param \Nette\DI\IContainer */ protected function processRequests(\Nette\DI\IContainer $container) { $this->updating(); $httpRequest = $container->httpRequest; $httpResponse = $container->httpResponse; if ($httpRequest->getHeader(self::XHR_HEADER, FALSE)) { $data = FALSE; $this->translator->setLang($httpRequest->getPost('lang')); switch($httpRequest->getPost('action')) { case 'get': break; case 'extract': $this->extractor->run(); break; case 'save': $data = $httpRequest->getPost('data', ""); break; } $dictionaries = $this->getDictionaries($data); $response = new \Nette\Application\Responses\JsonResponse(array( 'status' => "OK", 'lang' => $this->translator->dictionaries, 'data' => $dictionaries, )); $response->send($httpRequest, $httpResponse); exit(255); } $this->freeze(); }
public function __construct($filename) { parent::__construct($filename); $extension = $this->getExtension($filename); if (!isset($this->formats[$extension])) { echo "error: Unknown file format [{$extension}]\n"; exit; } $this->extract($extension); }
protected function processLines() { parent::processLines(); // default signature $match_string = '^sent from(.*?)'; // strip default signature if ($match_string) { list($default_signature, $cut_line) = self::getLinesFromEnd(1); $default_signature = implode(null, $default_signature); if (preg_match('/' . $match_string . '/is', $default_signature)) { $this->body = array_splice($this->body, 0, $cut_line); } } $this->stripSignature(); }
/** * {@inheritDoc} * @param string $field */ public function getField($field) { if (!$this->isFieldSupported($field)) { return null; } if (!$this->hasLoadedField($field)) { $loaderData = $this->getLoaderData($field); if (isset($this->normalizersMap[$field])) { $normalizerName = $this->normalizersMap[$field]; $normalizerFunction = sprintf('%sNormalizer', $normalizerName); $this->fields[$field] = $this->{$normalizerFunction}($loaderData); } else { $this->fields[$field] = $loaderData; } } return parent::getField($field); }
public function setUp() { $this->sut = Extractor::fromHtml(<<<HTML <html> \t<body> \t\t<span class="s1">foo</span> \t\t<div class="c1"> \t\t\t<div>xxx</div> \t\t\t<div> \t\t\t\t<span>yyy</span> \t\t\t\t<span>zzz</span> \t\t\t\t<span class="s1">baz</span> \t\t\t</div> \t\t</div> \t</body> </html> HTML ); }
/** * Extract reply from andorid mail client */ function processLines() { parent::processLines(); list($unwanted_text, $cut_line) = self::getLinesFromEnd(1); $unwanted_text = implode(null, $unwanted_text); // strip 'first name last name wrote:' if (preg_match('/(.*?)wrote:/is', $unwanted_text)) { $this->body = array_splice($this->body, 0, $cut_line); } // default signature $match_string = '^sent from(.*?)'; // strip default signature if ($match_string) { list($default_signature, $cut_line) = self::getLinesFromEnd(1); $default_signature = implode(null, $default_signature); if (preg_match('/' . $match_string . '/is', $default_signature)) { $this->body = array_splice($this->body, 0, $cut_line); } } $this->stripSignature(); }
foreach ($this->nodeList as $node) { echo "<tr><td>"; echo $node->id . "</td><td>"; echo $node->author . "</td><td>"; echo $node->authorEmail . "</td><td>N/A</td><td>"; foreach ($node->parentList as $p) { echo "[" . $p . "]\n "; } echo "</td><td>"; echo $node->commitDate . "</td><td>"; echo $node->relativeDate . "</td><td>"; foreach ($node->subsystem as $s) { echo "[" . $s . "]\n "; } echo "</td><td>"; echo $node->modificationSum . "</td><td>"; } echo "</table>"; } } $extractor = new Extractor(); // echo "Time ~ Position: ".$extractor->timePositionRatio."<br />"; $extractor->recCommitExtractor("e99cc29"); echo "<pre>"; $extractor->printJSON(); echo "</pre>"; ?> </div> </body> </html>
/** * @param string $html * * @return string */ static function toPlainText($html) { $html = str_replace('span', 'p', $html); $html = preg_replace('/<div class="signature".+<\\/div>/', '', $html); return parent::toPlainText($html); }
/** * Strip default Outlook for Mac signature. * * @param string $html * * @return string */ public static function toPlainText($html) { $html = preg_replace('/<div id="MAC_OUTLOOK_SIGNATURE".+<\\/div>/', '', $html); return parent::toPlainText($html); }
/** * Constructs a new GeoExtractor * * @param $batchExtraction If set to <code>true</code>, the extraction result table is cleared for the respective language on * <code>start()</code>, and entries are verified for duplicates. * This should be set to <code>false</code> for extraction previews. */ public function __construct() { parent::__construct(); $this->batchExtraction = Options::getOption('Geo.batchextraction'); }
// $url2 = "http://we.keepitsimple.co.il/user/38"; // $url = "http://www.hasut.co.il/24683"; // $url = "http://we.keepitsimple.co.il/user/149/"; // $url = 'http://www.b144.co.il/BusinessResults.aspx?_business=%D7%A7%D7%95%D7%A1%D7%9E%D7%98%D7%99%D7%A7%D7%90%D7%99%D7%95%D7%AA&_page_no=1'; // $url = "https://www.t.co.il/531-%D7%A7%D7%95%D7%A1%D7%9E%D7%98%D7%99%D7%A7%D7%90%D7%99%D7%95%D7%AA.html"; // $url = 'http://www.b144.co.il/BusinessResults.aspx?TS01568688_id=3&_business=%D7%A7%D7%95%D7%A1%D7%9E%D7%98%D7%99%D7%A7%D7%90%D7%99%D7%95%D7%AA&_page_no=1'; // $url = "http://www.imakeup.co.il/Suppliers/2/%D7%90%D7%99%D7%A4%D7%95%D7%A8+%D7%9C%D7%90%D7%99%D7%A8%D7%95%D7%A2%D7%99%D7%9D.html"; $url = 'http://www.lawyerinfo.co.il/lawyer/5332-%D7%A2%D7%99%D7%9C%D7%99-%D7%90%D7%91%D7%99%D7%90%D7%9C'; $emailList = array(); $phoneList = array(); $urlList = array($url); // test // $extractor = new Extractor($url); // $urlList = array_merge($urlList, $extractor -> getAllURLFromHTML()); // print var_dump($urlList); $extractor = new Extractor($url); $extractor->getEmailFromHTML($emailList); $extractor->getNumbersFromHTML($phoneList); print var_dump($emailList); print var_dump($phoneList); // $extractor = new Extractor($url2); // $extractor -> getEmailFromHTML($emailList); // $extractor -> getNumbersFromHTML($phoneList); // print var_dump($emailList); // print var_dump($phoneList); // ---- main script // if ($option == 'deep') { // $extractor = new Extractor($url); // $urlList = array_merge($urlList, $extractor -> getAllURLFromHTML()); // } // for ($i = 0; $i < count($urlList); $i++) {
$extractor = null; $uttrekkDirectory = "./uttrekksfiler"; $noarkIHoutputDir = "./"; // Main program starts here $uttrekkMySQLBase = null; try { $uttrekkMySQLBase = new UtrekkMySQLBase($uttrekk_db_host, $uttrekk_db_user, $uttrekk_db_pswd, $uttrekk_db_database); } catch (Exception $e) { echo $e->getMessage(); } if ($uttrekkMySQLBase == null) { echo "Problem med kobling til Uttrekksbasen\n"; return; } $databaseParameters = new MySQLDBParameters($uttrekk_db_host, 3306, $uttrekk_db_database, $uttrekk_db_user, $uttrekk_db_pswd); $extractor = new Extractor("mysql", $databaseParameters, $uttrekkDirectory); $extractor->deleteDirectoryAndContents(); $extractor->createDirectory(); $logDir = $extractor->getLogDir(); $logger = new Logger($logDir, false, false, true); echo "\nSlettet gamle filer (hvis de eksisterte) og oppretter mappe for uttrekk ({$uttrekkDirectory}) \n"; $noark4DatabaseStruktur = new Noark4DatabaseStruktur(); // Noe administrativt arbeid først, slett databasen om den eksisterer, lag en ny tom en og lag alle tabellene echo "Sletter MySQL midlertidig Noark 4 base. Resultatet er ("; $val = $uttrekkMySQLBase->executeStatement($noark4DatabaseStruktur->deleteDatabaseStatement($uttrekk_db_database)); echo ($val == true ? 'OK' : 'Feil') . ");\n"; echo "Oppretter MySQL midlertidig Noark 4 base. Resultatet er ("; $val = $uttrekkMySQLBase->executeStatement($noark4DatabaseStruktur->createDatabaseStatement($uttrekk_db_database)); echo ($val == true ? 'OK' : 'Feil') . ");\n"; $uttrekkMySQLBase->setDefaultDatabase(); $uttrekkMySQLBase->executeStatement($noark4DatabaseStruktur->deleteDatabaseStatement($uttrekk_db_database));
} else { $this->visited[] = "{$nodeId}"; if ($cNode->authorDate < $since | $this->dummyCount > 100) { $cNode->parentList = array(); $this->nodeList[] = $cNode; //echo "too old".$nodeId; } else { $this->nodeList[] = $cNode; foreach ($cNode->parentList as $p) { $this->recCommitExtractor($p); } } } } } $extractor = new Extractor(); // echo "Time ~ Position: ".$extractor->timePositionRatio."<br />"; $extractor->recCommitExtractor("e99cc29"); //List the node info in a table echo "<table width='90%'><tr>"; echo "<tr><th>SHAW</th><th>Author</th><th>Author Email</th><th>Sign offs</th><th>Parents</th><th>Commit Date</th><th>Relative Date</th><th>Subsystem</th><th>Modification Sum</th>"; foreach ($extractor->nodeList as $node) { echo "<tr><td>"; echo $node->id . "</td><td>"; echo $node->author . "</td><td>"; echo $node->authorEmail . "</td><td>N/A</td><td>"; foreach ($node->parentList as $p) { echo "[" . $p . "]\n "; } echo "</td><td>"; echo $node->commitDate . "</td><td>";
<?php require_once "MySQLDBParameters.php"; require_once "Extractor.php"; require_once "Constants.php"; $dbParams = new MySQLDBParameters("localhost", 3306, "utmo_noark5", "root", "1234haha"); $extractor = new Extractor('mysql', $dbParams); $sqlQuery = "SELECT * FROM mappe"; $mapping = array('idColumn' => 'tj_peid', 'rootTag' => 'ARKIV.TAB', 'rowTag' => 'ARKIV', 'encoder' => 'utf8_decode', 'elements' => array('SYSTEM' => 'systemId', 'TITTEL' => 'tittel', 'OFFTITT' => 'offentligTittel', 'MEDIUM' => 'dokumentmedium', 'OPPDAT' => 'opprettetDato', 'AVDATO' => 'avsluttetDato')); $extractor->extract($sqlQuery, $mapping, "arkiv.xml", "file");
try { $srcBase = new SrcBase($src_db_host, $src_db_port, $src_db_name, $src_db_user, $src_db_pswd, $src_db_sid); $uttrekkMySQLBase = new UtrekkMySQLBase($uttrekk_db_host, $uttrekk_db_user, $uttrekk_db_pswd, $uttrekk_db_database); } catch (Exception $e) { echo $e->getMessage(); } if ($srcBase == null) { echo "Problem med kobling til kildebasen\n"; return; } if ($uttrekkMySQLBase == null) { echo "Problem med kobling til Uttrekksbasen\n"; return; } $databaseParameters = new MySQLDBParameters($uttrekk_db_host, 3306, $uttrekk_db_database, $uttrekk_db_user, $uttrekk_db_pswd); $extractor = new Extractor("mysql", $databaseParameters, $uttrekkDirectory); $extractor->deleteDirectoryAndContents(); $extractor->createDirectory(); $logDir = $extractor->getLogDir(); $logger = new Logger($logDir, false, false, true); echo "\nSlettet gamle filer (hvis de eksisterte) og oppretter mappe for uttrekk ({$uttrekkDirectory}) \n"; $noark4DatabaseStruktur = new Noark4DatabaseStruktur(); // Temporary commented out as it takes to long to rebuild everything /* // Noe administrativt arbeid først, slett databasen om den eksisterer, lag en ny tom en og lag alle tabellene echo "Sletter MySQL midlertidig Noark 4 base. Resultatet er ("; $val = $uttrekkMySQLBase->executeStatement($noark4DatabaseStruktur->deleteDatabaseStatement($uttrekk_db_database)); echo ($val == true ? 'OK' : 'Feil' ) . ");\n"; echo "Oppretter MySQL midlertidig Noark 4 base. Resultatet er ("; $val = $uttrekkMySQLBase->executeStatement($noark4DatabaseStruktur->createDatabaseStatement($uttrekk_db_database)); echo ($val == true ? 'OK' : 'Feil' ) . ");\n";
} closedir($handle); } else { echo "cannot open dir {$path}"; } } } if (!isset($argv[1])) { echo "Language not specified! \nUsage: " . $argv[0] . " language\n\n"; exit(-1); } $language = $argv[1]; $withComments = (bool) $argv[2]; $skin = $argv[3]; $jsAppPath = realpath(dirname(__FILE__) . '/../../'); echo "Extracting translateable string for [{$language}]\n"; $e = new Extractor(); $e->setLanguage($language); if ($skin) { $e->extractFromTpl($jsAppPath . '/skins/' . $skin . '/templates/'); $counts = $e->save($withComments, $skin); die; } $e->extractFromTpl("{$jsAppPath}/templates/"); $e->extractFromJsFilterOrServiceUsage("{$jsAppPath}/js/"); $e->extractFromJsVar("{$jsAppPath}/js/modules/vbet5/filters/convertsetname.js", 'replacements'); //$e->extractFromTranslationsFile("$jsAppPath/js/modules/vbet5/translations.js"); $e->extractFromJsonFiles("{$jsAppPath}/languages"); $counts = $e->save($withComments); echo "Done. \n" . ($counts['translated'] + $counts['untranslated']) . " strings written to {$language}.po\n"; echo "(" . $counts['translated'] . " translated and " . $counts['untranslated'] . " not translated)\n";
public function test_getParameterType_MissingType_ReturnFalse() { $this->assertFalse(Extractor::instance()->getParameterType(test_ExtractorTest_HelperClassA::class, 'noType')); }
//set tickness $temp = $b->totalModified; for ($k = 0; $k < sizeof($b->node); $k++) { $temp -= $b->node[k]->modificationSum; //TODO check accuracy of this if (log($temp, 2) > $this->minThickness) { $b->node[k]->thickness = log($temp, 2); } else { $b->node[k]->thickness = $this->minThickness; } } $this->branches[] = $b; //add this branch to the list } } $extractor = new Extractor(); // echo "Time ~ Position: ".$extractor->timePositionRatio."<br />"; $extractor->recBranch("e99cc29"); //echo "Size of branch is: ".sizeof($extractor->branches)."<br />"; //print_r($extractor->branches); //Traverse the branch data and draw it echo '<script type="text/javascript" charset="utf-8"> window.onload = function () {'; echo "var paper = Raphael(\"chart\", {$extractor->canvasW}, {$extractor->canvasH});\n"; echo "var color0 = Raphael.getColor();"; echo "paper.path(\"M0,450L 2000,450\").attr({stroke: color0, \"stroke-width\": 20});"; echo "var color1 = Raphael.getColor();\n"; echo 'function curve(x, y, ax, ay, bx, by, zx, zy, thickness, color) { paper.path("M" x, y "C", ax, ay, bx, by, zx, zy").attr({stroke: color, "stroke-width": thickness}); }'; for ($i = 0; $i < sizeof($extractor->branches); $i++) {
/** * Return splitters * * @return array */ protected function getAllMessageSplitters() { return array_merge(parent::getAllMessageSplitters(), ['/\\-------------------------/is']); }
echo "<tr><td>"; echo $node->id . "</td><td>"; echo $node->author . "</td><td>"; echo $node->authorEmail . "</td><td>N/A</td><td>"; foreach ($node->parentList as $p) { echo "[" . $p . "]\n "; } echo "</td><td>"; echo $node->commitDate . "</td><td>"; echo $node->relativeDate . "</td><td>"; foreach ($node->subsystem as $s) { echo "[" . $s . "]\n "; } echo "</td><td>"; echo $node->modificationSum . "</td><td>"; } echo "</table>"; } } $extractor = new Extractor(); // echo "Time ~ Position: ".$extractor->timePositionRatio."<br />"; // $extractor->recCommitExtractor("e99cc29"); $extractor->startFromLatest(); echo "<pre>"; $extractor->printJSON(); echo "</pre>"; ?> </div> </body> </html>