Exemplo n.º 1
0
 /**
  * @param $shortDestination destination for short abstract.
  * If null, no short abstracts will be extracted.
  * @param $longDestination destination for long abstract.
  * If null, no long abstracts will be extracted.
  */
 public function __construct()
 {
     parent::__construct();
     //no validation required
     $this->shortPredicate = RDFtriple::URI(RDFS_COMMENT, false);
     $this->longPredicate = RDFtriple::predicate("abstract");
 }
Exemplo n.º 2
0
	/**
	 * @param \Nette\DI\IContainer
	 */
	protected function processRequests(\Nette\DI\IContainer $container)
	{
		$this->updating();

		$httpRequest = $container->httpRequest;
		$httpResponse = $container->httpResponse;

		if ($httpRequest->getHeader(self::XHR_HEADER, FALSE)) {
			$data = FALSE;
			$this->translator->setLang($httpRequest->getPost('lang'));
			switch($httpRequest->getPost('action')) {
				case 'get':
					break;
				case 'extract':
					$this->extractor->run();
					break;
				case 'save':
					$data = $httpRequest->getPost('data', "");
					break;
			}

			$dictionaries = $this->getDictionaries($data);

			$response = new \Nette\Application\Responses\JsonResponse(array(
				'status' => "OK",
				'lang' => $this->translator->dictionaries,
				'data' => $dictionaries,
			));

			$response->send($httpRequest, $httpResponse);
			exit(255);
		}

		$this->freeze();
	}
Exemplo n.º 3
0
 public function __construct($filename)
 {
     parent::__construct($filename);
     $extension = $this->getExtension($filename);
     if (!isset($this->formats[$extension])) {
         echo "error: Unknown file format [{$extension}]\n";
         exit;
     }
     $this->extract($extension);
 }
 protected function processLines()
 {
     parent::processLines();
     // default signature
     $match_string = '^sent from(.*?)';
     // strip default signature
     if ($match_string) {
         list($default_signature, $cut_line) = self::getLinesFromEnd(1);
         $default_signature = implode(null, $default_signature);
         if (preg_match('/' . $match_string . '/is', $default_signature)) {
             $this->body = array_splice($this->body, 0, $cut_line);
         }
     }
     $this->stripSignature();
 }
Exemplo n.º 5
0
 /**
  * {@inheritDoc}
  * @param string $field
  */
 public function getField($field)
 {
     if (!$this->isFieldSupported($field)) {
         return null;
     }
     if (!$this->hasLoadedField($field)) {
         $loaderData = $this->getLoaderData($field);
         if (isset($this->normalizersMap[$field])) {
             $normalizerName = $this->normalizersMap[$field];
             $normalizerFunction = sprintf('%sNormalizer', $normalizerName);
             $this->fields[$field] = $this->{$normalizerFunction}($loaderData);
         } else {
             $this->fields[$field] = $loaderData;
         }
     }
     return parent::getField($field);
 }
Exemplo n.º 6
0
    public function setUp()
    {
        $this->sut = Extractor::fromHtml(<<<HTML
<html>
\t<body>
\t\t<span class="s1">foo</span>
\t\t<div class="c1">
\t\t\t<div>xxx</div>
\t\t\t<div>
\t\t\t\t<span>yyy</span>
\t\t\t\t<span>zzz</span>
\t\t\t\t<span class="s1">baz</span>
\t\t\t</div>
\t\t</div>
\t</body>
</html>
HTML
);
    }
 /**
  * Extract reply from andorid mail client
  */
 function processLines()
 {
     parent::processLines();
     list($unwanted_text, $cut_line) = self::getLinesFromEnd(1);
     $unwanted_text = implode(null, $unwanted_text);
     // strip 'first name last name wrote:'
     if (preg_match('/(.*?)wrote:/is', $unwanted_text)) {
         $this->body = array_splice($this->body, 0, $cut_line);
     }
     // default signature
     $match_string = '^sent from(.*?)';
     // strip default signature
     if ($match_string) {
         list($default_signature, $cut_line) = self::getLinesFromEnd(1);
         $default_signature = implode(null, $default_signature);
         if (preg_match('/' . $match_string . '/is', $default_signature)) {
             $this->body = array_splice($this->body, 0, $cut_line);
         }
     }
     $this->stripSignature();
 }
Exemplo n.º 8
0
        foreach ($this->nodeList as $node) {
            echo "<tr><td>";
            echo $node->id . "</td><td>";
            echo $node->author . "</td><td>";
            echo $node->authorEmail . "</td><td>N/A</td><td>";
            foreach ($node->parentList as $p) {
                echo "[" . $p . "]\n ";
            }
            echo "</td><td>";
            echo $node->commitDate . "</td><td>";
            echo $node->relativeDate . "</td><td>";
            foreach ($node->subsystem as $s) {
                echo "[" . $s . "]\n ";
            }
            echo "</td><td>";
            echo $node->modificationSum . "</td><td>";
        }
        echo "</table>";
    }
}
$extractor = new Extractor();
//	echo "Time ~ Position: ".$extractor->timePositionRatio."<br />";
$extractor->recCommitExtractor("e99cc29");
echo "<pre>";
$extractor->printJSON();
echo "</pre>";
?>
</div>
</body>
</html>
 /**
  * @param string $html
  *
  * @return string
  */
 static function toPlainText($html)
 {
     $html = str_replace('span', 'p', $html);
     $html = preg_replace('/<div class="signature".+<\\/div>/', '', $html);
     return parent::toPlainText($html);
 }
 /**
  * Strip default Outlook for Mac signature.
  *
  * @param string $html
  *
  * @return string
  */
 public static function toPlainText($html)
 {
     $html = preg_replace('/<div id="MAC_OUTLOOK_SIGNATURE".+<\\/div>/', '', $html);
     return parent::toPlainText($html);
 }
Exemplo n.º 11
0
 /**
  * Constructs a new GeoExtractor
  *
  * @param   $batchExtraction    If set to <code>true</code>, the extraction result table is cleared for the respective language on
  *                              <code>start()</code>, and entries are verified for duplicates.
  *                              This should be set to <code>false</code> for extraction previews.
  */
 public function __construct()
 {
     parent::__construct();
     $this->batchExtraction = Options::getOption('Geo.batchextraction');
 }
// $url2 = "http://we.keepitsimple.co.il/user/38";
// $url = "http://www.hasut.co.il/24683";
// $url = "http://we.keepitsimple.co.il/user/149/";
// $url = 'http://www.b144.co.il/BusinessResults.aspx?_business=%D7%A7%D7%95%D7%A1%D7%9E%D7%98%D7%99%D7%A7%D7%90%D7%99%D7%95%D7%AA&_page_no=1';
// $url = "https://www.t.co.il/531-%D7%A7%D7%95%D7%A1%D7%9E%D7%98%D7%99%D7%A7%D7%90%D7%99%D7%95%D7%AA.html";
// $url = 'http://www.b144.co.il/BusinessResults.aspx?TS01568688_id=3&_business=%D7%A7%D7%95%D7%A1%D7%9E%D7%98%D7%99%D7%A7%D7%90%D7%99%D7%95%D7%AA&_page_no=1';
// $url = "http://www.imakeup.co.il/Suppliers/2/%D7%90%D7%99%D7%A4%D7%95%D7%A8+%D7%9C%D7%90%D7%99%D7%A8%D7%95%D7%A2%D7%99%D7%9D.html";
$url = 'http://www.lawyerinfo.co.il/lawyer/5332-%D7%A2%D7%99%D7%9C%D7%99-%D7%90%D7%91%D7%99%D7%90%D7%9C';
$emailList = array();
$phoneList = array();
$urlList = array($url);
// test
// $extractor = new Extractor($url);
// $urlList = array_merge($urlList, $extractor -> getAllURLFromHTML());
// print var_dump($urlList);
$extractor = new Extractor($url);
$extractor->getEmailFromHTML($emailList);
$extractor->getNumbersFromHTML($phoneList);
print var_dump($emailList);
print var_dump($phoneList);
// $extractor = new Extractor($url2);
// $extractor -> getEmailFromHTML($emailList);
// $extractor -> getNumbersFromHTML($phoneList);
// print var_dump($emailList);
// print var_dump($phoneList);
// ---- main script
// if ($option == 'deep') {
// 	$extractor = new Extractor($url);
// 	$urlList = array_merge($urlList, $extractor -> getAllURLFromHTML());
// }
// for ($i = 0; $i < count($urlList); $i++) {
Exemplo n.º 13
0
$extractor = null;
$uttrekkDirectory = "./uttrekksfiler";
$noarkIHoutputDir = "./";
// Main program starts here
$uttrekkMySQLBase = null;
try {
    $uttrekkMySQLBase = new UtrekkMySQLBase($uttrekk_db_host, $uttrekk_db_user, $uttrekk_db_pswd, $uttrekk_db_database);
} catch (Exception $e) {
    echo $e->getMessage();
}
if ($uttrekkMySQLBase == null) {
    echo "Problem med kobling til Uttrekksbasen\n";
    return;
}
$databaseParameters = new MySQLDBParameters($uttrekk_db_host, 3306, $uttrekk_db_database, $uttrekk_db_user, $uttrekk_db_pswd);
$extractor = new Extractor("mysql", $databaseParameters, $uttrekkDirectory);
$extractor->deleteDirectoryAndContents();
$extractor->createDirectory();
$logDir = $extractor->getLogDir();
$logger = new Logger($logDir, false, false, true);
echo "\nSlettet gamle filer (hvis de eksisterte) og oppretter mappe for uttrekk ({$uttrekkDirectory}) \n";
$noark4DatabaseStruktur = new Noark4DatabaseStruktur();
// Noe administrativt arbeid først, slett databasen om den eksisterer, lag en ny tom en og lag alle tabellene
echo "Sletter MySQL midlertidig Noark 4 base. Resultatet er (";
$val = $uttrekkMySQLBase->executeStatement($noark4DatabaseStruktur->deleteDatabaseStatement($uttrekk_db_database));
echo ($val == true ? 'OK' : 'Feil') . ");\n";
echo "Oppretter MySQL midlertidig Noark 4 base. Resultatet er (";
$val = $uttrekkMySQLBase->executeStatement($noark4DatabaseStruktur->createDatabaseStatement($uttrekk_db_database));
echo ($val == true ? 'OK' : 'Feil') . ");\n";
$uttrekkMySQLBase->setDefaultDatabase();
$uttrekkMySQLBase->executeStatement($noark4DatabaseStruktur->deleteDatabaseStatement($uttrekk_db_database));
Exemplo n.º 14
0
        } else {
            $this->visited[] = "{$nodeId}";
            if ($cNode->authorDate < $since | $this->dummyCount > 100) {
                $cNode->parentList = array();
                $this->nodeList[] = $cNode;
                //echo "too old".$nodeId;
            } else {
                $this->nodeList[] = $cNode;
                foreach ($cNode->parentList as $p) {
                    $this->recCommitExtractor($p);
                }
            }
        }
    }
}
$extractor = new Extractor();
//	echo "Time ~ Position: ".$extractor->timePositionRatio."<br />";
$extractor->recCommitExtractor("e99cc29");
//List the node info in a table
echo "<table width='90%'><tr>";
echo "<tr><th>SHAW</th><th>Author</th><th>Author Email</th><th>Sign offs</th><th>Parents</th><th>Commit Date</th><th>Relative Date</th><th>Subsystem</th><th>Modification Sum</th>";
foreach ($extractor->nodeList as $node) {
    echo "<tr><td>";
    echo $node->id . "</td><td>";
    echo $node->author . "</td><td>";
    echo $node->authorEmail . "</td><td>N/A</td><td>";
    foreach ($node->parentList as $p) {
        echo "[" . $p . "]\n ";
    }
    echo "</td><td>";
    echo $node->commitDate . "</td><td>";
<?php

require_once "MySQLDBParameters.php";
require_once "Extractor.php";
require_once "Constants.php";
$dbParams = new MySQLDBParameters("localhost", 3306, "utmo_noark5", "root", "1234haha");
$extractor = new Extractor('mysql', $dbParams);
$sqlQuery = "SELECT * FROM mappe";
$mapping = array('idColumn' => 'tj_peid', 'rootTag' => 'ARKIV.TAB', 'rowTag' => 'ARKIV', 'encoder' => 'utf8_decode', 'elements' => array('SYSTEM' => 'systemId', 'TITTEL' => 'tittel', 'OFFTITT' => 'offentligTittel', 'MEDIUM' => 'dokumentmedium', 'OPPDAT' => 'opprettetDato', 'AVDATO' => 'avsluttetDato'));
$extractor->extract($sqlQuery, $mapping, "arkiv.xml", "file");
Exemplo n.º 16
0
try {
    $srcBase = new SrcBase($src_db_host, $src_db_port, $src_db_name, $src_db_user, $src_db_pswd, $src_db_sid);
    $uttrekkMySQLBase = new UtrekkMySQLBase($uttrekk_db_host, $uttrekk_db_user, $uttrekk_db_pswd, $uttrekk_db_database);
} catch (Exception $e) {
    echo $e->getMessage();
}
if ($srcBase == null) {
    echo "Problem med kobling til kildebasen\n";
    return;
}
if ($uttrekkMySQLBase == null) {
    echo "Problem med kobling til Uttrekksbasen\n";
    return;
}
$databaseParameters = new MySQLDBParameters($uttrekk_db_host, 3306, $uttrekk_db_database, $uttrekk_db_user, $uttrekk_db_pswd);
$extractor = new Extractor("mysql", $databaseParameters, $uttrekkDirectory);
$extractor->deleteDirectoryAndContents();
$extractor->createDirectory();
$logDir = $extractor->getLogDir();
$logger = new Logger($logDir, false, false, true);
echo "\nSlettet gamle filer (hvis de eksisterte) og oppretter mappe for uttrekk ({$uttrekkDirectory}) \n";
$noark4DatabaseStruktur = new Noark4DatabaseStruktur();
// Temporary commented out as it takes to long to rebuild everything
/*
	// Noe administrativt arbeid først, slett databasen om den eksisterer, lag en ny tom en og lag alle tabellene
	echo "Sletter MySQL midlertidig Noark 4 base. Resultatet er (";
	$val = $uttrekkMySQLBase->executeStatement($noark4DatabaseStruktur->deleteDatabaseStatement($uttrekk_db_database));
	echo ($val == true  ? 'OK' : 'Feil' ) . ");\n";
	echo "Oppretter MySQL midlertidig Noark 4 base. Resultatet er (";
	$val = $uttrekkMySQLBase->executeStatement($noark4DatabaseStruktur->createDatabaseStatement($uttrekk_db_database));
	echo ($val == true  ? 'OK' : 'Feil' ) . ");\n";
Exemplo n.º 17
0
            }
            closedir($handle);
        } else {
            echo "cannot open dir {$path}";
        }
    }
}
if (!isset($argv[1])) {
    echo "Language not specified! \nUsage: " . $argv[0] . " language\n\n";
    exit(-1);
}
$language = $argv[1];
$withComments = (bool) $argv[2];
$skin = $argv[3];
$jsAppPath = realpath(dirname(__FILE__) . '/../../');
echo "Extracting translateable string for [{$language}]\n";
$e = new Extractor();
$e->setLanguage($language);
if ($skin) {
    $e->extractFromTpl($jsAppPath . '/skins/' . $skin . '/templates/');
    $counts = $e->save($withComments, $skin);
    die;
}
$e->extractFromTpl("{$jsAppPath}/templates/");
$e->extractFromJsFilterOrServiceUsage("{$jsAppPath}/js/");
$e->extractFromJsVar("{$jsAppPath}/js/modules/vbet5/filters/convertsetname.js", 'replacements');
//$e->extractFromTranslationsFile("$jsAppPath/js/modules/vbet5/translations.js");
$e->extractFromJsonFiles("{$jsAppPath}/languages");
$counts = $e->save($withComments);
echo "Done. \n" . ($counts['translated'] + $counts['untranslated']) . " strings written to {$language}.po\n";
echo "(" . $counts['translated'] . " translated and " . $counts['untranslated'] . " not translated)\n";
Exemplo n.º 18
0
 public function test_getParameterType_MissingType_ReturnFalse()
 {
     $this->assertFalse(Extractor::instance()->getParameterType(test_ExtractorTest_HelperClassA::class, 'noType'));
 }
Exemplo n.º 19
0
        //set tickness
        $temp = $b->totalModified;
        for ($k = 0; $k < sizeof($b->node); $k++) {
            $temp -= $b->node[k]->modificationSum;
            //TODO check accuracy of this
            if (log($temp, 2) > $this->minThickness) {
                $b->node[k]->thickness = log($temp, 2);
            } else {
                $b->node[k]->thickness = $this->minThickness;
            }
        }
        $this->branches[] = $b;
        //add this branch to the list
    }
}
$extractor = new Extractor();
//	echo "Time ~ Position: ".$extractor->timePositionRatio."<br />";
$extractor->recBranch("e99cc29");
//echo "Size of branch is: ".sizeof($extractor->branches)."<br />";
//print_r($extractor->branches);
//Traverse the branch data and draw it
echo '<script type="text/javascript" charset="utf-8">
		window.onload = function () {';
echo "var paper = Raphael(\"chart\", {$extractor->canvasW}, {$extractor->canvasH});\n";
echo "var color0 = Raphael.getColor();";
echo "paper.path(\"M0,450L 2000,450\").attr({stroke: color0, \"stroke-width\": 20});";
echo "var color1 = Raphael.getColor();\n";
echo 'function curve(x, y, ax, ay, bx, by, zx, zy, thickness, color) {
                    paper.path("M" x, y "C", ax, ay, bx, by, zx, zy").attr({stroke: color, "stroke-width": thickness});
}';
for ($i = 0; $i < sizeof($extractor->branches); $i++) {
 /**
  * Return splitters
  *
  * @return array
  */
 protected function getAllMessageSplitters()
 {
     return array_merge(parent::getAllMessageSplitters(), ['/\\-------------------------/is']);
 }
Exemplo n.º 21
0
            echo "<tr><td>";
            echo $node->id . "</td><td>";
            echo $node->author . "</td><td>";
            echo $node->authorEmail . "</td><td>N/A</td><td>";
            foreach ($node->parentList as $p) {
                echo "[" . $p . "]\n ";
            }
            echo "</td><td>";
            echo $node->commitDate . "</td><td>";
            echo $node->relativeDate . "</td><td>";
            foreach ($node->subsystem as $s) {
                echo "[" . $s . "]\n ";
            }
            echo "</td><td>";
            echo $node->modificationSum . "</td><td>";
        }
        echo "</table>";
    }
}
$extractor = new Extractor();
//	echo "Time ~ Position: ".$extractor->timePositionRatio."<br />";
//	$extractor->recCommitExtractor("e99cc29");
$extractor->startFromLatest();
echo "<pre>";
$extractor->printJSON();
echo "</pre>";
?>
</div>
</body>
</html>