function writeTripel($subject, $predicate, $object, $object_is = 'r', $dtype = NULL, $lang = NULL) { static $afp, $last, $j, $tc, $start; if (!$start) { $start = microtime(true); } if (!$fp[$GLOBALS['filename']]) { $fp[$GLOBALS['filename']] = fopen($GLOBALS['filename'], 'a'); } if ($GLOBALS['outputFormat'] != 'csv') { fwrite($fp[$GLOBALS['filename']], (isBlanknote($subject) ? "_:{$subject} " : "<{$subject}>\t") . "<{$predicate}>\t" . ($object_is == 'r' ? '<' . $object . '>' : ($object_is == 'b' ? '_:' . $object : "\"" . str_replace(array("\n", "\r", "\r\n", "\"", "\\"), array('\\n', '\\r', '\\r\\n', '\'\'', ' '), $object) . "\"" . ($lang ? "@{$lang}" : '') . ($dtype ? "^^<{$dtype}>" : ''))) . " .\n"); } else { fwrite($fp[$GLOBALS['filename']], $GLOBALS['modelID'] . "\t{$subject}\t{$predicate}\t" . str_replace(array('\\', "\n", "\r", "\r\n"), array('\\\\', '\\n', '\\r', '\\r\\n'), $object) . "\t{$lang}\t{$dtype}\t" . (isBlanknote($subject) ? 'b' : 'r') . "\t{$object_is}\t\\N\n"); } fclose($fp[$GLOBALS['filename']]); $tc++; if (++$j % 10000 == 0) { echo "10000 tripel written in " . round(microtime(true) - $last, 2) . "s (" . round($tc / (microtime(true) - $start)) . " tripel/s)\n"; $last = microtime(true); } }
public function parseValue($object, $subject, $predicate, &$extractor, $language = NULL) { $dtype = null; $object_is = 'l'; if (isBlanknote($object)) { $object_is = 'b'; $object = str_replace(";", "", $object); } else { if (isInt($object)) { $dtype = 'http://www.w3.org/2001/XMLSchema#integer'; } else { if (isIntwithComma($object)) { $object = str_replace(",", "", $object); $dtype = 'http://www.w3.org/2001/XMLSchema#integer'; } else { if (isFloat($object)) { $dtype = 'http://www.w3.org/2001/XMLSchema#decimal'; } else { if (catchPictureURI($object, $subject)) { $object_is = 'r'; $image = substr($object, strrpos($object, '/') + 1); $wikipediaImageDescription = 'http://' . $language . '.wikipedia.org/wiki/Image:' . $image; writeTripel($object, 'http://purl.org/dc/terms/rights', $wikipediaImageDescription, 'main', $object_is); } else { if (catchMonthYear($object)) { $dtype = 'http://www.w3.org/2001/XMLSchema#gYearMonth'; } else { if (catchDate($object)) { $dtype = 'http://www.w3.org/2001/XMLSchema#date'; } else { if (catchYear($object)) { $dtype = 'http://www.w3.org/2001/XMLSchema#gYear'; } else { if (catchRank($object)) { $dtype = $GLOBALS['W2RCFG']['w2ruri'] . 'Rank'; } else { if (catchLargeNumber($object)) { $dtype = 'http://www.w3.org/2001/XMLSchema#integer'; } else { if ($dtype = catchLargeMoney($object)) { } else { if ($dtype = catchMoneyWoCent($object)) { } else { if ($dtype = catchMoney($object)) { } else { if (catchPercent($object)) { $dtype = $GLOBALS['W2RCFG']['w2ruri'] . 'Percent'; } else { if ($dtype = catchUnited($object)) { } else { if (catchLink($object)) { $object_is = 'r'; } else { if (catchLinkList($object, $subject, $predicate, $dtype, $extractor)) { return null; } else { if ($list = catchExternalLink($object)) { // $list = catchExternalLink($object); foreach ($list[1] as $l) { if (strlen($l) > 1) { $l = explode(" ", $l); $object = $l[0]; $object_is = 'r'; writeTripel($subject, $predicate, $object, 'main', $object_is); unset($object); } } return null; } else { if (catchNumberWithReference($object, $subject, $predicate, $extractor)) { return null; } else { removeWikiCode($object); } } } } } } } } } } } } } } } } } } } //if ($GLOBALS['addExplicitTypeTriples']) // printexplicitTyping($predicate,$GLOBALS['filename'],'p',$object_is); //if ($GLOBALS['addExplicitTypeTriples']&&$GLOBALS['correctPropertyType']) // $object_is=printexplicitTyping($predicate,$GLOBALS['filename'],'p',$object_is); if (strlen(trim($object)) < 1) { return null; } return array($object, $object_is, $dtype, $language); }
/** * Main function to extract data-types, internal Links etc. from the Template. * * */ function parseAttributeValue($object, $subject, $predicate) { // Generate DBpedia Links from Internal Links //$object = preg_replace_callback('/\[\[([a-zA-Z\., \-_!&\$€¥]]+)(\|[a-zA-Z\., \-_!&\$€¥])*\]\]/','convertDBpediaLinks',$object); $object = preg_replace('/\\[\\[([0-9]{4})\\]\\]/', '$1', $object); $object = preg_replace_callback('/\\[\\[([^\\:\\|\\]]+)(\\|[^\\]]+)?\\]\\]/', 'convertDBpediaLinks', $object); removeSemanticExtension($object); $object = str_replace("[[image:green up.png]]", "", $object); $object = str_replace("[[image:red down.png]]", "", $object); $dtype = null; $lang = null; $object_is = 'l'; if (isBlanknote($object)) { $object_is = 'b'; $object = str_replace(";", "", $object); } else { if (isInt($object)) { $dtype = 'http://www.w3.org/2001/XMLSchema#integer'; } else { if (isIntwithComma($object)) { $object = str_replace(",", "", $object); $dtype = 'http://www.w3.org/2001/XMLSchema#integer'; } else { if (isFloat($object)) { $dtype = 'http://www.w3.org/2001/XMLSchema#decimal'; } else { if (catchPictureURI($object)) { $object_is = 'r'; } else { if (catchMonthYear($object)) { $dtype = 'http://www.w3.org/2001/XMLSchema#gYearMonth'; } else { if (catchDate($object)) { $dtype = 'http://www.w3.org/2001/XMLSchema#date'; } else { if (catchYear($object)) { $dtype = 'http://www.w3.org/2001/XMLSchema#gYear'; } else { if (catchRank($object)) { $dtype = $GLOBALS['W2RCFG']['w2ruri'] . 'Rank'; } else { if (catchLargeNumber($object)) { $dtype = 'http://www.w3.org/2001/XMLSchema#integer'; } else { if ($dtype = catchLargeMoney($object)) { } else { if ($dtype = catchMoneyWoCent($object)) { } else { if ($dtype = catchMoney($object)) { } else { if (catchPercent($object)) { $dtype = $GLOBALS['W2RCFG']['w2ruri'] . 'Percent'; } else { if ($dtype = catchUnited($object)) { } else { if (catchLink($object)) { $object_is = 'r'; } else { if (catchLinkList($object, $subject, $predicate, $dtype)) { return null; } else { if ($list = catchExternalLink($object)) { // $list = catchExternalLink($object); foreach ($list[1] as $l) { if (strlen($l) > 1) { $l = explode(" ", $l); $object = $l[0]; $object_is = 'r'; writeTripel($subject, $predicate, $object, 'main', $object_is); unset($object); } } return null; } else { if (catchNumberWithReference($object, $subject, $predicate)) { return null; } else { removeWikiCode($object); } } } } } } } } } } } } } } } } } } } if ($GLOBALS['addExplicitTypeTriples']) { printexplicitTyping($predicate, $GLOBALS['filename'], 'p', $object_is); } if ($GLOBALS['addExplicitTypeTriples'] && $GLOBALS['correctPropertyType']) { $object_is = printexplicitTyping($predicate, $GLOBALS['filename'], 'p', $object_is); } if (strlen(trim($object)) < 1) { return null; } return array($object, $object_is, $dtype, $lang); }