function extractCategoryRelationships()
{
    $res = mysql_query('SELECT page_title,cl_to FROM page INNER JOIN categorylinks ON(page_id=cl_from) WHERE page_namespace=14');
    while ($row = mysql_fetch_array($res)) {
        if (empty($GLOBALS['W2RCFG']['categories']) && empty($GLOBALS['W2RCFG']['categoriesPattern']) || preg_match('~^' . implode($GLOBALS['W2RCFG']['categories'], '|') . '$~i', decodeLocalName($row[0])) || fnmatch(implode($GLOBALS['W2RCFG']['categoriesPattern'], '|'), decodeLocalName($row[0]))) {
            writeTripel($GLOBALS['W2RCFG']['wikipediaBase'] . $GLOBALS['categoryLabel'] . ':' . $row[0], $GLOBALS['W2RCFG']['subCategoryProperty'], $GLOBALS['W2RCFG']['wikipediaBase'] . $GLOBALS['categoryLabel'] . ':' . $row[1]);
        }
    }
}
Ejemplo n.º 2
0
 public function parseValue($object, $subject, $predicate, &$extractor, $language = NULL)
 {
     $dtype = null;
     $object_is = 'l';
     if (isBlanknote($object)) {
         $object_is = 'b';
         $object = str_replace(";", "", $object);
     } else {
         if (isInt($object)) {
             $dtype = 'http://www.w3.org/2001/XMLSchema#integer';
         } else {
             if (isIntwithComma($object)) {
                 $object = str_replace(",", "", $object);
                 $dtype = 'http://www.w3.org/2001/XMLSchema#integer';
             } else {
                 if (isFloat($object)) {
                     $dtype = 'http://www.w3.org/2001/XMLSchema#decimal';
                 } else {
                     if (catchPictureURI($object, $subject)) {
                         $object_is = 'r';
                         $image = substr($object, strrpos($object, '/') + 1);
                         $wikipediaImageDescription = 'http://' . $language . '.wikipedia.org/wiki/Image:' . $image;
                         writeTripel($object, 'http://purl.org/dc/terms/rights', $wikipediaImageDescription, 'main', $object_is);
                     } else {
                         if (catchMonthYear($object)) {
                             $dtype = 'http://www.w3.org/2001/XMLSchema#gYearMonth';
                         } else {
                             if (catchDate($object)) {
                                 $dtype = 'http://www.w3.org/2001/XMLSchema#date';
                             } else {
                                 if (catchYear($object)) {
                                     $dtype = 'http://www.w3.org/2001/XMLSchema#gYear';
                                 } else {
                                     if (catchRank($object)) {
                                         $dtype = $GLOBALS['W2RCFG']['w2ruri'] . 'Rank';
                                     } else {
                                         if (catchLargeNumber($object)) {
                                             $dtype = 'http://www.w3.org/2001/XMLSchema#integer';
                                         } else {
                                             if ($dtype = catchLargeMoney($object)) {
                                             } else {
                                                 if ($dtype = catchMoneyWoCent($object)) {
                                                 } else {
                                                     if ($dtype = catchMoney($object)) {
                                                     } else {
                                                         if (catchPercent($object)) {
                                                             $dtype = $GLOBALS['W2RCFG']['w2ruri'] . 'Percent';
                                                         } else {
                                                             if ($dtype = catchUnited($object)) {
                                                             } else {
                                                                 if (catchLink($object)) {
                                                                     $object_is = 'r';
                                                                 } else {
                                                                     if (catchLinkList($object, $subject, $predicate, $dtype, $extractor)) {
                                                                         return null;
                                                                     } else {
                                                                         if ($list = catchExternalLink($object)) {
                                                                             // $list = catchExternalLink($object);
                                                                             foreach ($list[1] as $l) {
                                                                                 if (strlen($l) > 1) {
                                                                                     $l = explode(" ", $l);
                                                                                     $object = $l[0];
                                                                                     $object_is = 'r';
                                                                                     writeTripel($subject, $predicate, $object, 'main', $object_is);
                                                                                     unset($object);
                                                                                 }
                                                                             }
                                                                             return null;
                                                                         } else {
                                                                             if (catchNumberWithReference($object, $subject, $predicate, $extractor)) {
                                                                                 return null;
                                                                             } else {
                                                                                 removeWikiCode($object);
                                                                             }
                                                                         }
                                                                     }
                                                                 }
                                                             }
                                                         }
                                                     }
                                                 }
                                             }
                                         }
                                     }
                                 }
                             }
                         }
                     }
                 }
             }
         }
     }
     //if ($GLOBALS['addExplicitTypeTriples'])
     //	printexplicitTyping($predicate,$GLOBALS['filename'],'p',$object_is);
     //if ($GLOBALS['addExplicitTypeTriples']&&$GLOBALS['correctPropertyType'])
     //	$object_is=printexplicitTyping($predicate,$GLOBALS['filename'],'p',$object_is);
     if (strlen(trim($object)) < 1) {
         return null;
     }
     return array($object, $object_is, $dtype, $language);
 }
Ejemplo n.º 3
0
function printexplicitTyping($name, $filename, $name_is, $object_is = 'n')
{
    static $namearray = array();
    static $predicatetypearray = array();
    if ($name_is == 'c') {
        $save = $name . ':Cat';
    }
    if ($name_is == 't') {
        $save = $name . ':Temp';
    }
    if ($name_is == 'p') {
        $save = $name . ':Pred';
    }
    if (!arrayMultiSearch($save, $namearray[$filename])) {
        $namearray[$filename][] = $save;
        if ($object_is != 'n') {
            $predicatetypearray[$save][$filename]['is'] = $object_is;
        }
        $filedecisionTemplate = $GLOBALS['rdftypeProperty'] != $GLOBALS['W2RCFG']['templateProperty'] ? 'main' : 'type';
        $filedecisionCategory = $GLOBALS['rdftypeProperty'] != $GLOBALS['W2RCFG']['categoryProperty'] ? 'main' : 'type';
        if ($name_is == 'c' && $filedecisionCategory == 'type') {
            writeTripel($name, $GLOBALS['W2RCFG']['categoryProperty'], $GLOBALS['W2RCFG']['classBase'], 'type');
        }
        if ($name_is == 'c' && $filedecisionCategory == 'main') {
            printexplicitTyping($GLOBALS['W2RCFG']['categoryProperty'], $filename, 'p', 'r');
        }
        if ($name_is == 't' && $filedecisionTemplate == 'type') {
            writeTripel($GLOBALS['W2RCFG']['wikipediaBase'] . $GLOBALS['templateLabel'] . ':' . $name, $GLOBALS['rdftypeProperty'], $GLOBALS['W2RCFG']['classBase'], 'type');
        }
        if ($name_is == 't' && $filedecisionTemplate == 'main') {
            printexplicitTyping($GLOBALS['W2RCFG']['templateProperty'], $filename, 'p', 'r');
        }
        if ($name_is == 'p') {
            writeTripel($name, $GLOBALS['rdftypeProperty'], $object_is == 'l' ? $GLOBALS['W2RCFG']['datatypePropertyBase'] : $GLOBALS['W2RCFG']['objectPropertyBase'], 'type');
        }
        return;
    } else {
        return $predicatetypearray[$save][$filename]['is'];
    }
}
Ejemplo n.º 4
0
 //$GLOBALS['W2RCFG']['allowedtags'] = $GLOBALS['W2RCFG']['allowedtags']."<ref> </ref>";
 $tpl = strip_tags($tpl, $GLOBALS['W2RCFG']['allowedtags']);
 //$GLOBALS['W2RCFG']['allowedtags'] = str_replace("<ref>","",$GLOBALS['W2RCFG']['allowedtags']);
 //$GLOBALS['W2RCFG']['allowedtags'] = str_replace("</ref>","",$GLOBALS['W2RCFG']['allowedtags']);
 if ($templateCount > 1 && strlen($tmpTemplateName) > 1) {
     if (!isset($knownTemplates[$tmpTemplateName])) {
         $knownTemplates[$tmpTemplateName] = 1;
     } else {
         $knownTemplates[$tmpTemplateName]++;
     }
     $subject = $GLOBALS['W2RCFG']['wikipediaBase'] . $page . '/' . $tmpTemplateName . $knownTemplates[$tmpTemplateName];
     //////////////////////////////////////
     // Call function parseTemplate
     //////////////////////////////////////
     if ($extracted = $this->parseTemplate($subject, $tpl, $language)) {
         writeTripel($GLOBALS['W2RCFG']['wikipediaBase'] . $page, $GLOBALS['W2RCFG']['propertyBase'] . 'relatedInstance', $subject, 'r');
         if (isset($tplCount[$tplName])) {
             $tplCount[$tplName]++;
         } else {
             $tplCount[$tplName] = 1;
         }
     }
 } else {
     $subject = $GLOBALS['W2RCFG']['wikipediaBase'] . $page;
     //////////////////////////////////////
     // Call function parseTemplate
     //////////////////////////////////////
     if ($extracted = $this->parseTemplate($subject, $tpl, $language)) {
         if (isset($tplCount[$tplName])) {
             $tplCount[$tplName]++;
         } else {
Ejemplo n.º 5
0
            printList($s, $p, $o);
        } else {
            list($o, $o_is, $dtype, $lang) = $this->parseAttributeValue($o, $s, $p, $language);
            // special newline handling
            $br = array('<br>', '<br/>', '<br />');
            if ($o_is == 'l') {
                $o = str_replace($br, "\n", $o);
            } else {
                if ($o_is == 'r') {
                    $o = str_replace($br, '', $o);
                }
            }
            if ($o !== NULL) {
                writeTripel($s, $p, $o, 'main', $o_is, $dtype, $lang);
            }
        }
        //if($GLOBALS['templateStatistics'] && $o!=NULL && $equal) {
        //  $GLOBALS['propertyStat'][$pred]['count']++;
        //  $GLOBALS['propertyStat'][$pred]['maxCountPerTemplate']=max($GLOBALS['propertyStat'][$pred]['maxCountPerTemplate'],++$pc[$pred]);
        //  if(!$GLOBALS['propertyStat'][$pred]['inTemplates'] || !in_array($templateName,$GLOBALS['propertyStat'][$pred]['inTemplates']))
        //      $GLOBALS['propertyStat'][$pred]['inTemplates'][]=$templateName;
        //}
        $extracted = true;
    }
}
if (isset($extracted) && $extracted) {
    //writeTripel($s,$GLOBALS['W2RCFG']['templateProperty'],$GLOBALS['W2RCFG']['wikipediaBase'].$GLOBALS['templateLabel'].':'.encodeLocalName($templateName),$GLOBALS['filedecisionTemplate']);
    writeTripel($s, $GLOBALS['W2RCFG']['templateProperty'], $GLOBALS['W2RCFG']['wikipediaBase'] . $GLOBALS['templateLabel'] . ':' . $templateName);
    //if ($GLOBALS['addExplicitTypeTriples'])
    //  printexplicitTyping($templateName,$GLOBALS['filename'],'t');
}
/**
 * Parses numbers, with additional year behind, e.g.; numEmployees = 12,380 (2006)
 * Or an external link as reference, e.g.: revenue = 23 billion $ [http://moneyfacts.com]
 *
 */
function catchNumberWithReference($o, $s, $p)
{
    // echo "\nNWR: $o";
    // Matches numbers / units followed by year reference
    if (preg_match("/(^[0-9,\\.\$£€¥ ]+((b|B)illion|(m|M)illion)?|((T|t)rillion)?|((Q|q)uadrillion)?)([\\s]*\\([0-9]{4}\\))(.*)/", $o, $match)) {
        //$o = preg_replace("/\([0-9]{4}\)/","",$o);
        $o = trim($match[1]);
        // parseAttributeValue
    } else {
        if (preg_match("/(^[0-9,\\.\$£€¥ ]+((b|B)illion|(m|M)illion)?|((T|t)rillion)?|((Q|q)uadrillion)?)([\\s]*\\[http:\\/\\/[^\\]]+\\].*)/", $o, $match)) {
            $o = trim($match[1]);
        }
    }
    if ($match) {
        list($o, $o_is, $dtype, $lang) = parseAttributeValue($o, $s, $p);
        if ($o !== NULL) {
            writeTripel($s, $p, $o, 'main', $o_is, $dtype, $lang);
        }
        return true;
    }
    return false;
}