Exemple #1
0
function parseAndDisplay($terme)
{
    $terme = strtolower($terme);
    $url = 'http://www.jeuxdemots.org/rezo-xml.php?gotermsubmit=Chercher&gotermrel=' . $terme . '&output=onlyxml';
    $html = file_get_html($url);
    echo '<h1>' . $terme . '</h1><hr>';
    // définitions
    echo getDefinitions($html);
    displayRelations(getRelations('r_raff_sem', $html), 'Raffinements sémantiques');
    displayRelations(getRelations('r_meaning', $html), 'Gloses');
    displayRelations(getRelations('r_inhib', $html), 'Inhibition');
    displayRelations(getRelations('r_associated', $html), 'Associations d\'idées');
    displayRelations(getRelations('r_aki', $html), 'Totaki');
    displayRelations(getRelations('r_wiki', $html), 'Wikipedia');
    displayRelations(getRelations('r_coocurrence', $html), 'Coocurrences');
    displayRelations(getRelations('r_domain', $html), 'Thèmes / Domaines');
    displayRelations(getRelations('r_domain_subst', $html), 'Substituts pour ' . $terme . ' comme domaine');
    displayRelations(getRelations('r_syn', $html), 'Synonymes');
    displayRelations(getRelations('r_anto', $html), 'Contraires');
    displayRelations(getRelations('r_isa', $html), 'Génériques');
    displayRelations(getRelations('r_hypo', $html), 'Spécifiques');
    displayRelations(getRelations('r_instance', $html), 'Instances de ' . $terme);
    displayRelations(getRelations('r_has_part', $html), 'Parties de ' . $terme);
    displayRelations(getRelations('r_holo', $html), $terme . ' fait partie de');
    displayRelations(getRelations('r_item>set', $html), 'Ensembles ayant ' . $terme . ' pour élément');
    displayRelations(getRelations('r_quantificateur', $html), 'Quantificateurs pour ' . $terme);
    displayRelations(getRelations('r_magn', $html), 'Plus intense que ' . $terme);
    displayRelations(getRelations('r_antimagn', $html), 'Moins intense que ' . $terme);
    displayRelations(getRelations('r_is_bigger_than', $html), 'Moins gros que ' . $terme);
    displayRelations(getRelations('r_family', $html), 'Termes étymologiquement apparentés');
    displayRelations(getRelations('r_locution', $html), 'Locutions / termes composés');
    displayRelations(getRelations('r_carac', $html), 'Caractéristiques de ' . $terme);
    displayRelations(getRelations('r_carac-1', $html), 'Ayant ' . $terme . ' pour caractéristique');
    displayRelations(getRelations('r_color', $html), 'Couleurs pour ' . $terme);
    displayRelations(getRelations('r_against', $html), 'A quoi ' . $terme . ' peut-il s\'opposer/combattre?');
    displayRelations(getRelations('r_against-1', $html), 'Qu\'est ce qui s\'oppose à ' . $terme . '?');
    displayRelations(getRelations('r_lieu', $html), 'Lieux où peut se trouver ' . $terme);
    displayRelations(getRelations('r_lieu-1', $html), 'Que peut-on trouver dans le lieu ' . $terme . '?');
    displayRelations(getRelations('r_agent-1', $html), 'Que peut faire ' . $terme . '?');
    displayRelations(getRelations('r_patient-1', $html), 'Que peut-on faire à/de ' . $terme . '?');
    displayRelations(getRelations('r_instr-1', $html), 'Que peut-on faire avec ' . $terme . '?');
    displayRelations(getRelations('r_conseq', $html), 'Conséquences associées à ' . $terme);
    displayRelations(getRelations('r_make', $html), 'Que peut produire/faire ' . $terme . '?');
    displayRelations(getRelations('r_sentiment', $html), 'Sentiments/émotions associés à ' . $terme);
    displayRelations(getRelations('r_chunk_sujet', $html), $terme . ' comme sujet');
    displayRelations(getRelations('r_chunk_objet', $html), $terme . ' comme objet');
    displayRelations(getRelations('r_chunk_head', $html), $terme . ' comme tête syntaxtique');
}
$cli        = eZCLI::instance();
$db         = MMDB::instance();
$filePath   = isset( $options['filepath'] ) && $options['filepath'] != '' ? $options['filepath'] : 'var/merck/pfs.json';
$updateFile = false;

if ( $options['init'] )
    writeFile( $filePath, getRelations( $db ) );
else
{
    if ( ( $json = file_get_contents( $filePath ) ) === FALSE )
        $script->shutdown( 1, "An unexpected error occurred while trying to read diff file ($filePath)" );

    $alreadyIndexedPFs = array();
    $previousPFs = json_decode( $json, true );
    foreach ( getRelations( $db ) as $c => $pf )
    {
        $updateFile = false;
        $diff = array_merge( array_diff_assoc( $pf, $previousPFs[$c] ), array_diff_assoc( $previousPFs[$c], $pf ) );
        if ( count( $diff ) > 0 )
        {
            $updateFile = true;
            foreach ( $diff as $_pf )
            {
                if ( !in_array( $_pf, array_keys( $alreadyIndexedPFs ) ) )
                {
                    $result = PublisherFolderTool::indexContent( $_pf, 10 );
                    $alreadyIndexedPFs[$_pf] = $result;
                }
                else
                {
function generate_eac($resource, $end, $lang, $options)
{
    global $identityArray;
    global $processed;
    $processed[] = $resource;
    $id = substr(strstr($resource, 'resource/'), strpos(strstr($resource, 'resource/'), '/') + 1);
    //load dbpedia RDF
    $dbRDF = new DOMDocument();
    $dbRDF->load('http://dbpedia.org/data/' . $id . '.rdf');
    $dxpath = new DOMXpath($dbRDF);
    $dxpath->registerNamespace('rdf', "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
    $dxpath->registerNamespace('dbpedia-owl', "http://dbpedia.org/ontology/");
    $dxpath->registerNamespace('rdfs', "http://www.w3.org/2000/01/rdf-schema#");
    $dxpath->registerNamespace('ns7', "http://live.dbpedia.org/ontology/");
    $xml = '<?xml version="1.0" encoding="utf-8"?><eac-cpf xmlns="urn:isbn:1-931666-33-4" xmlns:xlink="http://www.w3.org/1999/xlink">';
    /************ CONTROL ************/
    $xml .= '<control>';
    $xml .= '<recordId>' . strtolower($id) . '</recordId>';
    //get viaf RDF, if it exists
    $viafId = '';
    $viafIds = $dxpath->query('//dbpprop:viaf');
    foreach ($viafIds as $node) {
        $viafId = $node->nodeValue;
    }
    if (strlen($viafId) > 0) {
        $viafRDF = new DOMDocument();
        $viafRDF->load('http://viaf.org/viaf/' . $viafId . '/rdf.xml');
        $vxpath = new DOMXPath($viafRDF);
        $vxpath->registerNamespace('rdf', "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
        $vxpath->registerNamespace('owl', "http://www.w3.org/2002/07/owl#");
        $vxpath->registerNamespace('schema', "http://schema.org/");
    }
    $xml .= '<maintenanceAgency><agencyName>Agency Name</agencyName></maintenanceAgency>';
    $xml .= '<maintenanceHistory><maintenanceEvent><eventType>created</eventType><eventDateTime standardDateTime="' . date(DATE_W3C) . '"/><agentType>machine</agentType><agent>xEAC dbpedia PHP</agent></maintenanceEvent></maintenanceHistory>';
    $xml .= '<conventionDeclaration><abbreviation>WIKIPEDIA</abbreviation><citation>Wikipedia/DBpedia</citation></conventionDeclaration>';
    $xml .= '<sources><source xlink:type="simple" xlink:href="' . $resource . '"/>';
    if (strlen($viafId) > 0) {
        $xml .= '<source xlink:type="simple" xlink:href="http://viaf.org/viaf/' . $viafId . '/"/>';
    }
    $xml .= '</sources>';
    $xml .= '</control><cpfDescription>';
    /************ IDENTITY ************/
    $xml .= '<identity>';
    //gather entityType
    $types = $dxpath->query('//rdf:type[rdf:resource="http://xmlns.com/foaf/0.1/Person"]');
    if (count($types) > 0) {
        $xml .= '<entityType>person</entityType>';
    } else {
        $xml .= '<entityType>family</entityType>';
    }
    //other entityIDs
    $xml .= '<entityId>' . $resource . '</entityId>';
    //get other records from VIAF
    if (strlen($viafId) > 0) {
        foreach ($vxpath->query("//rdf:Description[rdf:type/@rdf:resource='http://xmlns.com/foaf/0.1/Person']/schema:sameAs") as $ele) {
            if (!strstr($ele->getAttribute('rdf:resource'), 'dbpedia')) {
                $xml .= '<entityId>' . $ele->getAttribute('rdf:resource') . '</entityId>';
            }
        }
    }
    foreach ($dxpath->query('//rdfs:label') as $ele) {
        $xml .= '<nameEntry xml:lang="' . $ele->getAttribute('xml:lang') . '"><part>' . $ele->nodeValue . '</part>';
        //set English as preferred label, otherwise alternative
        if ($ele->getAttribute('xml:lang') == $lang) {
            $xml .= '<preferredForm>WIKIPEDIA</preferredForm>';
        } else {
            $xml .= '<alternativeForm>WIKIPEDIA</alternativeForm>';
        }
        $xml .= '</nameEntry>';
    }
    $xml .= '</identity>';
    /************ DESCRIPTION ************/
    $xml .= '<description>';
    //get dbpedia abstract -> eac:biogHist
    $abstracts = $dxpath->query("//dbpedia-owl:abstract");
    $abstract = getLabel($abstracts, $lang);
    $xml .= '<biogHist><abstract xml:lang="' . $abstract['lang'] . '" localType="wikipedia">' . $abstract['label'] . '</abstract></biogHist>';
    //existDates, get from VIAF by default, if available
    if (strlen($viafId) > 0) {
        $xml .= getExistDates($vxpath->query('//schema:birthDate')->item(0)->nodeValue, $vxpath->query('//schema:deathDate')->item(0)->nodeValue);
    } else {
        //else get from dbpedia (inconsistent)
        $startDates = $dxpath->query('//*[local-name()="birthDate"][@rdf:datatype="http://www.w3.org/2001/XMLSchema#date"]');
        $endDates = $dxpath->query('//*[local-name()="deathDate"][@rdf:datatype="http://www.w3.org/2001/XMLSchema#date"]');
        if (count($startDates) > 0 && count($endDates) > 0) {
            if (strlen($startDates->item(0)->nodeValue) > 0 && strlen($endDates->item(0)->nodeValue) > 0) {
                $gStart = $startDates->item(0)->nodeValue;
                $gEnd = $endDates->item(0)->nodeValue;
                $xml .= '<existDates><dateRange>';
                $xml .= '<fromDate standardDate="' . $gStart . '">' . getDateTextual($gStart) . '</fromDate>';
                $xml .= '<toDate standardDate="' . $gEnd . '">' . getDateTextual($gEnd) . '</toDate>';
                $xml .= '</dateRange></existDates>';
            }
        }
    }
    //get birth and death places
    if ($options['birth/death places'] == true) {
        $bdPlaces = $dxpath->query('descendant::dbpedia-owl:birthPlace|descendant::dbpedia-owl:deathPlace');
        foreach ($bdPlaces as $place) {
            $localname = $place->localName;
            $url = $place->getAttribute('rdf:resource');
            //get label
            $tempId = substr(strstr($url, 'resource/'), strpos(strstr($url, 'resource/'), '/') + 1);
            if (!array_key_exists($url, $identityArray)) {
                $labels = loadTempRDF($tempId);
                $label = getLabel($labels, $lang);
                $name = $label['label'];
            } else {
                $name = $identityArray[$url];
            }
            $xml .= '<place><placeEntry localVocabulary="' . $url . '">' . $name . '</placeEntry>';
            //set placeRole
            if ($localname == 'birthPlace') {
                $xml .= '<placeRole>Place of Birth</placeRole>';
            } else {
                $xml .= '<placeRole>Place of Death</placeRole>';
            }
            //add date for birth or death, if available
            if (strlen($viafId) > 0) {
                $query = $localname == 'birthPlace' ? '//schema:birthDate' : '//schema:deathDate';
                $gDate = normalizeDate($vxpath->query($query)->item(0)->nodeValue);
                $xml .= '<date standardDate="' . $gDate . '">' . getDateTextual($gDate) . '</date>';
            } else {
                if (count($startDates) > 0 && count($endDates) > 0) {
                    if (strlen($startDates->item(0)->nodeValue) > 0 && strlen($endDates->item(0)->nodeValue) > 0) {
                        $gDate = $localname == 'birthPlace' ? $startDates->item(0)->nodeValue : $endDates->item(0)->nodeValue;
                        $xml .= '<date standardDate="' . $gDate . '">' . getDateTextual($gDate) . '</date>';
                    }
                }
            }
            $xml .= '</place>';
        }
    }
    //get occupations
    if ($options['occupations'] == true) {
        $occupations = $dxpath->query('descendant::rdf:Description[@rdf:about="' . $resource . '"]/dbpedia-owl:occupation');
        foreach ($occupations as $occupation) {
            $url = $occupation->getAttribute('rdf:resource');
            $tempId = substr(strstr($url, 'resource/'), strpos(strstr($url, 'resource/'), '/') + 1);
            if (!array_key_exists($url, $identityArray)) {
                $labels = loadTempRDF($tempId);
                $label = getLabel($labels, $lang);
                $name = $label['label'];
            } else {
                $name = $identityArray[$url];
            }
            $xml .= '<occupation>';
            $xml .= '<term vocabularySource="' . $url . '">' . $name . '</term>';
            $xml .= '</occupation>';
        }
    }
    //get subjects
    if ($options['subjects'] == true) {
        $subjects = $dxpath->query('descendant::rdf:Description[@rdf:about="' . $resource . '"]/dcterms:subject');
        foreach ($subjects as $subject) {
            $url = $subject->getAttribute('rdf:resource');
            $tempId = substr(strstr($url, 'resource/'), strpos(strstr($url, 'resource/'), '/') + 1);
            if (!array_key_exists($url, $identityArray)) {
                $labels = loadTempRDF($tempId);
                $label = getLabel($labels, $lang);
                $name = $label['label'];
            } else {
                $name = $identityArray[$url];
            }
            $xml .= '<localDescription localType="subject">';
            $xml .= '<term vocabularySource="' . $url . '">' . $name . '</term>';
            $xml .= '</localDescription>';
        }
    }
    $xml .= '</description>';
    /************ RELATIONS ************/
    $xml .= getRelations($dxpath, $resource, $end, $lang, $options);
    //close EAC-CPF
    $xml .= '</cpfDescription></eac-cpf>';
    return $xml;
}
/**
 * Retrieves all links for a certain RecType
 * 
 * @param mixed $system  System reference
 * @param mixed $rectype Rectype reference
 */
function getLinks($system, $rectypes)
{
    $links = array();
    // Go through all rectypes
    for ($i = 0; $i < sizeof($rectypes); $i++) {
        // Find relations
        $relations = getRelations($system, $rectypes[$i]);
        // Find all targets for each relation
        foreach ($relations as $relation) {
            //get counts by target
            $targets = getTargets($system, $rectypes[$i], $relation);
            // Construct a link for each target
            foreach ($targets as $target) {
                $link = new stdClass();
                /* example                    
                relation:Object
                    count:0
                    id:16
                    ids:"10"
                    name:"Person(s) concerned"
                    type:"resource"
                source:1
                target:9
                targetcount:0                    
                */
                // Records
                $link->source = $i;
                $link->target = getIndex($rectypes, $target);
                $link->relation = $relation;
                // Counts
                $link->targetcount = $target->count;
                $link->relation->count = $target->count;
                //print_r($link);
                array_push($links, $link);
            }
        }
    }
    return $links;
}