/** This function modifies the $endpoint_arr and adds each of the statistics found here https://github.com/bio2rdf/bio2rdf-scripts/wiki/Bio2RDF-Dataset-Metrics to the array **/ function retrieveStatistics(&$endpoint_arr) { if (count($endpoint_arr)) { foreach ($endpoint_arr as $name => $details) { $endpoint_url = $details["endpoint_url"]; $graph_uri = $details["graph_uri"]; if (strlen($endpoint_url) != 0 && strlen($graph_uri) != 0) { //now retrieve each of the stats //numOfTriples $numOfTriplesJson = trim(@file_get_contents(q1($endpoint_url, $graph_uri))); $endpoint_arr[$name]["triples"] = getNumOfTriples($numOfTriplesJson); //numOfSubjects $numOfSubjectsJson = trim(@file_get_contents(q2($endpoint_url, $graph_uri))); $endpoint_arr[$name]["unique_subjects"] = getNumOfSubjects($numOfSubjectsJson); //numOfPredicates $numOfPredicatesJson = trim(@file_get_contents(q3($endpoint_url, $graph_uri))); $endpoint_arr[$name]["unique_predicates"] = getNumOfPredicates($numOfPredicatesJson); //numOfUniqueObjects $numOfObjectsJson = trim(@file_get_contents(q4($endpoint_url, $graph_uri))); $endpoint_arr[$name]["unique_objects"] = getNumOfObjects($numOfObjectsJson); //numOfTypes $numOfTypesJson = trim(@file_get_contents(q5($endpoint_url, $graph_uri))); $endpoint_arr[$name]["unique_types"] = getNumOfTypes($numOfTypesJson); //unique predicate-object links and their frequencies $numOfPredObjectFreqsJson = trim(@file_get_contents(q6($endpoint_url, $graph_uri))); $endpoint_arr[$name]["predicate_object_links"] = getPredObjFreq($numOfPredObjectFreqsJson); //unique predicate-literal links and their frequencies $numOfUniquePredicateLiteralLinksandFreqsJson = trim(@file_get_contents(q7($endpoint_url, $graph_uri))); $endpoint_arr[$name]["predicate_literals"] = getPredLitLinks($numOfUniquePredicateLiteralLinksandFreqsJson); //unique subject-predicate-unique object links and their frequencies $numOfSubjectPredicateUniqueObjectJson = trim(@file_get_contents(q8($endpoint_url, $graph_uri))); $endpoint_arr[$name]["subject_count_predicate_object_count"] = getSubPredObjLinks($numOfSubjectPredicateUniqueObjectJson); //unique subject-predicate-unique literal links and their frequencies $numOfSubjectPredUniqueLitJson = trim(@file_get_contents(q9($endpoint_url, $graph_uri))); $endpoint_arr[$name]["subject_count_predicate_literal_count"] = getSubPredLitLinks($numOfSubjectPredUniqueLitJson); //unique subject type-predicate-object type links and their frequencies $numOfSubjectTypePredicateObjectJson = trim(@file_get_contents(q10($endpoint_url, $graph_uri))); $endpoint_arr[$name]["subject_type_predicate_object_type"] = getSubTypePredObjType($numOfSubjectTypePredicateObjectJson); } } } return $endpoint_arr; }
/** This function modifies the $endpoint_arr and adds each of the statistics found here https://github.com/bio2rdf/bio2rdf-scripts/wiki/Bio2RDF-Dataset-Metrics to the array **/ function retrieveStatistics(&$endpoint_arr) { $warn = ""; if (count($endpoint_arr)) { foreach ($endpoint_arr as $name => $details) { $endpoint_url = $details["endpoint_url"]; $graph_uri = $details["graph_uri"]; if (strlen($endpoint_url) != 0 && strlen($graph_uri) != 0) { //now retrieve each of the stats //nsns counts $nsnsJSON = trim(@file_get_contents(nsQ($endpoint_url, $graph_uri))); $endpoint_arr[$name]["nsnscounts"] = getNSNSCounts($nsnsJSON); //numOfTriples $numOfTriplesJson = trim(@file_get_contents(q1($endpoint_url, $graph_uri))); $endpoint_arr[$name]["triples"] = getNumOfTriples($numOfTriplesJson); //get the date $dateJson = trim(@file_get_contents(getDatasetDateQuery($endpoint_url))); $endpoint_arr[$name]['date'] = getDate2($dateJson); //numOfSubjects $numOfSubjectsJson = trim(@file_get_contents(q2($endpoint_url, $graph_uri))); $endpoint_arr[$name]["unique_subjects"] = getNumOfSubjects($numOfSubjectsJson); //numOfPredicates $numOfPredicatesJson = trim(@file_get_contents(q3($endpoint_url, $graph_uri))); $endpoint_arr[$name]["unique_predicates"] = getNumOfPredicates($numOfPredicatesJson); //numOfUniqueObjects $numOfObjectsJson = trim(@file_get_contents(q4($endpoint_url, $graph_uri))); $endpoint_arr[$name]["unique_objects"] = getNumOfObjects($numOfObjectsJson); //numOfTypes $numOfTypesJson = trim(@file_get_contents(q5($endpoint_url, $graph_uri))); $endpoint_arr[$name]["unique_types"] = getNumOfTypes($numOfTypesJson); //unique predicate-object links and their frequencies $numOfPredObjectFreqsJson = trim(@file_get_contents(q6($endpoint_url, $graph_uri))); $endpoint_arr[$name]["predicate_object_links"] = getPredObjFreq($numOfPredObjectFreqsJson); //unique predicate-literal links and their frequencies $numOfUniquePredicateLiteralLinksandFreqsJson = trim(@file_get_contents(q7($endpoint_url, $graph_uri))); $endpoint_arr[$name]["predicate_literals"] = getPredLitLinks($numOfUniquePredicateLiteralLinksandFreqsJson); //unique subject-predicate-unique object links and their frequencies $numOfSubjectPredicateUniqueObjectJson = trim(@file_get_contents(q8($endpoint_url, $graph_uri))); $endpoint_arr[$name]["subject_count_predicate_object_count"] = getSubPredObjLinks($numOfSubjectPredicateUniqueObjectJson); //unique subject-predicate-unique literal links and their frequencies $numOfSubjectPredUniqueLitJson = trim(@file_get_contents(q9($endpoint_url, $graph_uri))); $endpoint_arr[$name]["subject_count_predicate_literal_count"] = getSubPredLitLinks($numOfSubjectPredUniqueLitJson); //unique subject type-predicate-object type links and their frequencies $numOfSubjectTypePredicateObjectJson = trim(@file_get_contents(q10($endpoint_url, $graph_uri))); $endpoint_arr[$name]["subject_type_predicate_object_type"] = getSubTypePredObjType($numOfSubjectTypePredicateObjectJson); } else { $warn .= "WARNING :: Endpoint " . $name . " does not have all of required information! (missing either the endpoint or graph uri!)!\n"; } } if (strlen($warn)) { echo $warn; } } return $endpoint_arr; }