/
enlargeNeoDatabase.php
110 lines (78 loc) · 3.15 KB
/
enlargeNeoDatabase.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
<?php
require_once("CoauthorNetworkCrawler.php");
require_once('vendor/autoload.php');
use Everyman\Neo4j\Cypher\Query;
//TODO: Actually need a better algorithm to refine the affiliation string
//So far only filter out the phrase with the possibleAffiliationKeywords
function filterAffiliation($affiliation){
$possibleAffiliationKeywords = array("university", "institute", "academy", "laboratory");
$affiliation_parts = explode(",", $affiliation);
foreach($possibleAffiliationKeywords as $keyword){
foreach($affiliation_parts as $part){
if(strpos(strtolower($part), $keyword) !== FALSE)
return trim($part);
}
}
return "";
}
function enlargeNeoDatabase($unifiedName){
$client = new Everyman\Neo4j\Client('localhost', 7474);
$client->getTransport()
->setAuth('neo4j', 'muresearch');
//Check if the author exists
$exist = FALSE;
$authorExists = "match (u:Person {name: \"". $unifiedName. "\"}) return u";
$query = new Query($client, $authorExists);
$result = $query->getResultSet();
foreach($result as $r){
$exist = TRUE;
}
if(!$exist){//Author does not exists. Must be of Mizzou
$crawler = new CoauthorNetworkCrawler($unifiedName, $affiliation = "University of Missouri");
$crawler->insertDB('localhost',7474,'neo4j','muresearch');
$setHasSearched = "match (u:Person {name: \"". $unifiedName. "\"}) set u.hasSearched = 1";
$query = new Query($client, $setHasSearched);
$result = $query->getResultSet();
return;
}
//Check if we need to crawl the network for this author while the author already exists.
$hasSearched = "match (u:Person {name: \"". $unifiedName. "\"}) return has(u.hasSearched) as hasSearched";
$query = new Query($client, $hasSearched);
$result = $query->getResultSet();
foreach($result as $r){
if($r['hasSearched']){//The user has been searched before. No need to re-search it
return;
}
else{//The user has NOT been seasrched before.
//Get the affiliation of this user
$getAffiliation = "match (u:Person {name: \"". $unifiedName. "\"}) return u.affiliation as affiliation";
$query = new Query($client, $getAffiliation);
$result = $query->getResultSet();
foreach($result as $r){
if(!empty($r['affiliation'])){//Affiliation info presented
$affiliation = filterAffiliation($r['affiliation']);
}
else{//Affiliation info empty
$affiliation = "";
}
}
$setHasSearched = "match (u:Person {name: \"". $unifiedName. "\"}) set u.hasSearched = 1";
$query = new Query($client, $setHasSearched);
$result = $query->getResultSet();
//echo $affiliation."\n";
$crawler = new CoauthorNetworkCrawler($unifiedName, $affiliation);
//print_r($crawler->crawlPubmed());
$crawler->insertDB('localhost',7474,'neo4j','muresearch');
return;
}
}
}
if(isset($_POST['data']) && !empty($_POST['data'])){
$stringJSON = stripslashes($_POST['data']);
$d = json_decode($stringJSON, true);
$unifiedName = $d['name'];
enlargeNeoDatabase($unifiedName);
}
//enlargeNeoDatabase("Valliyodan, Babu");
//enlargeNeoDatabase("Liu, Yang");
?>