function count_attributes($dir, $single_file = FALSE) { $no_activity_dates = array(); $activities_with_at_least_one = array(); $no_activities = array(); $found_hierarchies = array(); $activities_with_attribute = array(); $activity_by = array(); $document_links = array(); $result_element = array(); $conditions = array(); $participating_org_accountable = array(); $participating_org_implementing = array(); $budget = array(); $identifiers = array(); $transaction_type_commitment = array(); $transaction_type_disbursement = array(); $transaction_type_expenditure = array(); $no_disbursements = $no_incoming_funds = $no_tracable_transactions = array(); $activities_with_sector = array(); $most_recent = array(); $activities_with_location = array(); $activities_with_coordinates = array(); $activities_with_adminstrative = array(); $activities_sector_assumed_dac = array(); $activities_sector_declared_dac = array(); $activies_in_country_lang = array(); $i = 0; //used to count bad id's if ($handle = opendir($dir)) { //echo "Directory handle: $handle\n"; //echo "Files:\n"; /* This is the correct way to loop over the directory. */ while (false !== ($file = readdir($handle))) { if ($file != "." && $file != "..") { //ignore these system files //echo $file . PHP_EOL; if ($single_file && $file != $single_file) { //skip all files except the one we want if set/requested.Handy to test just one file in a directory continue; } //load the xml SAFELY /* Some safety against XML Injection attack * see: http://phpsecurity.readthedocs.org/en/latest/Injection-Attacks.html * * Attempt a quickie detection of DOCTYPE - discard if it is present (cos it shouldn't be!) */ $xml = file_get_contents($dir . $file); $collapsedXML = preg_replace("/[[:space:]]/", '', $xml); //echo $collapsedXML; if (preg_match("/<!DOCTYPE/i", $collapsedXML)) { //throw new InvalidArgumentException( // 'Invalid XML: Detected use of illegal DOCTYPE' // ); //echo "fail"; return FALSE; } $loadEntities = libxml_disable_entity_loader(true); $dom = new DOMDocument(); $dom->loadXML($xml); foreach ($dom->childNodes as $child) { if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) { throw new Exception\ValueException('Invalid XML: Detected use of illegal DOCTYPE'); libxml_disable_entity_loader($loadEntities); return FALSE; } } libxml_disable_entity_loader($loadEntities); if ($xml = simplexml_import_dom($dom)) { //print_r($xml); if (!xml_child_exists($xml, "//iati-organisation")) { //exclude organisation files $activities = $xml->{"iati-activity"}; //print_r($attributes); die; foreach ($activities as $activity) { $hierarchy = (string) $activity->attributes()->hierarchy; if ($hierarchy && $hierarchy != NULL) { $hierarchy = (string) $activity->attributes()->hierarchy; } else { $hierarchy = 0; } $found_hierarchies[] = $hierarchy; if (!isset($no_activities[$hierarchy])) { $no_activities[$hierarchy] = 0; } $no_activities[$hierarchy]++; //Set up some more counters: if (!isset($no_disbursements[$hierarchy])) { $no_disbursements[$hierarchy] = 0; } if (!isset($no_incoming_funds[$hierarchy])) { $no_incoming_funds[$hierarchy] = 0; } if (!isset($no_tracable_transactions[$hierarchy])) { $no_tracable_transactions[$hierarchy] = 0; } //Elements check //is <document-link>,<conditions>,<result> present if (count($activity->{"document-link"}) > 0) { $document_links[$hierarchy][] = (string) $activity->{'iati-identifier'}; } if (count($activity->conditions) > 0) { $conditions[$hierarchy][] = (string) $activity->{'iati-identifier'}; } if (count($activity->result) > 0) { $result_element[$hierarchy][] = (string) $activity->{'iati-identifier'}; } //More elements //Participating Organisation (Implementing) $participating_orgs = $activity->{"participating-org"}; foreach ($participating_orgs as $participating_org) { //echo (string)$activity->{"participating-org"}->attributes()->role; if ((string) $participating_org->attributes()->role == "Implementing") { //echo "yes"; $participating_org_implementing[$hierarchy][] = (string) $activity->{'iati-identifier'}; } //Participating Organisation (Accountable) if ((string) $participating_org->attributes()->role == "Accountable") { $participating_org_accountable[$hierarchy][] = (string) $activity->{'iati-identifier'}; } } //Budget/Planned Disbursement if (count($activity->budget) > 0 || count($activity->{"planned-disbursement"}) > 0) { $budget[$hierarchy][] = (string) $activity->{'iati-identifier'}; } //Unique Identifier check //Suck up all activity identifiers - check they start with the reporting org string //We count by storing the activity id in an array //if there is no identifier then set a dummy one to dump it into the 'bad' pile if (!isset($activity->{'iati-identifier'})) { $iati_identifier = "noIdentifierGiven" . $i; $i++; } else { $iati_identifier = (string) $activity->{'iati-identifier'}; } if (isset($activity->{'reporting-org'}->attributes()->ref)) { $reporting_org_ref = (string) $activity->{'reporting-org'}->attributes()->ref; //echo $reporting_org_ref . PHP_EOL; //echo $iati_identifier . PHP_EOL; if (strpos($reporting_org_ref, $iati_identifier) == 0) { //echo "yes"; $identifiers[$hierarchy]["good"][] = $iati_identifier; } else { //echo "no"; $identifiers[$hierarchy]["bad"][] = $iati_identifier; } } else { $identifiers[$hierarchy]["bad"][] = $iati_identifier; } //Financial transaction (Commitment) $transactions = $activity->transaction; //if (count($transactions) == 0) { // echo $id; //die; //} if (isset($transactions) && count($transactions) > 0) { //something not quite right here //Loop through each of the elements foreach ($transactions as $transaction) { //print_r($transaction); //Counts number of elements of this type in this activity //$no_transactions[$hierarchy]++; //$transaction_date = (string)$transaction->{'transaction-date'}->attributes()->{'iso-date'}; if (isset($transaction->{'transaction-type'})) { $transaction_type = (string) $transaction->{'transaction-type'}->attributes()->{'code'}; if ($transaction_type == "C") { $transaction_type_commitment[$hierarchy][] = (string) $activity->{'iati-identifier'}; } if ($transaction_type == "D") { $transaction_type_disbursement[$hierarchy][] = (string) $activity->{'iati-identifier'}; //Count the number of disbursements at this level $no_disbursements[$hierarchy]++; //now test it and count the passes if (isset($transaction->{"receiver-org"})) { //We have a provider-org = pass! $no_tracable_transactions[$hierarchy]++; } //$no_disbursements = $no_incoming_funds = $no_tracable_transactions = array(); } if ($transaction_type == "IF") { //Count the number of IFs at this level $no_incoming_funds[$hierarchy]++; if (isset($transaction->{"provider-org"})) { //We have a provider-org = pass! $no_tracable_transactions[$hierarchy]++; } } if ($transaction_type == "E") { $transaction_type_expenditure[$hierarchy][] = (string) $activity->{'iati-identifier'}; } } //if code attribute exists } } //Going to need a count of disbursements and of IF transactions //Then need to test each against a set of criteria /*if ($transaction_type == NULL) { $transaction_type = "Missing"; echo "missing"; } if ($transaction_type !="D") { echo $id; //die; }*/ //Locations //We can have more than one location, but they should add up to 100% $locations = $activity->location; //if (!isset($activities_with_location[$hierarchy])) { // $activities_with_location[$hierarchy] = 0; //} if (isset($locations) && count($locations) > 0) { $activities_with_location[$hierarchy][] = (string) $activity->{'iati-identifier'}; foreach ($locations as $location) { if (isset($location->coordinates)) { $activities_with_coordinates[$hierarchy][] = (string) $activity->{'iati-identifier'}; } if (isset($location->administrative)) { if (isset($location->administrative->attributes()->adm1)) { $adm1 = string($location->administrative->attributes()->adm1); } if (isset($location->administrative->attributes()->adm2)) { $adm2 = string($location->administrative->attributes()->adm2); } if (isset($adm1) && len($adm1) > 0 || isset($adm2) && len($adm2) > 0) { $activities_with_adminstrative[$hierarchy][] = (string) $activity->{'iati-identifier'}; } } } } //Sector $sectors = $activity->sector; if (isset($sectors) && count($sectors) > 0) { //$activities_with_sector[$hierarchy][] = (string)$activity->{'iati-identifier'}; foreach ($sectors as $sector) { if (!isset($sector->attributes()->vocabulary)) { $activities_sector_assumed_dac[$hierarchy][] = (string) $activity->{'iati-identifier'}; } elseif ((string) $sector->attributes()->vocabulary == "DAC") { //echo "DAC"; $activities_sector_declared_dac[$hierarchy][] = (string) $activity->{'iati-identifier'}; } } } //Last-updated-datetime $last_updated = $activity->attributes()->{'last-updated-datetime'}; $last_updated = strtotime($last_updated); if (!isset($most_recent[$hierarchy])) { $most_recent[$hierarchy] = 0; } if ($last_updated > $most_recent[$hierarchy]) { $most_recent[$hierarchy] = $last_updated; } //Activity dates $activity_dates = $activity->{"activity-date"}; //if (count($activity_dates) > 0) { //if ($activity_dates !=NULL) { // $activities_with_at_least_one[$hierarchy]++; //} foreach ($activity_dates as $activity_date) { //$attributes = array("end-actual","end-planned","start-actual","start-planned"); // $no_activity_dates[$hierarchy]++; //foreach($attributes as $attribute) { $type = (string) $activity_date->attributes()->type; if ($type == "start-actual" || $type == "start-planned") { $type = "start"; } if ($type == "end-actual" || $type == "end-planned") { $type = "end"; } //$date = (string)$activity_date->attributes()->{'iso-date'}; //Special Case for DFID //$date = (string)$activity_date; //echo $date; die; // $unix_time = strtotime($date); //if ($unix_time) { // $year = date("Y",strtotime($date)); //} else { // $year = 0; //we could not parse the date, so store the year as 0 //// } //$activity_by[$year][$hierarchy][$type]++; $activities_with_attribute[$hierarchy][$type][] = (string) $activity->{'iati-identifier'}; //Languages // if($hierarchy == 2) { $title_langs = $country_langs = $description_langs = $all_langs = array(); //Reset each of these each run through //Find default language of the activity $default_lang = (string) $activity->attributes('http://www.w3.org/XML/1998/namespace')->{'lang'}; //echo $default_lang; //Find recipient countries for this activity: $recipient_countries = $activity->{"recipient-country"}; foreach ($recipient_countries as $country) { $code = (string) $country->attributes()->code; //Look up default language for this code: $country_langs[] = look_up_lang($code); } //print_r($country_langs); //Find all the different languages used on the title element $titles = $activity->title; foreach ($titles as $title) { //create an array of all declared languages on titles $title_lang = (string) $title->attributes('http://www.w3.org/XML/1998/namespace')->{'lang'}; if ($title_lang == NULL) { $title_langs[] = $default_lang; } else { $title_langs[] = $title_lang; } $title_lang = ""; } //Find all the different languages used on the description element $descriptions = $activity->description; foreach ($descriptions as $description) { //create an array of all declared languages on titles $description_lang = (string) $description->attributes('http://www.w3.org/XML/1998/namespace')->{'lang'}; if ($description_lang == NULL) { $description_langs[] = $default_lang; } else { $description_langs[] = $description_lang; } $description_lang = ""; } //print_r($title_langs); //die; //Merge these arrays $all_langs = array_merge($description_langs, $title_langs); $all_langs = array_unique($all_langs); //Loop through the country languiages and see if they are found on either the title or description foreach ($country_langs as $lang) { if (in_array($lang, $all_langs)) { $activies_in_country_lang[$hierarchy][] = (string) $activity->{'iati-identifier'}; } } //$description_lang = (string)$activity->description->attributes('http://www.w3.org/XML/1998/namespace')->{'lang'}; // } } } //end foreach } //end if not organisation file } //end if xml is created } // end if file is not a system file } //end while closedir($handle); } //if (isset($types)) { //echo "no_activities" . PHP_EOL; //print_r($no_activities); //echo "activities_with_at_least_one" . PHP_EOL; //print_r($activities_with_at_least_one); //echo "no_activity_dates" . PHP_EOL; //print_r($no_activity_dates); //echo "activity_by_year" . PHP_EOL; ksort($activity_by); //print_r($activity_by); //echo "activities_with_attribute" . PHP_EOL; //print_r($activities_with_attribute); //foreach($types as $attribute_name=>$attribute) { /// echo $attribute_name; //foreach($attribute as $hierarchy=>$values) { // echo $hierarchy; // print_r(array_count_values($values)); // } // } //echo count($participating_org_implementing[0]); die; $found_hierarchies = array_unique($found_hierarchies); sort($found_hierarchies); //die; return array("no-activities" => $no_activities, "activities_with_at_least_one" => $activities_with_at_least_one, "no_activity_dates" => $no_activity_dates, "activity_by_year" => $activity_by, "hierarchies" => array_unique($found_hierarchies), "activities_with_attribute" => $activities_with_attribute, "document_links" => $document_links, "result_element" => $result_element, "conditions" => $conditions, "participating_org_accountable" => $participating_org_accountable, "participating_org_implementing" => $participating_org_implementing, "budget" => $budget, "identifiers" => $identifiers, "transaction_type_commitment" => $transaction_type_commitment, "transaction_type_disbursement" => $transaction_type_disbursement, "transaction_type_expenditure" => $transaction_type_expenditure, "no_disbursements" => $no_disbursements, "no_tracable_transactions" => $no_tracable_transactions, "no_incoming_funds" => $no_incoming_funds, "activities_with_location" => $activities_with_location, "activities_with_coordinates" => $activities_with_coordinates, "activities_with_adminstrative" => $activities_with_adminstrative, "activities_sector_assumed_dac" => $activities_sector_assumed_dac, "activities_sector_declared_dac" => $activities_sector_declared_dac, "most_recent" => $most_recent, "activies_in_country_lang" => $activies_in_country_lang); //} else { // return FALSE; //} }
if ($encoding != FALSE) { $basic['DetectEncoding'] = $encoding; } else { $basic['DetectEncoding'] = "Encoding: Not detected"; } //Activty or Organisation specific tests if (xml_child_exists($xml, "//iati-activity")) { //ignore organisation files $checking_activity_file = true; $basic['activities'] = count($xml->xpath("//iati-activity")); //$generated = $xml->attributes()->{'generated-datetime'}; //$version = $xml->attributes()->version; //$activities = count($xml->xpath("//iati-activity")); $hierarchies = $xml->xpath("//@hierarchy"); $basic['hierarchies'] = get_values($hierarchies, "int"); } elseif (xml_child_exists($xml, "//iati-organisation")) { $checking_organisation_file = true; $org_identifier = $xml->xpath("//iati-identifier"); $basic['org_iati_identifier'] = (string) $org_identifier[0]; //print_r($xml->xpath("//name")); die; $org_name = $xml->xpath("//name"); //a simplexml object $name = (string) $org_name[0]; $basic['org_name'] = $name; //$basic['org_name'] = $basic['org_name']->0; $org_ref = $xml->xpath("//reporting-org/@ref"); $basic['org_reporting_org_ref'] = (string) $org_ref[0]; $basic['org_recipient_country_budget'] = count($xml->xpath("//recipient-country-budget")); $basic['org_recipient_org_budget'] = count($xml->xpath("//recipient-org-budget")); $basic['org_total_budget'] = count($xml->xpath("//total-budget")); $basic['org_document_link'] = count($xml->xpath("//document-link"));