function count_attributes($dir, $single_file = FALSE)
{
    $no_activity_dates = array();
    $activities_with_at_least_one = array();
    $no_activities = array();
    $found_hierarchies = array();
    $activities_with_attribute = array();
    $activity_by = array();
    $document_links = array();
    $result_element = array();
    $conditions = array();
    $participating_org_accountable = array();
    $participating_org_implementing = array();
    $budget = array();
    $identifiers = array();
    $transaction_type_commitment = array();
    $transaction_type_disbursement = array();
    $transaction_type_expenditure = array();
    $no_disbursements = $no_incoming_funds = $no_tracable_transactions = array();
    $activities_with_sector = array();
    $most_recent = array();
    $activities_with_location = array();
    $activities_with_coordinates = array();
    $activities_with_adminstrative = array();
    $activities_sector_assumed_dac = array();
    $activities_sector_declared_dac = array();
    $activies_in_country_lang = array();
    $i = 0;
    //used to count bad id's
    if ($handle = opendir($dir)) {
        //echo "Directory handle: $handle\n";
        //echo "Files:\n";
        /* This is the correct way to loop over the directory. */
        while (false !== ($file = readdir($handle))) {
            if ($file != "." && $file != "..") {
                //ignore these system files
                //echo $file . PHP_EOL;
                if ($single_file && $file != $single_file) {
                    //skip all files except the one we want if set/requested.Handy to test just one file in a directory
                    continue;
                }
                //load the xml SAFELY
                /* Some safety against XML Injection attack
                 * see: http://phpsecurity.readthedocs.org/en/latest/Injection-Attacks.html
                 * 
                 * Attempt a quickie detection of DOCTYPE - discard if it is present (cos it shouldn't be!)
                 */
                $xml = file_get_contents($dir . $file);
                $collapsedXML = preg_replace("/[[:space:]]/", '', $xml);
                //echo $collapsedXML;
                if (preg_match("/<!DOCTYPE/i", $collapsedXML)) {
                    //throw new InvalidArgumentException(
                    //     'Invalid XML: Detected use of illegal DOCTYPE'
                    // );
                    //echo "fail";
                    return FALSE;
                }
                $loadEntities = libxml_disable_entity_loader(true);
                $dom = new DOMDocument();
                $dom->loadXML($xml);
                foreach ($dom->childNodes as $child) {
                    if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) {
                        throw new Exception\ValueException('Invalid XML: Detected use of illegal DOCTYPE');
                        libxml_disable_entity_loader($loadEntities);
                        return FALSE;
                    }
                }
                libxml_disable_entity_loader($loadEntities);
                if ($xml = simplexml_import_dom($dom)) {
                    //print_r($xml);
                    if (!xml_child_exists($xml, "//iati-organisation")) {
                        //exclude organisation files
                        $activities = $xml->{"iati-activity"};
                        //print_r($attributes); die;
                        foreach ($activities as $activity) {
                            $hierarchy = (string) $activity->attributes()->hierarchy;
                            if ($hierarchy && $hierarchy != NULL) {
                                $hierarchy = (string) $activity->attributes()->hierarchy;
                            } else {
                                $hierarchy = 0;
                            }
                            $found_hierarchies[] = $hierarchy;
                            if (!isset($no_activities[$hierarchy])) {
                                $no_activities[$hierarchy] = 0;
                            }
                            $no_activities[$hierarchy]++;
                            //Set up some more counters:
                            if (!isset($no_disbursements[$hierarchy])) {
                                $no_disbursements[$hierarchy] = 0;
                            }
                            if (!isset($no_incoming_funds[$hierarchy])) {
                                $no_incoming_funds[$hierarchy] = 0;
                            }
                            if (!isset($no_tracable_transactions[$hierarchy])) {
                                $no_tracable_transactions[$hierarchy] = 0;
                            }
                            //Elements check
                            //is <document-link>,<conditions>,<result> present
                            if (count($activity->{"document-link"}) > 0) {
                                $document_links[$hierarchy][] = (string) $activity->{'iati-identifier'};
                            }
                            if (count($activity->conditions) > 0) {
                                $conditions[$hierarchy][] = (string) $activity->{'iati-identifier'};
                            }
                            if (count($activity->result) > 0) {
                                $result_element[$hierarchy][] = (string) $activity->{'iati-identifier'};
                            }
                            //More elements
                            //Participating Organisation (Implementing)
                            $participating_orgs = $activity->{"participating-org"};
                            foreach ($participating_orgs as $participating_org) {
                                //echo (string)$activity->{"participating-org"}->attributes()->role;
                                if ((string) $participating_org->attributes()->role == "Implementing") {
                                    //echo "yes";
                                    $participating_org_implementing[$hierarchy][] = (string) $activity->{'iati-identifier'};
                                }
                                //Participating Organisation (Accountable)
                                if ((string) $participating_org->attributes()->role == "Accountable") {
                                    $participating_org_accountable[$hierarchy][] = (string) $activity->{'iati-identifier'};
                                }
                            }
                            //Budget/Planned Disbursement
                            if (count($activity->budget) > 0 || count($activity->{"planned-disbursement"}) > 0) {
                                $budget[$hierarchy][] = (string) $activity->{'iati-identifier'};
                            }
                            //Unique Identifier check
                            //Suck up all activity identifiers - check they start with the reporting org string
                            //We count by storing the activity id in an array
                            //if there is no identifier then set a dummy one to dump it into the 'bad' pile
                            if (!isset($activity->{'iati-identifier'})) {
                                $iati_identifier = "noIdentifierGiven" . $i;
                                $i++;
                            } else {
                                $iati_identifier = (string) $activity->{'iati-identifier'};
                            }
                            if (isset($activity->{'reporting-org'}->attributes()->ref)) {
                                $reporting_org_ref = (string) $activity->{'reporting-org'}->attributes()->ref;
                                //echo $reporting_org_ref . PHP_EOL;
                                //echo $iati_identifier . PHP_EOL;
                                if (strpos($reporting_org_ref, $iati_identifier) == 0) {
                                    //echo "yes";
                                    $identifiers[$hierarchy]["good"][] = $iati_identifier;
                                } else {
                                    //echo "no";
                                    $identifiers[$hierarchy]["bad"][] = $iati_identifier;
                                }
                            } else {
                                $identifiers[$hierarchy]["bad"][] = $iati_identifier;
                            }
                            //Financial transaction (Commitment)
                            $transactions = $activity->transaction;
                            //if (count($transactions) == 0) {
                            //  echo $id;
                            //die;
                            //}
                            if (isset($transactions) && count($transactions) > 0) {
                                //something not quite right here
                                //Loop through each of the elements
                                foreach ($transactions as $transaction) {
                                    //print_r($transaction);
                                    //Counts number of elements of this type in this activity
                                    //$no_transactions[$hierarchy]++;
                                    //$transaction_date = (string)$transaction->{'transaction-date'}->attributes()->{'iso-date'};
                                    if (isset($transaction->{'transaction-type'})) {
                                        $transaction_type = (string) $transaction->{'transaction-type'}->attributes()->{'code'};
                                        if ($transaction_type == "C") {
                                            $transaction_type_commitment[$hierarchy][] = (string) $activity->{'iati-identifier'};
                                        }
                                        if ($transaction_type == "D") {
                                            $transaction_type_disbursement[$hierarchy][] = (string) $activity->{'iati-identifier'};
                                            //Count the number of disbursements at this level
                                            $no_disbursements[$hierarchy]++;
                                            //now test it and count the passes
                                            if (isset($transaction->{"receiver-org"})) {
                                                //We have a provider-org = pass!
                                                $no_tracable_transactions[$hierarchy]++;
                                            }
                                            //$no_disbursements = $no_incoming_funds = $no_tracable_transactions = array();
                                        }
                                        if ($transaction_type == "IF") {
                                            //Count the number of IFs at this level
                                            $no_incoming_funds[$hierarchy]++;
                                            if (isset($transaction->{"provider-org"})) {
                                                //We have a provider-org = pass!
                                                $no_tracable_transactions[$hierarchy]++;
                                            }
                                        }
                                        if ($transaction_type == "E") {
                                            $transaction_type_expenditure[$hierarchy][] = (string) $activity->{'iati-identifier'};
                                        }
                                    }
                                    //if code attribute exists
                                }
                            }
                            //Going to need a count of disbursements and of IF transactions
                            //Then need to test each against a set of criteria
                            /*if ($transaction_type == NULL) {
                                $transaction_type = "Missing";
                                echo "missing";
                              }
                              if ($transaction_type !="D") {
                                echo $id;
                                //die;
                              }*/
                            //Locations
                            //We can have more than one location, but they should add up to 100%
                            $locations = $activity->location;
                            //if (!isset($activities_with_location[$hierarchy])) {
                            //  $activities_with_location[$hierarchy] = 0;
                            //}
                            if (isset($locations) && count($locations) > 0) {
                                $activities_with_location[$hierarchy][] = (string) $activity->{'iati-identifier'};
                                foreach ($locations as $location) {
                                    if (isset($location->coordinates)) {
                                        $activities_with_coordinates[$hierarchy][] = (string) $activity->{'iati-identifier'};
                                    }
                                    if (isset($location->administrative)) {
                                        if (isset($location->administrative->attributes()->adm1)) {
                                            $adm1 = string($location->administrative->attributes()->adm1);
                                        }
                                        if (isset($location->administrative->attributes()->adm2)) {
                                            $adm2 = string($location->administrative->attributes()->adm2);
                                        }
                                        if (isset($adm1) && len($adm1) > 0 || isset($adm2) && len($adm2) > 0) {
                                            $activities_with_adminstrative[$hierarchy][] = (string) $activity->{'iati-identifier'};
                                        }
                                    }
                                }
                            }
                            //Sector
                            $sectors = $activity->sector;
                            if (isset($sectors) && count($sectors) > 0) {
                                //$activities_with_sector[$hierarchy][] = (string)$activity->{'iati-identifier'};
                                foreach ($sectors as $sector) {
                                    if (!isset($sector->attributes()->vocabulary)) {
                                        $activities_sector_assumed_dac[$hierarchy][] = (string) $activity->{'iati-identifier'};
                                    } elseif ((string) $sector->attributes()->vocabulary == "DAC") {
                                        //echo "DAC";
                                        $activities_sector_declared_dac[$hierarchy][] = (string) $activity->{'iati-identifier'};
                                    }
                                }
                            }
                            //Last-updated-datetime
                            $last_updated = $activity->attributes()->{'last-updated-datetime'};
                            $last_updated = strtotime($last_updated);
                            if (!isset($most_recent[$hierarchy])) {
                                $most_recent[$hierarchy] = 0;
                            }
                            if ($last_updated > $most_recent[$hierarchy]) {
                                $most_recent[$hierarchy] = $last_updated;
                            }
                            //Activity dates
                            $activity_dates = $activity->{"activity-date"};
                            //if (count($activity_dates) > 0) {
                            //if ($activity_dates !=NULL) {
                            //  $activities_with_at_least_one[$hierarchy]++;
                            //}
                            foreach ($activity_dates as $activity_date) {
                                //$attributes = array("end-actual","end-planned","start-actual","start-planned");
                                // $no_activity_dates[$hierarchy]++;
                                //foreach($attributes as $attribute) {
                                $type = (string) $activity_date->attributes()->type;
                                if ($type == "start-actual" || $type == "start-planned") {
                                    $type = "start";
                                }
                                if ($type == "end-actual" || $type == "end-planned") {
                                    $type = "end";
                                }
                                //$date = (string)$activity_date->attributes()->{'iso-date'};
                                //Special Case for DFID
                                //$date = (string)$activity_date;
                                //echo $date; die;
                                // $unix_time = strtotime($date);
                                //if ($unix_time) {
                                //  $year = date("Y",strtotime($date));
                                //} else {
                                //   $year = 0; //we could not parse the date, so store the year as 0
                                //// }
                                //$activity_by[$year][$hierarchy][$type]++;
                                $activities_with_attribute[$hierarchy][$type][] = (string) $activity->{'iati-identifier'};
                                //Languages
                                // if($hierarchy == 2) {
                                $title_langs = $country_langs = $description_langs = $all_langs = array();
                                //Reset each of these each run through
                                //Find default language of the activity
                                $default_lang = (string) $activity->attributes('http://www.w3.org/XML/1998/namespace')->{'lang'};
                                //echo $default_lang;
                                //Find recipient countries for this activity:
                                $recipient_countries = $activity->{"recipient-country"};
                                foreach ($recipient_countries as $country) {
                                    $code = (string) $country->attributes()->code;
                                    //Look up default language for this code:
                                    $country_langs[] = look_up_lang($code);
                                }
                                //print_r($country_langs);
                                //Find all the different languages used on the title element
                                $titles = $activity->title;
                                foreach ($titles as $title) {
                                    //create an array of all declared languages on titles
                                    $title_lang = (string) $title->attributes('http://www.w3.org/XML/1998/namespace')->{'lang'};
                                    if ($title_lang == NULL) {
                                        $title_langs[] = $default_lang;
                                    } else {
                                        $title_langs[] = $title_lang;
                                    }
                                    $title_lang = "";
                                }
                                //Find all the different languages used on the description element
                                $descriptions = $activity->description;
                                foreach ($descriptions as $description) {
                                    //create an array of all declared languages on titles
                                    $description_lang = (string) $description->attributes('http://www.w3.org/XML/1998/namespace')->{'lang'};
                                    if ($description_lang == NULL) {
                                        $description_langs[] = $default_lang;
                                    } else {
                                        $description_langs[] = $description_lang;
                                    }
                                    $description_lang = "";
                                }
                                //print_r($title_langs);
                                //die;
                                //Merge these arrays
                                $all_langs = array_merge($description_langs, $title_langs);
                                $all_langs = array_unique($all_langs);
                                //Loop through the country languiages and see if they are found on either the title or description
                                foreach ($country_langs as $lang) {
                                    if (in_array($lang, $all_langs)) {
                                        $activies_in_country_lang[$hierarchy][] = (string) $activity->{'iati-identifier'};
                                    }
                                }
                                //$description_lang = (string)$activity->description->attributes('http://www.w3.org/XML/1998/namespace')->{'lang'};
                                // }
                            }
                        }
                        //end foreach
                    }
                    //end if not organisation file
                }
                //end if xml is created
            }
            // end if file is not a system file
        }
        //end while
        closedir($handle);
    }
    //if (isset($types)) {
    //echo "no_activities" . PHP_EOL;
    //print_r($no_activities);
    //echo "activities_with_at_least_one" . PHP_EOL;
    //print_r($activities_with_at_least_one);
    //echo "no_activity_dates" . PHP_EOL;
    //print_r($no_activity_dates);
    //echo "activity_by_year" . PHP_EOL;
    ksort($activity_by);
    //print_r($activity_by);
    //echo "activities_with_attribute" . PHP_EOL;
    //print_r($activities_with_attribute);
    //foreach($types as $attribute_name=>$attribute) {
    ///  echo $attribute_name;
    //foreach($attribute as $hierarchy=>$values) {
    //   echo $hierarchy;
    //   print_r(array_count_values($values));
    // }
    // }
    //echo count($participating_org_implementing[0]); die;
    $found_hierarchies = array_unique($found_hierarchies);
    sort($found_hierarchies);
    //die;
    return array("no-activities" => $no_activities, "activities_with_at_least_one" => $activities_with_at_least_one, "no_activity_dates" => $no_activity_dates, "activity_by_year" => $activity_by, "hierarchies" => array_unique($found_hierarchies), "activities_with_attribute" => $activities_with_attribute, "document_links" => $document_links, "result_element" => $result_element, "conditions" => $conditions, "participating_org_accountable" => $participating_org_accountable, "participating_org_implementing" => $participating_org_implementing, "budget" => $budget, "identifiers" => $identifiers, "transaction_type_commitment" => $transaction_type_commitment, "transaction_type_disbursement" => $transaction_type_disbursement, "transaction_type_expenditure" => $transaction_type_expenditure, "no_disbursements" => $no_disbursements, "no_tracable_transactions" => $no_tracable_transactions, "no_incoming_funds" => $no_incoming_funds, "activities_with_location" => $activities_with_location, "activities_with_coordinates" => $activities_with_coordinates, "activities_with_adminstrative" => $activities_with_adminstrative, "activities_sector_assumed_dac" => $activities_sector_assumed_dac, "activities_sector_declared_dac" => $activities_sector_declared_dac, "most_recent" => $most_recent, "activies_in_country_lang" => $activies_in_country_lang);
    //} else {
    //  return FALSE;
    //}
}
Exemplo n.º 2
0
 if ($encoding != FALSE) {
     $basic['DetectEncoding'] = $encoding;
 } else {
     $basic['DetectEncoding'] = "Encoding: Not detected";
 }
 //Activty or Organisation specific tests
 if (xml_child_exists($xml, "//iati-activity")) {
     //ignore organisation files
     $checking_activity_file = true;
     $basic['activities'] = count($xml->xpath("//iati-activity"));
     //$generated = $xml->attributes()->{'generated-datetime'};
     //$version = $xml->attributes()->version;
     //$activities = count($xml->xpath("//iati-activity"));
     $hierarchies = $xml->xpath("//@hierarchy");
     $basic['hierarchies'] = get_values($hierarchies, "int");
 } elseif (xml_child_exists($xml, "//iati-organisation")) {
     $checking_organisation_file = true;
     $org_identifier = $xml->xpath("//iati-identifier");
     $basic['org_iati_identifier'] = (string) $org_identifier[0];
     //print_r($xml->xpath("//name")); die;
     $org_name = $xml->xpath("//name");
     //a simplexml object
     $name = (string) $org_name[0];
     $basic['org_name'] = $name;
     //$basic['org_name'] = $basic['org_name']->0;
     $org_ref = $xml->xpath("//reporting-org/@ref");
     $basic['org_reporting_org_ref'] = (string) $org_ref[0];
     $basic['org_recipient_country_budget'] = count($xml->xpath("//recipient-country-budget"));
     $basic['org_recipient_org_budget'] = count($xml->xpath("//recipient-org-budget"));
     $basic['org_total_budget'] = count($xml->xpath("//total-budget"));
     $basic['org_document_link'] = count($xml->xpath("//document-link"));