/**
     * Export a single structure
     */
    function export_structure($parent_id)
    {
        /*
         * Define the location of the downloads directory.
         */
        $downloads_dir = WEB_ROOT . '/downloads/';
        $downloads_dir .= $this->edition['slug'] . '/';
        $structure_sql = '	SELECT structure_unified.*
							FROM structure
							LEFT JOIN structure_unified
								ON structure.id = structure_unified.s1_id';
        $structure_args = array();
        if (isset($parent_id)) {
            $structure_sql .= ' WHERE parent_id = :parent_id';
            $structure_args[':parent_id'] = $parent_id;
        } else {
            $structure_sql .= ' WHERE parent_id IS NULL';
        }
        $structure_sql .= ' AND edition_id = :edition_id';
        $structure_args[':edition_id'] = $this->edition_id;
        $structure_statement = $this->db->prepare($structure_sql, array(PDO::ATTR_CURSOR => PDO::CURSOR_FWDONLY));
        $structure_result = $structure_statement->execute($structure_args);
        if ($structure_result === FALSE) {
            echo '<p>' . $structure_sql . '</p>';
            echo '<p>' . $structure_result->getMessage() . '</p>';
            return;
        }
        /*
         * Get results as an array to save memory
         */
        while ($item = $structure_statement->fetch(PDO::FETCH_ASSOC)) {
            /*
             * Figure out the URL for this structural unit by iterating through the "identifier"
             * columns in this row.
             */
            $identifier_parts = array();
            foreach ($item as $key => $value) {
                if (preg_match('/s[0-9]_identifier/', $key) == 1) {
                    /*
                     * Higher-level structural elements (e.g., titles) will have blank columns in
                     * structure_unified, so we want to omit any blank values. Because a valid
                     * structural unit identifier is "0" (Virginia does this), we check the string
                     * length, rather than using empty().
                     */
                    if (strlen($value) > 0) {
                        $identifier_parts[] = urlencode($value);
                    }
                }
            }
            $identifier_parts = array_reverse($identifier_parts);
            $token = implode('/', $identifier_parts);
            /*
             * This is slightly different from how we handle permalinks since we don't want to
             * overwrite files if current has changed.
             */
            $url = '/';
            if (defined('LAW_LONG_URLS') && LAW_LONG_URLS === TRUE) {
                $url .= $token . '/';
            }
            /*
             * Now we can use our data to build the child law identifiers
             */
            if (INCLUDES_REPEALED !== TRUE) {
                $laws_sql = '	SELECT id, structure_id, section AS section_number, catch_line
								FROM laws
								WHERE structure_id = :s_id
								AND edition_id = :edition_id
								ORDER BY order_by, section';
            } else {
                $laws_sql = '	SELECT laws.id, laws.structure_id, laws.section AS section_number,
								laws.catch_line
								FROM laws
								LEFT OUTER JOIN laws_meta
									ON laws_meta.law_id = laws.id AND laws_meta.meta_key = "repealed"
								WHERE structure_id = :s_id
								AND (laws_meta.meta_value = "n" OR laws_meta.meta_value IS NULL)
								AND edition_id = :edition_id
								ORDER BY order_by, section';
            }
            $laws_args = array(':s_id' => $item['s1_id'], ':edition_id' => $this->edition_id);
            $laws_statement = $this->db->prepare($laws_sql, array(PDO::ATTR_CURSOR => PDO::CURSOR_FWDONLY));
            $laws_result = $laws_statement->execute($laws_args);
            if ($laws_result !== FALSE && $laws_statement->rowCount() > 0) {
                /*
                 * Establish the path of our code JSON storage directory.
                 */
                $json_dir = $downloads_dir . 'code-json' . $url;
                $this->mkdir($json_dir);
                /*
                 * Set a flag telling us that we may write JSON.
                 */
                $write_json = TRUE;
                /*
                 * Establish the path of our code text storage directory.
                 */
                $text_dir = $downloads_dir . 'code-text' . $url;
                $this->mkdir($text_dir);
                /*
                 * Set a flag telling us that we may write text.
                 */
                $write_text = TRUE;
                /*
                 * Establish the path of our code XML storage directory.
                 */
                $xml_dir = $downloads_dir . 'code-xml' . $url;
                $this->mkdir($xml_dir);
                /*
                 * Set a flag telling us that we may write XML.
                 */
                $write_xml = TRUE;
                /*
                 * Create a new instance of the Parser class, so that we have access to its
                 * get_structure_labels() method.
                 */
                $parser = new Parser(array('db' => $this->db, 'logger' => $this->logger, 'downloads_dir' => $this->downloads_dir, 'downloads_url' => $this->downloads_url));
                /*
                 * Create a new instance of the class that handles information about individual laws.
                 */
                $laws = new Law();
                /*
                 * Iterate through every section number, to pass to the Laws class.
                 */
                while ($section = $laws_statement->fetch(PDO::FETCH_OBJ)) {
                    /*
                     * Instruct the Law class on what, specifically, it should retrieve.
                     */
                    $laws->config->get_text = TRUE;
                    $laws->config->get_structure = TRUE;
                    $laws->config->get_amendment_attempts = FALSE;
                    $laws->config->get_court_decisions = TRUE;
                    $laws->config->get_metadata = TRUE;
                    $laws->config->get_references = TRUE;
                    $laws->config->get_related_laws = TRUE;
                    /*
                     * Pass the requested section number to Law.
                     */
                    $laws->law_id = $section->id;
                    $laws->edition_id = $this->edition_id;
                    unset($law, $section);
                    /*
                     * Get a list of all of the basic information that we have about this section.
                     */
                    $law = $laws->get_law();
                    if ($law !== FALSE) {
                        /*
                         * Eliminate colons from section numbers, since some OSes can't handle colons in
                         * filenames.
                         */
                        $filename = str_replace(':', '_', $law->section_number);
                        /*
                         * Store the JSON file.
                         */
                        if ($write_json === TRUE) {
                            $success = file_put_contents($json_dir . $filename . '.json', json_encode($law));
                            if ($success === FALSE) {
                                $this->logger->message('Could not write law JSON files "' . $json_dir . $filename . '.json' . '"', 9);
                                break;
                            } else {
                                $this->logger->message('Wrote file "' . $json_dir . $filename . '.json' . '"', 1);
                            }
                        }
                        /*
                         * Store the text file.
                         */
                        if ($write_text === TRUE) {
                            $success = file_put_contents($text_dir . $filename . '.txt', $law->plain_text);
                            if ($success === FALSE) {
                                $this->logger->message('Could not write law text files "' . $text_dir . $filename . '.txt', $law->plain_text . '"', 9);
                                break;
                            } else {
                                $this->logger->message('Wrote file "' . $json_dir . $filename . '.txt' . '"', 1);
                            }
                        }
                        /*
                         * Store the XML file.
                         */
                        if ($write_xml === TRUE) {
                            /*
                             * We need to massage the $law object into matching the State Decoded
                             * XML standard. The first step towards this is removing unnecessary
                             * elements.
                             */
                            unset($law->plain_text);
                            unset($law->structure_contents);
                            unset($law->next_section);
                            unset($law->previous_section);
                            unset($law->amendment_years);
                            unset($law->dublin_core);
                            unset($law->plain_text);
                            unset($law->section_id);
                            unset($law->structure_id);
                            unset($law->edition_id);
                            unset($law->full_text);
                            unset($law->formats);
                            unset($law->html);
                            $law->structure = $law->ancestry;
                            unset($law->ancestry);
                            $law->referred_to_by = $law->references;
                            unset($law->references);
                            /*
                             * Encode all entities as their proper Unicode characters, save for the
                             * few that are necessary in XML.
                             */
                            $law = html_entity_decode_object($law);
                            /*
                             * Quickly turn this into an XML string.
                             */
                            $xml = new SimpleXMLElement('<law />');
                            object_to_xml($law, $xml);
                            $xml = $xml->asXML();
                            /*
                             * Load the XML string into DOMDocument.
                             */
                            $dom = new DOMDocument();
                            $dom->loadXML($xml);
                            /*
                             * Simplify every reference, stripping them down to the cited sections.
                             */
                            $referred_to_by = $dom->getElementsByTagName('referred_to_by');
                            if (!empty($referred_to_by) && $referred_to_by->length > 0) {
                                $referred_to_by = $referred_to_by->item(0);
                                $references = $referred_to_by->getElementsByTagName('unit');
                                /*
                                 * Iterate backwards through our elements.
                                 */
                                for ($i = $references->length; --$i >= 0;) {
                                    $reference = $references->item($i);
                                    /*
                                     * Save the section number.
                                     */
                                    $section_number = trim($reference->getElementsByTagName('section_number')->item(0)->nodeValue);
                                    /*
                                     * Create a new element, named "reference," which contains the only
                                     * the section number.
                                     */
                                    $element = $dom->createElement('reference', $section_number);
                                    $reference->parentNode->insertBefore($element, $reference);
                                    /*
                                     * Remove the "unit" node.
                                     */
                                    $reference->parentNode->removeChild($reference);
                                }
                            }
                            /*
                             * Simplify and reorganize every structural unit.
                             */
                            $structure = $dom->getElementsByTagName('structure');
                            if (!empty($structure) && $structure->length > 0) {
                                $structure = $structure->item(0);
                                $structural_units = $structure->getElementsByTagName('unit');
                                /*
                                 * Iterate backwards through our elements.
                                 */
                                for ($i = $structural_units->length; --$i >= 0;) {
                                    $unit = $structural_units->item($i);
                                    /*
                                     * Add the "level" attribute.
                                     */
                                    $label = trim(strtolower($unit->getAttribute('label')));
                                    $level = $dom->createAttribute('level');
                                    $level->value = array_search($label, $parser->get_structure_labels()) + 1;
                                    $unit->appendChild($level);
                                    /*
                                     * Add the "identifier" attribute.
                                     */
                                    $identifier = $dom->createAttribute('identifier');
                                    $identifier->value = trim($unit->getElementsByTagName('identifier')->item(0)->nodeValue);
                                    $unit->appendChild($identifier);
                                    /*
                                     * Remove the "id" attribute from <unit>.
                                     */
                                    $unit->removeAttribute('id');
                                    /*
                                     * Store the name of this structural unit as the contents of <unit>.
                                     */
                                    $unit->nodeValue = trim($unit->getElementsByTagName('name')->item(0)->nodeValue);
                                    /*
                                     * Save these changes.
                                     */
                                    $structure->appendChild($unit);
                                }
                            }
                            /*
                             * Rename text units as text sections.
                             */
                            $text = $dom->getElementsByTagName('text');
                            if (!empty($text) && $text->length > 0) {
                                $text = $text->item(0);
                                $text_units = $text->getElementsByTagName('unit');
                                /*
                                 * Iterate backwards through our elements.
                                 */
                                for ($i = $text_units->length; --$i >= 0;) {
                                    $text_unit = $text_units->item($i);
                                    renameElement($text_unit, 'section');
                                }
                            }
                            /*
                             * Save the cleaned-up XML to the filesystem.
                             */
                            $success = file_put_contents($xml_dir . $filename . '.xml', $dom->saveXML());
                            if ($success === FALSE) {
                                $this->logger->message('Could not write law XML files', 9);
                                break;
                            }
                        }
                    }
                    // end the $law exists condition
                }
                // end the while() law iterator
            }
            // end the $laws condition
            $this->export_structure($item['s1_id']);
        }
        // end the while() structure iterator
    }
Пример #2
0
function html_entity_decode_object($obj)
{
    foreach ($obj as $field => $value) {
        if (is_object($value)) {
            $obj->{$field} = html_entity_decode_object($value);
        } elseif (is_string($value)) {
            $obj->{$field} = decode_entities($value);
        }
    }
    return $obj;
}