/** * Export a single structure */ function export_structure($parent_id) { /* * Define the location of the downloads directory. */ $downloads_dir = WEB_ROOT . '/downloads/'; $downloads_dir .= $this->edition['slug'] . '/'; $structure_sql = ' SELECT structure_unified.* FROM structure LEFT JOIN structure_unified ON structure.id = structure_unified.s1_id'; $structure_args = array(); if (isset($parent_id)) { $structure_sql .= ' WHERE parent_id = :parent_id'; $structure_args[':parent_id'] = $parent_id; } else { $structure_sql .= ' WHERE parent_id IS NULL'; } $structure_sql .= ' AND edition_id = :edition_id'; $structure_args[':edition_id'] = $this->edition_id; $structure_statement = $this->db->prepare($structure_sql, array(PDO::ATTR_CURSOR => PDO::CURSOR_FWDONLY)); $structure_result = $structure_statement->execute($structure_args); if ($structure_result === FALSE) { echo '<p>' . $structure_sql . '</p>'; echo '<p>' . $structure_result->getMessage() . '</p>'; return; } /* * Get results as an array to save memory */ while ($item = $structure_statement->fetch(PDO::FETCH_ASSOC)) { /* * Figure out the URL for this structural unit by iterating through the "identifier" * columns in this row. */ $identifier_parts = array(); foreach ($item as $key => $value) { if (preg_match('/s[0-9]_identifier/', $key) == 1) { /* * Higher-level structural elements (e.g., titles) will have blank columns in * structure_unified, so we want to omit any blank values. Because a valid * structural unit identifier is "0" (Virginia does this), we check the string * length, rather than using empty(). */ if (strlen($value) > 0) { $identifier_parts[] = urlencode($value); } } } $identifier_parts = array_reverse($identifier_parts); $token = implode('/', $identifier_parts); /* * This is slightly different from how we handle permalinks since we don't want to * overwrite files if current has changed. */ $url = '/'; if (defined('LAW_LONG_URLS') && LAW_LONG_URLS === TRUE) { $url .= $token . '/'; } /* * Now we can use our data to build the child law identifiers */ if (INCLUDES_REPEALED !== TRUE) { $laws_sql = ' SELECT id, structure_id, section AS section_number, catch_line FROM laws WHERE structure_id = :s_id AND edition_id = :edition_id ORDER BY order_by, section'; } else { $laws_sql = ' SELECT laws.id, laws.structure_id, laws.section AS section_number, laws.catch_line FROM laws LEFT OUTER JOIN laws_meta ON laws_meta.law_id = laws.id AND laws_meta.meta_key = "repealed" WHERE structure_id = :s_id AND (laws_meta.meta_value = "n" OR laws_meta.meta_value IS NULL) AND edition_id = :edition_id ORDER BY order_by, section'; } $laws_args = array(':s_id' => $item['s1_id'], ':edition_id' => $this->edition_id); $laws_statement = $this->db->prepare($laws_sql, array(PDO::ATTR_CURSOR => PDO::CURSOR_FWDONLY)); $laws_result = $laws_statement->execute($laws_args); if ($laws_result !== FALSE && $laws_statement->rowCount() > 0) { /* * Establish the path of our code JSON storage directory. */ $json_dir = $downloads_dir . 'code-json' . $url; $this->mkdir($json_dir); /* * Set a flag telling us that we may write JSON. */ $write_json = TRUE; /* * Establish the path of our code text storage directory. */ $text_dir = $downloads_dir . 'code-text' . $url; $this->mkdir($text_dir); /* * Set a flag telling us that we may write text. */ $write_text = TRUE; /* * Establish the path of our code XML storage directory. */ $xml_dir = $downloads_dir . 'code-xml' . $url; $this->mkdir($xml_dir); /* * Set a flag telling us that we may write XML. */ $write_xml = TRUE; /* * Create a new instance of the Parser class, so that we have access to its * get_structure_labels() method. */ $parser = new Parser(array('db' => $this->db, 'logger' => $this->logger, 'downloads_dir' => $this->downloads_dir, 'downloads_url' => $this->downloads_url)); /* * Create a new instance of the class that handles information about individual laws. */ $laws = new Law(); /* * Iterate through every section number, to pass to the Laws class. */ while ($section = $laws_statement->fetch(PDO::FETCH_OBJ)) { /* * Instruct the Law class on what, specifically, it should retrieve. */ $laws->config->get_text = TRUE; $laws->config->get_structure = TRUE; $laws->config->get_amendment_attempts = FALSE; $laws->config->get_court_decisions = TRUE; $laws->config->get_metadata = TRUE; $laws->config->get_references = TRUE; $laws->config->get_related_laws = TRUE; /* * Pass the requested section number to Law. */ $laws->law_id = $section->id; $laws->edition_id = $this->edition_id; unset($law, $section); /* * Get a list of all of the basic information that we have about this section. */ $law = $laws->get_law(); if ($law !== FALSE) { /* * Eliminate colons from section numbers, since some OSes can't handle colons in * filenames. */ $filename = str_replace(':', '_', $law->section_number); /* * Store the JSON file. */ if ($write_json === TRUE) { $success = file_put_contents($json_dir . $filename . '.json', json_encode($law)); if ($success === FALSE) { $this->logger->message('Could not write law JSON files "' . $json_dir . $filename . '.json' . '"', 9); break; } else { $this->logger->message('Wrote file "' . $json_dir . $filename . '.json' . '"', 1); } } /* * Store the text file. */ if ($write_text === TRUE) { $success = file_put_contents($text_dir . $filename . '.txt', $law->plain_text); if ($success === FALSE) { $this->logger->message('Could not write law text files "' . $text_dir . $filename . '.txt', $law->plain_text . '"', 9); break; } else { $this->logger->message('Wrote file "' . $json_dir . $filename . '.txt' . '"', 1); } } /* * Store the XML file. */ if ($write_xml === TRUE) { /* * We need to massage the $law object into matching the State Decoded * XML standard. The first step towards this is removing unnecessary * elements. */ unset($law->plain_text); unset($law->structure_contents); unset($law->next_section); unset($law->previous_section); unset($law->amendment_years); unset($law->dublin_core); unset($law->plain_text); unset($law->section_id); unset($law->structure_id); unset($law->edition_id); unset($law->full_text); unset($law->formats); unset($law->html); $law->structure = $law->ancestry; unset($law->ancestry); $law->referred_to_by = $law->references; unset($law->references); /* * Encode all entities as their proper Unicode characters, save for the * few that are necessary in XML. */ $law = html_entity_decode_object($law); /* * Quickly turn this into an XML string. */ $xml = new SimpleXMLElement('<law />'); object_to_xml($law, $xml); $xml = $xml->asXML(); /* * Load the XML string into DOMDocument. */ $dom = new DOMDocument(); $dom->loadXML($xml); /* * Simplify every reference, stripping them down to the cited sections. */ $referred_to_by = $dom->getElementsByTagName('referred_to_by'); if (!empty($referred_to_by) && $referred_to_by->length > 0) { $referred_to_by = $referred_to_by->item(0); $references = $referred_to_by->getElementsByTagName('unit'); /* * Iterate backwards through our elements. */ for ($i = $references->length; --$i >= 0;) { $reference = $references->item($i); /* * Save the section number. */ $section_number = trim($reference->getElementsByTagName('section_number')->item(0)->nodeValue); /* * Create a new element, named "reference," which contains the only * the section number. */ $element = $dom->createElement('reference', $section_number); $reference->parentNode->insertBefore($element, $reference); /* * Remove the "unit" node. */ $reference->parentNode->removeChild($reference); } } /* * Simplify and reorganize every structural unit. */ $structure = $dom->getElementsByTagName('structure'); if (!empty($structure) && $structure->length > 0) { $structure = $structure->item(0); $structural_units = $structure->getElementsByTagName('unit'); /* * Iterate backwards through our elements. */ for ($i = $structural_units->length; --$i >= 0;) { $unit = $structural_units->item($i); /* * Add the "level" attribute. */ $label = trim(strtolower($unit->getAttribute('label'))); $level = $dom->createAttribute('level'); $level->value = array_search($label, $parser->get_structure_labels()) + 1; $unit->appendChild($level); /* * Add the "identifier" attribute. */ $identifier = $dom->createAttribute('identifier'); $identifier->value = trim($unit->getElementsByTagName('identifier')->item(0)->nodeValue); $unit->appendChild($identifier); /* * Remove the "id" attribute from <unit>. */ $unit->removeAttribute('id'); /* * Store the name of this structural unit as the contents of <unit>. */ $unit->nodeValue = trim($unit->getElementsByTagName('name')->item(0)->nodeValue); /* * Save these changes. */ $structure->appendChild($unit); } } /* * Rename text units as text sections. */ $text = $dom->getElementsByTagName('text'); if (!empty($text) && $text->length > 0) { $text = $text->item(0); $text_units = $text->getElementsByTagName('unit'); /* * Iterate backwards through our elements. */ for ($i = $text_units->length; --$i >= 0;) { $text_unit = $text_units->item($i); renameElement($text_unit, 'section'); } } /* * Save the cleaned-up XML to the filesystem. */ $success = file_put_contents($xml_dir . $filename . '.xml', $dom->saveXML()); if ($success === FALSE) { $this->logger->message('Could not write law XML files', 9); break; } } } // end the $law exists condition } // end the while() law iterator } // end the $laws condition $this->export_structure($item['s1_id']); } // end the while() structure iterator }
function html_entity_decode_object($obj) { foreach ($obj as $field => $value) { if (is_object($value)) { $obj->{$field} = html_entity_decode_object($value); } elseif (is_string($value)) { $obj->{$field} = decode_entities($value); } } return $obj; }