public function validate_datajson($datajson_url = null, $datajson = null, $headers = null, $schema = null, $return_source = false, $quality = false, $component = null)
 {
     if ($datajson_url) {
         $datajson_header = $headers ? $headers : $this->campaign->uri_header($datajson_url);
         $errors = array();
         // Max file size
         $max_remote_size = $this->config->item('max_remote_size');
         // Only download the data.json if we need to
         if (empty($datajson_header['download_content_length']) || $datajson_header['download_content_length'] < 0 || !empty($datajson_header['download_content_length']) && $datajson_header['download_content_length'] > 0 && $datajson_header['download_content_length'] < $max_remote_size) {
             // Load the JSON
             $opts = array('http' => array('method' => "GET", 'user_agent' => "Data.gov data.json crawler"));
             $context = stream_context_create($opts);
             $datajson = @file_get_contents($datajson_url, false, $context, -1, $max_remote_size + 1);
             if ($datajson == false) {
                 $datajson = curl_from_json($datajson_url, false, false);
                 if (!$datajson) {
                     $errors[] = "File not found or couldn't be downloaded";
                 }
             }
         }
         if (!empty($datajson) && (empty($datajson_header['download_content_length']) || $datajson_header['download_content_length'] < 0)) {
             $datajson_header['download_content_length'] = strlen($datajson);
         }
         // See if it exceeds max size
         if ($datajson_header['download_content_length'] > $max_remote_size) {
             //$filesize = human_filesize($datajson_header['download_content_length']);
             //$errors[] = "The data.json file is " . $filesize . " which is currently too large to parse with this tool. Sorry.";
             // Increase the timeout limit
             @set_time_limit(6000);
             $this->load->helper('file');
             if ($rawfile = $this->archive_file('datajson-lines', $this->current_office_id, $datajson_url)) {
                 $outfile = $rawfile . '.lines.json';
                 $stream = fopen($rawfile, 'r');
                 $out_stream = fopen($outfile, 'w+');
                 $listener = new DataJsonParser();
                 $listener->out_file = $out_stream;
                 if ($this->environment == 'terminal' or $this->environment == 'cron') {
                     echo 'Attempting to convert to JSON lines' . PHP_EOL;
                 }
                 try {
                     $parser = new JsonStreamingParser_Parser($stream, $listener);
                     $parser->parse();
                 } catch (Exception $e) {
                     fclose($stream);
                     throw $e;
                 }
                 // Get the dataset count
                 $datajson_lines_count = $listener->_array_count;
                 // Delete temporary raw source file
                 unlink($rawfile);
                 $out_stream = fopen($outfile, 'r+');
                 $chunk_cycle = 0;
                 $chunk_size = 200;
                 $chunk_count = intval(ceil($datajson_lines_count / $chunk_size));
                 $buffer = '';
                 $response = array();
                 $response['errors'] = array();
                 if ($quality !== false) {
                     $response['qa'] = array();
                 }
                 echo "Analyzing {$datajson_lines_count} lines in {$chunk_count} chunks of {$chunk_size} lines each" . PHP_EOL;
                 while ($chunk_cycle < $chunk_count) {
                     $buffer = '';
                     $datajson_qa = null;
                     $counter = 0;
                     if ($chunk_cycle > 0) {
                         $key_offset = $chunk_size * $chunk_cycle;
                     } else {
                         $key_offset = 0;
                     }
                     $next_offset = $key_offset + $chunk_size;
                     //echo "Analyzing chunk $chunk_cycle of $chunk_count ($key_offset to $next_offset of $datajson_lines_count)" . PHP_EOL;
                     if ($chunk_cycle == 0) {
                         $json_header = fgets($out_stream);
                     }
                     while (($buffer .= fgets($out_stream)) && $counter < $chunk_size) {
                         $counter++;
                     }
                     $buffer = $json_header . $buffer;
                     $buffer = substr($buffer, 0, strlen($buffer) - 2) . ']}';
                     $validator = $this->campaign->jsonschema_validator($buffer, 'federal-v1.1');
                     if (!empty($validator['errors'])) {
                         $response['errors'] = array_merge($response['errors'], $this->process_validation_errors($validator['errors'], $key_offset));
                     }
                     if ($quality !== false) {
                         $datajson_qa = $this->campaign->datajson_qa($buffer, 'federal-v1.1', $quality, $component);
                         if (!empty($datajson_qa)) {
                             $response['qa'] = array_merge_recursive($response['qa'], $datajson_qa);
                         }
                     }
                     $chunk_cycle++;
                 }
                 // Delete json lines file
                 unlink($outfile);
                 // ###################################################################
                 // Needs to be refactored into separate function
                 // ###################################################################
                 // Sum QA counts
                 if (!empty($response['qa'])) {
                     if (!empty($response['qa']['bureauCodes'])) {
                         $response['qa']['bureauCodes'] = array_keys($response['qa']['bureauCodes']);
                     }
                     if (!empty($response['qa']['programCodes'])) {
                         $response['qa']['programCodes'] = array_keys($response['qa']['programCodes']);
                     }
                     $sum_array_fields = array('API_total', 'downloadURL_present', 'downloadURL_total', 'accessURL_present', 'accessURL_total', 'accessLevel_public', 'accessLevel_restricted', 'accessLevel_nonpublic', 'license_present', 'redaction_present', 'redaction_no_explanation');
                     foreach ($sum_array_fields as $array_field) {
                         if (!empty($response['qa'][$array_field]) && is_array($response['qa'][$array_field])) {
                             $response['qa'][$array_field] = array_sum($response['qa'][$array_field]);
                         }
                     }
                     // Sum validation counts
                     if (!empty($response['qa']['validation_counts']) && is_array($response['qa']['validation_counts'])) {
                         foreach ($response['qa']['validation_counts'] as $validation_key => $validation_count) {
                             if (is_array($response['qa']['validation_counts'][$validation_key])) {
                                 $response['qa']['validation_counts'][$validation_key] = array_sum($response['qa']['validation_counts'][$validation_key]);
                             }
                         }
                     }
                 }
                 $response['valid'] = empty($response['errors']) ? true : false;
                 $response['valid_json'] = true;
                 $response['total_records'] = $datajson_lines_count;
                 if (!empty($datajson_header['download_content_length'])) {
                     $response['download_content_length'] = $datajson_header['download_content_length'];
                 }
                 if (empty($response['errors'])) {
                     $response['errors'] = false;
                 }
                 return $response;
                 // ###################################################################
             } else {
                 $errors[] = "File not found or couldn't be downloaded";
             }
         }
         // See if it's valid JSON
         if (!empty($datajson) && $datajson_header['download_content_length'] < $max_remote_size) {
             // See if raw file is valid
             $raw_valid_json = is_json($datajson);
             // See if we can clean up the file to make it valid
             if (!$raw_valid_json) {
                 $datajson_processed = json_text_filter($datajson);
                 $valid_json = is_json($datajson_processed);
             } else {
                 $valid_json = true;
             }
             if ($valid_json !== true) {
                 $errors[] = 'The validator was unable to determine if this was valid JSON';
             }
         }
         if (!empty($errors)) {
             $valid_json = isset($valid_json) ? $valid_json : null;
             $raw_valid_json = isset($raw_valid_json) ? $raw_valid_json : null;
             $response = array('raw_valid_json' => $raw_valid_json, 'valid_json' => $valid_json, 'valid' => false, 'fail' => $errors, 'download_content_length' => $datajson_header['download_content_length']);
             if ($valid_json && $return_source === false) {
                 $catalog = json_decode($datajson_processed);
                 if ($schema == 'federal-v1.1' or $schema == 'non-federal-v1.1') {
                     $response['total_records'] = count($catalog->dataset);
                 } else {
                     $response['total_records'] = count($catalog);
                 }
             }
             return $response;
         }
     }
     // filter string for json conversion if we haven't already
     if ($datajson && empty($datajson_processed)) {
         $datajson_processed = json_text_filter($datajson);
     }
     // verify it's valid json
     if ($datajson_processed) {
         if (!isset($valid_json)) {
             $valid_json = is_json($datajson_processed);
         }
     }
     if ($datajson_processed && $valid_json) {
         $datajson_decode = json_decode($datajson_processed);
         if (!empty($datajson_decode->conformsTo) && $datajson_decode->conformsTo == 'https://project-open-data.cio.gov/v1.1/schema') {
             if ($schema !== 'federal-v1.1' && $schema !== 'non-federal-v1.1') {
                 if ($schema == 'federal') {
                     $schema = 'federal-v1.1';
                 } else {
                     if ($schema == 'non-federal') {
                         $schema = 'non-federal-v1.1';
                     } else {
                         $schema = 'federal-v1.1';
                     }
                 }
             }
             $this->schema = $schema;
         }
         if ($schema == 'federal-v1.1' && empty($datajson_decode->dataset)) {
             $errors[] = "This file does not appear to be using the federal-v1.1 schema";
             $response = array('raw_valid_json' => $raw_valid_json, 'valid_json' => $valid_json, 'valid' => false, 'fail' => $errors);
             return $response;
         }
         if ($schema !== 'federal-v1.1' && $schema !== 'non-federal-v1.1') {
             $chunk_size = 500;
             $datajson_chunks = array_chunk($datajson_decode, $chunk_size);
         } else {
             $datajson_chunks = array($datajson_decode);
         }
         $response = array();
         $response['errors'] = array();
         if ($quality !== false) {
             $response['qa'] = array();
         }
         // save detected schema version to output
         $response['schema_version'] = $schema;
         foreach ($datajson_chunks as $chunk_count => $chunk) {
             $chunk = json_encode($chunk);
             $validator = $this->campaign->jsonschema_validator($chunk, $schema);
             if (!empty($validator['errors'])) {
                 if ($chunk_count) {
                     $key_offset = $chunk_size * $chunk_count;
                     $key_offset = $key_offset;
                 } else {
                     $key_offset = 0;
                 }
                 $response['errors'] = $response['errors'] + $this->process_validation_errors($validator['errors'], $key_offset);
             }
             if ($quality !== false) {
                 $datajson_qa = $this->campaign->datajson_qa($chunk, $schema, $quality, $component);
                 if (!empty($datajson_qa)) {
                     $response['qa'] = array_merge_recursive($response['qa'], $datajson_qa);
                 }
             }
         }
         // Sum QA counts
         if (!empty($response['qa'])) {
             if (!empty($response['qa']['bureauCodes'])) {
                 $response['qa']['bureauCodes'] = array_keys($response['qa']['bureauCodes']);
             }
             if (!empty($response['qa']['programCodes'])) {
                 $response['qa']['programCodes'] = array_keys($response['qa']['programCodes']);
             }
             $sum_array_fields = array('accessURL_present', 'accessURL_total', 'accessLevel_public', 'accessLevel_restricted', 'accessLevel_nonpublic');
             foreach ($sum_array_fields as $array_field) {
                 if (!empty($response['qa'][$array_field]) && is_array($response['qa'][$array_field])) {
                     $response['qa'][$array_field] = array_sum($response['qa'][$array_field]);
                 }
             }
             // Sum validation counts
             if (!empty($response['qa']['validation_counts']) && is_array($response['qa']['validation_counts'])) {
                 foreach ($response['qa']['validation_counts'] as $validation_key => $validation_count) {
                     if (is_array($response['qa']['validation_counts'][$validation_key])) {
                         $response['qa']['validation_counts'][$validation_key] = array_sum($response['qa']['validation_counts'][$validation_key]);
                     }
                 }
             }
         }
         $valid_json = isset($raw_valid_json) ? $raw_valid_json : $valid_json;
         $response['valid'] = empty($response['errors']) ? true : false;
         $response['valid_json'] = $valid_json;
         if ($schema == 'federal-v1.1' or $schema == 'non-federal-v1.1') {
             $response['total_records'] = count($datajson_decode->dataset);
         } else {
             $response['total_records'] = count($datajson_decode);
         }
         if (!empty($datajson_header['download_content_length'])) {
             $response['download_content_length'] = $datajson_header['download_content_length'];
         }
         if (empty($response['errors'])) {
             $response['errors'] = false;
         }
         if ($return_source) {
             $dataset_array = ($schema == 'federal-v1.1' or $schema == 'non-federal-v1.1') ? true : false;
             $datajson_decode = filter_json($datajson_decode, $dataset_array);
             $response['source'] = $datajson_decode;
         }
         return $response;
     } else {
         $errors[] = "This does not appear to be valid JSON";
         $response = array('valid_json' => false, 'valid' => false, 'fail' => $errors);
         if (!empty($datajson_header['download_content_length'])) {
             $response['download_content_length'] = $datajson_header['download_content_length'];
         }
         return $response;
     }
 }
 /**
  * TO DO - when agencies provide a valid url, we should validate that before
  * the download.
  *
  * Open the archived file that has been downloaded in campaign status method and
  * validate it against the schema
  *
  * @param <array> $status
  * @param <string> $file_path
  * @param <string> $component
  * @param <string> $real_url
  */
 public function validate_archive_file_with_schema($status, $file_path, $component, $real_url)
 {
     $fp = fopen($file_path, 'r');
     if (!$fp) {
         $status['errors'][] = "Unable to open archived json file";
     }
     $status['total_records'] = 0;
     $status['download_content_length'] = 0;
     $status['content_type'] = "application/json";
     $status['schema_version'] = "1.0";
     $json = file_get_contents($file_path);
     if (empty($json)) {
         $status['errors'][] = 'Archived json file is empty';
         $status['valid_json'] = false;
     } else {
         if (!is_json($json)) {
             $json = json_text_filter($json);
         }
     }
     if (!empty($json) && !is_json($json)) {
         $status['errors'][] = 'Invalid archived json file';
         $status['valid_json'] = false;
     } else {
         $status['download_content_length'] = strlen($json);
         $data = json_decode($json);
         $status['total_records'] = count($data);
         $status['valid_json'] = true;
     }
     $schema = $this->datajson_schema($component);
     if (!empty($data)) {
         $validator = new JsonSchema\Validator();
         $validator->check($data, $schema);
         if (!$validator->isValid()) {
             $errors = $validator->getErrors();
             $status['schema_errors'] = $errors;
         }
     }
     return $status;
 }