Esempio n. 1
0
 /**
  * Export a record set as defined by the given search expression and the table_num for this exporter.
  * This function wraps the record-level exports using the settings 'wrap_before' and 'wrap_after' if they are set.
  * @param string $ps_exporter_code defines the exporter to use
  * @param SearchResult $po_result An existing SearchResult object
  * @param string $ps_filename Destination filename (we can't keep everything in memory here)
  * @param array $pa_options
  * 		progressCallback = callback function for asynchronous UI status reporting
  *		showCLIProgressBar = Show command-line progress bar. Default is false.
  *		logDirectory = path to directory where logs should be written
  *		logLevel = KLogger constant for minimum log level to record. Default is KLogger::INFO. Constants are, in descending order of shrillness:
  *			KLogger::EMERG = Emergency messages (system is unusable)
  *			KLogger::ALERT = Alert messages (action must be taken immediately)
  *			KLogger::CRIT = Critical conditions
  *			KLogger::ERR = Error conditions
  *			KLogger::WARN = Warnings
  *			KLogger::NOTICE = Notices (normal but significant conditions)
  *			KLogger::INFO = Informational messages
  *			KLogger::DEBUG = Debugging messages
  * @return boolean success state
  */
 public static function exportRecordsFromSearchResult($ps_exporter_code, $po_result, $ps_filename, $pa_options = array())
 {
     if (!$po_result instanceof SearchResult) {
         return false;
     }
     $vs_log_dir = caGetOption('logDirectory', $pa_options);
     if (!file_exists($vs_log_dir) || !is_writable($vs_log_dir)) {
         $vs_log_dir = caGetTempDirPath();
     }
     if (!($vn_log_level = caGetOption('logLevel', $pa_options))) {
         $vn_log_level = KLogger::INFO;
     }
     $o_log = new KLogger($vs_log_dir, $vn_log_level);
     ca_data_exporters::$s_exporter_cache = array();
     ca_data_exporters::$s_exporter_item_cache = array();
     $vb_show_cli_progress_bar = isset($pa_options['showCLIProgressBar']) && $pa_options['showCLIProgressBar'];
     $po_request = caGetOption('request', $pa_options, null);
     $vb_have_request = $po_request instanceof RequestHTTP;
     if (!($t_mapping = ca_data_exporters::loadExporterByCode($ps_exporter_code))) {
         return false;
     }
     $va_errors = ca_data_exporters::checkMapping($ps_exporter_code);
     if (sizeof($va_errors) > 0) {
         if ($po_request && isset($pa_options['progressCallback']) && ($ps_callback = $pa_options['progressCallback'])) {
             $ps_callback($po_request, 0, -1, _t('Export failed: %1', join("; ", $va_errors)), 0, memory_get_usage(true), 0);
         }
         return false;
     }
     $o_log->logInfo(_t("Starting SearchResult-based multi-record export for mapping %1.", $ps_exporter_code));
     $vn_start_time = time();
     $vs_wrap_before = $t_mapping->getSetting('wrap_before');
     $vs_wrap_after = $t_mapping->getSetting('wrap_after');
     $t_instance = $t_mapping->getAppDatamodel()->getInstanceByTableNum($t_mapping->get('table_num'));
     $vn_num_items = $po_result->numHits();
     $o_log->logInfo(_t("SearchResult contains %1 results. Now calling single-item export for each record.", $vn_num_items));
     if ($vs_wrap_before) {
         file_put_contents($ps_filename, $vs_wrap_before . "\n", FILE_APPEND);
     }
     if ($vb_show_cli_progress_bar) {
         print CLIProgressBar::start($vn_num_items, _t('Processing search result'));
     }
     if ($po_request && isset($pa_options['progressCallback']) && ($ps_callback = $pa_options['progressCallback'])) {
         if ($vn_num_items > 0) {
             $ps_callback($po_request, 0, $vn_num_items, _t("Exporting result"), time() - $vn_start_time, memory_get_usage(true), 0);
         } else {
             $ps_callback($po_request, 0, -1, _t('Found no records to export'), time() - $vn_start_time, memory_get_usage(true), 0);
         }
     }
     $vn_num_processed = 0;
     if ($t_mapping->getSetting('CSV_print_field_names')) {
         $va_header = $va_header_sources = array();
         $va_mapping_items = $t_mapping->getItems();
         foreach ($va_mapping_items as $vn_i => $va_mapping_item) {
             $va_settings = caUnserializeForDatabase($va_mapping_item['settings']);
             $va_header_sources[(int) $va_mapping_item['element']] = $va_settings['_id'] ? $va_settings['_id'] : $va_mapping_item['source'];
         }
         ksort($va_header_sources);
         foreach ($va_header_sources as $vn_element => $vs_source) {
             $va_tmp = explode(".", $vs_source);
             if ($t_table = $t_mapping->getAppDatamodel()->getInstanceByTableName($va_tmp[0], true)) {
                 $va_header[] = $t_table->getDisplayLabel($vs_source);
             } else {
                 $va_header[] = $vs_source;
             }
         }
         file_put_contents($ps_filename, join(",", $va_header) . "\n", FILE_APPEND);
     }
     $i = 0;
     while ($po_result->nextHit()) {
         // clear caches every once in a while. doesn't make much sense to keep them around while exporting
         if (++$i % 1000 == 0) {
             SearchResult::clearCaches();
             ca_data_exporters::clearCaches();
         }
         if ($vb_have_request) {
             if (!caCanRead($po_request->getUserID(), $t_instance->tableNum(), $po_result->get($t_instance->primaryKey()))) {
                 continue;
             }
         }
         $vs_item_export = ca_data_exporters::exportRecord($ps_exporter_code, $po_result->get($t_instance->primaryKey()), array('logger' => $o_log));
         file_put_contents($ps_filename, $vs_item_export . "\n", FILE_APPEND);
         if ($vb_show_cli_progress_bar) {
             print CLIProgressBar::next(1, _t("Exporting records ..."));
         }
         $vn_num_processed++;
         if ($vb_have_request && isset($pa_options['progressCallback']) && ($ps_callback = $pa_options['progressCallback'])) {
             $ps_callback($po_request, $vn_num_processed, $vn_num_items, _t("Exporting ... [%1/%2]", $vn_num_processed, $vn_num_items), time() - $vn_start_time, memory_get_usage(true), $vn_num_processed);
         }
     }
     if ($vs_wrap_after) {
         file_put_contents($ps_filename, $vs_wrap_after . "\n", FILE_APPEND);
     }
     if ($vb_show_cli_progress_bar) {
         print CLIProgressBar::finish();
     }
     if ($po_request && isset($pa_options['progressCallback']) && ($ps_callback = $pa_options['progressCallback'])) {
         $ps_callback($po_request, $vn_num_items, $vn_num_items, _t('Export completed'), time() - $vn_start_time, memory_get_usage(true), $vn_num_processed);
     }
     return true;
 }
Esempio n. 2
0
 /**
  * Forces a full reindex of all rows in the database or, optionally, a single table
  *
  * @param array $pa_table_names
  * @param array $pa_options Reindexing options:
  *			showProgress
  *			interactiveProgressDisplay
  *			log
  *			callback
  * @return null|false
  */
 public function reindex($pa_table_names = null, $pa_options = null)
 {
     define('__CollectiveAccess_IS_REINDEXING__', 1);
     $t_timer = new Timer();
     $pb_display_progress = isset($pa_options['showProgress']) ? (bool) $pa_options['showProgress'] : true;
     $pb_interactive_display = isset($pa_options['interactiveProgressDisplay']) ? (bool) $pa_options['interactiveProgressDisplay'] : false;
     $ps_callback = isset($pa_options['callback']) ? (string) $pa_options['callback'] : false;
     if ($pa_table_names) {
         if (!is_array($pa_table_names)) {
             $pa_table_names = array($pa_table_names);
         }
         $va_table_names = array();
         foreach ($pa_table_names as $vs_table) {
             if ($this->opo_datamodel->tableExists($vs_table)) {
                 $vn_num = $this->opo_datamodel->getTableNum($vs_table);
                 if ($pb_display_progress) {
                     print "\nTRUNCATING {$vs_table}\n\n";
                 }
                 $this->opo_engine->truncateIndex($vn_num);
                 $t_instance = $this->opo_datamodel->getInstanceByTableName($vs_table, true);
                 $va_table_names[$vn_num] = array('name' => $vs_table, 'num' => $vn_num, 'displayName' => $t_instance->getProperty('NAME_PLURAL'));
             }
         }
         if (!sizeof($va_table_names)) {
             return false;
         }
     } else {
         // full reindex
         $this->opo_engine->truncateIndex();
         $va_table_names = $this->getIndexedTables();
     }
     $o_db = $this->opo_db;
     if ($pb_display_progress || $ps_callback) {
         $va_names = array();
         foreach ($va_table_names as $vn_table_num => $va_table_info) {
             $va_names[] = $va_table_info['displayName'];
         }
         if ($pb_display_progress) {
             print "\nWILL INDEX [" . join(", ", $va_names) . "]\n\n";
         }
     }
     $vn_tc = 0;
     foreach ($va_table_names as $vn_table_num => $va_table_info) {
         $vs_table = $va_table_info['name'];
         $t_instance = $this->opo_datamodel->getInstanceByTableName($vs_table, true);
         $vn_table_num = $t_instance->tableNum();
         $va_fields_to_index = $this->getFieldsToIndex($vn_table_num);
         if (!is_array($va_fields_to_index) || sizeof($va_fields_to_index) == 0) {
             continue;
         }
         $qr_all = $o_db->query("SELECT " . $t_instance->primaryKey() . " FROM {$vs_table}");
         $vn_num_rows = $qr_all->numRows();
         if ($pb_display_progress) {
             print CLIProgressBar::start($vn_num_rows, _t('Indexing %1', $t_instance->getProperty('NAME_PLURAL')));
         }
         $vn_c = 0;
         $va_ids = $qr_all->getAllFieldValues($t_instance->primaryKey());
         $va_element_ids = null;
         if (method_exists($t_instance, "getApplicableElementCodes")) {
             $va_element_ids = array_keys($t_instance->getApplicableElementCodes(null, false, false));
         }
         $vn_table_num = $t_instance->tableNum();
         $vs_table_pk = $t_instance->primaryKey();
         $va_field_data = array();
         $va_intrinsic_list = $this->getFieldsToIndex($vs_table, $vs_table, array('intrinsicOnly' => true));
         $va_intrinsic_list[$vs_table_pk] = array();
         foreach ($va_ids as $vn_i => $vn_id) {
             if (!($vn_i % 200)) {
                 // Pre-load attribute values for next 200 items to index; improves index performance
                 $va_id_slice = array_slice($va_ids, $vn_i, 200);
                 if ($va_element_ids) {
                     ca_attributes::prefetchAttributes($o_db, $vn_table_num, $va_id_slice, $va_element_ids);
                 }
                 $qr_field_data = $o_db->query("\n\t\t\t\t\t\tSELECT " . join(", ", array_keys($va_intrinsic_list)) . " \n\t\t\t\t\t\tFROM {$vs_table}\n\t\t\t\t\t\tWHERE {$vs_table_pk} IN (?)\t\n\t\t\t\t\t", array($va_id_slice));
                 $va_field_data = array();
                 while ($qr_field_data->nextRow()) {
                     $va_field_data[(int) $qr_field_data->get($vs_table_pk)] = $qr_field_data->getRow();
                 }
                 SearchResult::clearCaches();
             }
             $this->indexRow($vn_table_num, $vn_id, $va_field_data[$vn_id], true);
             if ($pb_display_progress && $pb_interactive_display) {
                 CLIProgressBar::setMessage("Memory: " . caGetMemoryUsage());
                 print CLIProgressBar::next();
             }
             if ($ps_callback && !($vn_c % 100)) {
                 $ps_callback($vn_c, $vn_num_rows, null, null, (double) $t_timer->getTime(2), memory_get_usage(true), $va_table_names, $vn_table_num, $t_instance->getProperty('NAME_PLURAL'), $vn_tc + 1);
             }
             $vn_c++;
         }
         $qr_all->free();
         unset($t_instance);
         if ($pb_display_progress && $pb_interactive_display) {
             print CLIProgressBar::finish();
         }
         $this->opo_engine->optimizeIndex($vn_table_num);
         $vn_tc++;
     }
     if ($pb_display_progress) {
         print "\n\n\nDone! [Indexing for " . join(", ", $va_names) . " took " . caFormatInterval((double) $t_timer->getTime(4)) . "]\n";
         print "Note that if you're using an external search service like ElasticSearch, the data may only now be sent to the actual service because it was buffered until now. So you still might have to wait a while for the script to finish.\n";
     }
     if ($ps_callback) {
         $ps_callback(1, 1, _t('Elapsed time: %1', caFormatInterval((double) $t_timer->getTime(2))), _t('Index rebuild complete!'), (double) $t_timer->getTime(2), memory_get_usage(true), $va_table_names, null, null, sizeof($va_table_names));
     }
 }