/** * Execute the word discovery process. * * within a data source * get words from word list that have not been acquired from source * for each word * get word definitions from source * add word to database * add more words to word list * * @param array $word_array optional list of words */ public function crawl($word_array = null) { // prepare for long processing time ini_set('default_socket_timeout', 36000); ini_set('max_execution_time', 36000); $jrequest = $this->create_request(); $jword = new JentiWord($this->config); if ($word_array == null) { $word_array = $jword->get_words_without_definition($jrequest); if ($jword->error) { echo $jword->error; return; } } // send every message to the browser immediately ob_implicit_flush(TRUE); $this->log_open(); $msg = get_class($this) . ": processing " . count($word_array) . " words from " . $jrequest->service_name; $this->echo_msg_flush($msg); foreach ($word_array as $word) { $word = trim($word); $dictionary_word_array = $jrequest->get_word($word); if ($jrequest->error) { $this->echo_msg_flush($jrequest->error); } else { foreach ($dictionary_word_array as $word_info) { if ($this->config["save_words"] == true) { $jword->add_word($word_info); } $word_info["ERROR_ARRAY"] = $jword->error_array; $this->echo_word_info($word_info); } } if (count($dictionary_word_array) == 0) { // data source cannot provide this word $avoid_info["WORD"] = $word; $avoid_info["LANGUAGE_CODE"] = $jrequest->language_code; $avoid_info["SOURCE_NAME"] = $jrequest->service_name; $jword->update_word_list_word($avoid_info); if ($jword->error) { $this->echo_msg_flush($jword->error); } } } $this->log_close(); ob_implicit_flush(FALSE); }