/** * import datas into your wallabag * @return boolean */ public function import() { if (isset($_FILES['file']) && $_FILES['file']['tmp_name']) { Tools::logm('Import stated: parsing file'); // assume, that file is in json format $str_data = file_get_contents($_FILES['file']['tmp_name']); $data = json_decode($str_data, true); if ($data === null) { //not json - assume html $html = new simple_html_dom(); $html->load_file($_FILES['file']['tmp_name']); $data = array(); $read = 0; foreach (array('ol', 'ul') as $list) { foreach ($html->find($list) as $ul) { foreach ($ul->find('li') as $li) { $tmpEntry = array(); $a = $li->find('a'); $tmpEntry['url'] = $a[0]->href; $tmpEntry['tags'] = $a[0]->tags; $tmpEntry['is_read'] = $read; if ($tmpEntry['url']) { $data[] = $tmpEntry; } } # the second <ol/ul> is for read links $read = sizeof($data) && $read ? 0 : 1; } } } // for readability structure foreach ($data as $record) { if (is_array($record)) { $data[] = $record; foreach ($record as $record2) { if (is_array($record2)) { $data[] = $record2; } } } } $urlsInserted = array(); //urls of articles inserted foreach ($data as $record) { $url = trim(isset($record['article__url']) ? $record['article__url'] : (isset($record['url']) ? $record['url'] : '')); if ($url and !in_array($url, $urlsInserted)) { $title = isset($record['title']) ? $record['title'] : _('Untitled - Import - ') . '</a> <a href="./?import">' . _('click to finish import') . '</a><a>'; $body = isset($record['content']) ? $record['content'] : ''; $isRead = isset($record['is_read']) ? intval($record['is_read']) : (isset($record['archive']) ? intval($record['archive']) : 0); $isFavorite = isset($record['is_fav']) ? intval($record['is_fav']) : (isset($record['favorite']) ? intval($record['favorite']) : 0); // insert new record $id = $this->store->add($url, $title, $body, $this->user->getId(), $isFavorite, $isRead); if ($id) { $urlsInserted[] = $url; //add if (isset($record['tags']) && trim($record['tags'])) { $tags = explode(',', $record['tags']); foreach ($tags as $tag) { $entry_id = $id; $tag_id = $this->store->retrieveTagByValue($tag); if ($tag_id) { $this->store->setTagToEntry($tag_id['id'], $entry_id); } else { $this->store->createTag($tag); $tag_id = $this->store->retrieveTagByValue($tag); $this->store->setTagToEntry($tag_id['id'], $entry_id); } } } } } } $i = sizeof($urlsInserted); if ($i > 0) { $this->messages->add('s', _('Articles inserted: ') . $i . _('. Please note, that some may be marked as "read".')); } Tools::logm('Import of articles finished: ' . $i . ' articles added (w/o content if not provided).'); } else { $this->messages->add('e', _('Did you forget to select a file?')); } // file parsing finished here // now download article contents if any // check if we need to download any content $recordsDownloadRequired = $this->store->retrieveUnfetchedEntriesCount($this->user->getId()); if ($recordsDownloadRequired == 0) { // nothing to download $this->messages->add('s', _('Import finished.')); Tools::logm('Import finished completely'); Tools::redirect(); } else { // if just inserted - don't download anything, download will start in next reload if (!isset($_FILES['file'])) { // download next batch Tools::logm('Fetching next batch of articles...'); $items = $this->store->retrieveUnfetchedEntries($this->user->getId(), IMPORT_LIMIT); $purifier = $this->_getPurifier(); foreach ($items as $item) { $url = new Url(base64_encode($item['url'])); if ($url->isCorrect()) { Tools::logm('Fetching article ' . $item['id']); $content = Tools::getPageContent($url); $title = $content['rss']['channel']['item']['title'] != '' ? $content['rss']['channel']['item']['title'] : _('Untitled'); $body = $content['rss']['channel']['item']['description'] != '' ? $content['rss']['channel']['item']['description'] : _('Undefined'); // clean content to prevent xss attack $title = $purifier->purify($title); $body = $purifier->purify($body); $this->store->updateContentAndTitle($item['id'], $title, $body, $this->user->getId()); Tools::logm('Article ' . $item['id'] . ' updated.'); } else { Tools::logm('Unvalid URL (' . $item['url'] . ') to fetch for article ' . $item['id']); } } } } return array('includeImport' => true, 'import' => array('recordsDownloadRequired' => $recordsDownloadRequired, 'recordsUnderDownload' => IMPORT_LIMIT, 'delay' => IMPORT_DELAY * 1000)); }