function cleanse_urls($str, $article_url) { $html = SimpleHtmlDom\str_get_html($str); foreach ($html->find('a') as $element) { // Fix cross posts, especially those from LMMSChallenge facebook if (!$element->find('img') && is_feed_image($element)) { $element->innertext = '<img class="img-thumbnail fb-thumb" src="' . $element->href . '"/>'; $element->href = $article_url; } // Fix unnecessary facebook redirects $pos = strpos(strtolower($element->href), '.php?u=http%3a%2f%2f') + strpos(strtolower($element->href), '.php?u=https%3a%2f%2f'); if ($pos) { // Remove js callbacks $element->onmouseover = ''; $element->onclick = ''; // Strip out the facebook params $href = explode('&', $element->href)[0]; // Isolate and decode the proper url $element->href = urldecode(substr($href, $pos + 7)); } // Fix relative facebook URLs if (str_startswith($element->href, '/')) { $element->href = 'https://www.facebook.com' . $element->href; } } foreach ($html->find('img') as $img) { $img->class = ($img->class ? $img->class . ' ' : '') . 'img-thumbnail fb-thumb'; } return $html->save(); }
function cleanse_html($atom, $href) { $html = SimpleHtmlDom\str_get_html($atom); foreach ($html->find('img') as $element) { // Skip smilies if (str_contains($element->src, '/smilies/')) { continue; } $class = 'img img-thumbnail forum-thumb'; $element->outertext = '<a href="' . $href . '"><img class="' . $class . '" src="' . scale_image($element->src, 200) . '"></a>'; } foreach ($html->find('p') as $element) { if ($element->class == 'dummy') { $element->outertext = '<small class="feed-small">' . $element->innertext . '</small>'; } } $html->save; return $html; }
public function sendAction($request) { $account = $this->repo('Account')->find($request->get('account')); if (!$account) { return $this->responseAJAX(['status' => 'error', 'message' => 'Brak skonfigurowanego konta pocztowego, z którego można wysłać wiadomość.']); } $this->repo('Account')->decryptPasswords($account); $this->completeAccountWithPassword($account); try { $to = new EmailAddressParser($request->get('to')); $cc = new EmailAddressParser($request->get('cc')); $bcc = new EmailAddressParser($request->get('bcc')); $message = \Swift_Message::newInstance(); $content = $request->get('content'); $contentObj = \SimpleHtmlDom\str_get_html($content); if ($request->get('originalMessageId')) { $originalMessage = $this->openBoxFromRequest($request)->readMessage($request->get('originalMessageId'), MailReader::READ_BODY_ATTACHMENTS); if ($originalMessage) { foreach ($originalMessage->attachments as $attachment) { /** * Add attachment to new message, and retrive it's ID. */ $newAttId = $message->embed(\Swift_Image::fromPath($attachment->filePath)); /** * Search for elements with given attribute and given ID of attachment. */ $elements = $contentObj->find('*[data-attid="cid:' . $attachment->id . '"]'); foreach ($elements as $element) { /** * Few lines abowe we add attachment to new mail, so we save it's id * in searched src of image (inline attachment). Old SRC we replace by * new attachment's ID. */ $element->src = $newAttId; /** * We only add this attribute to tage the ID of attachment in original message. * Now we only remove it, couse we don't need it anymore. */ $element->removeAttribute('data-attid'); } } } } $content = $contentObj->save(); $message->setSubject($request->get('subject'))->setFrom(array($account->getSmtpUsername() => $account->getSenderName()))->setTo($to->getAsPairs())->setBody($content, 'text/html'); if ($cc->hasAny()) { $message->setCc($cc->getAsPairs()); } if ($bcc->hasAny()) { $message->setBcc($bcc->getAsPairs()); } $attachmentsDir = BASEPATH . '/app/Cache/outgoing-attachments/' . $this->request()->getSession()->getId() . '/' . $request->request->get('windowId'); if (is_dir($attachmentsDir)) { foreach (new \DirectoryIterator($attachmentsDir) as $fileInfo) { if ($fileInfo->isDot() || $fileInfo->isDir()) { continue; } $message->attach(\Swift_Attachment::fromPath($fileInfo->getPathname())); } } $transport = \Swift_SmtpTransport::newInstance($account->getSmtpHost(), $account->getSmtpPort(), $account->getSmtpSecurity())->setUsername($account->getSmtpUsername())->setPassword($account->getSmtpPassword()); $mailer = \Swift_Mailer::newInstance($transport); // Send the message $result = $mailer->send($message); $this->openBoxFromRequest($request)->appendMessage($message->toString(), $this->imap($account)->getSentBoxName(), '\\Seen'); // Save stat if ($this->openSettings('app')->get('mod.mail.stat.savesentmailsinfo') == 1) { $stat = new SentMail(); $stat->setUserId($this->user()->getId()); $stat->setDate(time()); $this->repo('SentMail')->save($stat); } } catch (\Exception $e) { return $this->responseAJAX(['status' => 'error', 'message' => $e->getMessage()]); } return $this->responseAJAX(['status' => $result ? 'success' : 'error', 'message' => 'Wiadomość została wysłana.']); }
// 初始化curl $list_handle = curl_init(); // curl配置参数 $options = [CURLOPT_URL => $list_url . $i, CURLOPT_RETURNTRANSFER => true, CURLOPT_USERAGENT => $cmd->rand_user_agent()]; curl_setopt_array($list_handle, $options); // 执行curl $pageContent = curl_exec($list_handle); if (curl_errno($list_handle)) { $cmd->beep(); $cmd->alert('curl出错:' . curl_error($list_handle)); $cmd->alert('出错的URL是:' . $list_url . $i); } else { curl_close($list_handle); $cmd->alert("第 {$i} 列表页html内容获取成功!"); } $html = SimpleHtmlDom\str_get_html($pageContent); $thread_lists = $html->find('tr[class=tr3 t_one]'); foreach ($thread_lists as $thread_tr) { // 标题和链接地址 foreach ($thread_tr->find('td h3 a') as $url) { $title = iconv('gb2312', 'utf-8//IGNORE', $url->innertext); $url = "http://t66y.com/" . $url->href; } // 回复数量 foreach ($thread_tr->find('td[class=tal f10 y-style]') as $comment) { $comments = $comment->innertext; } // 作者 foreach ($thread_tr->find('a[class=bl]') as $author) { $author = iconv('gb2312', 'utf-8', $author->innertext); }
for ($j = $key_start; $j < $key_end; $j++) { // 初始化curl $thread_handle = curl_init(); // curl配置参数 $options = [CURLOPT_URL => $threads[$j]['url'], CURLOPT_RETURNTRANSFER => true, CURLOPT_USERAGENT => $cmd->rand_user_agent()]; curl_setopt_array($thread_handle, $options); // 执行curl $thread_content = curl_exec($thread_handle); if (curl_errno($thread_handle)) { $cmd->beep(); $cmd->alert('curl出错:' . curl_error($thread_handle)); $cmd->alert('出错的URL是:' . $threads[$j]['url']); } else { curl_close($thread_handle); } $html = SimpleHtmlDom\str_get_html($thread_content); // 查找正文中所有的图片url并写入数据库 if (is_object($html)) { foreach ($html->find('input') as $element) { if ($element->src != '') { $cmd->alert($element->src); // 写入数据库 try { $pdo = new PDO('mysql:host=127.0.0.1;dbname=caoliu;charset=utf8', 'root', '881224'); $pdo->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION); $stmt = $pdo->prepare("insert into test_img_url (thread_id, url) VALUES (:thread_id, :url)"); $stmt->bindParam(':thread_id', $threads[$j]['id']); $stmt->bindParam(':url', $element->src); $stmt->execute(); } catch (PDOException $error) { $cmd->alert($error->getMessage());
} ?> <h1>标题关键词排名前20</h1> <?php try { $stmt = $pdo->query("select title from thread"); $result = $stmt->fetchAll(); } catch (PDOException $error) { $cmd->alert($error->getMessage()); } // 将标题组装成一个回车符分隔的字符串 foreach ($result as $value) { // 先去掉标题中类似 [10p] 的部分,每行都有,代表帖子的图片数量 $title = preg_replace("/[0-9][0-9]P/", ' ', $value['title']); $text .= SimpleHtmlDom\str_get_html($title)->plaintext . "\n"; } $cws = new PSCWS4(); $cws->set_charset('utf8'); // 编码 $cws->set_dict(ROOT_PATH . '/vendor/scws/pscws4/dict/dict.utf8.xdb'); // 加载字典文件 $cws->set_rule(ROOT_PATH . '/vendor/scws/pscws4/etc/rules.ini'); // 人名地名规则 //$cws->set_multi(3); $cws->set_ignore(true); // 忽略标点 //$cws->set_debug(true); $cws->set_duality(true); // 对 单字 格外进行二元法匹配 $cws->send_text($text);