Example #1
0
function cleanse_urls($str, $article_url)
{
    $html = SimpleHtmlDom\str_get_html($str);
    foreach ($html->find('a') as $element) {
        // Fix cross posts, especially those from LMMSChallenge facebook
        if (!$element->find('img') && is_feed_image($element)) {
            $element->innertext = '<img class="img-thumbnail fb-thumb" src="' . $element->href . '"/>';
            $element->href = $article_url;
        }
        // Fix unnecessary facebook redirects
        $pos = strpos(strtolower($element->href), '.php?u=http%3a%2f%2f') + strpos(strtolower($element->href), '.php?u=https%3a%2f%2f');
        if ($pos) {
            // Remove js callbacks
            $element->onmouseover = '';
            $element->onclick = '';
            // Strip out the facebook params
            $href = explode('&', $element->href)[0];
            // Isolate and decode the proper url
            $element->href = urldecode(substr($href, $pos + 7));
        }
        // Fix relative facebook URLs
        if (str_startswith($element->href, '/')) {
            $element->href = 'https://www.facebook.com' . $element->href;
        }
    }
    foreach ($html->find('img') as $img) {
        $img->class = ($img->class ? $img->class . ' ' : '') . 'img-thumbnail fb-thumb';
    }
    return $html->save();
}
Example #2
0
function cleanse_html($atom, $href)
{
    $html = SimpleHtmlDom\str_get_html($atom);
    foreach ($html->find('img') as $element) {
        // Skip smilies
        if (str_contains($element->src, '/smilies/')) {
            continue;
        }
        $class = 'img img-thumbnail forum-thumb';
        $element->outertext = '<a href="' . $href . '"><img class="' . $class . '" src="' . scale_image($element->src, 200) . '"></a>';
    }
    foreach ($html->find('p') as $element) {
        if ($element->class == 'dummy') {
            $element->outertext = '<small class="feed-small">' . $element->innertext . '</small>';
        }
    }
    $html->save;
    return $html;
}
Example #3
0
 public function sendAction($request)
 {
     $account = $this->repo('Account')->find($request->get('account'));
     if (!$account) {
         return $this->responseAJAX(['status' => 'error', 'message' => 'Brak skonfigurowanego konta pocztowego, z którego można wysłać wiadomość.']);
     }
     $this->repo('Account')->decryptPasswords($account);
     $this->completeAccountWithPassword($account);
     try {
         $to = new EmailAddressParser($request->get('to'));
         $cc = new EmailAddressParser($request->get('cc'));
         $bcc = new EmailAddressParser($request->get('bcc'));
         $message = \Swift_Message::newInstance();
         $content = $request->get('content');
         $contentObj = \SimpleHtmlDom\str_get_html($content);
         if ($request->get('originalMessageId')) {
             $originalMessage = $this->openBoxFromRequest($request)->readMessage($request->get('originalMessageId'), MailReader::READ_BODY_ATTACHMENTS);
             if ($originalMessage) {
                 foreach ($originalMessage->attachments as $attachment) {
                     /**
                      * Add attachment to new message, and retrive it's ID.
                      */
                     $newAttId = $message->embed(\Swift_Image::fromPath($attachment->filePath));
                     /**
                      * Search for elements with given attribute and given ID of attachment.
                      */
                     $elements = $contentObj->find('*[data-attid="cid:' . $attachment->id . '"]');
                     foreach ($elements as $element) {
                         /**
                          * Few lines abowe we add attachment to new mail, so we save it's id
                          * in searched src of image (inline attachment). Old SRC we replace by
                          * new attachment's ID.
                          */
                         $element->src = $newAttId;
                         /**
                          * We only add this attribute to tage the ID of attachment in original message.
                          * Now we only remove it, couse we don't need it anymore.
                          */
                         $element->removeAttribute('data-attid');
                     }
                 }
             }
         }
         $content = $contentObj->save();
         $message->setSubject($request->get('subject'))->setFrom(array($account->getSmtpUsername() => $account->getSenderName()))->setTo($to->getAsPairs())->setBody($content, 'text/html');
         if ($cc->hasAny()) {
             $message->setCc($cc->getAsPairs());
         }
         if ($bcc->hasAny()) {
             $message->setBcc($bcc->getAsPairs());
         }
         $attachmentsDir = BASEPATH . '/app/Cache/outgoing-attachments/' . $this->request()->getSession()->getId() . '/' . $request->request->get('windowId');
         if (is_dir($attachmentsDir)) {
             foreach (new \DirectoryIterator($attachmentsDir) as $fileInfo) {
                 if ($fileInfo->isDot() || $fileInfo->isDir()) {
                     continue;
                 }
                 $message->attach(\Swift_Attachment::fromPath($fileInfo->getPathname()));
             }
         }
         $transport = \Swift_SmtpTransport::newInstance($account->getSmtpHost(), $account->getSmtpPort(), $account->getSmtpSecurity())->setUsername($account->getSmtpUsername())->setPassword($account->getSmtpPassword());
         $mailer = \Swift_Mailer::newInstance($transport);
         // Send the message
         $result = $mailer->send($message);
         $this->openBoxFromRequest($request)->appendMessage($message->toString(), $this->imap($account)->getSentBoxName(), '\\Seen');
         // Save stat
         if ($this->openSettings('app')->get('mod.mail.stat.savesentmailsinfo') == 1) {
             $stat = new SentMail();
             $stat->setUserId($this->user()->getId());
             $stat->setDate(time());
             $this->repo('SentMail')->save($stat);
         }
     } catch (\Exception $e) {
         return $this->responseAJAX(['status' => 'error', 'message' => $e->getMessage()]);
     }
     return $this->responseAJAX(['status' => $result ? 'success' : 'error', 'message' => 'Wiadomość została wysłana.']);
 }
 // 初始化curl
 $list_handle = curl_init();
 // curl配置参数
 $options = [CURLOPT_URL => $list_url . $i, CURLOPT_RETURNTRANSFER => true, CURLOPT_USERAGENT => $cmd->rand_user_agent()];
 curl_setopt_array($list_handle, $options);
 // 执行curl
 $pageContent = curl_exec($list_handle);
 if (curl_errno($list_handle)) {
     $cmd->beep();
     $cmd->alert('curl出错:' . curl_error($list_handle));
     $cmd->alert('出错的URL是:' . $list_url . $i);
 } else {
     curl_close($list_handle);
     $cmd->alert("第 {$i} 列表页html内容获取成功!");
 }
 $html = SimpleHtmlDom\str_get_html($pageContent);
 $thread_lists = $html->find('tr[class=tr3 t_one]');
 foreach ($thread_lists as $thread_tr) {
     // 标题和链接地址
     foreach ($thread_tr->find('td h3 a') as $url) {
         $title = iconv('gb2312', 'utf-8//IGNORE', $url->innertext);
         $url = "http://t66y.com/" . $url->href;
     }
     // 回复数量
     foreach ($thread_tr->find('td[class=tal f10 y-style]') as $comment) {
         $comments = $comment->innertext;
     }
     // 作者
     foreach ($thread_tr->find('a[class=bl]') as $author) {
         $author = iconv('gb2312', 'utf-8', $author->innertext);
     }
 for ($j = $key_start; $j < $key_end; $j++) {
     // 初始化curl
     $thread_handle = curl_init();
     // curl配置参数
     $options = [CURLOPT_URL => $threads[$j]['url'], CURLOPT_RETURNTRANSFER => true, CURLOPT_USERAGENT => $cmd->rand_user_agent()];
     curl_setopt_array($thread_handle, $options);
     // 执行curl
     $thread_content = curl_exec($thread_handle);
     if (curl_errno($thread_handle)) {
         $cmd->beep();
         $cmd->alert('curl出错:' . curl_error($thread_handle));
         $cmd->alert('出错的URL是:' . $threads[$j]['url']);
     } else {
         curl_close($thread_handle);
     }
     $html = SimpleHtmlDom\str_get_html($thread_content);
     // 查找正文中所有的图片url并写入数据库
     if (is_object($html)) {
         foreach ($html->find('input') as $element) {
             if ($element->src != '') {
                 $cmd->alert($element->src);
                 // 写入数据库
                 try {
                     $pdo = new PDO('mysql:host=127.0.0.1;dbname=caoliu;charset=utf8', 'root', '881224');
                     $pdo->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
                     $stmt = $pdo->prepare("insert into test_img_url (thread_id, url) VALUES (:thread_id, :url)");
                     $stmt->bindParam(':thread_id', $threads[$j]['id']);
                     $stmt->bindParam(':url', $element->src);
                     $stmt->execute();
                 } catch (PDOException $error) {
                     $cmd->alert($error->getMessage());
Example #6
0
}
?>

<h1>标题关键词排名前20</h1>
<?php 
try {
    $stmt = $pdo->query("select title from thread");
    $result = $stmt->fetchAll();
} catch (PDOException $error) {
    $cmd->alert($error->getMessage());
}
// 将标题组装成一个回车符分隔的字符串
foreach ($result as $value) {
    // 先去掉标题中类似 [10p] 的部分,每行都有,代表帖子的图片数量
    $title = preg_replace("/[0-9][0-9]P/", ' ', $value['title']);
    $text .= SimpleHtmlDom\str_get_html($title)->plaintext . "\n";
}
$cws = new PSCWS4();
$cws->set_charset('utf8');
// 编码
$cws->set_dict(ROOT_PATH . '/vendor/scws/pscws4/dict/dict.utf8.xdb');
// 加载字典文件
$cws->set_rule(ROOT_PATH . '/vendor/scws/pscws4/etc/rules.ini');
// 人名地名规则
//$cws->set_multi(3);
$cws->set_ignore(true);
// 忽略标点
//$cws->set_debug(true);
$cws->set_duality(true);
// 对 单字 格外进行二元法匹配
$cws->send_text($text);