public static function handleImages($html) { $simpleDom = new \SimpleHTMLDOM(); $html = $simpleDom->str_get_html($html); foreach ($html->find('image') as $image) { } $html->load($html->save()); return $html; }
public function actionIndex() { $i = 1; for ($i = 1; $i < 2; $i++) { $simpleHTML = new SimpleHTMLDOM(); $html = $simpleHTML->file_get_html("http://www.healthfamily.co/danh-sach-nha-thuoc-hieu-thuoc?p={$i}&r=100#.VavHvXj0HuU"); foreach ($html->find('div.h125') as $element) { $name = $element->find('h3 a')->innertext; // $address = $element->find('p.date-time', 0)->innertext; $pharmacy = new Pharmacy(); $pharmacy->name = $name; $pharmacy->address = ""; $pharmacy->save(FALSE); } } $this->render('index'); }
/** * HTML Format * * @param $data */ public function postProcessing(&$data) { \Yii::import('ext.SimpleHTMLDOM.SimpleHTMLDOM'); //\Yii::log(print_r($data, true), 'info', 'api'); /**@todo: clear*/ for ($i = 0; $i < count($data); $i++) { $simpleDom = new \SimpleHTMLDOM(); $html = $simpleDom->str_get_html($data[$i]['html']); foreach ($html->find('img') as $smile) { $smile->outertext = '<smile>' . $smile->src . '</smile>'; } $html->load($html->save()); foreach ($html->find('iframe') as $video) { $video->outertext = '<video>' . $video->src . '</video>'; } $html->load($html->save()); foreach ($html->find('picture') as $picture) { $picture->outertext = '<image>' . $picture->first_child()->srcset . '</image>'; } $html->load($html->save()); /*foreach ($html->find('a') as $link) { $link->outertext = '<link><src>' . $link->href . '</src><title>' . $link->innertext . '</title></link>'; } $html->load($html->save());*/ foreach ($html->find('comment') as $comment) { $comment->outertext = ''; } $html->load($html->save()); foreach ($html->find('photo-collection') as $collection) { $data[$i]['photo-collection'] = $collection->params; $collection->outertext = ''; } $html->load($html->save()); $this->clearTags($html, 'div[class=b-article_in-img]'); /*$tags = array('div', 'strong', 'del', 'em', 'b', 'u', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',); foreach ($tags as $key => $tag) { $this->clearTags($html, $tag); //\Yii::log((string)$html, 'info', 'api'); }*/ //\Yii::log(print_r($html->nodes, true), 'info', 'api'); $html->load($html->save()); $data[$i]['html'] = $html->outertext; } }
/** * 一次性导入优酷视频 * Enter description here ... * @param unknown_type $courseId */ public function actionYoukuPlayList($courseId = 0) { if (isset($_POST['url'])) { $url = $_POST['url']; // $output = Yii::app()->curl->get($_POST['url']); // $partern = ""; Yii::import('ext.SimpleHTMLDOM.SimpleHTMLDOM'); // Create DOM from URL or file $simpleHTML = new SimpleHTMLDOM(); $html = $simpleHTML->file_get_html($url); $urlDict = array('playlist' => array('pattern' => "/\\/playlist_show\\/id_/i", 'selector' => '#list1_1 .items li.v_title a'), 'episode' => array('pattern' => "/\\/show_page\\/id_/i", 'selector' => '#episode li.ititle_w a')); foreach ($urlDict as $key => $item) { if (preg_match($item['pattern'], $url)) { $type = $key; $selector = $item['selector']; break; } } // Find all images foreach ($html->find($selector) as $elem) { $lesson = new Lesson(); $lesson->courseId = $courseId; $lesson->title = $elem->getAttribute("title") ? $elem->getAttribute("title") : $elem->innertext; $pattern = "/id_(.*)\\/?\\.html/i"; preg_match($pattern, $elem->href, $matches); if ($matches[1]) { // http://player.youku.com/player.php/sid/XNTk5MTQ3OTQ0/v.swf $lesson->url = "http://player.youku.com/player.php/sid/{$matches['1']}/v.swf"; } else { continue; } $lesson->addTime = time(); $lesson->save(); } $this->redirect(array('lesson/editByCourse', 'courseId' => $courseId)); Yii::app()->end(); } $course = Course::model()->findByPk($courseId); $this->renderPartial("youkuPlayList", array('course' => $course), false, true); }