public function crawlMetaData($websiteUrl, $keyInput = '', $pageContent = '', $returVal = false) { if (empty($pageContent)) { if (!preg_match('/\\w+/', $websiteUrl)) { return; } if (!stristr($websiteUrl, 'http://')) { $websiteUrl = "http://" . $websiteUrl; } $spider = new Spider(); $ret = $spider->getContent($websiteUrl); } else { $ret['page'] = $pageContent; $metaInfo = array(); } if (!empty($ret['page'])) { if (empty($keyInput)) { # meta title preg_match('/<TITLE>(.*?)<\\/TITLE>/si', $ret['page'], $matches); if (!empty($matches[1])) { if ($returVal) { $metaInfo['page_title'] = $matches[1]; } else { WebsiteController::addInputValue($matches[1], 'webtitle'); } } # meta description preg_match('/<META.*?name="description".*?content="(.*?)"/si', $ret['page'], $matches); if (empty($matches[1])) { preg_match("/<META.*?name='description'.*?content='(.*?)'/si", $ret['page'], $matches); } if (empty($matches[1])) { preg_match('/<META content="(.*?)" name="description"/si', $ret['page'], $matches); } if (!empty($matches[1])) { if ($returVal) { $metaInfo['page_description'] = $matches[1]; } else { WebsiteController::addInputValue($matches[1], 'webdescription'); } } } # meta keywords preg_match('/<META.*?name="keywords".*?content="(.*?)"/si', $ret['page'], $matches); if (empty($matches[1])) { preg_match("/<META.*?name='keywords'.*?content='(.*?)'/si", $ret['page'], $matches); } if (empty($matches[1])) { preg_match('/<META content="(.*?)" name="keywords"/si', $ret['page'], $matches); } if (!empty($matches[1])) { if ($returVal) { $metaInfo['page_keywords'] = $matches[1]; } else { WebsiteController::addInputValue($matches[1], 'webkeywords'); } } } return $metaInfo; }