public static function add($xml, $source = "") { $data = simplexml_load_string($xml, 'SimpleXMLElement', LIBXML_NOCDATA); $type = trim($data->MsgType); $from_openid = $data->FromUserName; $to_openid = $data->ToUserName; $date_now = date("Y-m-d H:i:s"); switch ($type) { case "location": $content = trim($data->Label); break; case "voice": $content = trim($data->Recognition); break; case "image": $content = trim($data->PicUrl); break; case "link": $content = trim($data->Title); break; case "shortvideo": $content = trim($data->ThumbMediaId); break; case "event": $content = trim($data->Event); break; default: $content = trim($data->Content); break; } $model_wx_history = new WxHistory(); $model_wx_history->from_openid = $from_openid; $model_wx_history->to_openid = $to_openid; $model_wx_history->type = $type; $model_wx_history->content = $content; $model_wx_history->text = $xml; $model_wx_history->source = $source; $model_wx_history->created_time = $date_now; $model_wx_history->save(0); if (filter_var($content, FILTER_VALIDATE_URL) !== FALSE) { SpiderService::add($content); } if (in_array($type, ["text"]) && substr($content, 0, 1) == "#") { $bind_info = UserOpenidUnionid::findOne(['other_openid' => strval($from_openid)]); if (!$bind_info) { $unique_name = md5($from_openid); $user_info = User::findOne(['unique_name' => $unique_name]); if (!$user_info) { $model_user = new User(); $model_user->nickname = "微信用户" . substr($from_openid, -10); $model_user->unique_name = $unique_name; $model_user->updated_time = $date_now; $model_user->created_time = $date_now; $model_user->save(0); $user_info = $model_user; } $model_bind = new UserOpenidUnionid(); $model_bind->uid = $user_info['uid']; $model_bind->openid = $from_openid; $model_bind->unionid = ''; $model_bind->other_openid = $from_openid; $model_bind->updated_time = $date_now; $model_bind->created_time = $date_now; $model_bind->save(0); } if ($bind_info) { $model_message = new UserMessageHistory(); $model_message->uid = $bind_info['uid']; $model_message->type = 1; $model_message->content = ltrim($content, "#"); $model_message->status = 1; $model_message->updated_time = $date_now; $model_message->created_time = $date_now; $model_message->save(0); } } }
private function crawl_devtang($url) { $ret = []; $content = $this->getContentByUrl($url); if (!$content) { return $ret; } $reg_rule = "/<div\\s*class=\"entry-content\">(.*?)<\\/div>\\s*<footer>/is"; preg_match($reg_rule, $content, $matches); if ($matches && $matches[1]) { $ret['content'] = trim($matches[1]); } /*替换img*/ if ($ret['content']) { preg_match_all('/<\\s*img\\s+[^>]*?src\\s*=\\s*(\'|\\")(.*?)\\1[^>]*?\\/?\\s*>/i', $ret['content'], $match_img); if ($match_img && count($match_img) == 3) { foreach ($match_img[2] as $_img_src) { if (!preg_match("/^http/", $_img_src)) { $tmp_down_url = "http://" . SpiderService::getDomain("devtang") . $_img_src; } else { $tmp_down_url = $_img_src; } $full_img_url = $this->downImg($tmp_down_url) . "?format=/w/300"; $ret['content'] = str_replace($_img_src, $full_img_url, $ret['content']); } } } $reg_rule = "/<h1\\s*class=\"entry-title\">(.*?)<\\/h1>/is"; preg_match($reg_rule, $content, $matches); if ($matches && $matches[1]) { $ret['title'] = trim($matches[1]); } return $ret; }