public function parseFile($content) { $doc = new DOMDocument(); $content = str_replace('text/html; charset=Big5', 'text/html; charset=UTF-8', Big52003::iconv($content)); //<img src='http://gcis.nat.gov.tw/CNSServlet/KaiCGI1?page=3&code=3A62&size=12&background=ffffff&foreground=000000' onclick='javascript:this.src="http://gcis.nat.gov.tw/CNSServlet/KaiCGI1?page=3&code=3A62&size=36&background=ffffff&foreground=000000";' border='0' align='absmiddle' /> $content = preg_replace_callback('#<img src=\'http://gcis.nat.gov.tw/CNSServlet/KaiCGI1\\?page=([^&]*)&code=([^&]*)&([^\']*)\' onclick=\'([^\']*)\' border=\'0\' align=\'absmiddle\' />#', function ($matches) { return CNS2UTF8::convert($matches[1], $matches[2]); }, $content); @$doc->loadHTML($content); // 基本資料 $info = new StdClass(); $table_dom = $doc->getElementsByTagName('table'); if ('外國公司報備基本資料' == trim(explode("\n", trim($table_dom->item(3)->nodeValue))[0])) { return self::parseForeignCompany($doc); } elseif ('外國公司認許基本資料' == trim(explode("\n", trim($table_dom->item(3)->nodeValue))[0])) { return self::parseForeignCompany($doc); } elseif ('大陸公司許可報備基本資料' == trim(explode("\n", trim($table_dom->item(3)->nodeValue))[0])) { return self::parseChinaCompany($doc); } elseif ('大陸公司許可基本資料' == trim(explode("\n", trim($table_dom->item(3)->nodeValue))[0])) { return self::parseChinaCompany($doc); } $table_dom = $doc->getElementById('Tab01'); if (!$table_dom) { throw new Exception('不知道的 HTML'); } $base_table_dom = $table_dom->getElementsByTagName('table')->item(1); foreach ($base_table_dom->getElementsByTagName('tr') as $tr_dom) { if (!$tr_dom->getElementsByTagName('td')->item(1)) { continue; } $column = trim($tr_dom->getElementsByTagName('td')->item(1)->childNodes->item(0)->wholeText); if (in_array($column, array('統一編號', '公司狀況', '公司名稱', '資本總額(元)', '實收資本額(元)', '代表人姓名', '公司所在地', '登記機關', '股權狀況'))) { $value_dom = $tr_dom->getElementsByTagName('td')->item(2)->childNodes->item(0); $info->{$column} = trim(explode("\n", trim($value_dom->wholeText))[0]); } elseif (in_array($column, array('核准設立日期', '最後核准變更日期', '停業日期(起)', '停業日期(迄)', '延展開業日期(迄)'))) { $value_dom = $tr_dom->getElementsByTagName('td')->item(2)->childNodes->item(0); $value = trim(explode("\n", trim($value_dom->wholeText))[0]); if (preg_match('#(.*)年(.*)月(.*)日#', $value, $matches)) { $value = new stdClass(); $value->year = 1911 + intval($matches[1]); $value->month = intval($matches[2]); $value->day = intval($matches[3]); $info->{$column} = $value; } else { $info->{$column} = null; } } elseif ('所營事業資料' == $column) { $value_dom = $tr_dom->getElementsByTagName('td')->item(2); $list_table_dom = $value_dom->getElementsByTagName('table')->item(0); $list = array(); foreach ($list_table_dom->getElementsByTagName('td') as $td_dom) { $lines = explode("\n", $td_dom->childNodes->item(0)->wholeText); if (!preg_match('#^([A-Z0-9]*)#', trim($lines[1]), $matches)) { throw new Exception('事業代號不正確'); } $list[] = array($matches[1], trim($lines[2])); } $info->{$column} = $list; } elseif ($column == '' or preg_match('/查詢「/', $column)) { } elseif (strpos($tr_dom->getElementsByTagName('td')->item(1)->nodeValue, '原營利事業登記證所登載之營業項目資料')) { $info->{'舊營業項目資料'} = 'http://gcis.nat.gov.tw' . $tr_dom->getElementsByTagName('td')->item(1)->getElementsByTagName('a')->item(0)->getAttribute('href'); } else { $key_dom = $tr_dom->getElementsByTagName('td')->item(1); $value_dom = $tr_dom->getElementsByTagName('td')->item(2); echo '[TODO1]' . trim($key_dom->nodeValue) . ' ' . $value_dom->nodeValue . "\n"; exit; } } if ('核准認許' !== $info->{'公司狀況'}) { // 董監事名單 $table_dom = $doc->getElementById('Tab02'); $base_table_dom = $table_dom->getElementsByTagName('table')->item(1); $list = array(); for ($i = 1; $i < $base_table_dom->getElementsByTagName('tr')->length; $i++) { $td_doms = $base_table_dom->getElementsByTagName('tr')->item($i)->getElementsByTagName('td'); $row = new StdClass(); $row->{'序號'} = trim($td_doms->item(1)->nodeValue); $row->{'職稱'} = trim($td_doms->item(2)->nodeValue); $row->{'姓名'} = trim($td_doms->item(3)->nodeValue); if (trim($td_doms->item(4)->nodeValue) != '') { $a_dom = $td_doms->item(4)->getElementsByTagName('a')->item(0); if (!$a_dom) { $row->{'所代表法人'} = array(0, trim($td_doms->item(4)->nodeValue)); } else { $link = $a_dom->getAttribute('href'); if (!preg_match('#banNo=(.*)#', $link, $matches)) { throw new Exception('請處理法人'); } $row->{'所代表法人'} = array($matches[1], trim($a_dom->nodeValue)); } } else { $row->{'所代表法人'} = ''; } $row->{'出資額'} = trim($td_doms->item(5)->nodeValue); $list[] = $row; } $info->{'董監事名單'} = $list; // 經理人名單 $table_dom = $doc->getElementById('Tab03'); $base_table_dom = $table_dom->getElementsByTagName('table')->item(1); $list = array(); for ($i = 1; $i < $base_table_dom->getElementsByTagName('tr')->length; $i++) { $td_doms = $base_table_dom->getElementsByTagName('tr')->item($i)->getElementsByTagName('td'); $row = new StdClass(); $row->{'序號'} = trim($td_doms->item(1)->nodeValue); $row->{'姓名'} = trim($td_doms->item(2)->nodeValue); if (!preg_match('#(.*)年(.*)月(.*)日#', trim($td_doms->item(3)->nodeValue), $matches)) { $row->{'到職日期'} = null; } else { $value = new stdClass(); $value->year = 1911 + intval($matches[1]); $value->month = intval($matches[2]); $value->day = intval($matches[3]); $row->{'到職日期'} = $value; } $list[] = $row; } $info->{'經理人名單'} = $list; } return $info; }