Пример #1
0
 protected static function _getMap()
 {
     if (!is_null(self::$_maps)) {
         return self::$_maps;
     }
     self::$_maps = array();
     foreach (glob(__DIR__ . '/../maps/cns2unicode/cns_unicode*.txt') as $file) {
         $fp = fopen($file, 'r');
         while (false !== ($line = fgets($fp))) {
             if ($line[0] == '#') {
                 continue;
             }
             list($cns, $unicode) = explode("\t", trim($line));
             list($page, $code) = explode('-', $cns);
             self::$_maps[intval($page)][strtolower($code)] = $unicode;
         }
     }
     return self::$_maps;
 }
Пример #2
0
 public function parseFile($content)
 {
     $doc = new DOMDocument();
     $content = str_replace('text/html; charset=Big5', 'text/html; charset=UTF-8', Big52003::iconv($content));
     //<img src='http://gcis.nat.gov.tw/CNSServlet/KaiCGI1?page=3&code=3A62&size=12&background=ffffff&foreground=000000' onclick='javascript:this.src="http://gcis.nat.gov.tw/CNSServlet/KaiCGI1?page=3&code=3A62&size=36&background=ffffff&foreground=000000";' border='0' align='absmiddle' />
     $content = preg_replace_callback('#<img src=\'http://gcis.nat.gov.tw/CNSServlet/KaiCGI1\\?page=([^&]*)&code=([^&]*)&([^\']*)\' onclick=\'([^\']*)\' border=\'0\' align=\'absmiddle\' />#', function ($matches) {
         return CNS2UTF8::convert($matches[1], $matches[2]);
     }, $content);
     @$doc->loadHTML($content);
     // 基本資料
     $info = new StdClass();
     $table_dom = $doc->getElementsByTagName('table');
     if ('外國公司報備基本資料' == trim(explode("\n", trim($table_dom->item(3)->nodeValue))[0])) {
         return self::parseForeignCompany($doc);
     } elseif ('外國公司認許基本資料' == trim(explode("\n", trim($table_dom->item(3)->nodeValue))[0])) {
         return self::parseForeignCompany($doc);
     } elseif ('大陸公司許可報備基本資料' == trim(explode("\n", trim($table_dom->item(3)->nodeValue))[0])) {
         return self::parseChinaCompany($doc);
     } elseif ('大陸公司許可基本資料' == trim(explode("\n", trim($table_dom->item(3)->nodeValue))[0])) {
         return self::parseChinaCompany($doc);
     }
     $table_dom = $doc->getElementById('Tab01');
     if (!$table_dom) {
         throw new Exception('不知道的 HTML');
     }
     $base_table_dom = $table_dom->getElementsByTagName('table')->item(1);
     foreach ($base_table_dom->getElementsByTagName('tr') as $tr_dom) {
         if (!$tr_dom->getElementsByTagName('td')->item(1)) {
             continue;
         }
         $column = trim($tr_dom->getElementsByTagName('td')->item(1)->childNodes->item(0)->wholeText);
         if (in_array($column, array('統一編號', '公司狀況', '公司名稱', '資本總額(元)', '實收資本額(元)', '代表人姓名', '公司所在地', '登記機關', '股權狀況'))) {
             $value_dom = $tr_dom->getElementsByTagName('td')->item(2)->childNodes->item(0);
             $info->{$column} = trim(explode("\n", trim($value_dom->wholeText))[0]);
         } elseif (in_array($column, array('核准設立日期', '最後核准變更日期', '停業日期(起)', '停業日期(迄)', '延展開業日期(迄)'))) {
             $value_dom = $tr_dom->getElementsByTagName('td')->item(2)->childNodes->item(0);
             $value = trim(explode("\n", trim($value_dom->wholeText))[0]);
             if (preg_match('#(.*)年(.*)月(.*)日#', $value, $matches)) {
                 $value = new stdClass();
                 $value->year = 1911 + intval($matches[1]);
                 $value->month = intval($matches[2]);
                 $value->day = intval($matches[3]);
                 $info->{$column} = $value;
             } else {
                 $info->{$column} = null;
             }
         } elseif ('所營事業資料' == $column) {
             $value_dom = $tr_dom->getElementsByTagName('td')->item(2);
             $list_table_dom = $value_dom->getElementsByTagName('table')->item(0);
             $list = array();
             foreach ($list_table_dom->getElementsByTagName('td') as $td_dom) {
                 $lines = explode("\n", $td_dom->childNodes->item(0)->wholeText);
                 if (!preg_match('#^([A-Z0-9]*)#', trim($lines[1]), $matches)) {
                     throw new Exception('事業代號不正確');
                 }
                 $list[] = array($matches[1], trim($lines[2]));
             }
             $info->{$column} = $list;
         } elseif ($column == '' or preg_match('/查詢「/', $column)) {
         } elseif (strpos($tr_dom->getElementsByTagName('td')->item(1)->nodeValue, '原營利事業登記證所登載之營業項目資料')) {
             $info->{'舊營業項目資料'} = 'http://gcis.nat.gov.tw' . $tr_dom->getElementsByTagName('td')->item(1)->getElementsByTagName('a')->item(0)->getAttribute('href');
         } else {
             $key_dom = $tr_dom->getElementsByTagName('td')->item(1);
             $value_dom = $tr_dom->getElementsByTagName('td')->item(2);
             echo '[TODO1]' . trim($key_dom->nodeValue) . ' ' . $value_dom->nodeValue . "\n";
             exit;
         }
     }
     if ('核准認許' !== $info->{'公司狀況'}) {
         // 董監事名單
         $table_dom = $doc->getElementById('Tab02');
         $base_table_dom = $table_dom->getElementsByTagName('table')->item(1);
         $list = array();
         for ($i = 1; $i < $base_table_dom->getElementsByTagName('tr')->length; $i++) {
             $td_doms = $base_table_dom->getElementsByTagName('tr')->item($i)->getElementsByTagName('td');
             $row = new StdClass();
             $row->{'序號'} = trim($td_doms->item(1)->nodeValue);
             $row->{'職稱'} = trim($td_doms->item(2)->nodeValue);
             $row->{'姓名'} = trim($td_doms->item(3)->nodeValue);
             if (trim($td_doms->item(4)->nodeValue) != '') {
                 $a_dom = $td_doms->item(4)->getElementsByTagName('a')->item(0);
                 if (!$a_dom) {
                     $row->{'所代表法人'} = array(0, trim($td_doms->item(4)->nodeValue));
                 } else {
                     $link = $a_dom->getAttribute('href');
                     if (!preg_match('#banNo=(.*)#', $link, $matches)) {
                         throw new Exception('請處理法人');
                     }
                     $row->{'所代表法人'} = array($matches[1], trim($a_dom->nodeValue));
                 }
             } else {
                 $row->{'所代表法人'} = '';
             }
             $row->{'出資額'} = trim($td_doms->item(5)->nodeValue);
             $list[] = $row;
         }
         $info->{'董監事名單'} = $list;
         // 經理人名單
         $table_dom = $doc->getElementById('Tab03');
         $base_table_dom = $table_dom->getElementsByTagName('table')->item(1);
         $list = array();
         for ($i = 1; $i < $base_table_dom->getElementsByTagName('tr')->length; $i++) {
             $td_doms = $base_table_dom->getElementsByTagName('tr')->item($i)->getElementsByTagName('td');
             $row = new StdClass();
             $row->{'序號'} = trim($td_doms->item(1)->nodeValue);
             $row->{'姓名'} = trim($td_doms->item(2)->nodeValue);
             if (!preg_match('#(.*)年(.*)月(.*)日#', trim($td_doms->item(3)->nodeValue), $matches)) {
                 $row->{'到職日期'} = null;
             } else {
                 $value = new stdClass();
                 $value->year = 1911 + intval($matches[1]);
                 $value->month = intval($matches[2]);
                 $value->day = intval($matches[3]);
                 $row->{'到職日期'} = $value;
             }
             $list[] = $row;
         }
         $info->{'經理人名單'} = $list;
     }
     return $info;
 }