Example #1
0
/**
 * Returns some example hard coded data for clustering
 */
function addExampleDocuments(Carrot2Job $job)
{
    $docs = array(array("Data Mining - Wikipedia", "http://en.wikipedia.org/wiki/Data_mining"), array("KD Nuggets", "http://www.kdnuggets.com/"), array("The Data Mine", "http://www.the-data-mine.com/"), array("DMG", "http://www.dmg.org/"), array("Two Crows: Data mining glossary", "http://www.twocrows.com/glossary.htm"), array("Jeff Ullman's Data Mining Lecture Notes", "http://www-db.stanford.edu/~ullman/mining/mining.html"), array("Thearling.com", "http://www.thearling.com/"), array("Data Mining", "http://www.eco.utexas.edu/~norman/BUS.FOR/course.mat/Alex"), array("CCSU - Data Mining Book", "http://www.ccsu.edu/datamining/resources.html"), array("Data Mining: Practical Machine Learning Tools and Techniques", "http://www.cs.waikato.ac.nz/~ml/weka/book.html"), array("Data Mining - Monografias.com", "http://www.monografias.com/trabajos/datamining/datamining.shtml"), array("Amazon.com: Data Mining: Books: Pieter Adriaans,Dolf Zantinge", "http://www.amazon.com/exec/obidos/tg/detail/-/0201403803?v=glance"), array("DMReview", "http://www.dmreview.com/"), array("Data Mining @ CCSU", "http://www.ccsu.edu/datamining"), array("What is Data Mining", "http://www.megaputer.com/dm/dm101.php3"), array("Electronic Statistics Book: Data Mining Techniques", "http://www.statsoft.com/textbook/stdatmin.html"), array("data mining - a definition from Whatis.com - see also: data miner, data analysis", "http://searchcrm.techtarget.com/sDefinition/0,,sid11_gci211901,00.html"), array("St@tServ - About Data Mining", "http://www.statserv.com/datamining.html"), array("DATA MINING 2005", "http://www.wessex.ac.uk/conferences/2005/data05"), array("Investor Home - Data Mining Book", "http://www.investorhome.com/mining.htm"), array("SAS | Data Mining and Text Data Mining", "http://www.sas.com/technologies/data_mining"), array("Data Mining Student Notes, QUB", "http://www.pcc.qub.ac.uk/tec/courses/datamining/stu_notes/dm_book_1.html"), array("Data Mining", "http://datamining.typepad.com/data_mining"), array("Two Crows Corporation", "http://www.twocrows.com/"), array("Statistical Data Mining Tutorials", "http://www.autonlab.org/tutorials"), array("Data Mining: An Introduction", "http://databases.about.com/library/weekly/aa100700a.htm"), array("Data Mining Project", "http://research.microsoft.com/dmx/datamining"), array("An Introduction to Data Mining", "http://www.thearling.com/text/dmwhite/dmwhite.htm"), array("Untangling Text Data Mining", "http://www.sims.berkeley.edu/~hearst/papers/acl99/acl99-tdm.html"), array("Data Mining Technologies", "http://www.data-mine.com/"), array("SQL Server Data Mining Tutorials", "http://www.sqlserverdatamining.com/"), array("Data Warehousing Information Center", "http://www.dwinfocenter.org/"), array("ITworld.com - Data mining Tutorials", "http://www.itworld.com/App/110/050805datamining"), array("IBM Research | Almaden Research Center | Computer Science", "http://www.almaden.ibm.com/cs/quest"), array("Data Mining and Discovery", "http://www.aaai.org/AITopics/html/mining.html"), array("Data Mining: An Overview", "http://www.fas.org/irp/crs/RL31798.pdf"), array("Data Mining", "http://www.gr-fx.com/graf-fx.htm"), array("Data Mining Benchmarking Association (DMBA)", "http://www.dmbenchmarking.com/"), array("Data Mining", "http://www.computerworld.com/databasetopics/businessintelligence/datamining"), array("National Center for Data Mining (NCDM) - University of Illinois at Chicago", "http://www.ncdm.uic.edu/"));
    foreach ($docs as $doc) {
        $job->addDocument($doc[0], '', $doc[1]);
    }
}
Example #2
0
 /**
  * Processes the provided Carrot2 job.
  *
  * @return returns Carrot2Result with processing results
  * @throws Carrot2Exception in case of unrecoverable errors, e.g. no connection to DCS
  */
 public function cluster(Carrot2Job $job)
 {
     $curl = curl_init($this->baseurl);
     // Prepare request parameters
     $fields = array_merge($job->getAttributes(), array('dcs.output.format' => 'XML'));
     $documents = $job->getDocuments();
     if (count($documents) > 0) {
         $fields['dcs.c2stream'] = $this->generateXml($documents);
     }
     self::addIfNotNull($fields, 'dcs.source', $job->getSource());
     self::addIfNotNull($fields, 'dcs.algorithm', $job->getAlgorithm());
     self::addIfNotNull($fields, 'query', $job->getQuery());
     // Make POST request
     curl_setopt_array($curl, array(CURLOPT_POST => true, CURLOPT_HTTPHEADER => array('Content-Type: multipart/formdata'), CURLOPT_HEADER => false, CURLOPT_RETURNTRANSFER => true, CURLOPT_POSTFIELDS => $fields));
     $response = curl_exec($curl);
     $error = curl_errno($curl);
     if ($error !== 0) {
         throw new Carrot2Exception(curl_error($curl));
     }
     $httpStatus = curl_getinfo($curl, CURLINFO_HTTP_CODE);
     if ($httpStatus >= 400) {
         throw new Carrot2Exception('HTTP error occurred, error code: ' . $httpStatus);
     }
     return $this->extractResponse($response);
 }