/
Backpage.php
109 lines (96 loc) · 3.15 KB
/
Backpage.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
<?php
namespace app\models\glabs\categories;
use app\commands\GlabsController;
use app\models\glabs\objects\ObjectException;
use app\models\glabs\objects\Backpage as Object;
use app\models\glabs\sites\BaseSite;
use PHPHtmlParser\Dom;
use PHPHtmlParser\Exceptions\CurlException;
/**
* Class of categories of craigslist.org.
*
* @package glabs
* @author Nikolaj Rudakov <nnrudakov@gmail.com>
* @copyright 2016
*/
class Backpage extends BaseCategory
{
/**
* @inheritdoc
*/
public function __construct($url, $title, $categoryId, $type, $count)
{
self::$pageParam = '&page=';
$url = array_map(function ($item) { return $item . '?layout=summary'; }, $url);
parent::__construct($url, $title, $categoryId, $type, $count);
}
/**
* @inheritdoc
*/
protected function collectObjects($url)
{
if (!array_key_exists($url, $this->collectedCount)) {
$this->collectedCount[$url] = 0;
}
$dom = new Dom();
try {
$dom->loadFromUrl($url, [], GlabsController::$curl);
} catch (CurlException $e) {
if (false === strpos($e->getMessage(), 'timed out') ) {
throw new CurlException($e->getMessage());
}
if (false === strpos($e->getMessage(), '525') ) {
throw new CurlException($e->getMessage());
}
GlabsController::showMessage(' ...trying again', false);
return $this->collectObjects($url);
}
// end collect. no results
if (false !== strpos($dom, 'No matches found.')) {
return true;
}
$this->checkTotalObjects($dom);
/* @var \PHPHtmlParser\Dom\AbstractNode $span */
foreach ($dom->find('.summaryHeader') as $span) {
if ($this->isEnoughCollect()) {
break;
}
/* @var \PHPHtmlParser\Dom\AbstractNode $link */
if ($link = $span->find('a', 0)) {
$href = $link->getAttribute('href');
if (in_array($href, $this->collected, true)) {
continue;
}
$object = new Object($url, $href, $link->text(), $this->categoryId, $this->type);
try {
$object->setPrice();
} catch (ObjectException $e) {
continue;
}
$this->collected[] = $href;
$this->objects[] = $object;
$this->collectedCount[$url]++;
BaseSite::$doneObjects++;
BaseSite::progress();
}
}
if (!$this->isEnoughCollect()) {
$curl = GlabsController::$curl;
$curl::$referer = $url;
$url = str_replace(self::$pageParam . self::$page, '', $url);
self::$page += self::$page ? 1 : 2;
return $this->collectObjects($this->getPagedUrl($url));
}
return true;
}
/**
* @inheritdoc
*/
protected function checkTotalObjects($dom)
{
if (!$this->count) {
$this->count = $this->needCount = 2500;
}
return true;
}
}