forked from gonejack/tumblr-images
/
fetch.php
211 lines (153 loc) · 6.25 KB
/
fetch.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
<?php
main();
function main() {
(!isset($_GET['url']) || !filter_var($_GET['url'], FILTER_VALIDATE_URL)) && exit('Hello World!');
$strPageSource = getPageSource($_GET['url']); #get HTML page source code
!$strPageSource && echoImageNotFoundTextFileAndExit($_GET['url']);
$arrImagesUrls = parseImagesUrls($strPageSource); #parse urls of images
$intCountOfImagesUrls = count($arrImagesUrls);
$intCountOfImagesUrls === 0 && echoImageNotFoundTextFileAndExit($_GET['url']); #no image url found, echo error message as txt file.
$intCountOfImagesUrls === 1 && redirectAndExit(array_pop($arrImagesUrls)); #we got just one image url to be fetch, so no need for fetching, just redirect the browser to it.
$arrContentAndUrlOfValidImages = fetchImages($arrImagesUrls); #not every url is available, so try every one.
$intCountOfValidImagesUrls = count($arrContentAndUrlOfValidImages['validImagesUrls']); #check out the number of available urls
$intCountOfValidImagesUrls === 0 && echoImageNotFoundTextFileAndExit($_GET['url']);
$intCountOfValidImagesUrls === 1 && redirectAndExit(array_pop($arrContentAndUrlOfValidImages['validImagesUrls'])); #if we got just one available url, no need to pack the image cause we could just redirect the browser.
//when we got multiple images to deal with
$strZipString = makeZipPack($arrContentAndUrlOfValidImages['imageStrings'], $arrContentAndUrlOfValidImages['validImagesUrls']);
outputZipPackAsFileDownload($strZipString);
}
/**
* get HTML page source
* @param $strUrl
* @return bool|string
*/
function getPageSource($strUrl) {
$strPageSource = @file_get_contents($strUrl);
//Tumblr has two URL types, try the short one when the long one failed to be access.
if (strlen($strPageSource) < 100) {
$strShortUrl = '';
preg_match('<http.+/post/\d+>', $strUrl, $arrMatch) && $strShortUrl = $arrMatch[0];
$strShortUrl && $strPageSource = @file_get_contents($strShortUrl);
//check one more time
strlen($strPageSource) < 100 && $strPageSource = false;
}
return $strPageSource;
}
/**
* regular expression fetching operation for images urls on HTML page source
* @param $strPageSource
* @return array
*/
function parseImagesUrls($strPageSource) {
$arrReturnUrls = array();
$strRegPatten = "<(?:content|src)=\"((?:https?://\d+\.media\.tumblr\.com)/(?:(\w+)/)?(?:tumblr_\w+_(1280|540|500|400|250)\.(?:png|jpg|gif)))\">i";
if (preg_match_all($strRegPatten, $strPageSource, $arrMatches)) {
$arrTemp = array(); #array( hashValue => array('url' => url, 'size' => size), hashValue => array('url' => url, 'size' => size),...)
list(, $arrUrls, $arrHashes, $arrSizes) = $arrMatches;
//filter, find out the url which represent the max size of the image.
for ($i = 0, $length = sizeof($arrUrls); $i < $length; $i++) {
$strUrl = $arrUrls[$i];
$strHashes = $arrHashes[$i];
$strSize = $arrSizes[$i];
if (empty($arrTemp[$strHashes]) || $arrTemp[$strHashes]['size'] < $strSize) {
$arrTemp[$strHashes] = array('url' => $strUrl, 'size' => $strSize);
}
}
foreach ($arrTemp as $arrItem) {
$arrReturnUrls[] = $arrItem['url'];
}
}
return $arrReturnUrls;
}
/**
* get images raw strings
* @param $arrImagesUrls
* @return array
*/
function fetchImages($arrImagesUrls) {
$arrReturn = array('imageStrings' => array(), 'validImagesUrls' => array());
$arrValidStatus = array(200, 301, 304);
foreach ($arrImagesUrls as $strImageUrl) {
$strImageString = @file_get_contents($strImageUrl);
if ($strImageString === false) {
continue;
}
$intHttpStatus = parseHeaders($http_response_header, 'status');
$boolFetchSuccess = in_array($intHttpStatus, $arrValidStatus);
if ($boolFetchSuccess) {
$arrReturn['imageStrings'][] = $strImageString;
$arrReturn['validImagesUrls'][] = $strImageUrl;
}
}
return $arrReturn;
}
/**
* Parse a set of HTTP headers
*
* @param array The php headers to be parsed
* @param [string] The name of the header to be retrieved
* @return A header value if a header is passed;
* An array with all the headers otherwise
*/
function parseHeaders(array $headers, $header = null) {
$output = array();
if ('HTTP' === substr($headers[0], 0, 4)) {
list(, $output['status'], $output['status_text']) = explode(' ', $headers[0]);
unset($headers[0]);
}
foreach ($headers as $v) {
$h = preg_split('/:\s*/', $v);
$output[strtolower($h[0])] = $h[1];
}
if (null !== $header) {
if (isset($output[strtolower($header)])) {
return $output[strtolower($header)];
}
return;
}
return $output;
}
/**
* redirect the browser to the direct image url
* @param $strImageUrl
*/
function redirectAndExit($strImageUrl) {
header('Location: ' . $strImageUrl, true, 301);
exit;
}
/**
* make a txt file including error message
* @param $strUrl
*/
function echoImageNotFoundTextFileAndExit($strUrl) {
header('Content-Type: text/html');
header('Content-Disposition: attachment; filename=' . date('Y/M/j/D G:i:s') . '.htm');
echo "No tumblr images found at <a href='$strUrl' target='_self'><i>$strUrl</i></a>";
exit;
}
/**
* generate zip file stream
* @param $arrImageStrings
* @param $arrImageUrls
* @return string
*/
function makeZipPack($arrImageStrings, $arrImageUrls) {
require_once('zip.lib.php');
$zipGenerator = new ZipFile();
for ($i = 0, $length = sizeof($arrImageStrings); $i < $length; $i++) {
$strImageString = $arrImageStrings[$i];
$strImageUrl = $arrImageUrls[$i];
$zipGenerator->addFile($strImageString, basename($strImageUrl));
}
return $zipGenerator->file();
}
/**
* make some headers for zip file as attachment download
* @param $strZipString
*/
function outputZipPackAsFileDownload($strZipString) {
header('Content-Type: application/zip');
header('Content-Length: ' . strlen($strZipString));
header('Content-Disposition: attachment; filename=' . date('Y/M/j/D G:i:s') . '.zip');
echo $strZipString;
}