/** * handle a GET. Since any node can handle a GET request, we need to gracefully handle misses. * the algorithm follows * * ("target storage node" and "target node" refer to a node that is supposed to store the requested data) * * if we are NOT a target storage node for the file being requested * send a redirect to a node that supposedly contains the data * * else if we are a target storage node * if we contain the file * send the file to the client * else if we do not contain the file we might need to self heal * if we are configured to self heal * call the self healing function (described below) * else * send a 404 to the client * endif * endif * endif * */ public function handle() { $iAmATarget = $this->iAmATarget(); if ($iAmATarget) { if (file_exists($this->finalPath)) { $this->sendFile(); } else { if ($this->canSelfHeal()) { try { if (!$this->selfHeal()) { WebDFS_Helper::send404($this->params['name']); } } catch (Exception $e) { $this->errorLog('selfHeal', $e->getMessage(), $e->getTraceAsString()); WebDFS_Helper::send500(); } } else { WebDFS_Helper::send404($this->params['name']); } } } else { // get the paths, choose one, and print a 301 redirect $nodes = $this->getTargetNodes(); if ($nodes) { WebDFS_Helper::send301(join('/', array($nodes[0]['staticUrl'], $this->params['pathHash'], $this->params['name']))); } } }
/** * * self heal * * Self heal accomplishes one of two things depending on when and why it is called. * It can be used to automatically move data from an old config when scaling. * And it can be used to fetch and save to disk a copy of some data from a peer server when * data has been lost; say, when a server failed. * * The self healing process is initiated when we have been asked for some data that is supposedly * stored on our disk and we cannot find it. * * When we are asked to fetch data that is supposedly stored on our disk, one of the following things can be true: * * 1) The data never was put on disk and this is simply servicing a request for data that is non-existent * ( we currently do not have a reliable way to tell what is supposedly on our disk * this could change if we start keeping a partial index in memory of what is supposedly on the disk. ) * * 2) For some reason, the data is missing or corrupted and we need to heal ourselves * * 3) New servers and disks have been added to the cluster configuration and we are performing * an auto move operation * * Currently, we have to assume that we "might" or "probably" have been asked to store the data * at some point in the past. Therefore we are forced to search for the data before we return a 404 to the client * * heal is the function that fecthes the file from a peer server * and then saves it to the temp path. * * self heal will: * iterate the all data configs starting with the oldest and look for the old data. * if we locate the data * we download it * save it to disk * fsync the data * * The above facilitates self heal and the first part of auto move * To complete the auto move we need to check and see if the data needs to be deleted from the * source. The source being the server from which we downloaded the file * for the self healing process. we only delete the source if the server in question * is NOT in the target nodes list we derive from the current data config * * * If we cannot find that data at all; * remove the tempfile * we send a "404 not found" message back to the client * * endif * */ public function selfHeal() { $filename = $this->params['name']; $tmpPath = $this->tmpPath; $fd = fopen($tmpPath, "wb+"); if (!$fd) { $this->errorLog('selfHealNoFile', $tmpPath, $filename); WebDFS_Helper::send500(); return; } $locator = null; $configIdx = null; $copiedFrom = null; $fileSize = null; $nodes = null; $healed = false; if ($this->params['getContext'] != self::GET_CONTEXT_AUTOMOVE) { $headers = array(); $headers[0] = self::HEADER_GET_CONTEXT . ': ' . self::GET_CONTEXT_AUTOMOVE; $curl = curl_init(); curl_setopt($curl, CURLOPT_HTTPHEADER, $headers); curl_setopt($curl, CURLOPT_FILE, $fd); curl_setopt($curl, CURLOPT_TIMEOUT, 10); curl_setopt($curl, CURLOPT_HEADER, false); curl_setopt($curl, CURLOPT_FAILONERROR, true); curl_setopt($curl, CURLOPT_BINARYTRANSFER, true); curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true); $totalConfigs = count($this->config['data']); for ($configIdx = $totalConfigs - 1; $configIdx >= 0; $configIdx--) { if ($configIdx == 0) { // 0 means we are looking at the most current config $locator = $this->locator; $nodes = $this->getTargetNodes(); } else { $config = $this->config['data'][$configIdx]; $locClass = $config['locatorClassName']; $locator = new $locClass($config); $nodes = $locator->findNodes($filename); } foreach ($nodes as $node) { // check to see if we are looking at node data for ourselves // in which case we do not want to make a request as that // would be wasted resources and pointless if ($node['proxyUrl'] != $this->config['thisProxyUrl']) { $url = join('/', array($node['staticUrl'], $this->params['pathHash'], $filename)); curl_setopt($curl, CURLOPT_URL, $url); curl_exec($curl); $info = curl_getinfo($curl); if (!curl_errno($curl) && $info['http_code'] < 400) { fclose($fd); $copiedFrom = $node; $this->debugLog('autoMove'); $healed = true; break 2; } ftruncate($fd, 0); } } } } // at this point we have achieved the same effect as a spoolData() call // so now we: // save the data // return the file back to the caller // if the source proxy url is NOT in the current target nodes list // we issue a delete command to the source node // and delete the data from the old location // endif if (!$healed) { // we cannot find the data // remove the temp file // send a 404 fclose($fd); unlink($tmpPath); WebDFS_Helper::send404($this->params['name']); } else { if ($healed) { // need to check to see if we wrote all of the data // as dictated by the content length headeer $fileSize = filesize($tmpPath); if ($fileSize != $info['download_content_length']) { unlink($tmpPath); $msg = sprintf($this->config['exceptionMsgs']['incompleteWrite'], $info['download_content_length'], $fileSize); throw new WebDFS_Exception($msg); } $this->saveData(); $this->sendFile(); // here we check if the source from where we copied // is included in the the current target node list $position = $this->getTargetNodePosition(null, $copiedFrom['proxyUrl']); if ($position == WebDFS::POSITION_NONE) { $this->sendDeleteForHeal($copiedFrom['proxyUrl'] . '/' . $filename); } } } }