/** * DOCUMENT ME */ public function go() { $dir_iterator = new RecursiveDirectoryIterator($this->dir); $iterator = new RecursiveIteratorIterator($dir_iterator, RecursiveIteratorIterator::SELF_FIRST); $count = 0; $mimeTypes = aMediaTools::getOption('mime_types'); // It comes back as a mapping of extensions to types, get the types $extensions = array_keys($mimeTypes); $mimeTypes = array_values($mimeTypes); foreach ($iterator as $sfile) { if ($sfile->isFile()) { $file = $sfile->getPathname(); if (preg_match('/(^|\\/)\\./', $file)) { # Silently ignore all dot folders to avoid trouble with svn and friends $this->giveFeedback("info", "Ignoring dotfile", $file); continue; } $pathinfo = pathinfo($file); // basename and filename seem backwards to me, but that's how it is in the PHP docs and // sure enough that's how it behaves if ($pathinfo['basename'] === 'Thumbs.db') { continue; } $vfp = new aValidatorFilePersistent(array('mime_types' => $mimeTypes, 'validated_file_class' => 'aValidatedFile', 'required' => false), array('mime_types' => 'The following file types are accepted: ' . implode(', ', $extensions))); $guid = aGuid::generate(); try { $vf = $vfp->clean(array('newfile' => array('tmp_name' => $file, 'name' => $pathinfo['basename']), 'persistid' => $guid)); } catch (Exception $e) { $this->giveFeedback("warning", "Not supported or corrupt", $file); continue; } $item = new aMediaItem(); // Split it up to make tags out of the portion of the path that isn't dir (i.e. the folder structure they used) $dir = $this->dir; $dir = preg_replace('/\\/$/', '', $dir) . '/'; $relevant = preg_replace('/^' . preg_quote($dir, '/') . '/', '', $file); // TODO: not Microsoft-friendly, might matter in some setting $components = preg_split('/\\//', $relevant); $tags = array_slice($components, 0, count($components) - 1); foreach ($tags as &$tag) { // We don't strictly need to be this harsh, but it's safe and definitely // takes care of some things we definitely can't allow, like periods // (which cause mod_rewrite problems with pretty Symfony URLs). // TODO: clean it up in a nicer way without being UTF8-clueless // (aTools::slugify is UTF8-safe) $tag = aTools::slugify($tag); } $item->title = aMediaTools::filenameToTitle($pathinfo['basename']); $item->setTags($tags); if (!strlen($item->title)) { $this->giveFeedback("error", "Files must have a basename", $file); continue; } // The preSaveImage / save / saveImage dance is necessary because // the sluggable behavior doesn't kick in until save and the image file // needs a slug based filename. if (!$item->preSaveFile($vf)) { $this->giveFeedback("error", "Save failed", $file); continue; } $item->save(); if (!$item->saveFile($vf)) { $this->giveFeedback("error", "Save failed", $file); $item->delete(); continue; } unlink($file); $count++; $this->giveFeedback("completed", $count, $file); } } $this->giveFeedback("total", $count); }
/** * DOCUMENT ME * @param mixed $arguments * @param mixed $options */ protected function execute($arguments = array(), $options = array()) { // We need a basic context so we can call helpers to format text $context = sfContext::createInstance($this->configuration); // initialize the database connection $databaseManager = new sfDatabaseManager($this->configuration); $connection = $databaseManager->getDatabase($options['connection'] ? $options['connection'] : null)->getConnection(); // PDO connection not so useful, get the doctrine one $conn = Doctrine_Manager::connection(); $accounts = Doctrine::getTable('aEmbedMediaAccount')->findAll(); foreach ($accounts as $a) { $perPage = 50; $service = aMediaTools::getEmbedService($a->service); if (!$service) { // An account for a service that has been deconfigured continue; } $total = null; $page = 1; $serviceUrls = array(); while (true) { $results = $service->browseUser($a->username, $page, $perPage); if ($results === false) { // We hit the rate limit, the account is bad, etc. Just // be tolerant and retry later. Would be nice to distinguish // these cases but it's not that hard to figure out an // account is gone break; } foreach ($results['results'] as $result) { $serviceUrls[] = $result['url']; } // We hit the end of the results for this user if (!count($results['results'])) { break; } $page++; } if (count($serviceUrls)) { $existingServiceUrls = Doctrine::getTable('aMediaItem')->createQuery('m')->select('m.service_url')->andWhereIn('m.service_url', $serviceUrls)->execute(array(), Doctrine::HYDRATE_SINGLE_SCALAR); } else { $existingServiceUrls = array(); } $existingServiceUrls = array_flip($existingServiceUrls); foreach ($serviceUrls as $serviceUrl) { if (!isset($existingServiceUrls[$serviceUrl])) { // If Doctrine becomes a performance problem I could use PDO // and set lucene_dirty to let that clean itself up later $id = $service->getIdFromUrl($serviceUrl); $info = $service->getInfo($id); if (!$info) { // We are not actually allowed meaningful access to this video. Password protected for example continue; } $item = new aMediaItem(); $item->setTitle($info['title']); // We want tags to be lower case, and slashes break routes in most server configs. $info['tags'] = str_replace('/', '-', aString::strtolower($info['tags'])); $item->setTags($info['tags']); $item->setDescription(aHtml::textToHtml($info['description'])); $item->setCredit($info['credit']); $item->setServiceUrl($info['url']); $item->setType($service->getType()); // The dance is this: get the thumbnail if there is one; // call preSaveFile to learn the width, height and format // before saving; save; and then saveFile to copy it to a // filename based on the slug, which is unknown until after save $thumbnail = $service->getThumbnail($id); if ($thumbnail) { // Grab a local copy of the thumbnail, and get the pain // over with all at once in a predictable way if // the service provider fails to give it to us. $thumbnailCopy = aFiles::getTemporaryFilename(); if (copy($thumbnail, $thumbnailCopy)) { $item->preSaveFile($thumbnailCopy); } } $item->save(); if ($thumbnail) { $item->saveFile($thumbnailCopy); } $item->free(); } } } }
/** * Adds an image or PDF (todo: should scan file extensions properly & import word docs etc). * @param mixed $src * @param mixed $returnType * @param mixed $tag * @return mixed */ protected function findOrAddMediaItem($src, $returnType = 'id', $tag = true) { $mediaId = null; $slug = null; $info = pathinfo($src); $path = $info['dirname'] . '/' . $info['filename']; $dirname = $info['dirname']; // Move any query string or hash string into the filename and out of the "extension" if (isset($info['extension'])) { $qat = strpos($info['extension'], '?'); if ($qat !== false) { $path .= substr($info['extension'], $qat); $info['extension'] = substr($info['extension'], 0, $qat); } $hashat = strpos($info['extension'], '#'); if ($hashat !== false) { $path .= substr($info['extension'], $hashat); $info['extension'] = substr($info['extension'], 0, $hashat); } // Extension should be a clean Unix path component $info['extension'] = preg_replace('/[^\\w]/', '', $info['extension']); } // Remove any hostname before splitting for tags, also dump case differences $dirname = strtolower(preg_replace('|^\\w+://.*?/|', '', $dirname)); $tags = preg_split('#/#', $dirname); $newTags = array(); foreach ($tags as $tag) { if (strlen($tag) > 1) { $newTags[] = $tag; } } $tags = $newTags; $extension = isset($info['extension']) ? $info['extension'] : 'unknown'; $slug = aTools::slugify($path . "-{$extension}"); // We need to encode spaces but not slashes... $src = str_replace(' ', '%20', $src); if (substr($src, 0, 5) !== 'http:') { $src = $this->imagesDir . '/' . $src; } $result = $this->sql->query('SELECT id FROM a_media_item WHERE slug = :slug', array('slug' => $slug)); if (isset($result[0]['id'])) { $mediaId = $result[0]['id']; } else { $mediaItem = new aMediaItem(); $mediaItem->setTitle($slug); $mediaItem->setSlug($slug); if ($extension === 'pdf') { $mediaItem->setType('pdf'); } else { $mediaItem->setType('image'); } $filename = $mediaItem->getOriginalPath($extension); if (file_exists($filename)) { // Avoids costly double imports of media $mediaItem->preSaveFile($filename); } else { $bad = isset($this->failedMedia[$src]); if (!$bad) { $tmpFile = aFiles::getTemporaryFilename(); try { if (!copy($src, $tmpFile)) { throw new sfException(sprintf('Could not copy file: %s', $src)); } if (!$mediaItem->saveFile($tmpFile)) { throw new sfException(sprintf('Could not save file: %s', $src)); } } catch (Exception $e) { $this->failedMedia[$src] = true; } if (file_exists($tmpFile)) { unlink($tmpFile); } } } if (!isset($this->failedMedia[$src])) { $this->sql->fastSaveMediaItem($mediaItem); if ($tag) { $this->sql->fastSaveTags('aMediaItem', $mediaItem->id, $tags); } $mediaId = $mediaItem->id; // getOriginalPath needs a context, ugh $path = '/uploads/media_items/' . $mediaItem->slug . '.original.pdf'; $mediaItem->free(true); } } if ($returnType === 'path') { return $path; } else { return $mediaId; } return false; }