function __construct($pagepath = null) { $this->P = person_if_signed_on(); // "pwb_" prefix means passwordbox $this->action = get_http_var('pwb_action'); $this->pw1 = get_http_var('pw1', null); $this->pw2 = get_http_var('pw2', null); $this->err = null; $this->info = null; if (is_null($pagepath)) { // no specific page - use the _current_ one. $foo = crack_url($_SERVER['REQUEST_URI']); $this->pagepath = $foo['path']; } else { // use the one supplied. $this->pagepath = $pagepath; } if (is_null($this->P)) { return; } if ($this->action == 'set_password') { if (is_null($this->pw1) || is_null($this->pw2)) { $this->err = "Please type your new password twice"; } elseif (strlen($this->pw1) < 5 || strlen($this->pw2) < 5) { $this->err = "Your password must be at least 5 characters long"; } elseif ($this->pw1 != $this->pw2) { $this->err = "Please type the same password twice"; } else { // all looks good. do it. $this->P->password($this->pw1); db_commit(); $this->info = 'Password changed'; } } }
function save() { $data = $this->cleaned_data; $fields = array('title', 'byline', 'description', 'pubdate', 'permalink', 'srcorg', 'status'); // if srcorg left blank, fill it out using domainname if (!$data['srcorg']) { $parts = crack_url($data['permalink']); $domain = strtolower($parts['host']); $data['srcorg'] = $this->find_or_create_publication($domain); } // all set - time to upsert! $params = array(); foreach ($fields as $f) { $values[] = $data[$f]; $placeholders[] = '?'; } if ($data['id']) { // update $sql = "UPDATE article SET (" . join(',', $fields) . ") = (" . join(',', $placeholders) . ") WHERE id=?"; $values[] = $data['id']; db_do($sql, $values); // make sure article_url has permalink (srcurl might be different, but we'll assume it's there already) db_do("DELETE FROM article_url WHERE url=? AND article_id=?", $data['permalink'], $data['id']); db_do("INSERT INTO article_url (url,article_id) VALUES (?,?)", $data['permalink'], $data['id']); } else { //create new $sql = "INSERT INTO article (id," . join(',', $fields) . ",firstseen,lastseen) VALUES (DEFAULT," . join(',', $placeholders) . ",NOW(),NOW()) RETURNING id"; $data['id'] = db_getOne($sql, $values); // set up article_url db_do("INSERT INTO article_url (url,article_id) VALUES (?,?)", $data['permalink'], $data['id']); } // set attributed journos db_do("DELETE FROM journo_attr WHERE article_id=?", $data['id']); $authors = explode(',', $data['authors']); $params = array(); $params[] = $data['id']; $placeholders = array(); foreach ($authors as $a) { $params[] = trim($a); $placeholders[] = '?'; } db_do("INSERT INTO journo_attr (journo_id,article_id) SELECT id,? FROM journo WHERE ref IN (" . join(',', $placeholders) . ")", $params); // queue for xapian indexing db_do("DELETE FROM article_needs_indexing WHERE article_id=?", $data['id']); db_do("INSERT INTO article_needs_indexing (article_id) VALUES (?)", $data['id']); // check for any submitted articles for this url that could now be resolved $submitted = SubmittedArticle::fetch_by_url($data['permalink']); foreach ($submitted as $s) { $s->update_status(); $s->save(); } # TODO: # log the action return $data; }
function handle_pingback($method, $params, $extra) { list($sourceURI, $targetURI) = $params; // fetch the source URI to verify that the source does indeed link to the target $html = file_get_contents($sourceURI); if ($html === FALSE) { CRAPLOG("0x10\n"); return 0x10; // "The source URI does not exist." } // cheesy conversion to utf-8 $html = mb_convert_encoding($html, 'UTF-8', mb_detect_encoding($html, 'UTF-8, ISO-8859-1, windows-1252', true)); $html = html_entity_decode($html, ENT_COMPAT, 'UTF-8'); if (strpos($html, $targetURI) === FALSE) { CRAPLOG("0x11\n"); return 0x11; // "The source URI does not contain a link to the target URI, and so cannot be used as a source." } // check URL, try and extract journo ref $bits = crack_url($targetURI); $path = $bits['path']; $m = array(); $ref = null; if (preg_match("%([a-zA-Z0-9]+-[-a-zA-Z0-9]+)/?%", $path, $m)) { $ref = $m[1]; } if ($ref === null) { CRAPLOG("0x21\n"); return 0x21; // "The specified target URI cannot be used as a target." } // valid journo? $journo = db_getRow("SELECT * FROM journo WHERE ref=? AND status='a'", $ref); if ($journo === null) { CRAPLOG("0x21 (invalid journo)\n"); return 0x21; // "The specified target URI cannot be used as a target." } // try and extract title to use as description $desc = $sourceURI; $m = array(); if (preg_match('!<title>(.*?)</title>!i', $html, $m)) { $desc = $m[1]; $desc = preg_replace('/\\s+/', ' ', $desc); } // already got this pingback? if (db_getOne("SELECT id FROM journo_weblink WHERE journo_id=? AND url=? AND approved=true", $journo['id'], $sourceURI)) { CRAPLOG("0x30\n"); return 0x30; // "The pingback has already been registered." } // OK. time to add it! $sql = <<<EOT INSERT INTO journo_weblink (journo_id, url, description, approved, kind, rank) VALUES ( ?,?,?,true,'pingback',500) EOT; db_do($sql, $journo['id'], $sourceURI, $desc); db_commit(); CRAPLOG("added.\n"); return "Ping registered - thanks"; }
function journo_collectArticles(&$journo, $limit = 10, $offset = 0) { // union to merge results from "articles" and "journo_other_articles" // into one query... NOTE: union doesn't use column names, so // the order is important. $sql = <<<EOT ( SELECT a.id,a.title,a.description,a.pubdate,a.permalink, o.prettyname as srcorgname, a.srcorg,a.total_bloglinks,a.total_comments FROM article a INNER JOIN journo_attr attr ON a.id=attr.article_id INNER JOIN organisation o ON o.id=a.srcorg WHERE a.status='a' AND attr.journo_id=? UNION ALL SELECT NULL as id, title, NULL as description, pubdate, url as permalink, publication as srcorgname, NULL as srcorg, 0 as total_bloglinks, 0 as total_comments FROM journo_other_articles WHERE status='a' AND journo_id=? ) ORDER BY pubdate DESC LIMIT ? OFFSET ? EOT; $arts = db_getAll($sql, $journo['id'], $journo['id'], $limit, $offset); // now do a pass over to pretty up the results foreach ($arts as &$a) { // add pretty pubdate etc... article_augment($a); if (!is_null($a['id'])) { $a['buzz'] = BuzzFragment($a); } else { $a['buzz'] = ''; } // no publication given? use the hostname from the url if (!$a['srcorgname']) { $bits = crack_url($a['permalink']); $a['srcorgname'] = $bits['host']; } } unset($a); return $arts; }
function is_sane_article_url($url) { if (strpos(trim($url), ' ') !== False) { return "URLs should not contain spaces"; } $bits = crack_url($url); if ($bits === FALSE) { return "Please enter the full url of this article"; } // default to http:// if ($bits['scheme'] == '') { $bits['scheme'] = 'http'; } $host = trim($bits['host']); $scheme = trim(strtolower($bits['scheme'])); $path = trim($bits['path']); $query = trim($bits['query']); if ($host == '') { return "Please enter the full url of this article"; } // no ftp: or internal file: links please! if ($scheme != 'http' && $scheme != 'https') { return "Sorry, \"{$scheme}://\" urls are not supported"; } // hostnames probably shouldn't have spaces in them... // (proably user entering a headline... sigh...) // if( strpos( $host, ' ' ) !== False ) { // return "Please enter a valid url"; // } // make sure we've got at least a non-blank path (or a non-blank query) if (($path == '' || $path == '/') && $query == '') { return "Please enter the FULL url of this article"; } return null; }
function pretty_domain($url) { $bits = crack_url($url); $domain = $bits['host']; $domain = preg_replace("/^www./", '', $domain); return $domain; }