public static function Update() { $result = 0; foreach (self::$queue as $postid) { try { $post = Post::FindByID($postid); $post->Save(); // Automatically calls Recalculate() before saving // Update post groups try { $query = 'SELECT DISTINCT("articlegroup"."groupid") FROM "articles" FORCE INDEX("postid") INNER JOIN "articlegroup" ON ("articlegroup"."articleid" = "articles"."id") WHERE "articles"."postid" = ?'; $rs = static::$conn->Execute($query, $post->id); while ($row = $rs->Fetch()) { try { $postgroup = Postgroup::Find(array('postid' => (int) $post->id, 'groupid' => (int) $row['groupid'])); } catch (ActiveRecord_NotFoundException $e) { // Create $postgroup = new Postgroup(); $postgroup->postid = $post->id; $postgroup->groupid = $row['groupid']; $postgroup->created = $post->created; $postgroup->Save(); } } } catch (Exception $e) { // Ignore errors } $result++; } catch (Exception $e) { // Ignore errors } } PostCat::Update(); return $result; }
public function Run() { echo $this->Title('Posts'); $total_new = 0; $total_found = 0; $total_ignored = 0; // Lets not bother with spammers $skipauthors = array(); $query = 'SELECT "authorid",COUNT(*) AS "total",SUM("hidden") AS "hidden" FROM "posts" GROUP BY "authorid" '; $rs = static::$conn->Execute($query); while ($row = $rs->Fetch()) { // Total posts equals hidden posts if ($row['total'] == $row['hidden'] && $row['hidden'] > self::SPAM_POST_THRESHOLD) { $skipauthors[$row['authorid']] = $row['authorid']; } } // Process new articles by author - each author gets 30 seconds so that spam can't hold up useful posts $authors = array(); $query = 'SELECT "articles"."authorid","authors"."name" FROM "articles" ' . 'LEFT JOIN "authors" ON ("authors"."id" = "articles"."authorid") ' . 'WHERE ("articles"."postid" = 0) AND ("articles"."created" > ?) ' . (count($skipauthors) > 0 ? 'AND ("articles"."authorid" NOT IN (' . implode(',', $skipauthors) . ')) ' : '') . 'GROUP BY "articles"."authorid" ' . 'ORDER BY RAND() '; // Ensures that if a single author still holds up script too long, next time luck might change $rs = static::$conn->Execute($query, time() - self::MATCH_RECENT); while ($row = $rs->Fetch()) { $authors[$row['authorid']] = $row['name']; } foreach ($authors as $author_id => $author_name) { try { // Begin transaction static::$conn->BeginTransaction(); $start = GetMicroTime(); echo '<p><i>Processing articles posted by <b>' . SafeHTML($author_name) . '</b></i><br />'; $count = 0; $new = 0; $found = 0; $ignored = 0; $skip = array(); while (GetMicroTime() - $start < self::TIME_LIMIT) { // Get article not yet associated with a post $query = 'SELECT "id","authorid","subject","post_date" FROM "articles" ' . 'WHERE ("postid" = 0) AND ("authorid" = ?) AND ("created" > ?) ' . (count($skip) > 0 ? 'AND ("id" > ' . intval(end($skip)) . ') ' : '') . 'ORDER BY "id" ASC LIMIT 0,1'; $rs = static::$conn->Execute($query, $author_id, time() - self::MATCH_RECENT); $row = $rs->Fetch(); // Abort if no more articles if (!$row) { break; } // Reset $this->sql = $row['subject']; $this->sql_alt = $row['subject']; $this->parts = null; $this->pos = null; // Create Queries $this->CreateQueries(); echo SafeHTML($row['subject']) . ' - '; $strategy = $this->DetermineStrategy($row); // If first pass failed, try a second time if ($strategy == self::STRATEGY_IGNORE) { $strategy = $this->SecondPass($row); } // Skip ignored if ($strategy == self::STRATEGY_IGNORE) { $ignored++; $skip[$row['id']] = $row['id']; echo ' - <i><b style="color:#990000">Ignored</b></i><br />'; continue; } // Find existing post (if any) $query = 'SELECT "postid" FROM "articles" WHERE ("postid" != 0) AND ("subject" LIKE ?) AND ("authorid" = ?) AND ("post_date" > ?) AND ("post_date" < ?) LIMIT 0,1'; $rs = static::$conn->Execute($query, $strategy == self::STRATEGY_PRIMARY ? $this->sql : $this->sql_alt, $row['authorid'], $row['post_date'] - self::MATCH_RANGE, $row['post_date'] + self::MATCH_RANGE); $match = $rs->Fetch(); if ($match) { $found++; $post = Post::FindByID($match['postid']); echo ' - <i><b style="color:#000099">Found</b></i><br />'; } else { $new++; $post = new Post(); // We need the post ID so save first $post->Save(); echo ' - <i><b style="color:#009900">New</b></i><br />'; } // Update articles $query = 'UPDATE "articles" SET "postid" = ? WHERE ("postid" = 0) AND ("subject" LIKE ?) AND ("authorid" = ?) AND ("post_date" > ?) AND ("post_date" < ?)'; static::$conn->Execute($query, $post->id, $strategy == self::STRATEGY_PRIMARY ? $this->sql : $this->sql_alt, $row['authorid'], $row['post_date'] - self::MATCH_RANGE, $row['post_date'] + self::MATCH_RANGE); // Queue post recalculation + post group update (initializes all missing fields for new posts too) Post::Queue($post->id); // Sleep for 0.1 seconds to give database time to process usleep(100000); if ($count > self::ARTICLE_LIMIT) { break; } $count++; } // Update all posts in one go (in case one post was updated more than once) $posts = Post::Update(); // Commit transaction static::$conn->Commit(); } catch (Exception $e) { // Rollback on error static::$conn->Rollback(); throw $e; } echo '<b>' . $new . '</b> posts added, <b>' . $found . '</b> posts updated, <b>' . $ignored . '</b> posts ignored (<b>' . number_format(GetMicroTime() - $start, 2) . '</b> seconds)</p>'; $total_new += $new; $total_found += $found; $total_ignored += $ignored; // Abort if processing all authors takes to much time if (GetMicroTime() - $this->time['start'] > self::MAX_TIME_LIMIT) { break; } } echo '<p><i>Total <b>' . $total_new . '</b> posts added, <b>' . $total_found . '</b> posts updated, <b>' . $total_ignored . '</b> posts ignored</i></p>'; // Detect spam $this->DetectSpam(); // Update last 1000 number of posts that are marked hidden yet are still being listed (=BAD!) $query = 'SELECT "id" FROM "posts" LEFT JOIN "postcat" ON ("postcat"."postid" = "posts"."id") ' . 'WHERE ("postcat"."postid" IS NOT NULL) ' . 'AND ("posts"."hidden" = 1) ' . 'ORDER BY "posts"."id" DESC ' . 'LIMIT 0,1000 '; $rs = static::$conn->Execute($query); while ($row = $rs->Fetch()) { $post = Post::Find($row['id']); $post->Save(); } // Update last 100 number of posts that are not being listed even though they're not marked hidden (ie: they were still incomplete when last time encountered) $query = 'SELECT "id" FROM "posts" LEFT JOIN "postcat" ON ("postcat"."postid" = "posts"."id") ' . 'WHERE ("postcat"."postid" IS NULL) ' . 'AND ("posts"."hidden" = 0) ' . 'AND ("posts"."post_date" < ' . (time() - 600) . ') ' . 'AND ("posts"."files" > 1) ' . 'AND ("posts"."files" < ' . self::MAX_POST_FILES_LIMIT . ') ' . 'AND ("posts"."size" >= 1048576) ' . 'ORDER BY "posts"."post_date" DESC ' . 'LIMIT 0,100 '; $rs = static::$conn->Execute($query); while ($row = $rs->Fetch()) { $post = Post::Find($row['id']); $post->Save(); } // Update last 100 number of posts that have been last updated longest time ago $rs = static::$conn->Execute('SELECT "id" FROM "posts" ORDER BY "updated" ASC LIMIT 0,100 '); while ($row = $rs->Fetch()) { $post = Post::Find($row['id']); $post->Save(); } PostCat::Update(); }