From 3b43a463dd1e21c03b96cc8ff1f77c74056bb9d3 Mon Sep 17 00:00:00 2001
From: Eric Rahm <ericrahm@gmail.com>
Date: Tue, 24 Feb 2015 11:37:51 -0800
Subject: [PATCH 01/23] Enable e10s

---
 slimtest_config.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/slimtest_config.py b/slimtest_config.py
index b6f9cde..a614f59 100644
--- a/slimtest_config.py
+++ b/slimtest_config.py
@@ -33,6 +33,7 @@
     'vars':
       {
         'test': [ 'benchtester', 'test_memory_usage.py' ],
+        'e10s': True,
       }
   },
 };

From 2087e30e4b29f4abf8738eba655a524ed4ec901b Mon Sep 17 00:00:00 2001
From: Eric Rahm <ericrahm@gmail.com>
Date: Tue, 24 Feb 2015 18:00:50 -0800
Subject: [PATCH 02/23] Update DB schemas to support e10s

- Breaks out metadata into it's own columns/tables
- Stores value type and kind in data table
- Adds process name to data table
---
 benchtester/BenchTester.py | 40 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/benchtester/BenchTester.py b/benchtester/BenchTester.py
index f75a2f8..9e4fbf6 100644
--- a/benchtester/BenchTester.py
+++ b/benchtester/BenchTester.py
@@ -15,7 +15,14 @@
 import mercurial, mercurial.ui, mercurial.hg, mercurial.commands
 import time
 
+# Database version, bump this when incompatible DB changes are made
+gVersion = 1
+
 gTableSchemas = [
+  # benchtester_version - the database version, can be used for upgrade scripts
+  '''CREATE TABLE IF NOT EXISTS
+      "benchtester_version" ("version" INTEGER NOT NULL UNIQUE)''',
+
   # Builds - info on builds we have tests for
   '''CREATE TABLE IF NOT EXISTS
       "benchtester_builds" ("id" INTEGER PRIMARY KEY NOT NULL,
@@ -35,12 +42,26 @@
       "benchtester_datapoints" ("id" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
                                 "name" VARCHAR NOT NULL UNIQUE)''',
 
+  # Procs - names of processes
+  '''CREATE TABLE IF NOT EXISTS
+      "benchtester_procs" ("id" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
+                           "name" VARCHAR NOT NULL UNIQUE)''',
+
+  # Checkpoints - names of checkpoints
+  '''CREATE TABLE IF NOT EXISTS
+      "benchtester_checkpoints" ("id" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
+                                 "name" VARCHAR NOT NULL UNIQUE)''',
+
   # Data - datapoints from tests
   '''CREATE TABLE IF NOT EXISTS
       "benchtester_data" ("test_id" INTEGER NOT NULL,
                           "datapoint_id" INTEGER NOT NULL,
+                          "checkpoint_id" INTEGER NOT NULL,
+                          "proc_id" INTEGER NOT NULL,
+                          "iteration" INTEGER NOT NULL,
                           "value" INTEGER NOT NULL,
-                          "meta" VARCHAR)''',
+                          "units" INTEGER NOT NULL,
+                          "kind" INTEGER NOT NULL)''',
 
   # Some default indexes
   '''CREATE INDEX IF NOT EXISTS test_lookup ON benchtester_tests ( name, build_id DESC )''',
@@ -264,11 +285,28 @@ def _open_db(self):
       self.sqlitedb = self.args['sqlitedb'] = None
       return False
     try:
+      db_exists = os.path.exists(self.args['sqlitedb'])
+
       sql_path = os.path.abspath(self.args['sqlitedb'])
       self.sqlite = sqlite3.connect(sql_path, timeout=900)
       cur = self.sqlite.cursor()
+
+      if db_exists:
+        # make sure the version matches
+        cur.execute("SELECT `version` FROM `benchtester_version` WHERE `version` = ?", [ gVersion ])
+        row = cur.fetchone()
+        version = row[0] if row else None
+        if version != gVersion:
+          self.error("Incompatible versions: %s is version %s, current version is %s" % (self.args['sqlitedb'], version, gVersion))
+          self.sqlitedb = self.args['sqlitedb'] = None
+          return False
+
       for schema in gTableSchemas:
         cur.execute(schema)
+
+      if not db_exists:
+        cur.execute("INSERT INTO `benchtester_version` (`version`) VALUES (?)", [ gVersion ])
+
       # Create/update build ID
       cur.execute("SELECT `time`, `id` FROM `benchtester_builds` WHERE `name` = ?", [ self.buildname ])
       buildrow = cur.fetchone()

From 1627924cffc4be5f1f34600458cd8639cf1ea182 Mon Sep 17 00:00:00 2001
From: Eric Rahm <ericrahm@gmail.com>
Date: Wed, 25 Feb 2015 11:13:46 -0800
Subject: [PATCH 03/23] Make checkpoint always include kind and unit

- Always report kind and unit
- Don't convert to strings, just report the integer value
---
 benchtester/checkpoint.js | 34 ++++++----------------------------
 1 file changed, 6 insertions(+), 28 deletions(-)

diff --git a/benchtester/checkpoint.js b/benchtester/checkpoint.js
index 67fa3b6..9a9c17c 100644
--- a/benchtester/checkpoint.js
+++ b/benchtester/checkpoint.js
@@ -26,40 +26,18 @@ function createCheckpoint(aLabel) {
       aProcess = "Main"
     }
 
-    var unitname;
-    switch (aUnits) {
-      // Old builds had no units field and assumed bytes
-      case undefined:
-      case Ci.nsIMemoryReporter.UNITS_BYTES:
-        break;
-      case Ci.nsIMemoryReporter.UNITS_COUNT:
-        unitname = "cnt";
-        break;
-      case Ci.nsIMemoryReporter.UNITS_PERCENTAGE:
-        unitname = "pct";
-        break;
-      default:
-        // Unhandled
-        return;
-    }
-
-    // For types with non-bytes units the value is
-    //   { 'unit': 'percent', 'val': 1234 }
-    // For bytes it is just a number, so as not to bloat output (we end up
-    // exporting 11k+ reporters on newer builds)
     if (!result['memory'][aProcess]) {
       result['memory'][aProcess] = {}
     }
 
     if (result['memory'][aProcess][aPath]) {
-      if (unitname)
-        result['memory'][aProcess][aPath]['val'] += aAmount;
-      else
-        result['memory'][aProcess][aPath] += aAmount;
-    } else if (unitname) {
-      result['memory'][aProcess][aPath] = { 'unit': unitname, 'val': aAmount };
+      result['memory'][aProcess][aPath]['val'] += aAmount;
     } else {
-      result['memory'][aProcess][aPath] = aAmount;
+      result['memory'][aProcess][aPath] = {
+        'unit': aUnits,
+        'val': aAmount,
+        'kind': aKind
+      };
     }
 
     if (aKind !== undefined && aKind == Ci.nsIMemoryReporter.KIND_HEAP

From 9432382d709706bb097893ca4c0667a4e32725d5 Mon Sep 17 00:00:00 2001
From: Eric Rahm <ericrahm@gmail.com>
Date: Wed, 25 Feb 2015 11:15:04 -0800
Subject: [PATCH 04/23] Keep memory reports in checkpoint format

Removes intermediate conversion step.
---
 benchtester/MarionetteTest.py | 18 +-----------------
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/benchtester/MarionetteTest.py b/benchtester/MarionetteTest.py
index 297642c..6fd3a45 100644
--- a/benchtester/MarionetteTest.py
+++ b/benchtester/MarionetteTest.py
@@ -127,23 +127,7 @@ def run_test(self, testname, testvars={}):
 
     self.endurance_results = runner.testvars.get("results", [])
 
-    results = list()
-    for x in range(len(self.endurance_results)):
-      iteration = self.endurance_results[x]
-      for checkpoint in iteration:
-        iternum = x + 1
-        label = checkpoint['label']
-        # TODO(ER): Handle all process entries
-        for memtype,memval in checkpoint['memory']['Main'].items():
-          if type(memval) is dict:
-            prefix = memval['unit'] + ":"
-            memval = memval['val']
-          else:
-            prefix = ""
-          datapoint = [ "%s%s" % (prefix, memtype), memval, "%s:%u" % (label, iternum) ]
-          results.append(datapoint)
-
-    if not self.tester.add_test_results(testname, results, not failures):
+    if not self.tester.add_test_results(testname, self.endurance_results, not failures):
       return self.error("Failed to save test results")
     if failures:
       return self.error("%u failures occured during test run" % failures)

From 43496f795f8300e1d7235b93c6132f0d2b717efa Mon Sep 17 00:00:00 2001
From: Eric Rahm <ericrahm@gmail.com>
Date: Wed, 25 Feb 2015 11:23:04 -0800
Subject: [PATCH 05/23] Properly insert data into new database format

- Process and checkpoint names are properly inserted
- All new columns in benchtester_data are filled
---
 benchtester/BenchTester.py | 92 +++++++++++++++++++++++++++++---------
 1 file changed, 70 insertions(+), 22 deletions(-)

diff --git a/benchtester/BenchTester.py b/benchtester/BenchTester.py
index 9e4fbf6..0af09fc 100644
--- a/benchtester/BenchTester.py
+++ b/benchtester/BenchTester.py
@@ -7,12 +7,13 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this file,
 # You can obtain one at http://mozilla.org/MPL/2.0/.
 
-import sys
-import os
 import argparse
+import mercurial, mercurial.ui, mercurial.hg, mercurial.commands
+import os
+import re
 import sqlite3
 import subprocess
-import mercurial, mercurial.ui, mercurial.hg, mercurial.commands
+import sys
 import time
 
 # Database version, bump this when incompatible DB changes are made
@@ -160,6 +161,69 @@ def load_module(self, modname):
 
     return True
 
+  def insert_results(self, test_id, results):
+    # - results is an array of iterations
+    # - iterations is an array of checkpoints
+    # - checkpoint is a dict with: label, memory
+    # - memory is a dict of processes
+    cur = self.sqlite.cursor()
+
+    for x, iteration in enumerate(results):
+      iternum = x + 1
+      for checkpoint in iteration:
+        label = checkpoint['label']
+
+        # insert checkpoint name, get checkpoint_id
+        cur.execute("SELECT id FROM benchtester_checkpoints WHERE name = ?", (label, ))
+        row = cur.fetchone()
+        checkpoint_id = row[0] if row else None
+        if checkpoint_id is None:
+          cur.execute("INSERT INTO benchtester_checkpoints(name) VALUES (?)", (label, ))
+          checkpoint_id = cur.lastrowid
+
+        for process_name, memory in checkpoint['memory'].iteritems():
+          # memory is a dictionary of datapoint_name: { val, unit, kind }
+
+          # Strip pid portion of process name
+          process_re = r'(.*)\s+\(.*\)'
+          m = re.match(process_re, process_name)
+          if m:
+            process_name = m.group(1)
+
+          # insert process name, get process_id
+          cur.execute("SELECT id FROM benchtester_procs WHERE name = ?", (process_name, ))
+          row = cur.fetchone()
+          process_id = row[0] if row else None
+          if process_id is None:
+            cur.execute("INSERT INTO benchtester_procs(name) VALUES (?)", (process_name, ))
+            process_id = cur.lastrowid
+
+          # insert datapoint names
+          insertbegin = time.time()
+          self.info("Inserting %u datapoints into DB" % len(memory))
+          cur.executemany("INSERT OR IGNORE INTO `benchtester_datapoints`(name) "
+                          "VALUES (?)",
+                          ( [ k ] for k in memory.iterkeys() ))
+          self.sqlite.commit()
+          self.info("Filled datapoint names in %.02fs" % (time.time() - insertbegin))
+
+          # insert datapoint values
+          insertbegin = time.time()
+          cur.executemany("INSERT INTO `benchtester_data` "
+                          "SELECT ?, p.id, ?, ?, ?, ?, ?, ? FROM `benchtester_datapoints` p "
+                          "WHERE p.name = ?",
+                          ( [ test_id,
+                              checkpoint_id,
+                              process_id,
+                              iternum,
+                              dp['val'],
+                              dp['unit'],
+                              dp['kind'],
+                              name ]
+                            for name, dp in memory.iteritems() if dp ))
+          self.sqlite.commit()
+          self.info("Filled datapoint values in %.02fs" % (time.time() - insertbegin))
+
   # datapoints a list of the format [ [ "key", value, "meta"], ... ].
   # Duplicate keys are allowed. Value is numeric and required, meta is an
   # optional string (see db format)
@@ -186,27 +250,11 @@ def add_test_results(self, testname, datapoints, succeeded=True):
 
         if datapoints:
           testid = cur.fetchone()[0]
-          insertbegin = time.time()
-          self.info("Inserting %u datapoints into DB" % len(datapoints))
-          cur.executemany("INSERT OR IGNORE INTO `benchtester_datapoints`(name) "
-                          "VALUES (?)",
-                          ([ datapoint[0] ] for datapoint in datapoints))
-          self.sqlite.commit()
-          self.info("Filled datapoint names in %.02fs" % (time.time() - insertbegin))
-          insertbegin = time.time()
-          # If val is a list, it is interpreted as [ value, meta ]
-          cur.executemany("INSERT INTO `benchtester_data` "
-                          "SELECT ?, p.id, ?, ? FROM `benchtester_datapoints` p "
-                          "WHERE p.name = ?",
-                          ( [ testid,
-                              dp[1],
-                              dp[2] if len(dp) > 2 else None,
-                              dp[0] ]
-                            for dp in datapoints ))
-          self.sqlite.commit()
-          self.info("Filled datapoint values in %.02fs" % (time.time() - insertbegin))
+          self.insert_results(testid, datapoints)
       except Exception, e:
         self.error("Failed to insert data into sqlite, got '%s': %s" % (type(e), e))
+        import traceback
+        traceback.print_exc()
         self.sqlite.rollback()
         return False
     return True

From 44fafdd1d502ba75ac99d4543b5339f6057e4083 Mon Sep 17 00:00:00 2001
From: Eric Rahm <ericrahm@gmail.com>
Date: Wed, 25 Feb 2015 15:36:14 -0800
Subject: [PATCH 06/23] Handle new DB format in create_graph_json.py

Uses new DB fields. Currently only handles the Main process and ignores
the 'kind' field.
---
 create_graph_json.py | 48 +++++++++++++++++++++++++++++---------------
 1 file changed, 32 insertions(+), 16 deletions(-)

diff --git a/create_graph_json.py b/create_graph_json.py
index ce9ca08..2809d7c 100755
--- a/create_graph_json.py
+++ b/create_graph_json.py
@@ -86,6 +86,14 @@
   }
 }
 
+# Mapping of unit values to names
+unit_map = {
+    0: 'bytes',
+    1: 'cnt',
+    #2 => UNITS_COUNT_CUMULATIVE, currently this isn't handled
+    3: 'pct'
+}
+
 # Reuse default tests for android, but s/Iteration 5/Iteration 1/
 for k, v in gTests['Slimtest-TalosTP5-Slow']['series'].iteritems():
   if type(v['datapoint']) is list:
@@ -269,30 +277,38 @@ def _findNode(nodes, datapoint, nodeize):
         nodeize = False
 
       # Pull all data for latest run of this test on this build
-      allrows = cur.execute('''SELECT p.name AS datapoint, d.value, d.meta
-                               FROM benchtester_data d, benchtester_datapoints p
-                               WHERE test_id = ? AND p.id = d.datapoint_id
+      allrows = cur.execute('''SELECT dp.name AS datapoint,
+                                      c.name AS checkpoint,
+                                      p.name AS process,
+                                      d.iteration, d.value, d.units, d.kind
+                               FROM benchtester_data d,
+                                    benchtester_datapoints dp,
+                                    benchtester_procs p,
+                                    benchtester_checkpoints c
+                               WHERE test_id = ? AND dp.id = d.datapoint_id
+                                                 AND c.id = d.checkpoint_id
+                                                 AND p.id = d.proc_id
                             ''', [testdata[testname]['id']])
 
+      # NB: For now kind is ignored, anything but the Main process is ignored
+
       # Sort data, splitting it up into nodes if requested. Calculate the value
       # of each node - either a sum of its childnodes, or its explicit value if
       # given. The idea is to reduce the amount of data juggling the frontend
       # needs to do.
       for row in allrows:
-        # If the datapoint begins with "AAA:..." then the datapoint has
-        # non-bytes units, and we include _units and strip the prefix
-        datapoint = row['datapoint']
-        units = datapoint.find(':', 0, 4)
-        if units != -1:
-          (units, datapoint) = datapoint.split(':', 1)
-        else:
-          units = 'bytes'
+        if row['process'] != 'Main':
+          continue
 
-        # The 'meta' field in the db holds "CheckpointName:Iteration". Prefix
-        # these on to the reporter name, e.g. "Iteration 1/MaxMem/<reporter>" so
-        # they fit nicely into a tree.
-        meta = row['meta'].split(':')
-        datapoint = "Iteration %u/%s/%s" % (int(meta[1]), meta[0], datapoint)
+        datapoint = row['datapoint']
+        units = unit_map.get(row['units'])
+        if not units:
+          print("skipping unhandled unit %s for %s" % (row['units'], datapoint))
+          continue
+
+        # Prefix the reporter name, e.g. "Iteration 1/MaxMem/<reporter>" so
+        # that it fits nicely into a tree.
+        datapoint = "Iteration %u/%s/%s" % (row['iteration'], row['checkpoint'], datapoint)
 
         if nodeize:
           # Note that we perserve null values as 'none', to differentiate missing data from values of 0

From d866329d89fc4d3049f590ec601111efb1a79c67 Mon Sep 17 00:00:00 2001
From: Eric Rahm <ericrahm@gmail.com>
Date: Thu, 26 Feb 2015 15:40:45 -0800
Subject: [PATCH 07/23] v0 to v1 update script

Based on the original update_database.py script.
---
 util/update_database_v0_v1.py | 190 ++++++++++++++++++++++++++++++++++
 1 file changed, 190 insertions(+)
 create mode 100755 util/update_database_v0_v1.py

diff --git a/util/update_database_v0_v1.py b/util/update_database_v0_v1.py
new file mode 100755
index 0000000..644e017
--- /dev/null
+++ b/util/update_database_v0_v1.py
@@ -0,0 +1,190 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright © 2012 Mozilla Corporation
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+# Converts a database using the older unversioned format to the v1 format.
+
+import os
+import re
+import sqlite3
+import sys
+import time
+
+sys.path.append(os.path.join('.', 'benchtester'))
+
+# We need gTableSchemas to create the new database
+try:
+  import BenchTester
+except:
+  sys.stderr.write("Couldn't find benchtester in current directory. Run me from the root!\n");
+  sys.exit(1);
+
+
+# memory report 'kind' constants
+KIND_NONHEAP = 0
+KIND_HEAP = 1
+KIND_OTHER = 2
+
+# memory report 'units' constants
+UNITS_BYTES = 0
+UNITS_COUNT = 1
+UNITS_COUNT_CUMULATIVE = 2
+UNITS_PERCENTAGE = 3
+
+if len(sys.argv) < 2:
+  sys.stderr.write("Usage: %s <database>\n" % (sys.argv[0],));
+  sys.stderr.write("  will create a new database named <database>.new in the\n");
+  sys.stderr.write("  newer format. The optional second parameter is one or\n");
+  sys.stderr.write("  more tests (by name) to omit from the new database.\n");
+  sys.exit(1);
+
+if not os.path.exists(sys.argv[1]):
+  sys.stderr.write("Database '%s' does not exist" % (sys.argv[1],))
+  sys.exit(1)
+
+newdb = sys.argv[1] + '.new'
+print("Creating %s..." % (newdb,))
+sql = sqlite3.connect(newdb, timeout=900)
+sql.row_factory = sqlite3.Row
+cur = sql.cursor()
+for schema in BenchTester.gTableSchemas:
+  print(schema)
+  cur.execute(schema)
+
+# This will speed things up significantly at the expense of ~4GiB memory usage
+cur.execute('''PRAGMA cache_size = -4000000''')
+cur.execute('''PRAGMA temp_store = 2''')
+# The new database is empty if we don't reach COMMIT, so we don't particularly
+# care if we corrupt it. This also significantly speeds up the operation.
+cur.execute('''PRAGMA journal_mode = OFF''')
+cur.execute('''PRAGMA synchronous = OFF''')
+
+# Open old db
+print("Opening %s..." % (sys.argv[1],))
+cur.execute('''ATTACH DATABASE ? AS old''', [ sys.argv[1] ])
+
+print("Counting rows...")
+cur.execute('SELECT COUNT(*) FROM old.benchtester_tests')
+totalrows = cur.fetchone()[0]
+print("%u total tests" % totalrows)
+
+#
+# Determine format of old DB
+#
+
+db_version = None
+try:
+  cur.execute('SELECT * FROM old.benchtester_version LIMIT 1')
+  db_version = cur.fetchone()['version']
+except sqlite3.OperationalError:
+  db_version = 0
+
+if db_version == BenchTester.gVersion:
+  print("Database is up to date, version = %s" % db_version)
+  sys.exit(1)
+elif db_version != 0 or BenchTester.gVersion != 1:
+  print("This script currently only handles 0 => 1")
+  sys.exit(1)
+else:
+  print("Upgrading db version from %s to %s" % (db_version, BenchTester.gVersion))
+
+starttime = time.time()
+
+# Set the DB version
+cur.execute('INSERT INTO benchtester_version(version) VALUES ( ? )', (BenchTester.gVersion, ))
+
+# Add the benchtester_checkpoints
+cur.execute('SELECT DISTINCT meta FROM old.benchtester_data')
+checkpoints = set([ row['meta'].split(':')[0] for row in cur.fetchall() ])
+cur.executemany('INSERT INTO benchtester_checkpoints(name) '
+                'VALUES (?)', ( [ checkpoint ] for checkpoint in checkpoints ))
+
+print("[%.02fs] Inserted %d checkpoints" % ((time.time() - starttime), len(checkpoints)))
+
+# Add an entry for Main in benchtester_procs
+cur.execute('INSERT INTO benchtester_procs(name) VALUES ( ? )', ('Main', ))
+
+# Fill in the datapoints table
+cur.execute('SELECT DISTINCT name AS datapoint '
+            'FROM old.benchtester_datapoints d ')
+
+# Given an old datapoint name, returns [ newname, units ]
+def splitunits(dp):
+  units = UNITS_BYTES
+  match = re.match(r'(cnt|pct):(.*)', dp)
+
+  if match:
+    dp = match.group(2)
+    units = UNITS_COUNT if match.group(1) == 'cnt' else UNITS_PERCENTAGE
+
+  return [ dp, units ]
+
+datapoints = set(( splitunits(row['datapoint'])[0] for row in cur.fetchall() ))
+
+print("[%.02fs] Selected %d datapoints" % ((time.time() - starttime), len(datapoints)))
+
+# Insert all datapoint names
+cur.executemany('INSERT OR IGNORE INTO benchtester_datapoints(name) '
+                'VALUES (?)',
+                ( [ dp ] for dp in datapoints ))
+
+print("[%.02fs] Inserted %d datapoints" % ((time.time() - starttime), len(datapoints)))
+
+# Copy the builds table
+cur.execute('INSERT INTO benchtester_builds(id, name, time) '
+            'SELECT id, name, time from old.benchtester_builds ')
+
+print("[%.02fs] Copied benchtester_builds" % (time.time() - starttime))
+
+# Copy the tests table
+cur.execute('INSERT INTO benchtester_tests(id, name, time, build_id, successful) '
+            'SELECT id, name, time, build_id, successful FROM old.benchtester_tests')
+
+print("[%.02fs] Copied benchtester_tests" % (time.time() - starttime))
+
+# Fill in the new benchtester_data table
+data = cur.execute('SELECT d.test_id, p.name AS datapoint, d.value, d.meta '
+                   'FROM old.benchtester_data d '
+                   'JOIN old.benchtester_datapoints p '
+                   'ON d.datapoint_id = p.id ')
+
+def splitmeta(meta):
+  return meta.split(':')
+
+def rowify(row):
+  dp, units = splitunits(row['datapoint'])
+  checkpoint, iteration = splitmeta(row['meta'])
+  proc_id = 1 # there's just the Main process in version 0
+
+  # we just say kind is heap if under explicit, other if not
+  kind = KIND_HEAP if dp.startswith('explicit') else KIND_OTHER
+
+  return [ row['test_id'],
+           proc_id,
+           int(iteration),
+           row['value'],
+           units,
+           kind,
+           dp,
+           checkpoint ]
+
+# Insert data
+cur.executemany('INSERT INTO benchtester_data(test_id,datapoint_id,checkpoint_id,proc_id,iteration,value,units,kind) '
+                'SELECT ?, p.id, c.id, ?, ?, ?, ?, ? '
+                'FROM benchtester_datapoints p, '
+                '     benchtester_checkpoints c '
+                'WHERE p.name = ? AND c.name = ?',
+                ( rowify(row) for row in data.fetchall() ))
+
+print("[%.02fs] Inserted benchtester_data" % (time.time() - starttime))
+
+sql.commit()
+
+print("[%.02fs] Committed everything" % (time.time() - starttime))
+

From ac493e8f868e68cbda738ff7e20d3710f1f9b356 Mon Sep 17 00:00:00 2001
From: Eric Rahm <ericrahm@gmail.com>
Date: Thu, 26 Feb 2015 18:50:15 -0800
Subject: [PATCH 08/23] Add note about supported versions

---
 benchtester/checkpoint.js | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/benchtester/checkpoint.js b/benchtester/checkpoint.js
index 9a9c17c..ade57cf 100644
--- a/benchtester/checkpoint.js
+++ b/benchtester/checkpoint.js
@@ -70,6 +70,8 @@ function createCheckpoint(aLabel) {
   var memMgr = Cc["@mozilla.org/memory-reporter-manager;1"].
       getService(Ci.nsIMemoryReporterManager);
 
+  // NB: |memMgr.getReports| was added in Fx28, we do not support releases
+  //     prior to that.
   memMgr.getReports(addReport, null, onFinish, null, /* anonymize */ false);
 }
 

From 806e9aee5a37a0d84cf328d132fae6fea686857a Mon Sep 17 00:00:00 2001
From: Eric Rahm <ericrahm@gmail.com>
Date: Fri, 27 Feb 2015 09:23:20 -0800
Subject: [PATCH 09/23] Rename 'memory' => 'reports'

---
 benchtester/BenchTester.py | 14 +++++++-------
 benchtester/checkpoint.js  | 18 +++++++++---------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/benchtester/BenchTester.py b/benchtester/BenchTester.py
index 0af09fc..ef9a474 100644
--- a/benchtester/BenchTester.py
+++ b/benchtester/BenchTester.py
@@ -164,8 +164,8 @@ def load_module(self, modname):
   def insert_results(self, test_id, results):
     # - results is an array of iterations
     # - iterations is an array of checkpoints
-    # - checkpoint is a dict with: label, memory
-    # - memory is a dict of processes
+    # - checkpoint is a dict with: label, reports
+    # - reports is a dict of processes
     cur = self.sqlite.cursor()
 
     for x, iteration in enumerate(results):
@@ -181,8 +181,8 @@ def insert_results(self, test_id, results):
           cur.execute("INSERT INTO benchtester_checkpoints(name) VALUES (?)", (label, ))
           checkpoint_id = cur.lastrowid
 
-        for process_name, memory in checkpoint['memory'].iteritems():
-          # memory is a dictionary of datapoint_name: { val, unit, kind }
+        for process_name, reports in checkpoint['reports'].iteritems():
+          # reports is a dictionary of datapoint_name: { val, unit, kind }
 
           # Strip pid portion of process name
           process_re = r'(.*)\s+\(.*\)'
@@ -200,10 +200,10 @@ def insert_results(self, test_id, results):
 
           # insert datapoint names
           insertbegin = time.time()
-          self.info("Inserting %u datapoints into DB" % len(memory))
+          self.info("Inserting %u datapoints into DB" % len(reports))
           cur.executemany("INSERT OR IGNORE INTO `benchtester_datapoints`(name) "
                           "VALUES (?)",
-                          ( [ k ] for k in memory.iterkeys() ))
+                          ( [ k ] for k in reports.iterkeys() ))
           self.sqlite.commit()
           self.info("Filled datapoint names in %.02fs" % (time.time() - insertbegin))
 
@@ -220,7 +220,7 @@ def insert_results(self, test_id, results):
                               dp['unit'],
                               dp['kind'],
                               name ]
-                            for name, dp in memory.iteritems() if dp ))
+                            for name, dp in reports.iteritems() if dp ))
           self.sqlite.commit()
           self.info("Filled datapoint values in %.02fs" % (time.time() - insertbegin))
 
diff --git a/benchtester/checkpoint.js b/benchtester/checkpoint.js
index ade57cf..9c2c406 100644
--- a/benchtester/checkpoint.js
+++ b/benchtester/checkpoint.js
@@ -13,7 +13,7 @@ function createCheckpoint(aLabel) {
   var result = {
     label: aLabel,
     timestamp: new Date(),
-    memory: {},
+    reports: {},
   };
 
   var knownHeap = {};
@@ -26,14 +26,14 @@ function createCheckpoint(aLabel) {
       aProcess = "Main"
     }
 
-    if (!result['memory'][aProcess]) {
-      result['memory'][aProcess] = {}
+    if (!result['reports'][aProcess]) {
+      result['reports'][aProcess] = {}
     }
 
-    if (result['memory'][aProcess][aPath]) {
-      result['memory'][aProcess][aPath]['val'] += aAmount;
+    if (result['reports'][aProcess][aPath]) {
+      result['reports'][aProcess][aPath]['val'] += aAmount;
     } else {
-      result['memory'][aProcess][aPath] = {
+      result['reports'][aProcess][aPath] = {
         'unit': aUnits,
         'val': aAmount,
         'kind': aKind
@@ -56,11 +56,11 @@ function createCheckpoint(aLabel) {
    */
   function onFinish(aClosure) {
     // Calculate heap-unclassified for each process
-    var keys = Object.keys(result['memory']);
+    var keys = Object.keys(result['reports']);
     for (var idx = 0; idx < keys.length; idx++) {
       let proc = keys[idx];
-      result['memory'][proc]['explicit/heap-unclassified'] = 
-          result['memory'][proc]['heap-allocated'] - knownHeap[proc];
+      result['reports'][proc]['explicit/heap-unclassified'] =
+          result['reports'][proc]['heap-allocated'] - knownHeap[proc];
     }
 
     marionetteScriptFinished(result);

From defd51c6a89a0b000d57c00abd9b1b621769fc53 Mon Sep 17 00:00:00 2001
From: Eric Rahm <erahm@mozilla.com>
Date: Thu, 19 Mar 2015 16:20:59 -0700
Subject: [PATCH 10/23] Explicitly set version to 0 when upgrading old DBs

---
 util/update_database.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/util/update_database.py b/util/update_database.py
index ad8a731..03558fe 100755
--- a/util/update_database.py
+++ b/util/update_database.py
@@ -89,6 +89,9 @@
   print("Database is already the newest format!")
   sys.exit(1)
 
+# Explicitly set the new version to 0.
+cur.execute("INSERT INTO `benchtester_version` (`version`) VALUES (?)", [ 0 ])
+
 # Copy all non-excluded tests
 # (this was added so I could drop the obsolete Slimtest-TalosTP5 test from old DBs)
 

From ed1dddbad15b100a50da2a33b21beb257998b414 Mon Sep 17 00:00:00 2001
From: Eric Rahm <erahm@mozilla.com>
Date: Thu, 26 Mar 2015 11:46:07 -0700
Subject: [PATCH 11/23] Cleanup create_graph_json.py formatting

---
 create_graph_json.py | 602 ++++++++++++++++++++++---------------------
 1 file changed, 308 insertions(+), 294 deletions(-)

diff --git a/create_graph_json.py b/create_graph_json.py
index 2809d7c..237a1a5 100755
--- a/create_graph_json.py
+++ b/create_graph_json.py
@@ -21,7 +21,10 @@
 import gzip
 
 # For looking up build rev numbers
-import mercurial, mercurial.ui, mercurial.hg, mercurial.commands
+import mercurial
+import mercurial.ui
+import mercurial.hg
+import mercurial.commands
 gMercurialRepo = "./mozilla-inbound"
 
 # Config for which tests to export
@@ -34,108 +37,109 @@
 #                    full datapoint name.
 #                    If a list is given, interpret as alternate names for the datapoint
 gTests = {
-  "Slimtest-TalosTP5-Slow" : {
-    "nodeize" : "/",
-    "dump" : True,
-    "series" : {
-      "MaxMemoryV2" : { "datapoint": "Iteration 5/TabsOpen/explicit" },
-      "MaxMemorySettledV2" : { "datapoint": "Iteration 5/TabsOpenSettled/explicit" },
-      "MaxMemoryForceGCV2" : { "datapoint": "Iteration 5/TabsOpenForceGC/explicit" },
-      "MaxMemoryResidentV2" : { "datapoint": "Iteration 5/TabsOpen/resident" },
-      "MaxMemoryResidentSettledV2" : { "datapoint": "Iteration 5/TabsOpenSettled/resident" },
-      "MaxMemoryResidentForceGCV2" : { "datapoint": "Iteration 5/TabsOpenForceGC/resident" },
-      "StartMemoryV2" : { "datapoint": "Iteration 1/Start/explicit" },
-      "StartMemoryResidentV2" : { "datapoint": "Iteration 1/Start/resident" },
-      "StartMemorySettledV2" : { "datapoint": "Iteration 1/StartSettled/explicit" },
-      "StartMemoryResidentSettledV2" : { "datapoint": "Iteration 1/StartSettled/resident" },
-      "EndMemoryV2" : { "datapoint": "Iteration 5/TabsClosed/explicit" },
-      "EndMemoryResidentV2" : { "datapoint": "Iteration 5/TabsClosed/resident" },
-      "EndMemorySettledV2" : { "datapoint": "Iteration 5/TabsClosedSettled/explicit" },
-      "EndMemoryForceGCV2" : { "datapoint": "Iteration 5/TabsClosedForceGC/explicit" },
-      "EndMemoryResidentSettledV2" : { "datapoint": "Iteration 5/TabsClosedSettled/resident" },
-      "EndMemoryResidentForceGCV2" : { "datapoint": "Iteration 5/TabsClosedForceGC/resident" },
-      "MaxHeapUnclassifiedV2" : { "datapoint": "Iteration 5/TabsOpenSettled/explicit/heap-unclassified" },
-      "MaxJSV2" : {
-        "datapoint": [
-          # As of Jul 2012
-          "Iteration 5/TabsOpenSettled/js-main-runtime",
-          # Pre-Jul 2012
-          "Iteration 5/TabsOpenSettled/explicit/js",
-          # Old ~FF4 reporters
-          "Iteration 5/TabsOpenSettled/js",
-          # Brief period in may 2011 before heap-used became explicit
-          "Iteration 5/TabsOpenSettled/heap-used/js"
-        ]
-      },
-      "MaxImagesV2" : {
-        "datapoint": [
-          "Iteration 5/TabsOpenSettled/explicit/images",
-          # Old ~FF4 reporters
-          "Iteration 5/TabsOpenSettled/images",
-          # Brief period in may 2011 before heap-used became explicit
-          "Iteration 5/TabsOpenSettled/heap-used/images"
-        ]
-      }
+    "Slimtest-TalosTP5-Slow": {
+        "nodeize": "/",
+        "dump": True,
+        "series": {
+            "MaxMemoryV2": {"datapoint": "Iteration 5/TabsOpen/explicit"},
+            "MaxMemorySettledV2": {"datapoint": "Iteration 5/TabsOpenSettled/explicit"},
+            "MaxMemoryForceGCV2": {"datapoint": "Iteration 5/TabsOpenForceGC/explicit"},
+            "MaxMemoryResidentV2": {"datapoint": "Iteration 5/TabsOpen/resident"},
+            "MaxMemoryResidentSettledV2": {"datapoint": "Iteration 5/TabsOpenSettled/resident"},
+            "MaxMemoryResidentForceGCV2": {"datapoint": "Iteration 5/TabsOpenForceGC/resident"},
+            "StartMemoryV2": {"datapoint": "Iteration 1/Start/explicit"},
+            "StartMemoryResidentV2": {"datapoint": "Iteration 1/Start/resident"},
+            "StartMemorySettledV2": {"datapoint": "Iteration 1/StartSettled/explicit"},
+            "StartMemoryResidentSettledV2": {"datapoint": "Iteration 1/StartSettled/resident"},
+            "EndMemoryV2": {"datapoint": "Iteration 5/TabsClosed/explicit"},
+            "EndMemoryResidentV2": {"datapoint": "Iteration 5/TabsClosed/resident"},
+            "EndMemorySettledV2": {"datapoint": "Iteration 5/TabsClosedSettled/explicit"},
+            "EndMemoryForceGCV2": {"datapoint": "Iteration 5/TabsClosedForceGC/explicit"},
+            "EndMemoryResidentSettledV2": {"datapoint": "Iteration 5/TabsClosedSettled/resident"},
+            "EndMemoryResidentForceGCV2": {"datapoint": "Iteration 5/TabsClosedForceGC/resident"},
+            "MaxHeapUnclassifiedV2": {"datapoint": "Iteration 5/TabsOpenSettled/explicit/heap-unclassified"},
+            "MaxJSV2": {
+                "datapoint": [
+                    # As of Jul 2012
+                    "Iteration 5/TabsOpenSettled/js-main-runtime",
+                    # Pre-Jul 2012
+                    "Iteration 5/TabsOpenSettled/explicit/js",
+                    # Old ~FF4 reporters
+                    "Iteration 5/TabsOpenSettled/js",
+                    # Brief period in may 2011 before heap-used became explicit
+                    "Iteration 5/TabsOpenSettled/heap-used/js"
+                ]
+            },
+            "MaxImagesV2": {
+                "datapoint": [
+                    "Iteration 5/TabsOpenSettled/explicit/images",
+                    # Old ~FF4 reporters
+                    "Iteration 5/TabsOpenSettled/images",
+                    # Brief period in may 2011 before heap-used became explicit
+                    "Iteration 5/TabsOpenSettled/heap-used/images"
+                ]
+            }
+        }
+    },
+    "Android-ARMv6": {
+        "nodeize": "/",
+        "dump": True,
+        # See below
+        "series": {}
     }
-  },
-  "Android-ARMv6" : {
-    "nodeize" : "/",
-    "dump" : True,
-    # See below
-    "series" : {}
-  }
 }
 
 # Mapping of unit values to names
 unit_map = {
     0: 'bytes',
     1: 'cnt',
-    #2 => UNITS_COUNT_CUMULATIVE, currently this isn't handled
+    # 2 => UNITS_COUNT_CUMULATIVE, currently this isn't handled
     3: 'pct'
 }
 
 # Reuse default tests for android, but s/Iteration 5/Iteration 1/
 for k, v in gTests['Slimtest-TalosTP5-Slow']['series'].iteritems():
-  if type(v['datapoint']) is list:
-    out = []
-    for x in v['datapoint']:
-      out.append(re.sub('^Iteration 5', 'Iteration 1', x))
-  else:
-    out = re.sub('^Iteration 5', 'Iteration 1', v['datapoint'])
-  gTests['Android-ARMv6']['series']['Android'+k] = { "datapoint": out }
+    if type(v['datapoint']) is list:
+        out = []
+        for x in v['datapoint']:
+            out.append(re.sub('^Iteration 5', 'Iteration 1', x))
+    else:
+        out = re.sub('^Iteration 5', 'Iteration 1', v['datapoint'])
+    gTests['Android-ARMv6']['series']['Android' + k] = {"datapoint": out}
 
 
 # Python 2 compat
 if sys.hexversion < 0x03000000:
-  def bytes(string, **kwargs):
-    return string
+    def bytes(string, **kwargs):
+        return string
+
 
 def error(msg):
-  sys.stderr.write(msg + '\n')
-  sys.exit(1)
+    sys.stderr.write(msg + '\n')
+    sys.exit(1)
 
 if len(sys.argv) != 4:
-  error("Usage: %s <database> <seriesname> <outdir>" % sys.argv[0])
+    error("Usage: %s <database> <seriesname> <outdir>" % sys.argv[0])
 
 gDatabase = os.path.normpath(sys.argv[1])
 gSeriesName = sys.argv[2]
 gOutDir = os.path.normpath(sys.argv[3])
 
 if not os.path.isfile(gDatabase):
-  error("Database '%s' not found")
+    error("Database '%s' not found")
 
 if not os.path.isdir(gOutDir):
-  if os.path.exists(gOutDir):
-    error("File '%s' is not a directory" % gOutDir)
-  # Try to create
-  parentdir = os.path.dirname(gOutDir)
-  # dirname() returns '' for this-directory. Other os.path functions dont
-  # recognize '' as this-directory. ???.
-  if parentdir == '':
-    parentdir = '.'
-  if not os.path.isdir(parentdir):
-    error("'%s' is not a directory, cannot create folders in it" % parentdir)
-  os.mkdir(gOutDir)
+    if os.path.exists(gOutDir):
+        error("File '%s' is not a directory" % gOutDir)
+    # Try to create
+    parentdir = os.path.dirname(gOutDir)
+    # dirname() returns '' for this-directory. Other os.path functions dont
+    # recognize '' as this-directory. ???.
+    if parentdir == '':
+        parentdir = '.'
+    if not os.path.isdir(parentdir):
+        error("'%s' is not a directory, cannot create folders in it" % parentdir)
+    os.mkdir(gOutDir)
 
 sql = sqlite3.connect(gDatabase, timeout=900)
 sql.row_factory = sqlite3.Row
@@ -147,37 +151,43 @@ def error(msg):
 builds = cur.fetchall()
 hg_ui = None
 hg_repo = None
+
+
 def build_sort(build_a, build_b):
-  global hg_repo, hg_ui
-  if build_a['time'] != build_b['time']:
-    return 1 if build_a['time'] > build_b['time'] else -1
-  # Builds have equal timestamp, look up their revision number in repo if
-  # possible
-  if not hg_repo:
-    hg_ui = mercurial.ui.ui()
-    hg_repo = mercurial.hg.repository(hg_ui, gMercurialRepo)
-    hg_ui.readconfig(os.path.join(gMercurialRepo, ".hg", "hgrc"))
-    hg_ui.pushbuffer()
-    # Pull repo, but don't update so we don't conflict with whatever the test
-    # daemon is doing with it
-    mercurial.commands.pull(hg_ui, hg_repo, check=False, update=False)
-    hg_ui.popbuffer()
-
-  # Get revisions
-  try:
-    hg_ui.pushbuffer()
-    mercurial.commands.log(hg_ui, hg_repo, rev=[ "%s" % (build_a['name'],) ], template="{rev}", date="", user=None, follow=None)
-    a_rev = int(hg_ui.popbuffer())
-    hg_ui.pushbuffer()
-    mercurial.commands.log(hg_ui, hg_repo, rev=[ "%s" % (build_b['name'],) ], template="{rev}", date="", user=None, follow=None)
-    b_rev = int(hg_ui.popbuffer())
-  except Exception as e:
-    # mercurial throws all kinds of fun exceptions for bad input
-    print("WARNING: Couldn't lookup ordering of commits with identical timestamp: %s / %s (%s: %s)" % (build_a[1], build_b[1], type(e), e))
-    return 0
-
-  print("Builds %s and %s have identical timestamp, using rev numbers %u and %u" % (build_a['name'], build_b['name'], a_rev, b_rev))
-  return 1 if a_rev > b_rev else -1 if b_rev > a_rev else 0
+    global hg_repo, hg_ui
+    if build_a['time'] != build_b['time']:
+        return 1 if build_a['time'] > build_b['time'] else -1
+    # Builds have equal timestamp, look up their revision number in repo if
+    # possible
+    if not hg_repo:
+        hg_ui = mercurial.ui.ui()
+        hg_repo = mercurial.hg.repository(hg_ui, gMercurialRepo)
+        hg_ui.readconfig(os.path.join(gMercurialRepo, ".hg", "hgrc"))
+        hg_ui.pushbuffer()
+        # Pull repo, but don't update so we don't conflict with whatever the test
+        # daemon is doing with it
+        mercurial.commands.pull(hg_ui, hg_repo, check=False, update=False)
+        hg_ui.popbuffer()
+
+    # Get revisions
+    try:
+        hg_ui.pushbuffer()
+        mercurial.commands.log(
+            hg_ui, hg_repo, rev=["%s" % (build_a['name'],)], template="{rev}", date="", user=None, follow=None)
+        a_rev = int(hg_ui.popbuffer())
+        hg_ui.pushbuffer()
+        mercurial.commands.log(
+            hg_ui, hg_repo, rev=["%s" % (build_b['name'],)], template="{rev}", date="", user=None, follow=None)
+        b_rev = int(hg_ui.popbuffer())
+    except Exception as e:
+        # mercurial throws all kinds of fun exceptions for bad input
+        print("WARNING: Couldn't lookup ordering of commits with identical timestamp: %s / %s (%s: %s)" %
+              (build_a[1], build_b[1], type(e), e))
+        return 0
+
+    print("Builds %s and %s have identical timestamp, using rev numbers %u and %u" %
+          (build_a['name'], build_b['name'], a_rev, b_rev))
+    return 1 if a_rev > b_rev else -1 if b_rev > a_rev else 0
 
 print("Sorting builds...")
 builds = sorted(builds, cmp=build_sort)
@@ -196,88 +206,91 @@ def build_sort(build_a, build_b):
 gSeriesNames = [y for x in gTests.values() for y in x['series'].keys()]
 
 data = {
-  'series' : dict((n, []) for n in gSeriesNames),
-  'builds' : []
+    'series': dict((n, []) for n in gSeriesNames),
+    'builds': []
 }
 
 # Open the old file, if possible, to skip generating redundant data
 old_data = None
 old_series_file = os.path.join(gOutDir, gSeriesName + '.json.gz')
 if os.path.exists(old_series_file):
-  last_series = gzip.open(old_series_file, 'r')
-  old_data = json.loads(last_series.read())
-  last_series.close()
-  # Old builds by index
-  old_builds_map = {}
-  for i in range(len(old_data["builds"])):
-    old_builds_map[old_data["builds"][i]["revision"]] = i
+    last_series = gzip.open(old_series_file, 'r')
+    old_data = json.loads(last_series.read())
+    last_series.close()
+    # Old builds by index
+    old_builds_map = {}
+    for i in range(len(old_data["builds"])):
+        old_builds_map[old_data["builds"][i]["revision"]] = i
 
 # Helper to find a node by datapoint
+
+
 def _findNode(nodes, datapoint, nodeize):
-  node = nodes
-  if nodeize:
-    for branch in datapoint.split(nodeize):
-      if node and branch in node:
-        node = node[branch]
-      else:
-        return None
-    return node
-  else:
-    return nodes.get(datapoint)
+    node = nodes
+    if nodeize:
+        for branch in datapoint.split(nodeize):
+            if node and branch in node:
+                node = node[branch]
+            else:
+                return None
+        return node
+    else:
+        return nodes.get(datapoint)
 
 i = 0
 for build in builds:
-  i += 1
+    i += 1
 
-  # Lookup tests for this build
-  testdata = {}
-  for testname in gTests.keys():
-    testdata[testname] = { 'time' : None, 'id' : None, 'nodes' : {} }
+    # Lookup tests for this build
+    testdata = {}
+    for testname in gTests.keys():
+        testdata[testname] = {'time': None, 'id': None, 'nodes': {}}
 
-    # Get latest test for this build
-    cur.execute('''SELECT id, time FROM benchtester_tests
+        # Get latest test for this build
+        cur.execute('''SELECT id, time FROM benchtester_tests
                    WHERE name = ? AND build_id = ? AND successful = 1
                    ORDER BY time DESC LIMIT 1''', [testname, build['id']])
-    testrow = cur.fetchone()
-    if not testrow:
-      continue
-
-    testdata[testname]['time'] = testrow['time']
-    testdata[testname]['id'] = testrow['id']
-
-  test_ids = [testdata[testname]['id'] for testname in gTests.keys()]
-
-  #
-  # Determine if we should process this build or use the existing data
-  #
-  if old_data and build['name'] in old_builds_map and old_data['builds'][old_builds_map[build['name']]]['test_ids'] == test_ids:
-    print("[%u/%u] Using existing data for build %s" % (i, len(builds), build['name']))
-    oldindex = old_builds_map[build['name']]
-    data['builds'].append(old_data['builds'][oldindex])
-    for sname in gSeriesNames:
-      if sname in old_data['series']:
-        data['series'][sname].append(old_data['series'][sname][oldindex])
-      else:
-        # Fill null in for newly-added series. We'll regenerate these by hand
-        # if desired, but forcing-regenerate means we have to de-archive all
-        # old DBs when the datapoint may only be in recent tests anyway
-        data['series'][sname].append(None)
-  else:
-    print("[%u/%u] Processing build %s" % (i, len(builds), build['name']))
-    # Fill builds
-    data['builds'].append({ 'revision' : build['name'], 'time' : build['time'], 'test_ids' : test_ids })
+        testrow = cur.fetchone()
+        if not testrow:
+            continue
+
+        testdata[testname]['time'] = testrow['time']
+        testdata[testname]['id'] = testrow['id']
+
+    test_ids = [testdata[testname]['id'] for testname in gTests.keys()]
 
     #
-    # For each test gTests references, pull all of its data into testdata
+    # Determine if we should process this build or use the existing data
     #
-    for testname in gTests.keys():
-      if testname in gTests:
-        nodeize = gTests[testname].get('nodeize')
-      else:
-        nodeize = False
+    if old_data and build['name'] in old_builds_map and old_data['builds'][old_builds_map[build['name']]]['test_ids'] == test_ids:
+        print("[%u/%u] Using existing data for build %s" % (i, len(builds), build['name']))
+        oldindex = old_builds_map[build['name']]
+        data['builds'].append(old_data['builds'][oldindex])
+        for sname in gSeriesNames:
+            if sname in old_data['series']:
+                data['series'][sname].append(old_data['series'][sname][oldindex])
+            else:
+                # Fill null in for newly-added series. We'll regenerate these by hand
+                # if desired, but forcing-regenerate means we have to de-archive all
+                # old DBs when the datapoint may only be in recent tests anyway
+                data['series'][sname].append(None)
+    else:
+        print("[%u/%u] Processing build %s" % (i, len(builds), build['name']))
+        # Fill builds
+        data['builds'].append(
+            {'revision': build['name'], 'time': build['time'], 'test_ids': test_ids})
+
+        #
+        # For each test gTests references, pull all of its data into testdata
+        #
+        for testname in gTests.keys():
+            if testname in gTests:
+                nodeize = gTests[testname].get('nodeize')
+            else:
+                nodeize = False
 
-      # Pull all data for latest run of this test on this build
-      allrows = cur.execute('''SELECT dp.name AS datapoint,
+            # Pull all data for latest run of this test on this build
+            allrows = cur.execute('''SELECT dp.name AS datapoint,
                                       c.name AS checkpoint,
                                       p.name AS process,
                                       d.iteration, d.value, d.units, d.kind
@@ -290,137 +303,138 @@ def _findNode(nodes, datapoint, nodeize):
                                                  AND p.id = d.proc_id
                             ''', [testdata[testname]['id']])
 
-      # NB: For now kind is ignored, anything but the Main process is ignored
-
-      # Sort data, splitting it up into nodes if requested. Calculate the value
-      # of each node - either a sum of its childnodes, or its explicit value if
-      # given. The idea is to reduce the amount of data juggling the frontend
-      # needs to do.
-      for row in allrows:
-        if row['process'] != 'Main':
-          continue
-
-        datapoint = row['datapoint']
-        units = unit_map.get(row['units'])
-        if not units:
-          print("skipping unhandled unit %s for %s" % (row['units'], datapoint))
-          continue
-
-        # Prefix the reporter name, e.g. "Iteration 1/MaxMem/<reporter>" so
-        # that it fits nicely into a tree.
-        datapoint = "Iteration %u/%s/%s" % (row['iteration'], row['checkpoint'], datapoint)
-
-        if nodeize:
-          # Note that we perserve null values as 'none', to differentiate missing data from values of 0
-          cursor = testdata[testname]['nodes']
-          thisnode = datapoint.split(nodeize)
-          for n in range(len(thisnode)):
-            leaf = thisnode[n]
-            cursor.setdefault(leaf, {})
-            cursor = cursor[leaf]
-            # Nodes can have a value *and* childnodes, so we set _val for specific
-            # values, and _sum for derived childnodes
-            if n == len(thisnode) - 1:
-              cursor['_units'] = units
-              cursor['_val'] = row['value']
-
-            # discard() will make this the canonical units if no explicit value
-            # for this node shows up.
-            if '_childunits' in cursor and cursor['_childunits'] != units:
-              cursor['_childunits'] = 'mixed'
-            else:
-              cursor['_childunits'] = units
-
-            if not '_sum' in cursor or cursor['_sum'] == None:
-              cursor['_sum'] = row['value']
-            elif row['value'] != None:
-              cursor['_sum'] += row['value']
-        else:
-          # Flat data
-          # For types with units, we use [ 'unit', val ] pairs
-          val = [ units, row['value'] ] if units else row['value']
-          testdata[testname]['nodes'][row['datapoint']] = val
-
-    # Discard duplicate _sum/_val data after totalling, flatten node if there
-    # are no children
-    def discard(node):
-      # If no explicit value or units, use the sum/childunits
-      if '_val' not in node:
-        node['_val'] = node.get('_sum')
-      if '_units' not in node:
-        node['_units'] = node.get('_childunits')
-      if '_sum' in node:
-        del node['_sum']
-      if '_childunits' in node:
-        del node['_childunits']
-      # Bytes is the default unit
-      if node.get('_units') == 'bytes':
-        del node['_units']
-      for x in node:
-        if x not in [ '_val', '_units' ]:
-          discard(node[x])
-          # Just _val, no _units or _sum, replace node with just raw value
-          if len(node[x]) == 1:
-            node[x] = node[x]['_val']
-    for x in testdata:
-      discard(testdata[x]['nodes'])
-
-    #
-    # Build all series [[x,y], ...] from testdata object
-    #
-    for test, testinfo in gTests.items():
-      for sname, sinfo in testinfo['series'].items():
-        nodes = testdata[test]['nodes']
-        # Is this nodeized data?
-        nodeize = gTests[test].get('nodeize')
-
-        node = None
-        if type(sinfo['datapoint']) == list:
-          datapoint = None
-          # If datapoint has alternate names, find the first one defined in the
-          # nodes
-          for dp in sinfo['datapoint']:
-            node = _findNode(nodes, dp, nodeize)
-            if node:
-              break
-        else:
-          node = _findNode(nodes, sinfo['datapoint'], nodeize)
-
-        if nodeize:
-          if node == None:
-            value = None
-          elif type(node) in [ int, long ]:
-            value = node
-          else:
-            value = node.get('_val')
-        else:
-          # Flat data
-          value = node
-
-        data['series'][sname].append(value)
-
-    #
-    # Discard data for tests not requested to be dumped
-    #
-    for testname in testdata.keys():
-      if not testname in gTests.keys() or \
-         not gTests[testname].get('dump'):
-        del testdata[testname]
-
-    #
-    # Write out the test data for this build into <buildname>.json.gz
-    #
-    testfile = gzip.open(os.path.join(gOutDir, build['name'] + '.json.gz'), 'w', 9)
-    testfile.write(bytes(json.dumps(testdata, indent=2), encoding="utf-8"))
-    testfile.write(bytes('\n', encoding="utf-8"))
-    testfile.close()
+            # NB: For now kind is ignored, anything but the Main process is ignored
+
+            # Sort data, splitting it up into nodes if requested. Calculate the value
+            # of each node - either a sum of its childnodes, or its explicit value if
+            # given. The idea is to reduce the amount of data juggling the frontend
+            # needs to do.
+            for row in allrows:
+                if row['process'] != 'Main':
+                    continue
+
+                datapoint = row['datapoint']
+                units = unit_map.get(row['units'])
+                if not units:
+                    print("skipping unhandled unit %s for %s" % (row['units'], datapoint))
+                    continue
+
+                # Prefix the reporter name, e.g. "Iteration 1/MaxMem/<reporter>" so
+                # that it fits nicely into a tree.
+                datapoint = "Iteration %u/%s/%s" % (row['iteration'], row['checkpoint'], datapoint)
+
+                if nodeize:
+                    # Note that we perserve null values as 'none', to differentiate missing
+                    # data from values of 0
+                    cursor = testdata[testname]['nodes']
+                    thisnode = datapoint.split(nodeize)
+                    for n in range(len(thisnode)):
+                        leaf = thisnode[n]
+                        cursor.setdefault(leaf, {})
+                        cursor = cursor[leaf]
+                        # Nodes can have a value *and* childnodes, so we set _val for specific
+                        # values, and _sum for derived childnodes
+                        if n == len(thisnode) - 1:
+                            cursor['_units'] = units
+                            cursor['_val'] = row['value']
+
+                        # discard() will make this the canonical units if no explicit value
+                        # for this node shows up.
+                        if '_childunits' in cursor and cursor['_childunits'] != units:
+                            cursor['_childunits'] = 'mixed'
+                        else:
+                            cursor['_childunits'] = units
+
+                        if not '_sum' in cursor or cursor['_sum'] == None:
+                            cursor['_sum'] = row['value']
+                        elif row['value'] != None:
+                            cursor['_sum'] += row['value']
+                else:
+                    # Flat data
+                    # For types with units, we use [ 'unit', val ] pairs
+                    val = [units, row['value']] if units else row['value']
+                    testdata[testname]['nodes'][row['datapoint']] = val
+
+        # Discard duplicate _sum/_val data after totalling, flatten node if there
+        # are no children
+        def discard(node):
+            # If no explicit value or units, use the sum/childunits
+            if '_val' not in node:
+                node['_val'] = node.get('_sum')
+            if '_units' not in node:
+                node['_units'] = node.get('_childunits')
+            if '_sum' in node:
+                del node['_sum']
+            if '_childunits' in node:
+                del node['_childunits']
+            # Bytes is the default unit
+            if node.get('_units') == 'bytes':
+                del node['_units']
+            for x in node:
+                if x not in ['_val', '_units']:
+                    discard(node[x])
+                    # Just _val, no _units or _sum, replace node with just raw value
+                    if len(node[x]) == 1:
+                        node[x] = node[x]['_val']
+        for x in testdata:
+            discard(testdata[x]['nodes'])
+
+        #
+        # Build all series [[x,y], ...] from testdata object
+        #
+        for test, testinfo in gTests.items():
+            for sname, sinfo in testinfo['series'].items():
+                nodes = testdata[test]['nodes']
+                # Is this nodeized data?
+                nodeize = gTests[test].get('nodeize')
+
+                node = None
+                if type(sinfo['datapoint']) == list:
+                    datapoint = None
+                    # If datapoint has alternate names, find the first one defined in the
+                    # nodes
+                    for dp in sinfo['datapoint']:
+                        node = _findNode(nodes, dp, nodeize)
+                        if node:
+                            break
+                else:
+                    node = _findNode(nodes, sinfo['datapoint'], nodeize)
+
+                if nodeize:
+                    if node == None:
+                        value = None
+                    elif type(node) in [int, long]:
+                        value = node
+                    else:
+                        value = node.get('_val')
+                else:
+                    # Flat data
+                    value = node
+
+                data['series'][sname].append(value)
+
+        #
+        # Discard data for tests not requested to be dumped
+        #
+        for testname in testdata.keys():
+            if not testname in gTests.keys() or \
+               not gTests[testname].get('dump'):
+                del testdata[testname]
+
+        #
+        # Write out the test data for this build into <buildname>.json.gz
+        #
+        testfile = gzip.open(os.path.join(gOutDir, build['name'] + '.json.gz'), 'w', 9)
+        testfile.write(bytes(json.dumps(testdata, indent=2), encoding="utf-8"))
+        testfile.write(bytes('\n', encoding="utf-8"))
+        testfile.close()
 
 data['generated'] = time.time()
 data['series_info'] = {}
 for test in gTests.keys():
-  for series in gTests[test]['series'].keys():
-    data['series_info'][series] = gTests[test]['series'][series]
-    data['series_info'][series]['test'] = test
+    for series in gTests[test]['series'].keys():
+        data['series_info'][series] = gTests[test]['series'][series]
+        data['series_info'][series]['test'] = test
 
 print("[%u/%u] Finished, writing %s.json.gz" % (i, i, gSeriesName))
 # Write out all the generated series into series.json.gz

From 0d873be4f8d40b6927c251531e6829f7a890d365 Mon Sep 17 00:00:00 2001
From: Eric Rahm <erahm@mozilla.com>
Date: Fri, 27 Mar 2015 11:24:22 -0700
Subject: [PATCH 12/23] Fix calculating heap-unclassified

heap-unclassified wasn't updated to take into account the new reporting
format. This just wraps it in the proper object format.
---
 benchtester/checkpoint.js | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/benchtester/checkpoint.js b/benchtester/checkpoint.js
index 9c2c406..025e766 100644
--- a/benchtester/checkpoint.js
+++ b/benchtester/checkpoint.js
@@ -59,8 +59,11 @@ function createCheckpoint(aLabel) {
     var keys = Object.keys(result['reports']);
     for (var idx = 0; idx < keys.length; idx++) {
       let proc = keys[idx];
-      result['reports'][proc]['explicit/heap-unclassified'] =
-          result['reports'][proc]['heap-allocated'] - knownHeap[proc];
+      result['reports'][proc]['explicit/heap-unclassified'] = {
+        'unit': Ci.nsIMemoryReporter.UNITS_BYTES,
+        'val': result['reports'][proc]['heap-allocated']['val'] - knownHeap[proc],
+        'kind': Ci.nsIMemoryReporter.KIND_HEAP
+      };
     }
 
     marionetteScriptFinished(result);

From 277bf2f9ae548bbf90d5d18e5e2656437720fae6 Mon Sep 17 00:00:00 2001
From: Eric Rahm <erahm@mozilla.com>
Date: Fri, 27 Mar 2015 11:28:10 -0700
Subject: [PATCH 13/23] Add process name to node path

This adds the process name to the node path, ie:
    'Iteration 5/StartSettled/*Main*/heap-allocated'
Note: e10s currently only has one 'Main' process and one 'Web Content'
process.
---
 create_graph_json.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/create_graph_json.py b/create_graph_json.py
index 237a1a5..0d00a65 100755
--- a/create_graph_json.py
+++ b/create_graph_json.py
@@ -303,28 +303,25 @@ def _findNode(nodes, datapoint, nodeize):
                                                  AND p.id = d.proc_id
                             ''', [testdata[testname]['id']])
 
-            # NB: For now kind is ignored, anything but the Main process is ignored
+            # NB: For now kind is ignored
 
             # Sort data, splitting it up into nodes if requested. Calculate the value
             # of each node - either a sum of its childnodes, or its explicit value if
             # given. The idea is to reduce the amount of data juggling the frontend
             # needs to do.
             for row in allrows:
-                if row['process'] != 'Main':
-                    continue
-
                 datapoint = row['datapoint']
                 units = unit_map.get(row['units'])
                 if not units:
                     print("skipping unhandled unit %s for %s" % (row['units'], datapoint))
                     continue
 
-                # Prefix the reporter name, e.g. "Iteration 1/MaxMem/<reporter>" so
+                # Prefix the reporter name, e.g. "Iteration 1/StartSettled/Main/<reporter>" so
                 # that it fits nicely into a tree.
-                datapoint = "Iteration %u/%s/%s" % (row['iteration'], row['checkpoint'], datapoint)
+                datapoint = "Iteration %u/%s/%s/%s" % (row['iteration'], row['checkpoint'], row['process'], datapoint)
 
                 if nodeize:
-                    # Note that we perserve null values as 'none', to differentiate missing
+                    # Note that we preserve null values as 'none', to differentiate missing
                     # data from values of 0
                     cursor = testdata[testname]['nodes']
                     thisnode = datapoint.split(nodeize)

From deb1e39d06c59bc68dc6972074627a2ee34003aa Mon Sep 17 00:00:00 2001
From: Eric Rahm <erahm@mozilla.com>
Date: Fri, 27 Mar 2015 11:30:48 -0700
Subject: [PATCH 14/23] Update series datapoints to handle process names

This also adds series datapoints of the 'Web Content' process.
---
 create_graph_json.py | 147 +++++++++++++++++++++++++++++++++++++------
 1 file changed, 129 insertions(+), 18 deletions(-)

diff --git a/create_graph_json.py b/create_graph_json.py
index 0d00a65..ec3413f 100755
--- a/create_graph_json.py
+++ b/create_graph_json.py
@@ -41,25 +41,111 @@
         "nodeize": "/",
         "dump": True,
         "series": {
-            "MaxMemoryV2": {"datapoint": "Iteration 5/TabsOpen/explicit"},
-            "MaxMemorySettledV2": {"datapoint": "Iteration 5/TabsOpenSettled/explicit"},
-            "MaxMemoryForceGCV2": {"datapoint": "Iteration 5/TabsOpenForceGC/explicit"},
-            "MaxMemoryResidentV2": {"datapoint": "Iteration 5/TabsOpen/resident"},
-            "MaxMemoryResidentSettledV2": {"datapoint": "Iteration 5/TabsOpenSettled/resident"},
-            "MaxMemoryResidentForceGCV2": {"datapoint": "Iteration 5/TabsOpenForceGC/resident"},
-            "StartMemoryV2": {"datapoint": "Iteration 1/Start/explicit"},
-            "StartMemoryResidentV2": {"datapoint": "Iteration 1/Start/resident"},
-            "StartMemorySettledV2": {"datapoint": "Iteration 1/StartSettled/explicit"},
-            "StartMemoryResidentSettledV2": {"datapoint": "Iteration 1/StartSettled/resident"},
-            "EndMemoryV2": {"datapoint": "Iteration 5/TabsClosed/explicit"},
-            "EndMemoryResidentV2": {"datapoint": "Iteration 5/TabsClosed/resident"},
-            "EndMemorySettledV2": {"datapoint": "Iteration 5/TabsClosedSettled/explicit"},
-            "EndMemoryForceGCV2": {"datapoint": "Iteration 5/TabsClosedForceGC/explicit"},
-            "EndMemoryResidentSettledV2": {"datapoint": "Iteration 5/TabsClosedSettled/resident"},
-            "EndMemoryResidentForceGCV2": {"datapoint": "Iteration 5/TabsClosedForceGC/resident"},
-            "MaxHeapUnclassifiedV2": {"datapoint": "Iteration 5/TabsOpenSettled/explicit/heap-unclassified"},
+            "MaxMemoryV2": {
+                "datapoint": [
+                    "Iteration 5/TabsOpen/Main/explicit",
+                    "Iteration 5/TabsOpen/explicit",
+                ]
+            },
+            "MaxMemorySettledV2": {
+                "datapoint": [
+                    "Iteration 5/TabsOpenSettled/Main/explicit",
+                    "Iteration 5/TabsOpenSettled/explicit",
+                ]
+            },
+            "MaxMemoryForceGCV2": {
+                "datapoint": [
+                    "Iteration 5/TabsOpenForceGC/Main/explicit",
+                    "Iteration 5/TabsOpenForceGC/explicit",
+                ]
+            },
+            "MaxMemoryResidentV2": {
+                "datapoint": [
+                    "Iteration 5/TabsOpen/Main/resident",
+                    "Iteration 5/TabsOpen/resident",
+                ]
+            },
+            "MaxMemoryResidentSettledV2": {
+                "datapoint": [
+                    "Iteration 5/TabsOpenSettled/Main/resident",
+                    "Iteration 5/TabsOpenSettled/resident",
+                ]
+            },
+            "MaxMemoryResidentForceGCV2": {
+                "datapoint": [
+                    "Iteration 5/TabsOpenForceGC/Main/resident",
+                    "Iteration 5/TabsOpenForceGC/resident",
+                ]
+            },
+            "StartMemoryV2": {
+                "datapoint": [
+                    "Iteration 1/Start/Main/explicit",
+                    "Iteration 1/Start/explicit",
+                ]
+            },
+            "StartMemoryResidentV2": {
+                "datapoint": [
+                    "Iteration 1/Start/Main/resident",
+                    "Iteration 1/Start/resident",
+                ]
+            },
+            "StartMemorySettledV2": {
+                "datapoint": [
+                    "Iteration 1/StartSettled/Main/explicit",
+                    "Iteration 1/StartSettled/explicit",
+                ]
+            },
+            "StartMemoryResidentSettledV2": {
+                "datapoint": [
+                    "Iteration 1/StartSettled/Main/resident",
+                    "Iteration 1/StartSettled/resident",
+                ]
+            },
+            "EndMemoryV2": {
+                "datapoint": [
+                    "Iteration 5/TabsClosed/Main/explicit",
+                    "Iteration 5/TabsClosed/explicit",
+                ]
+            },
+            "EndMemoryResidentV2": {
+                "datapoint": [
+                    "Iteration 5/TabsClosed/Main/resident",
+                    "Iteration 5/TabsClosed/resident",
+                ]
+            },
+            "EndMemorySettledV2": {
+                "datapoint": [
+                    "Iteration 5/TabsClosedSettled/Main/explicit",
+                    "Iteration 5/TabsClosedSettled/explicit",
+                ]
+            },
+            "EndMemoryForceGCV2": {
+                "datapoint": [
+                    "Iteration 5/TabsClosedForceGC/Main/explicit",
+                    "Iteration 5/TabsClosedForceGC/explicit",
+                ]
+            },
+            "EndMemoryResidentSettledV2": {
+                "datapoint": [
+                    "Iteration 5/TabsClosedSettled/Main/resident",
+                    "Iteration 5/TabsClosedSettled/resident",
+                ]
+            },
+            "EndMemoryResidentForceGCV2": {
+                "datapoint": [
+                    "Iteration 5/TabsClosedForceGC/Main/resident",
+                    "Iteration 5/TabsClosedForceGC/resident",
+                ]
+            },
+            "MaxHeapUnclassifiedV2": {
+                "datapoint": [
+                    "Iteration 5/TabsOpenSettled/Main/explicit/heap-unclassified",
+                    "Iteration 5/TabsOpenSettled/explicit/heap-unclassified",
+                ]
+            },
             "MaxJSV2": {
                 "datapoint": [
+                    "Iteration 5/TabsOpenSettled/Main/js-main-runtime",
                     # As of Jul 2012
                     "Iteration 5/TabsOpenSettled/js-main-runtime",
                     # Pre-Jul 2012
@@ -72,13 +158,34 @@
             },
             "MaxImagesV2": {
                 "datapoint": [
+                    "Iteration 5/TabsOpenSettled/Main/explicit/images",
                     "Iteration 5/TabsOpenSettled/explicit/images",
                     # Old ~FF4 reporters
                     "Iteration 5/TabsOpenSettled/images",
                     # Brief period in may 2011 before heap-used became explicit
                     "Iteration 5/TabsOpenSettled/heap-used/images"
                 ]
-            }
+            },
+
+            "Web Content MaxMemoryV2": {"datapoint": "Iteration 5/TabsOpen/Web Content/explicit"},
+            "Web Content MaxMemorySettledV2": {"datapoint": "Iteration 5/TabsOpenSettled/Web Content/explicit"},
+            "Web Content MaxMemoryForceGCV2": {"datapoint": "Iteration 5/TabsOpenForceGC/Web Content/explicit"},
+            "Web Content MaxMemoryResidentV2": {"datapoint": "Iteration 5/TabsOpen/Web Content/resident"},
+            "Web Content MaxMemoryResidentSettledV2": {"datapoint": "Iteration 5/TabsOpenSettled/Web Content/resident"},
+            "Web Content MaxMemoryResidentForceGCV2": {"datapoint": "Iteration 5/TabsOpenForceGC/Web Content/resident"},
+            "Web Content StartMemoryV2": {"datapoint": "Iteration 1/Start/Web Content/explicit"},
+            "Web Content StartMemoryResidentV2": {"datapoint": "Iteration 1/Start/Web Content/resident"},
+            "Web Content StartMemorySettledV2": {"datapoint": "Iteration 1/StartSettled/Web Content/explicit"},
+            "Web Content StartMemoryResidentSettledV2": {"datapoint": "Iteration 1/StartSettled/Web Content/resident"},
+            "Web Content EndMemoryV2": {"datapoint": "Iteration 5/TabsClosed/Web Content/explicit"},
+            "Web Content EndMemoryResidentV2": {"datapoint": "Iteration 5/TabsClosed/Web Content/resident"},
+            "Web Content EndMemorySettledV2": {"datapoint": "Iteration 5/TabsClosedSettled/Web Content/explicit"},
+            "Web Content EndMemoryForceGCV2": {"datapoint": "Iteration 5/TabsClosedForceGC/Web Content/explicit"},
+            "Web Content EndMemoryResidentSettledV2": {"datapoint": "Iteration 5/TabsClosedSettled/Web Content/resident"},
+            "Web Content EndMemoryResidentForceGCV2": {"datapoint": "Iteration 5/TabsClosedForceGC/Web Content/resident"},
+            "Web Content MaxHeapUnclassifiedV2": {"datapoint": "Iteration 5/TabsOpenSettled/Web Content/explicit/heap-unclassified"},
+            "Web Content MaxJSV2": {"datapoint": "Iteration 5/TabsOpenSettled/Web Content/js-main-runtime"},
+            "Web Content MaxImagesV2": {"datapoint": "Iteration 5/TabsOpenSettled/Web Content/explicit/images"}
         }
     },
     "Android-ARMv6": {
@@ -99,6 +206,10 @@
 
 # Reuse default tests for android, but s/Iteration 5/Iteration 1/
 for k, v in gTests['Slimtest-TalosTP5-Slow']['series'].iteritems():
+    # Only use the "Main" entries as a template
+    if "Web Content" in k:
+        continue
+
     if type(v['datapoint']) is list:
         out = []
         for x in v['datapoint']:

From eac108defc8fff6d52676804aac34a802c221f5d Mon Sep 17 00:00:00 2001
From: Eric Rahm <erahm@mozilla.com>
Date: Fri, 27 Mar 2015 11:34:30 -0700
Subject: [PATCH 15/23] Add e10s graphs

Updates to handle the new datapoint format. Adds series for 'Web
Content' and specifies the existing graphs are for 'Main'.
---
 html/slimyet.js | 38 +++++++++++++++++++++++++++++++++-----
 1 file changed, 33 insertions(+), 5 deletions(-)

diff --git a/html/slimyet.js b/html/slimyet.js
index d45fd40..deb7b91 100644
--- a/html/slimyet.js
+++ b/html/slimyet.js
@@ -523,7 +523,7 @@ var gReleaseLookup = function() {
 // /data/areweslimyet.json and comments below. These are exported from the full
 // test database by create_graph_json.py
 var gSeries = {
-  "Resident Memory" : {
+  "Main Resident Memory" : {
     'StartMemoryResidentV2':         "RSS: Fresh start",
     'StartMemoryResidentSettledV2':  "RSS: Fresh start [+30s]",
     'MaxMemoryResidentV2':           "RSS: After TP5",
@@ -533,7 +533,7 @@ var gSeries = {
     'EndMemoryResidentSettledV2':    "RSS: After TP5, tabs closed [+30s]",
     'EndMemoryResidentForceGCV2':    "RSS: After TP5, tabs closed [+30s, forced GC]"
   },
-  "Explicit Memory" : {
+  "Main Explicit Memory" : {
     'StartMemoryV2':         "Explicit: Fresh start",
     'StartMemorySettledV2':  "Explicit: Fresh start [+30s]",
     'MaxMemoryV2':           "Explicit: After TP5",
@@ -543,10 +543,35 @@ var gSeries = {
     'EndMemorySettledV2':    "Explicit: After TP5, tabs closed [+30s]",
     'EndMemoryForceGCV2':    "Explicit: After TP5, tabs closed [+30s, forced GC]"
   },
-  "Miscellaneous Measurements" : {
+  "Main Miscellaneous Measurements" : {
     'MaxHeapUnclassifiedV2':  "Heap Unclassified: After TP5 [+30s]",
     'MaxJSV2':                "JS: After TP5 [+30s]",
     'MaxImagesV2':            "Images: After TP5 [+30s]"
+  },
+  "Web Content Resident Memory" : {
+    'Web Content StartMemoryResidentV2':         "RSS: Fresh start",
+    'Web Content StartMemoryResidentSettledV2':  "RSS: Fresh start [+30s]",
+    'Web Content MaxMemoryResidentV2':           "RSS: After TP5",
+    'Web Content MaxMemoryResidentSettledV2':    "RSS: After TP5 [+30s]",
+    'Web Content MaxMemoryResidentForceGCV2':    "RSS: After TP5 [+30s, forced GC]",
+    'Web Content EndMemoryResidentV2':           "RSS: After TP5, tabs closed",
+    'Web Content EndMemoryResidentSettledV2':    "RSS: After TP5, tabs closed [+30s]",
+    'Web Content EndMemoryResidentForceGCV2':    "RSS: After TP5, tabs closed [+30s, forced GC]"
+  },
+  "Web Content Explicit Memory" : {
+    'Web Content StartMemoryV2':         "Explicit: Fresh start",
+    'Web Content StartMemorySettledV2':  "Explicit: Fresh start [+30s]",
+    'Web Content MaxMemoryV2':           "Explicit: After TP5",
+    'Web Content MaxMemorySettledV2':    "Explicit: After TP5 [+30s]",
+    'Web Content MaxMemoryForceGCV2':    "Explicit: After TP5 [+30s, forced GC]",
+    'Web Content EndMemoryV2':           "Explicit: After TP5, tabs closed",
+    'Web Content EndMemorySettledV2':    "Explicit: After TP5, tabs closed [+30s]",
+    'Web Content EndMemoryForceGCV2':    "Explicit: After TP5, tabs closed [+30s, forced GC]"
+  },
+  "Web Content Miscellaneous Measurements" : {
+    'Web Content MaxHeapUnclassifiedV2':  "Heap Unclassified: After TP5 [+30s]",
+    'Web Content MaxJSV2':                "JS: After TP5 [+30s]",
+    'Web Content MaxImagesV2':            "Images: After TP5 [+30s]"
   }
 };
 
@@ -556,6 +581,9 @@ var gHgBaseUrl = 'https://hg.mozilla.org/integration/mozilla-inbound';
 // prepend 'Android' to series names.
 if (gQueryVars['mobile']) {
   for (var series in gSeries) {
+    if (series.startswith('Web Content'))
+      continue;
+
     for (var dp in gSeries[series]) {
       gSeries[series]['Android'+dp] = gSeries[series][dp].replace("After TP5", "After tabs");
       delete gSeries[series][dp];
@@ -817,8 +845,8 @@ function memoryTreeNode(target, data, select, path, depth) {
 
   // TODO Use 'mixed' units as an indicator of container nodes instead of hard
   //      coding.
-  var showVal = depth >= 2;
-  var showPct = depth >= 3;
+  var showVal = depth >= 3;
+  var showPct = depth >= 4;
 
   // if select is passed as "a/b/c", split it so it is an array
   if (typeof(select) == "string") {

From 02d56432854ea207d241a64e9e6769d4d59f3b5b Mon Sep 17 00:00:00 2001
From: Eric Rahm <erahm@mozilla.com>
Date: Fri, 27 Mar 2015 14:36:09 -0700
Subject: [PATCH 16/23] Support e10s in the about memory exporter

Adds handling of the process name name in the datapoint tree.
---
 html/about_memory_worker.js | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/html/about_memory_worker.js b/html/about_memory_worker.js
index 720a83c..ffe3c7b 100644
--- a/html/about_memory_worker.js
+++ b/html/about_memory_worker.js
@@ -51,7 +51,8 @@ onmessage = function(aEvent) {
 // @param {aPath} The node path.
 // @param {aData} The data node.
 // @param {aReports} The array of report entries that is being built.
-function checkpointToAboutMemory(aPath, aData, aReports) {
+// @param {aProcess} The process this report is for.
+function checkpointToAboutMemory(aPath, aData, aReports, aProcess) {
   function defval(aObj) {
     if (typeof(aObj) == 'number') {
       return aObj;
@@ -99,7 +100,7 @@ function checkpointToAboutMemory(aPath, aData, aReports) {
     // This is a leaf node.
     var report = {
       description: "",
-      process: "Main Process",
+      process: aProcess + " Process",
       amount: defval(aData),
       units: units(aData),
       path: aPath,
@@ -113,7 +114,12 @@ function checkpointToAboutMemory(aPath, aData, aReports) {
   var node;
   while (node = childern.shift()) {
     var nodePath = aPath != "" ? aPath + '/' + node : node;
-    checkpointToAboutMemory(nodePath, aData[node], aReports);
+    var process = aProcess;
+    if (!process) {
+      process = nodePath;
+      nodePath = "";
+    }
+    checkpointToAboutMemory(nodePath, aData[node], aReports, process);
   }
 }
 

From 797177e4238ee9a896d257c52618e5380cd7262c Mon Sep 17 00:00:00 2001
From: Eric Rahm <ericrahm@gmail.com>
Date: Mon, 30 Mar 2015 11:50:07 -0700
Subject: [PATCH 17/23] Add proper wait for new tab to open

Use built in |wait_for_condition| to check if the the new tab has been
opened rather than a short sleep.
---
 benchtester/test_memory_usage.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/benchtester/test_memory_usage.py b/benchtester/test_memory_usage.py
index df29846..842317a 100644
--- a/benchtester/test_memory_usage.py
+++ b/benchtester/test_memory_usage.py
@@ -253,8 +253,7 @@ def open_and_focus(self):
                                                           {'anonid': 'tabs-newtab-button'}))
             newtab_button.click()
 
-            # Janky workaround to make sure the tab is loaded
-            time.sleep(0.25)
+            self.wait_for_condition(lambda mn: len(mn.window_handles) == tabs_loaded + 1)
 
             # NB: The tab list isn't sorted, so we do a set diff to determine
             #     which is the new tab

From 9b8e871f248bcfa8f2a48d7c2cb314d5e490af6c Mon Sep 17 00:00:00 2001
From: Eric Rahm <ericrahm@gmail.com>
Date: Mon, 30 Mar 2015 11:51:28 -0700
Subject: [PATCH 18/23] Disable e10s first time user banner

Prevent 'Thanks for using e10s' message from showing up. Also adds a few
more e10s prefs just in case.
---
 benchtester/MarionetteTest.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/benchtester/MarionetteTest.py b/benchtester/MarionetteTest.py
index 6fd3a45..a28561b 100644
--- a/benchtester/MarionetteTest.py
+++ b/benchtester/MarionetteTest.py
@@ -61,6 +61,13 @@ def run_test(self, testname, testvars={}):
       "browser.tabs.remote.autostart": e10s,
       "browser.tabs.remote.autostart.1": e10s,
       "browser.tabs.remote.autostart.2": e10s,
+      "browser.tabs.remote.autostart.3": e10s,
+      "browser.tabs.remote.autostart.4": e10s,
+      "browser.tabs.remote.autostart.5": e10s,
+      "browser.tabs.remote.autostart.6": e10s,
+
+      # prevent "You're using e10s!" dialog from showing up
+      "browser.displayedE10SNotice": 1000,
 
       # We're not testing flash memory usage. Also: it likes to crash in VNC sessions.
       "plugin.disable": True,

From b9895ab7252facbc76c2e53a6d9d299df2bda135 Mon Sep 17 00:00:00 2001
From: Eric Rahm <ericrahm@gmail.com>
Date: Wed, 2 Sep 2015 15:05:02 -0700
Subject: [PATCH 19/23] Specify startup timeout to work around marionette bug

---
 benchtester/MarionetteTest.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/benchtester/MarionetteTest.py b/benchtester/MarionetteTest.py
index a28561b..c3685c6 100644
--- a/benchtester/MarionetteTest.py
+++ b/benchtester/MarionetteTest.py
@@ -94,7 +94,8 @@ def run_test(self, testname, testvars={}):
                     profile=profile,
                     logger=logger,
                     address="localhost:%d" % self.port,
-                    gecko_log=self.gecko_log)
+                    gecko_log=self.gecko_log,
+                    startup_timeout=60)
 
     # Add test
     testpath = os.path.join(*testvars['test'])

From f53030c275006c0dbb65ba03e0da946295a97d6e Mon Sep 17 00:00:00 2001
From: Eric Rahm <ericrahm@gmail.com>
Date: Wed, 2 Sep 2015 15:37:02 -0700
Subject: [PATCH 20/23] Update window handles after opening a new tab

It's possible for the window handle to change after opening a new tab
and then navigating to a real URL when e10s is enabled.
---
 benchtester/test_memory_usage.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/benchtester/test_memory_usage.py b/benchtester/test_memory_usage.py
index 88ce63d..192947b 100644
--- a/benchtester/test_memory_usage.py
+++ b/benchtester/test_memory_usage.py
@@ -245,6 +245,7 @@ def open_and_focus(self):
         """
         page_to_load = self._urls[self._pages_loaded % len(self._urls)]
         tabs_loaded = len(self._tabs)
+        is_new_tab = False
 
         if tabs_loaded < self._maxTabs and tabs_loaded <= self._pages_loaded:
             full_tab_list = self.marionette.window_handles
@@ -265,6 +266,8 @@ def open_and_focus(self):
             self._tabs.append(new_tabs[0])
             tabs_loaded += 1
 
+            is_new_tab = True
+
         tab_idx = self._pages_loaded % self._maxTabs
 
         tab = self._tabs[tab_idx]
@@ -282,6 +285,16 @@ def open_and_focus(self):
             self.marionette.navigate(page_to_load)
             self.logger.debug("loaded!")
 
+        # On e10s the tab handle can change after actually loading content
+        if is_new_tab:
+          # First build a set up w/o the current tab
+          old_tabs = set(self._tabs)
+          old_tabs.remove(tab)
+          # Perform a set diff to get the (possibly) new handle
+          [new_tab] = set(self.marionette.window_handles) - old_tabs
+          # Update the tab list at the current index to preserve the tab ordering
+          self._tabs[tab_idx] = new_tab
+
         # give the page time to settle
         time.sleep(self._perTabPause)
 

From 5ab874443d71844a546c1ea14a6c45ed44ab1c23 Mon Sep 17 00:00:00 2001
From: Eric Rahm <ericrahm@gmail.com>
Date: Wed, 4 Nov 2015 14:56:54 -0800
Subject: [PATCH 21/23] Add support for multiple content processes

A '--process_count' argument is added to specify the amount of content
processes to use with e10s enabled. The value defaults to 1.
---
 benchtester/MarionetteTest.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/benchtester/MarionetteTest.py b/benchtester/MarionetteTest.py
index 5958b3d..b7c8962 100644
--- a/benchtester/MarionetteTest.py
+++ b/benchtester/MarionetteTest.py
@@ -25,6 +25,9 @@ def __init__(self, parent):
     parent.add_argument('--gecko_log',
                         help="Logfile for gecko output. Defaults to 'gecko.log'",
                         default=None)
+    parent.add_argument('--process_count',
+                         help="Number of e10s processes to use",
+                         default=1)
     self.name = "MarionetteTest"
     self.parent = parent
 
@@ -34,6 +37,8 @@ def setup(self):
     self.endurance_results = None
     self.port = int(self.parent.args['marionette_port'])
     self.gecko_log = self.parent.args['gecko_log']
+    self.process_count = int(self.parent.args['process_count'])
+    self.info("Process Count: %d " % self.process_count)
 
     return True
 
@@ -65,6 +70,7 @@ def run_test(self, testname, testvars={}):
       "browser.tabs.remote.autostart.4": e10s,
       "browser.tabs.remote.autostart.5": e10s,
       "browser.tabs.remote.autostart.6": e10s,
+      "dom.ipc.processCount": self.process_count,
 
       # prevent "You're using e10s!" dialog from showing up
       "browser.displayedE10SNotice": 1000,

From f39f6441b598e9af49470c294e1401f65577b5ae Mon Sep 17 00:00:00 2001
From: Eric Rahm <ericrahm@gmail.com>
Date: Wed, 4 Nov 2015 15:23:08 -0800
Subject: [PATCH 22/23] Handle multiple instances of the same process type

Normalize process names to include a process number if there are
multiple instances.

Example mapping:
  [ "Main" => "Main",
    "Web Content (123)" => "Web Content",
    "Web Content (234)" => "Web Content 2",
    "Web Content (345)" => "Web Content 3" ]

The first instance of the process name excludes the number so as to
avoid breaking graph generation for the AWSY website which expects names
without numbers.
---
 benchtester/BenchTester.py             |  38 +++++++--
 tests/benchtester/test_bench_tester.py | 108 +++++++++++++++++++++++++
 2 files changed, 140 insertions(+), 6 deletions(-)
 create mode 100644 tests/benchtester/test_bench_tester.py

diff --git a/benchtester/BenchTester.py b/benchtester/BenchTester.py
index ef9a474..d9ceaea 100644
--- a/benchtester/BenchTester.py
+++ b/benchtester/BenchTester.py
@@ -161,6 +161,36 @@ def load_module(self, modname):
 
     return True
 
+  @staticmethod
+  def map_process_names(process_names):
+    # Normalize the process names.
+    # Given: [ "Main", "Web Content (123)", "Web Content (345)", "Web Content (678)" ]
+    # Mapping: [ "Main" => "Main",
+    #            "Web Content (123)" => "Web Content",
+    #            "Web Content (345)" => "Web Content 2",
+    #            "Web Content (678)" => "Web Content 3"
+    #          ]
+    proc_name_counts = {}
+    proc_name_mapping = {}
+
+    for full_process_name in process_names:
+      # Drop the pid portion of process name
+      process_re = r'(.*)\s+\(\d+\)'
+      m = re.match(process_re, full_process_name)
+      if m:
+        proc_name = m.group(1)
+        if proc_name in proc_name_counts:
+          proc_name_counts[proc_name] += 1
+          proc_name_mapping[full_process_name] = "%s %d" % (proc_name, proc_name_counts[proc_name])
+        else:
+          # Leave the first entry w/o a number
+          proc_name_counts[proc_name] = 1
+          proc_name_mapping[full_process_name] = proc_name
+      else:
+        proc_name_mapping[full_process_name] = full_process_name
+
+    return proc_name_mapping
+
   def insert_results(self, test_id, results):
     # - results is an array of iterations
     # - iterations is an array of checkpoints
@@ -181,14 +211,10 @@ def insert_results(self, test_id, results):
           cur.execute("INSERT INTO benchtester_checkpoints(name) VALUES (?)", (label, ))
           checkpoint_id = cur.lastrowid
 
+        proc_name_mapping = self.map_process_names(checkpoint['reports'])
         for process_name, reports in checkpoint['reports'].iteritems():
           # reports is a dictionary of datapoint_name: { val, unit, kind }
-
-          # Strip pid portion of process name
-          process_re = r'(.*)\s+\(.*\)'
-          m = re.match(process_re, process_name)
-          if m:
-            process_name = m.group(1)
+          process_name = proc_name_mapping[process_name]
 
           # insert process name, get process_id
           cur.execute("SELECT id FROM benchtester_procs WHERE name = ?", (process_name, ))
diff --git a/tests/benchtester/test_bench_tester.py b/tests/benchtester/test_bench_tester.py
new file mode 100644
index 0000000..aa3be53
--- /dev/null
+++ b/tests/benchtester/test_bench_tester.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import sys
+import unittest
+
+# Janky hack to work around not having modules setup
+sys.path.insert(0, "../../benchtester")
+from BenchTester import BenchTester
+
+class BenchTesterTest(unittest.TestCase):
+
+  def test_process_name_mapping(self):
+    # Test one w/ pid, one w/o
+    proc_names_list = [
+        "Main",
+        "Web Content (1234)"
+        ]
+
+    expected_mappings = {
+        "Main": "Main",
+        "Web Content (1234)": "Web Content"
+        }
+
+    proc_name_mappings = BenchTester.map_process_names(proc_names_list)
+    self.assertEqual(expected_mappings, proc_name_mappings)
+
+    # Test multiple of one type
+    proc_names_list = [
+        "Main",
+        "Web Content (1234)",
+        "Web Content (2345)",
+        "Web Content (3456)"
+        ]
+
+    expected_mappings = {
+        "Main": "Main",
+        "Web Content (1234)": "Web Content",
+        "Web Content (2345)": "Web Content 2",
+        "Web Content (3456)": "Web Content 3"
+        }
+
+    proc_name_mappings = BenchTester.map_process_names(proc_names_list)
+    self.assertEqual(expected_mappings, proc_name_mappings)
+
+    # Test multiple of several types
+    proc_names_list = [
+        "Main",
+        "Web Content (1234)",
+        "Web Content (2345)",
+        "Web Content (3456)",
+        "GMP (1234)",
+        "GMP (2345)"
+        ]
+
+    expected_mappings = {
+        "Main": "Main",
+        "Web Content (1234)": "Web Content",
+        "Web Content (2345)": "Web Content 2",
+        "Web Content (3456)": "Web Content 3",
+        "GMP (1234)": "GMP",
+        "GMP (2345)": "GMP 2"
+        }
+
+    proc_name_mappings = BenchTester.map_process_names(proc_names_list)
+    self.assertEqual(expected_mappings, proc_name_mappings)
+
+    # Test with a dictionary
+    proc_names_dict = {
+        "Main": [],
+        "Web Content (1234)": [],
+        "Web Content (2345)": [],
+        "Web Content (3456)": [],
+        "GMP (1234)": [],
+        "GMP (2345)": []
+        }
+
+    proc_name_mappings = BenchTester.map_process_names(proc_names_dict)
+    self.assertEqual(expected_mappings, proc_name_mappings)
+
+    # Test with different pid orderings
+    proc_names_dict = {
+        "Main": [],
+        "Web Content (2345)": [],
+        "Web Content (1234)": [],
+        "Web Content (3456)": [],
+        "GMP (2345)": [],
+        "GMP (1234)": []
+        }
+
+    expected_mappings = {
+        "Main": "Main",
+        "Web Content (2345)": "Web Content",
+        "Web Content (1234)": "Web Content 2",
+        "Web Content (3456)": "Web Content 3",
+        "GMP (2345)": "GMP",
+        "GMP (1234)": "GMP 2"
+        }
+
+    proc_name_mappings = BenchTester.map_process_names(proc_names_dict)
+    self.assertEqual(expected_mappings, proc_name_mappings)
+
+
+if __name__ == '__main__':
+  unittest.main()

From 90caa55e63ab177ce9ffeb1f7f66188f6497d2f5 Mon Sep 17 00:00:00 2001
From: Eric Rahm <ericrahm@gmail.com>
Date: Wed, 11 Nov 2015 17:22:29 -0800
Subject: [PATCH 23/23] Use mozinstall to extract builds (#76)

---
 benchtester/BuildGetter.py | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/benchtester/BuildGetter.py b/benchtester/BuildGetter.py
index 3192315..c5c7302 100644
--- a/benchtester/BuildGetter.py
+++ b/benchtester/BuildGetter.py
@@ -22,6 +22,7 @@
 import urllib2
 
 import mozdownload
+import mozinstall
 
 PUSHLOG_BRANCH_MAP = {
   'mozilla-inbound': 'integration/mozilla-inbound',
@@ -152,6 +153,7 @@ def __init__(self, scraper_args, directory=None,
 
     self._branch = None
     self._extracted = directory
+    self._install_dir = None
     self._cleanup_dir = False
     self._prepared = False
     self._revision = None
@@ -202,14 +204,6 @@ def __init__(self, scraper_args, directory=None,
     
     self._valid = True
 
-  @staticmethod
-  def extract_build(src, dstdir):
-    """Extracts the given build to the given directory."""
-
-    # cross-platform FIXME, this is hardcoded to tar at the moment
-    with tarfile.open(src, mode='r:*') as tar:
-      tar.extractall(path=dstdir)
- 
   def prepare(self):
     """
     Prepares the build for testing.
@@ -228,7 +222,7 @@ def prepare(self):
     self._scraperTarget = self._scraper.filename
 
     _stat("Extracting build")
-    self.extract_build(self._scraper.filename, self._extracted)
+    self._install_dir = mozinstall.install(self._scraper.filename, self._extracted)
 
     self._prepared = True
     self._scraper = None
@@ -242,7 +236,7 @@ def cleanup(self):
       os.remove(self._scraperTarget)
 
       # remove the extracted archive
-      shutil.rmtree(os.path.join(self._extracted, "firefox"))
+      mozinstall.uninstall(self._install_dir)
 
     # remove the temp directory that was created
     if self._cleanup_dir:
@@ -259,8 +253,7 @@ def get_valid(self):
   def get_binary(self):
     if not self._prepared:
       raise Exception("Build is not prepared")
-    # FIXME More hard-coded linux stuff
-    return os.path.join(self._extracted, "firefox", "firefox")
+    return mozinstall.get_binary(self._install_dir, "firefox")
 
   def get_buildtime(self):
     return self._timestamp