comunic/3rdparty/pdf.js/test/stats/statcmp.js

/*jslint node: true */

'use strict';

var fs = require('fs');

try {
  var ttest = require('ttest');
} catch (e) {
  console.log('\nttest is not installed -- to intall, run "npm install ttest"');
  console.log('Continuing without significance test...\n');
}

var VALID_GROUP_BYS = ['browser', 'pdf', 'page', 'round', 'stat'];

function parseOptions() {
  var yargs = require('yargs')
    .usage('Compare the results of two stats files.\n' +
           'Usage:\n  $0 <BASELINE> <CURRENT> [options]')
    .demand(2)
    .string(['groupBy'])
    .describe('groupBy', 'How statistics should grouped. Valid options: ' +
              VALID_GROUP_BYS.join(' '))
    .default('groupBy', 'browser,stat');
  var result = yargs.argv;
  result.baseline = result._[0];
  result.current = result._[1];
  if (result.groupBy) {
    result.groupBy = result.groupBy.split(/[;, ]+/);
  }
  return result;
}

function group(stats, groupBy) {
  var vals = [];
  for (var i = 0; i < stats.length; i++) {
    var stat = stats[i];
    var keyArr = [];
    for (var j = 0; j < groupBy.length; j++) {
      keyArr.push(stat[groupBy[j]]);
    }
    var key = keyArr.join(',');
    if (vals[key] === undefined) {
      vals[key] = [];
    }
    vals[key].push(stat['time']);
  }
  return vals;
}

/*
 * Flatten the stats so that there's one row per stats entry.
 * Also, if results are not grouped by 'stat', keep only 'Overall' results.
 */
function flatten(stats) {
  var rows = [];
  stats.forEach(function(stat) {
    stat['stats'].forEach(function(s) {
      rows.push({
        browser: stat['browser'],
        page: stat['page'],
        pdf: stat['pdf'],
        round: stat['round'],
        stat: s['name'],
        time: s['end'] - s['start']
      });
    });
  });
  // Use only overall results if not grouped by 'stat'
  if (options.groupBy.indexOf('stat') < 0) {
    rows = rows.filter(function(s) { return s.stat === 'Overall'; });
  }
  return rows;
}

function pad(s, length, dir /* default: 'right' */) {
  s = '' + s;
  var spaces = new Array(Math.max(0, length - s.length + 1)).join(' ');
  return dir === 'left' ? spaces + s : s + spaces;
}

function mean(array) {
  function add(a, b) {
    return a + b;
  }
  return array.reduce(add, 0) / array.length;
}

/* Comparator for row key sorting. */
function compareRow(a, b) {
  a = a.split(',');
  b = b.split(',');
  for (var i = 0; i < Math.min(a.length, b.length); i++) {
    var intA = parseInt(a[i], 10);
    var intB = parseInt(b[i], 10);
    var ai = isNaN(intA) ? a[i] : intA;
    var bi = isNaN(intB) ? b[i] : intB;
    if (ai < bi) {
      return -1;
    }
    if (ai > bi) {
      return 1;
    }
  }
  return 0;
}

/*
 * Dump various stats in a table to compare the baseline and current results.
 * T-test Refresher:
 * If I understand t-test correctly, p is the probability that we'll observe
 * another test that is as extreme as the current result assuming the null
 * hypothesis is true. P is NOT the probability of the null hypothesis. The null
 * hypothesis in this case is that the baseline and current results will be the
 * same. It is generally accepted that you can reject the null hypothesis if the
 * p-value is less than 0.05. So if p < 0.05 we can reject the results are the
 * same which doesn't necessarily mean the results are faster/slower but it can
 * be implied.
 */
function stat(baseline, current) {
  var baselineGroup = group(baseline, options.groupBy);
  var currentGroup = group(current, options.groupBy);

  var keys = Object.keys(baselineGroup);
  keys.sort(compareRow);

  var labels = options.groupBy.slice(0);
  labels.push('Count', 'Baseline(ms)', 'Current(ms)', '+/-', '% ');
  if (ttest) {
    labels.push('Result(P<.05)');
  }
  var i, row, rows = [];
  // collect rows and measure column widths
  var width = labels.map(function(s) { return s.length; });
  rows.push(labels);
  for (var k = 0; k < keys.length; k++) {
    var key = keys[k];
    var baselineMean = mean(baselineGroup[key]);
    var currentMean = mean(currentGroup[key]);
    row = key.split(',');
    row.push('' + baselineGroup[key].length,
             '' + Math.round(baselineMean),
             '' + Math.round(currentMean),
             '' + Math.round(currentMean - baselineMean),
             (100 * (currentMean - baselineMean) / baselineMean).toFixed(2));
    if (ttest) {
      var p = (baselineGroup[key].length < 2) ? 1 :
               ttest(baselineGroup[key], currentGroup[key]).pValue();
      if (p < 0.05) {
        row.push(currentMean < baselineMean ? 'faster' : 'slower');
      } else {
        row.push('');
      }
    }
    for (i = 0; i < row.length; i++) {
      width[i] = Math.max(width[i], row[i].length);
    }
    rows.push(row);
  }

  // add horizontal line
  var hline = width.map(function(w) { return new Array(w+1).join('-'); });
  rows.splice(1, 0, hline);

  // print output
  console.log('-- Grouped By ' + options.groupBy.join(', ') + ' --');
  var groupCount = options.groupBy.length;
  for (var r = 0; r < rows.length; r++) {
    row = rows[r];
    for (i = 0; i < row.length; i++) {
      row[i] = pad(row[i], width[i], (i < groupCount) ? 'right' : 'left');
    }
    console.log(row.join(' | '));
  }
}

function main() {
  var baseline, current;
  try {
    var baselineFile = fs.readFileSync(options.baseline).toString();
    baseline = flatten(JSON.parse(baselineFile));
  } catch(e) {
    console.log('Error reading file "' + options.baseline + '": ' + e);
    process.exit(0);
  }
  try {
    var currentFile = fs.readFileSync(options.current).toString();
    current = flatten(JSON.parse(currentFile));
  } catch(e) {
    console.log('Error reading file "' + options.current + '": ' + e);
    process.exit(0);
  }
  stat(baseline, current);
}

var options = parseOptions();
main();
First commit 2016-11-19 11:08:12 +00:00			`/jslint node: true /`

			`'use strict';`

			`var fs = require('fs');`

			`try {`
			`var ttest = require('ttest');`
			`} catch (e) {`
			`console.log('\nttest is not installed -- to intall, run "npm install ttest"');`
			`console.log('Continuing without significance test...\n');`
			`}`

			`var VALID_GROUP_BYS = ['browser', 'pdf', 'page', 'round', 'stat'];`

			`function parseOptions() {`
			`var yargs = require('yargs')`
			`.usage('Compare the results of two stats files.\n' +`
			`'Usage:\n $0 <BASELINE> <CURRENT> [options]')`
			`.demand(2)`
			`.string(['groupBy'])`
			`.describe('groupBy', 'How statistics should grouped. Valid options: ' +`
			`VALID_GROUP_BYS.join(' '))`
			`.default('groupBy', 'browser,stat');`
			`var result = yargs.argv;`
			`result.baseline = result._[0];`
			`result.current = result._[1];`
			`if (result.groupBy) {`
			`result.groupBy = result.groupBy.split(/[;, ]+/);`
			`}`
			`return result;`
			`}`

			`function group(stats, groupBy) {`
			`var vals = [];`
			`for (var i = 0; i < stats.length; i++) {`
			`var stat = stats[i];`
			`var keyArr = [];`
			`for (var j = 0; j < groupBy.length; j++) {`
			`keyArr.push(stat[groupBy[j]]);`
			`}`
			`var key = keyArr.join(',');`
			`if (vals[key] === undefined) {`
			`vals[key] = [];`
			`}`
			`vals[key].push(stat['time']);`
			`}`
			`return vals;`
			`}`

			`/*`
			`* Flatten the stats so that there's one row per stats entry.`
			`* Also, if results are not grouped by 'stat', keep only 'Overall' results.`
			`*/`
			`function flatten(stats) {`
			`var rows = [];`
			`stats.forEach(function(stat) {`
			`stat['stats'].forEach(function(s) {`
			`rows.push({`
			`browser: stat['browser'],`
			`page: stat['page'],`
			`pdf: stat['pdf'],`
			`round: stat['round'],`
			`stat: s['name'],`
			`time: s['end'] - s['start']`
			`});`
			`});`
			`});`
			`// Use only overall results if not grouped by 'stat'`
			`if (options.groupBy.indexOf('stat') < 0) {`
			`rows = rows.filter(function(s) { return s.stat === 'Overall'; });`
			`}`
			`return rows;`
			`}`

			`function pad(s, length, dir /* default: 'right' */) {`
			`s = '' + s;`
			`var spaces = new Array(Math.max(0, length - s.length + 1)).join(' ');`
			`return dir === 'left' ? spaces + s : s + spaces;`
			`}`

			`function mean(array) {`
			`function add(a, b) {`
			`return a + b;`
			`}`
			`return array.reduce(add, 0) / array.length;`
			`}`

			`/* Comparator for row key sorting. */`
			`function compareRow(a, b) {`
			`a = a.split(',');`
			`b = b.split(',');`
			`for (var i = 0; i < Math.min(a.length, b.length); i++) {`
			`var intA = parseInt(a[i], 10);`
			`var intB = parseInt(b[i], 10);`
			`var ai = isNaN(intA) ? a[i] : intA;`
			`var bi = isNaN(intB) ? b[i] : intB;`
			`if (ai < bi) {`
			`return -1;`
			`}`
			`if (ai > bi) {`
			`return 1;`
			`}`
			`}`
			`return 0;`
			`}`

			`/*`
			`* Dump various stats in a table to compare the baseline and current results.`
			`* T-test Refresher:`
			`* If I understand t-test correctly, p is the probability that we'll observe`
			`* another test that is as extreme as the current result assuming the null`
			`* hypothesis is true. P is NOT the probability of the null hypothesis. The null`
			`* hypothesis in this case is that the baseline and current results will be the`
			`* same. It is generally accepted that you can reject the null hypothesis if the`
			`* p-value is less than 0.05. So if p < 0.05 we can reject the results are the`
			`* same which doesn't necessarily mean the results are faster/slower but it can`
			`* be implied.`
			`*/`
			`function stat(baseline, current) {`
			`var baselineGroup = group(baseline, options.groupBy);`
			`var currentGroup = group(current, options.groupBy);`

			`var keys = Object.keys(baselineGroup);`
			`keys.sort(compareRow);`

			`var labels = options.groupBy.slice(0);`
			`labels.push('Count', 'Baseline(ms)', 'Current(ms)', '+/-', '% ');`
			`if (ttest) {`
			`labels.push('Result(P<.05)');`
			`}`
			`var i, row, rows = [];`
			`// collect rows and measure column widths`
			`var width = labels.map(function(s) { return s.length; });`
			`rows.push(labels);`
			`for (var k = 0; k < keys.length; k++) {`
			`var key = keys[k];`
			`var baselineMean = mean(baselineGroup[key]);`
			`var currentMean = mean(currentGroup[key]);`
			`row = key.split(',');`
			`row.push('' + baselineGroup[key].length,`
			`'' + Math.round(baselineMean),`
			`'' + Math.round(currentMean),`
			`'' + Math.round(currentMean - baselineMean),`
			`(100 * (currentMean - baselineMean) / baselineMean).toFixed(2));`
			`if (ttest) {`
			`var p = (baselineGroup[key].length < 2) ? 1 :`
			`ttest(baselineGroup[key], currentGroup[key]).pValue();`
			`if (p < 0.05) {`
			`row.push(currentMean < baselineMean ? 'faster' : 'slower');`
			`} else {`
			`row.push('');`
			`}`
			`}`
			`for (i = 0; i < row.length; i++) {`
			`width[i] = Math.max(width[i], row[i].length);`
			`}`
			`rows.push(row);`
			`}`

			`// add horizontal line`
			`var hline = width.map(function(w) { return new Array(w+1).join('-'); });`
			`rows.splice(1, 0, hline);`

			`// print output`
			`console.log('-- Grouped By ' + options.groupBy.join(', ') + ' --');`
			`var groupCount = options.groupBy.length;`
			`for (var r = 0; r < rows.length; r++) {`
			`row = rows[r];`
			`for (i = 0; i < row.length; i++) {`
			`row[i] = pad(row[i], width[i], (i < groupCount) ? 'right' : 'left');`
			`}`
			`console.log(row.join(' \| '));`
			`}`
			`}`

			`function main() {`
			`var baseline, current;`
			`try {`
			`var baselineFile = fs.readFileSync(options.baseline).toString();`
			`baseline = flatten(JSON.parse(baselineFile));`
			`} catch(e) {`
			`console.log('Error reading file "' + options.baseline + '": ' + e);`
			`process.exit(0);`
			`}`
			`try {`
			`var currentFile = fs.readFileSync(options.current).toString();`
			`current = flatten(JSON.parse(currentFile));`
			`} catch(e) {`
			`console.log('Error reading file "' + options.current + '": ' + e);`
			`process.exit(0);`
			`}`
			`stat(baseline, current);`
			`}`

			`var options = parseOptions();`
			`main();`