aboutsummaryrefslogtreecommitdiffstats
path: root/trunk/etherpad/src/etherpad/statistics/statistics.js
diff options
context:
space:
mode:
Diffstat (limited to 'trunk/etherpad/src/etherpad/statistics/statistics.js')
-rw-r--r--trunk/etherpad/src/etherpad/statistics/statistics.js1248
1 files changed, 1248 insertions, 0 deletions
diff --git a/trunk/etherpad/src/etherpad/statistics/statistics.js b/trunk/etherpad/src/etherpad/statistics/statistics.js
new file mode 100644
index 0000000..8174405
--- /dev/null
+++ b/trunk/etherpad/src/etherpad/statistics/statistics.js
@@ -0,0 +1,1248 @@
+/**
+ * Copyright 2009 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS-IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import("dateutils.noon");
+import("execution");
+import("exceptionutils");
+import("fastJSON");
+import("fileutils.fileLineIterator");
+import("jsutils.*");
+import("sqlbase.sqlobj");
+
+import("etherpad.log");
+
+jimport("net.appjet.oui.GenericLoggerUtils");
+jimport("net.appjet.oui.LoggableFromJson");
+jimport("net.appjet.oui.FilterWrangler");
+jimport("java.lang.System.out.println");
+jimport("net.appjet.common.util.ExpiringMapping");
+
+var millisInDay = 86400*1000;
+
+function _stats() {
+ if (! appjet.cache.statistics) {
+ appjet.cache.statistics = {};
+ }
+ return appjet.cache.statistics;
+}
+
+function onStartup() {
+ execution.initTaskThreadPool("statistics", 1);
+ _scheduleNextDailyUpdate();
+
+ onReset();
+}
+
+function _info(m) {
+ log.info({type: 'statistics', message: m});
+}
+
+function _warn(m) {
+ log.info({type: 'statistics', message: m});
+}
+
+function _statData() {
+ return _stats().stats;
+}
+
+function getAllStatNames() {
+ return keys(_statData());
+}
+
+function getStatData(statName) {
+ return _statData()[statName];
+}
+
+function _setStatData(statName, data) {
+ _statData()[statName] = data;
+}
+
+function liveSnapshot(stat) {
+ var statObject;
+ if (typeof(stat) == 'string') {
+ // "stat" is the stat name.
+ statObject = getStatData(stat);
+ } else if (typeof(stat) == 'object') {
+ statObject = stat;
+ } else {
+ return;
+ }
+ return _callFunction(statObject.snapshot_f,
+ statObject.name, statObject.options, statObject.data);
+}
+
+// ------------------------------------------------------------------
+// stats processing
+// ------------------------------------------------------------------
+
+// some useful constants
+var LIVE = 'live';
+var HIST = 'historical';
+var HITS = 'hits';
+var UNIQ = 'uniques';
+var VALS = 'values';
+var HGRM = 'histogram';
+
+// helpers
+
+function _date(d) {
+ return new Date(d);
+}
+
+function _saveStat(day, name, value) {
+ var timestamp = Math.floor(day.valueOf() / 1000);
+ _info({statistic: name,
+ timestamp: timestamp,
+ value: value});
+ try {
+ sqlobj.insert('statistics', {
+ name: name,
+ timestamp: timestamp,
+ value: fastJSON.stringify(value)
+ });
+ } catch (e) {
+ var msg;
+ try {
+ msg = e.getMessage();
+ } catch (e2) {
+ try {
+ msg = e.toSource();
+ } catch (e3) {
+ msg = "(none)";
+ }
+ }
+ _warn("failed to save stat "+name+": "+msg);
+ }
+}
+
+function _convertScalaTopValuesToJs(topValues) {
+ var totalValue = topValues._1();
+ var countsMap = topValues._2();
+ countsObj = {};
+ countsMap.foreach(scalaF1(function(pair) { countsObj[pair._1()] = pair._2(); }));
+ return {total: totalValue, counts: countsObj};
+}
+
+function _fakeMap() {
+ var map = {}
+ return {
+ get: function(k) { return map[k]; },
+ put: function(k, v) { map[k] = v; },
+ remove: function(k) { delete map[k]; }
+ }
+}
+
+function _withinSecondsOf(numSeconds, t1, t2) {
+ return (t1 > t2-numSeconds*1000) && (t1 < t2+numSeconds*1000);
+}
+
+function _callFunction(functionName, arg1, arg2, etc) {
+ var f = this[functionName];
+ var args = Array.prototype.slice.call(arguments, 1);
+ return f.apply(this, args);
+}
+
+// trackers and other init functions
+
+function _hitTracker(trackerType, timescaleType) {
+ var className;
+ switch (trackerType) {
+ case HITS: className = "BucketedLastHits"; break;
+ case UNIQ: className = "BucketedUniques"; break;
+ case VALS: className = "BucketedValueCounts"; break;
+ case HGRM: className = "BucketedLastHitsHistogram"; break;
+ }
+ var tracker;
+ switch (timescaleType) {
+ case LIVE:
+ tracker = new net.appjet.oui[className](24*60*60*1000);
+ break;
+ case HIST:
+ // timescale just needs to be longer than a day.
+ tracker = new net.appjet.oui[className](365*24*60*60*1000, true);
+ break;
+ }
+
+ var conversionData = {
+ total_f: "count",
+ history_f: "history",
+ latest_f: "latest",
+ };
+ switch (trackerType) {
+ case HITS: case UNIQ:
+ conversionData.conversionFunction =
+ function(x) { return x; } // no conversion necessary.
+ break;
+ case VALS:
+ conversionData.conversionFunction = _convertScalaTopValuesToJs
+ break;
+ case HGRM:
+ conversionData.conversionFunction =
+ function(hFunc) { return function(pct) { return hFunc.apply(pct); } }
+ break;
+ }
+
+
+ return {
+ tracker: tracker,
+ conversionData: conversionData,
+ hit: function(d, n1, n2) {
+ d = _date(d);
+ if (n2 === undefined) {
+ this.tracker.hit(d, n1);
+ } else {
+ this.tracker.hit(d, n1, n2);
+ }
+ },
+ get total() {
+ return this.conversionData.conversionFunction(this.tracker[this.conversionData.total_f]());
+ },
+ history: function(bucketsPerSample, numSamples) {
+ var scalaArray = this.tracker[this.conversionData.history_f](bucketsPerSample, numSamples);
+ var jsArray = [];
+ for (var i = 0; i < scalaArray.length(); ++i) {
+ jsArray.push(this.conversionData.conversionFunction(scalaArray.apply(i)));
+ }
+ return jsArray;
+ },
+ latest: function(bucketsPerSample) {
+ return this.conversionData.conversionFunction(this.tracker[this.conversionData.latest_f](bucketsPerSample));
+ }
+ }
+}
+
+function _initCount(statName, options, timescaleType) {
+ return _hitTracker(HITS, timescaleType);
+}
+function _initUniques(statName, options, timescaleType) {
+ return _hitTracker(UNIQ, timescaleType);
+}
+function _initTopValues(statName, options, timescaleType) {
+ return _hitTracker(VALS, timescaleType);
+}
+function _initHistogram(statName, options, timescaleType) {
+ return _hitTracker(HGRM, timescaleType);
+}
+
+function _initLatencies(statName, options, type) {
+ var hits = _initTopValues(statName, options, type);
+ var latencies = _initTopValues(statName, options, type);
+
+ return {
+ hit: function(d, value, latency) {
+ hits.hit(d, value);
+ latencies.hit(d, value, latency);
+ },
+ hits: hits,
+ latencies: latencies
+ }
+}
+
+function _initDisconnectTracker(statName, options, timescaleType) {
+ return {
+ map: (timescaleType == LIVE ? new ExpiringMapping(60*1000) : _fakeMap()),
+ counter: _initCount(statName, options, timescaleType),
+ uniques: _initUniques(statName, options, timescaleType),
+ isLive: timescaleType == LIVE
+ }
+}
+
+// update functions
+
+function _updateCount(statName, options, logName, data, logObject) {
+ // println("update count: "+statName+" on log "+logName+", with data: "+data.toSource()+" with log entry: "+logObject.toSource());
+ if (options.filter == null || options.filter(logObject)) {
+ data.hit(logObject.date, 1);
+ }
+}
+
+function _updateSum(statName, options, logName, data, logObject) {
+ // println("update sum: "+statName+" on log "+logName+", with data: "+data.toSource()+" with log entry: "+logObject.toSource());
+ if (options.filter == null || options.filter(logObject)) {
+ data.hit(logObject.date, Math.round(Number(logObject[options.fieldName])));
+ }
+}
+
+function _updateUniquenessCount(statName, options, logName, data, logObject) {
+ // println("update uniqueness: "+statName+" on log "+logName+", with data: "+data.toSource()+" with log entry: "+logObject.toSource());
+ if (options.filter == null || options.filter(logObject)) {
+ var value = logObject[options.fieldName];
+ if (value === undefined) { return; }
+ data.hit(logObject.date, value);
+ }
+}
+
+function _updateTopValues(statName, options, logName, data, logObject) {
+ // println("update topvalues: "+statName+" on log "+logName+", with data: "+data.toSource()+" with log entry: "+logObject.toSource());
+
+ if (options.filter == null || options.filter(logObject)) {
+ var value = logObject[options.fieldName];
+ if (value === undefined) { return; }
+ if (options.canonicalizer) {
+ value = options.canonicalizer(value);
+ }
+ data.hit(logObject.date, value);
+ }
+}
+
+function _updateLatencies(statName, options, logName, data, logObject) {
+ // println("update latencies: "+statName+" on log "+logName+", with data: "+data.toSource()+" with log entry: "+logObject.toSource());
+
+ if (options.filter == null || options.filter(logObject)) {
+ var value = logObject[options.fieldName];
+ var latency = logObject[options.latencyFieldName];
+ if (value === undefined) { return; }
+ data.hit(logObject.date, value, latency);
+ }
+}
+
+function _updateDisconnectTracker(statName, options, logName, data, logObject) {
+ if (logName == "frontend/padevents" && logObject.type != "userleave") {
+ // we only care about userleaves from the padevents log.
+ return;
+ }
+
+ var [evtPrefix, otherPrefix] =
+ (logName == "frontend/padevents" ? ["l-", "d-"] : ["d-", "l-"]);
+ var dateLong = logObject.date;
+ var userId = logObject.session;
+
+ var lastOtherEvent = data.map.get(otherPrefix+userId);
+ if (lastOtherEvent != null && _withinSecondsOf(60, dateLong, lastOtherEvent.date)) {
+ data.counter.hit(logObject.date, 1);
+ data.uniques.hit(logObject.date, userId);
+ data.map.remove(otherPrefix+userId);
+ if (data.isLive) {
+ log.custom("avoidable_disconnects",
+ {userId: userId,
+ errorMessage: lastOtherEvent.errorMessage || logObject.errorMessage});
+ }
+ } else {
+ data.map.put(evtPrefix+userId, {date: dateLong, message: logObject.errorMessage});
+ }
+}
+
+// snapshot functions
+
+function _lazySnapshot(snapshot) {
+ var total;
+ var history = {};
+ var latest = {};
+ return {
+ get total() {
+ if (total === undefined) {
+ total = snapshot.total;
+ }
+ return total;
+ },
+ history: function(bucketsPerSample, numSamples) {
+ if (history[""+bucketsPerSample+":"+numSamples] === undefined) {
+ history[""+bucketsPerSample+":"+numSamples] = snapshot.history(bucketsPerSample, numSamples);
+ }
+ return history[""+bucketsPerSample+":"+numSamples];
+ },
+ latest: function(bucketsPerSample) {
+ if (latest[""+bucketsPerSample] === undefined) {
+ latest[""+bucketsPerSample] = snapshot.latest(bucketsPerSample);
+ }
+ return latest[""+bucketsPerSample];
+ }
+ }
+}
+
+function _snapshotTotal(statName, options, data) {
+ return _lazySnapshot(data);
+}
+
+function _convertTopValue(topValue) {
+ var counts = topValue.counts;
+ var sortedValues = keys(counts).sort(function(x, y) {
+ return counts[y] - counts[x];
+ }).map(function(key) {
+ return { value: key, count: counts[key] };
+ });
+ return {count: topValue.total, topValues: sortedValues.slice(0, 50) };
+}
+
+function _snapshotTopValues(statName, options, data) {
+ var convertedData = {};
+
+ return _lazySnapshot({
+ get total() {
+ return _convertTopValue(data.total);
+ },
+ history: function(bucketsPerSample, numSamples) {
+ return data.history(bucketsPerSample, numSamples).map(_convertTopValue);
+ },
+ latest: function(bucketsPerSample) {
+ return _convertTopValue(data.latest(bucketsPerSample));
+ }
+ });
+}
+
+function _snapshotLatencies(statName, options, data) {
+ // convert the hits + total latencies into a topValues-style data object.
+ var hits = data.hits;
+ var totalLatencies = data.latencies;
+
+ function convertCountsObjects(latencyCounts, hitCounts) {
+ var mergedCounts = {}
+ keys(latencyCounts.counts).forEach(function(value) {
+ mergedCounts[value] =
+ Math.round(latencyCounts.counts[value] / (hitCounts.counts[value] || 1));
+ });
+ return {counts: mergedCounts, total: latencyCounts.total / (hitCounts.total || 1)};
+ }
+
+ // ...and then convert that object into a snapshot.
+ return _snapshotTopValues(statName, options, {
+ get total() {
+ return convertCountsObjects(totalLatencies.total, hits.total);
+ },
+ history: function(bucketsPerSample, numSamples) {
+ return mergeArrays(
+ convertCountsObjects,
+ totalLatencies.history(bucketsPerSample, numSamples),
+ hits.history(bucketsPerSample, numSamples));
+ },
+ latest: function(bucketsPerSample) {
+ return convertCountsObjects(totalLatencies.latest(bucketsPerSample), hits.latest(bucketsPerSample));
+ }
+ });
+}
+
+function _snapshotDisconnectTracker(statName, options, data) {
+ var topValues = {};
+ var counts = data.counter;
+ var uniques = data.uniques;
+ function topValue(counts, uniques) {
+ return {
+ count: counts,
+ topValues: [{value: "total_disconnects", count: counts},
+ {value: "disconnected_userids", count: uniques}]
+ }
+ }
+ return _lazySnapshot({
+ get total() {
+ return topValue(counts.total, uniques.total);
+ },
+ history: function(bucketsPerSample, numSamples) {
+ return mergeArrays(
+ topValue,
+ counts.history(bucketsPerSample, numSamples),
+ uniques.history(bucketsPerSample, numSamples));
+ },
+ latest: function(bucketsPerSample) {
+ return topValue(counts.latest(bucketsPerSample), uniques.latest(bucketsPerSample));
+ }
+ });
+}
+
+function _generateLogInterestMap(statNames) {
+ var interests = {};
+ statNames.forEach(function(statName) {
+ var logs = getStatData(statName).logNames;
+ logs.forEach(function(logName) {
+ if (! interests[logName]) {
+ interests[logName] = {};
+ }
+ interests[logName][statName] = true;
+ });
+ });
+ return interests;
+}
+
+
+// ------------------------------------------------------------------
+// stat generators
+// ------------------------------------------------------------------
+
+// statSpec has these properties
+// name
+// dataType - line, topvalues, histogram, etc.
+// logNames
+// init_f
+// update_f
+// snapshot_f
+// options - object containing any additional data, passed in to to the various functions.
+
+// init_f gets (statName, options, "live"|"historical")
+// update_f gets (statName, options, logName, data, logObject)
+// snapshot_f gets (statName, options, data)
+function addStat(statSpec) {
+ var statName = statSpec.name;
+ if (! getStatData(statName)) {
+ var initialData =
+ _callFunction(statSpec.init_f, statName, statSpec.options, LIVE);
+ _setStatData(statName, {
+ data: initialData,
+ });
+ }
+
+ var s = getStatData(statName);
+
+ s.options = statSpec.options;
+ s.name = statName;
+ s.logNames = statSpec.logNames;
+ s.dataType = statSpec.dataType;
+ s.historicalDays = ("historicalDays" in statSpec ? statSpec.historicalDays : 1);
+
+ s.init_f = statSpec.init_f;
+ s.update_f = statSpec.update_f;
+ s.snapshot_f = statSpec.snapshot_f;
+
+ function registerInterest(logName) {
+ if (! _stats().logNamesToInterestedStatNames[logName]) {
+ _stats().logNamesToInterestedStatNames[logName] = {};
+ }
+ _stats().logNamesToInterestedStatNames[logName][statName] = true;
+ }
+ statSpec.logNames.forEach(registerInterest);
+}
+
+function addSimpleCount(statName, historicalDays, logName, filter) {
+ addStat({
+ name: statName,
+ dataType: "line",
+ logNames: (logName instanceof Array ? logName : [logName]),
+ init_f: "_initCount",
+ update_f: "_updateCount",
+ snapshot_f: "_snapshotTotal",
+ options: { filter: filter },
+ historicalDays: historicalDays || 1
+ });
+}
+
+function addSimpleSum(statName, historicalDays, logName, filter, fieldName) {
+ addStat({
+ name: statName,
+ dataType: "line",
+ logNames: (logName instanceof Array ? logName : [logName]),
+ init_f: "_initCount",
+ update_f: "_updateSum",
+ snapshot_f: "_snapshotTotal",
+ options: { filter: filter, fieldName: fieldName },
+ historicalDays: historicalDays || 1
+ });
+}
+
+function addUniquenessCount(statName, historicalDays, logName, filter, fieldName) {
+ addStat({
+ name: statName,
+ dataType: "line",
+ logNames: (logName instanceof Array ? logName : [logName]),
+ init_f: "_initUniques",
+ update_f: "_updateUniquenessCount",
+ snapshot_f: "_snapshotTotal",
+ options: { filter: filter, fieldName: fieldName },
+ historicalDays: historicalDays || 1
+ })
+}
+
+function addTopValuesStat(statName, historicalDays, logName, filter, fieldName, canonicalizer) {
+ addStat({
+ name: statName,
+ dataType: "topValues",
+ logNames: (logName instanceof Array ? logName : [logName]),
+ init_f: "_initTopValues",
+ update_f: "_updateTopValues",
+ snapshot_f: "_snapshotTopValues",
+ options: { filter: filter, fieldName: fieldName, canonicalizer: canonicalizer },
+ historicalDays: historicalDays || 1
+ });
+}
+
+function addLatenciesStat(statName, historicalDays, logName, filter, fieldName, latencyFieldName) {
+ addStat({
+ name: statName,
+ dataType: "topValues",
+ logNames: (logName instanceof Array ? logName : [logName]),
+ init_f: "_initLatencies",
+ update_f: "_updateLatencies",
+ snapshot_f: "_snapshotLatencies",
+ options: { filter: filter, fieldName: fieldName, latencyFieldName: latencyFieldName },
+ historicalDays: historicalDays || 1
+ });
+}
+
+
+// RETURNING USERS
+
+function _initReturningUsers(statName, options, timescaleType) {
+ return { cache: {}, uniques: _initUniques(statName, options, timescaleType) };
+}
+
+function _returningUsersUserId(logObject) {
+ if (logObject.type == "userjoin") {
+ return logObject.userId;
+ }
+}
+
+function _returningUsersUserCreationDate(userId) {
+ var record = sqlobj.selectSingle('pad_cookie_userids', {id: userId});
+ if (record) {
+ return record.createdDate.getTime();
+ }
+}
+
+function _returningUsersAccountId(logObject) {
+ return logObject.proAccountId;
+}
+
+function _returningUsersAccountCreationDate(accountId) {
+ var record = sqlobj.selectSingle('pro_accounts', {id: accountId});
+ if (record) {
+ return record.createdDate.getTime();
+ }
+}
+
+
+function _updateReturningUsers(statName, options, logName, data, logObject) {
+ var userId = (options.useProAccountId ? _returningUsersAccountId(logObject) : _returningUsersUserId(logObject));
+ if (! userId) { return; }
+ var date = logObject.date;
+ if (! data.cache[""+userId]) {
+ var creationTime = (options.useProAccountId ? _returningUsersAccountCreationDate(userId) : _returningUsersUserCreationDate(userId));
+ if (! creationTime) { return; } // hm. weird case.
+ data.cache[""+userId] = creationTime;
+ }
+ if (data.cache[""+userId] < date - options.registeredNDaysAgo*24*60*60*1000) {
+ data.uniques.hit(logObject.date, ""+userId);
+ }
+}
+function _snapshotReturningUsers(statName, options, data) {
+ return _lazySnapshot(data.uniques);
+}
+
+function addReturningUserStat(statName, pastNDays, registeredNDaysAgo) {
+ addStat({
+ name: statName,
+ dataType: "line",
+ logNames: ["frontend/padevents"],
+ init_f: "_initReturningUsers",
+ update_f: "_updateReturningUsers",
+ snapshot_f: "_snapshotReturningUsers",
+ options: { registeredNDaysAgo: registeredNDaysAgo },
+ historicalDays: pastNDays
+ });
+}
+
+function addReturningProAccountStat(statName, pastNDays, registeredNDaysAgo) {
+ addStat({
+ name: statName,
+ dataType: "line",
+ logNames: ["frontend/request"],
+ init_f: "_initReturningUsers",
+ update_f: "_updateReturningUsers",
+ snapshot_f: "_snapshotReturningUsers",
+ options: { registeredNDaysAgo: registeredNDaysAgo, useProAccountId: true },
+ historicalDays: pastNDays
+ });
+}
+
+
+function addDisconnectStat() {
+ addStat({
+ name: "streaming_disconnects",
+ dataType: "topValues",
+ logNames: ["frontend/padevents", "frontend/reconnect", "frontend/disconnected_autopost"],
+ init_f: "_initDisconnectTracker",
+ update_f: "_updateDisconnectTracker",
+ snapshot_f: "_snapshotDisconnectTracker",
+ options: {}
+ });
+}
+
+// PAD STARTUP LATENCY
+function _initPadStartupLatency(statName, options, timescaleType) {
+ return {
+ recentGets: (timescaleType == LIVE ? new ExpiringMapping(60*1000) : _fakeMap()),
+ latencies: _initHistogram(statName, options, timescaleType),
+ }
+}
+
+function _updatePadStartupLatency(statName, options, logName, data, logObject) {
+ var session = logObject.session;
+ if (logName == "frontend/request") {
+ if (! ('padId' in logObject)) { return; }
+ var padId = logObject.padId;
+ if (! data.recentGets.get(session)) {
+ data.recentGets.put(session, {});
+ }
+ data.recentGets.get(session)[padId] = logObject.date;
+ }
+ if (logName == "frontend/padevents") {
+ if (logObject.type != 'userjoin') { return; }
+ if (! data.recentGets.get(session)) { return; }
+ var padId = logObject.padId;
+ var getTime = data.recentGets.get(session)[padId];
+ if (! getTime) { return; }
+ delete data.recentGets.get(session)[padId];
+ var latency = logObject.date - getTime;
+ if (latency < 60*1000) {
+ // latencies longer than 60 seconds don't represent data we care about for this stat.
+ data.latencies.hit(logObject.date, latency);
+ }
+ }
+}
+
+function _snapshotPadStartupLatency(statName, options, data) {
+ var latencies = data.latencies;
+ function convertHistogram(histogram_f) {
+ var deciles = {};
+ [0, 1, 5, 10, 25, 50, 75, 90, 95, 99, 100].forEach(function(pct) {
+ deciles[""+pct] = histogram_f(pct);
+ });
+ return deciles;
+ }
+ return _lazySnapshot({
+ latencies: latencies,
+ get total() {
+ return convertHistogram(this.latencies.total);
+ },
+ history: function(bucketsPerSample, numSamples) {
+ return this.latencies.history(bucketsPerSample, numSamples).map(convertHistogram);
+ },
+ latest: function(bucketsPerSample) {
+ return convertHistogram(this.latencies.latest(bucketsPerSample));
+ }
+ });
+}
+
+function addPadStartupLatencyStat() {
+ addStat({
+ name: "pad_startup_times",
+ dataType: "histogram",
+ logNames: ["frontend/padevents", "frontend/request"],
+ init_f: "_initPadStartupLatency",
+ update_f: "_updatePadStartupLatency",
+ snapshot_f: "_snapshotPadStartupLatency",
+ options: {}
+ });
+}
+
+
+function _initSampleTracker(statName, options, timescaleType) {
+ return {
+ samples: Array(1440), // 1 hour at 1 sample/minute
+ nextSample: 0,
+ numSamples: 0
+ }
+}
+
+function _updateSampleTracker(statName, options, logName, data, logObject) {
+ if (options.filter && ! options.filter(logObject)) {
+ return;
+ }
+ if (options.fieldName && ! (options.fieldName in logObject)) {
+ return;
+ }
+ data.samples[data.nextSample] = (options.fieldName ? logObject[fieldName] : logObject);
+ data.nextSample++;
+ data.nextSample %= data.samples.length;
+ data.numSamples = Math.min(data.samples.length, data.numSamples+1);
+}
+
+function _snapshotSampleTracker(statName, options, data) {
+ function indexTransform(i) {
+ return (data.nextSample-data.numSamples+i + data.samples.length) % data.samples.length;
+ }
+ var merge_f = options.mergeFunction || function(a, b) { return a+b; }
+ var process_f = options.processFunction || function(a) { return a; }
+ function mergeValues(values) {
+ if (values.length <= 1) { return values[0]; }
+ var t = values[0];
+ for (var i = 1; i < values.length; ++i) {
+ t = merge_f(values[i], t);
+ }
+ return t;
+ }
+ return _lazySnapshot({
+ get total() {
+ var t = [];
+ for (var i = 0; i < data.numSamples; ++i) {
+ t.push(data.samples[indexTransform(i)]);
+ }
+ return process_f(mergeValues(t), t.length);
+ },
+ history: function(bucketsPerSample, numSamples) {
+ var allSamples = [];
+ for (var i = data.numSamples-1; i >= Math.max(0, data.numSamples - bucketsPerSample*numSamples); --i) {
+ allSamples.push(data.samples[indexTransform(i)]);
+ }
+ var out = [];
+ for (var i = 0; i < numSamples && i*bucketsPerSample < allSamples.length; ++i) {
+ var subArray = [];
+ for (var j = 0; j < bucketsPerSample && i*bucketsPerSample+j < allSamples.length; ++j) {
+ subArray.push(allSamples[i*bucketsPerSample+j]);
+ }
+ out.push(process_f(mergeValues(subArray), subArray.length));
+ }
+ return out.reverse();
+ },
+ latest: function(bucketsPerSample) {
+ var t = [];
+ for (var i = data.numSamples-1; i >= Math.max(0, data.numSamples-bucketsPerSample); --i) {
+ t.push(data.samples[indexTransform(i)]);
+ }
+ return process_f(mergeValues(t), t.length);
+ }
+ });
+}
+
+function addSampleTracker(statName, logName, filter, fieldName, mergeFunction, processFunction) {
+ addStat({
+ name: statName,
+ dataType: "histogram",
+ logNames: (logName instanceof Array ? logName : [logName]),
+ init_f: "_initSampleTracker",
+ update_f: "_updateSampleTracker",
+ snapshot_f: "_snapshotSampleTracker",
+ options: { filter: filter, fieldName: fieldName,
+ mergeFunction: mergeFunction, processFunction: processFunction }
+ });
+}
+
+function addCometLatencySampleTracker(statName) {
+ addSampleTracker(statName, "backend/server-events", typeMatcher("streaming-message-latencies"), null,
+ function(a, b) {
+ var ret = {};
+ ["count", "p50", "p90", "p95", "p99", "max"].forEach(function(key) {
+ ret[key] = (Number(a[key]) || 0) + (Number(b[key]) || 0);
+ });
+ return ret;
+ },
+ function(v, count) {
+ if (count == 0) {
+ return {
+ "50": 0, "90": 0, "95": 0, "99": 0, "100": 0
+ }
+ }
+ var ret = {count: v.count};
+ ["p50", "p90", "p95", "p99", "max"].forEach(function(key) {
+ ret[key] = (Number(v[key]) || 0)/(Number(count) || 1);
+ });
+ return {"50": Math.round(ret.p50/1000),
+ "90": Math.round(ret.p90/1000),
+ "95": Math.round(ret.p95/1000),
+ "99": Math.round(ret.p99/1000),
+ "100": Math.round(ret.max/1000)};
+ });
+}
+
+function addConnectionTypeSampleTracker(statName) {
+ var caredAboutFields = ["streaming", "longpolling", "shortpolling", "(unconnected)"];
+
+ addSampleTracker(statName, "backend/server-events", typeMatcher("streaming-connection-count"), null,
+ function(a, b) {
+ var ret = {};
+ caredAboutFields.forEach(function(k) {
+ ret[k] = (Number(a[k]) || 0) + (Number(b[k]) || 0);
+ });
+ return ret;
+ },
+ function(v, count) {
+ if (count == 0) {
+ return _convertTopValue({total: 0, counts: {}});
+ }
+ var values = {};
+ var total = 0;
+ caredAboutFields.forEach(function(k) {
+ values[k] = Math.round((Number(v[k]) || 0)/count);
+ total += values[k];
+ });
+ values["Total"] = total;
+ return _convertTopValue({
+ total: Math.round(total),
+ counts: values
+ });
+ });
+}
+
+// helpers for filter functions
+
+function expectedHostnames() {
+ var hostPart = appjet.config.listenHost || "localhost";
+ if (appjet.config.listenSecureHost != hostPart) {
+ hostPart = "("+hostPart+"|"+(appjet.config.listenSecureHost || "localhost")+")";
+ }
+ var ports = [];
+ if (appjet.config.listenPort != 80) {
+ ports.push(""+appjet.config.listenPort);
+ }
+ if (appjet.config.listenSecurePort != 443) {
+ ports.push(""+appjet.config.listenSecurePort);
+ }
+ var portPart = (ports.length > 0 ? ":("+ports.join("|")+")" : "");
+ return hostPart + portPart;
+}
+
+function fieldMatcher(fieldName, fieldValue) {
+ if (fieldValue instanceof RegExp) {
+ return function(logObject) {
+ return fieldValue.test(logObject[fieldName]);
+ }
+ } else {
+ return function(logObject) {
+ return logObject[fieldName] == fieldValue;
+ }
+ }
+}
+
+function typeMatcher(type) {
+ return fieldMatcher("type", type);
+}
+
+function invertMatcher(f) {
+ return function(logObject) {
+ return ! f(logObject);
+ }
+}
+
+function setupStatsCollector() {
+ var c;
+
+ function unwatchLog(logName) {
+ GenericLoggerUtils.clearWrangler(logName.split('/')[1], c.wranglers[logName]);
+ }
+ function watchLog(logName) {
+ c.wranglers[logName] = new Packages.net.appjet.oui.LogWrangler({
+ tell: function(lpb) {
+ c.queue.add({logName: logName, json: lpb.json()});
+ }
+ });
+ c.wranglers[logName].watch(logName.split('/')[1]);
+ }
+
+ c = _stats().liveCollector;
+ if (c) {
+ c.watchedLogs.forEach(unwatchLog);
+ delete c.wrangler;
+ } else {
+ c = _stats().liveCollector = {};
+ }
+ c.watchedLogs = keys(_stats().logNamesToInterestedStatNames);
+ c.queue = new java.util.concurrent.ConcurrentLinkedQueue();
+ c.wranglers = {};
+ c.watchedLogs.forEach(watchLog);
+
+ if (! c.updateTask || c.updateTask.isDone()) {
+ c.updateTask = execution.scheduleTask('statistics', "statisticsLiveUpdate", 2000, []);
+ }
+}
+
+serverhandlers.tasks.statisticsLiveUpdate = function() {
+ var c = _stats().liveCollector;
+ try {
+ while (true) {
+ var obj = c.queue.poll();
+ if (obj != null) {
+ var statNames =
+ keys(_stats().logNamesToInterestedStatNames[obj.logName]);
+ var logObject = fastJSON.parse(obj.json);
+ statNames.forEach(function(statName) {
+ var statObject = getStatData(statName);
+ _callFunction(statObject.update_f,
+ statName, statObject.options, obj.logName, statObject.data, logObject);
+ });
+ } else {
+ break;
+ }
+ }
+ } catch (e) {
+ println("EXCEPTION IN LIVE UPDATE: "+e+" / "+e.fileName+":"+e.lineNumber)
+ println(exceptionutils.getStackTracePlain(new net.appjet.bodylock.JSRuntimeException(String(e), e.javaException || e.rhinoException)));
+ } finally {
+ c.updateTask = execution.scheduleTask('statistics', "statisticsLiveUpdate", 2000, []);
+ }
+}
+
+function onReset() {
+ // this gets refilled every reset.
+ _stats().logNamesToInterestedStatNames = {};
+
+ // we'll want to keep around the live data, though, so this is conditionally set.
+ if (! _stats().stats) {
+ _stats().stats = {};
+ }
+
+ addSimpleCount("site_pageviews", 1, "frontend/request", null);
+ addUniquenessCount("site_unique_ips", 1, "frontend/request", null, "clientAddr");
+
+ addUniquenessCount("active_user_ids", 1, "frontend/padevents", typeMatcher("userjoin"), "userId");
+ addUniquenessCount("active_user_ids_7days", 7, "frontend/padevents", typeMatcher("userjoin"), "userId");
+ addUniquenessCount("active_user_ids_30days", 30, "frontend/padevents", typeMatcher("userjoin"), "userId");
+
+ addUniquenessCount("active_pro_accounts", 1, "frontend/request", invertMatcher(fieldMatcher("proAccountId", undefined)),
+ "proAccountId");
+ addUniquenessCount("active_pro_accounts_7days", 7, "frontend/request", invertMatcher(fieldMatcher("proAccountId", undefined)),
+ "proAccountId");
+ addUniquenessCount("active_pro_accounts_30days", 30, "frontend/request", invertMatcher(fieldMatcher("proAccountId", undefined)),
+ "proAccountId");
+
+
+ addUniquenessCount("active_pads", 1, "frontend/padevents", typeMatcher("userjoin"), "padId");
+ addSimpleCount("new_pads", 1, "frontend/padevents", typeMatcher("newpad"));
+
+ addSimpleCount("chat_messages", 1, "frontend/chat", null);
+ addUniquenessCount("active_chatters", 1, "frontend/chat", null, "userId");
+
+ addSimpleCount("exceptions", 1, "frontend/exception", null);
+
+ addSimpleCount("eepnet_trial_downloads", 1, "frontend/eepnet_download_info", null);
+
+ addSimpleSum("revenue", 1, "frontend/billing", typeMatcher("purchase-complete"), "dollars")
+
+ var hostRegExp = new RegExp("^https?:\\/\\/([-a-zA-Z0-9]+.)?"+expectedHostnames()+"\\/");
+ addTopValuesStat("top_referers", 1, "frontend/request",
+ invertMatcher(fieldMatcher(
+ "referer", hostRegExp)),
+ "referer");
+
+ addTopValuesStat("paths_404", 1, "frontend/request", fieldMatcher("statusCode", 404), "path");
+ addTopValuesStat("paths_500", 1, "frontend/request", fieldMatcher("statusCode", 500), "path");
+ addTopValuesStat("paths_exception", 1, "frontend/exception", null, "path");
+
+ addTopValuesStat("top_exceptions", 1, ["frontend/exception", "backend/exceptions"],
+ invertMatcher(fieldMatcher("trace", undefined)),
+ "trace", function(trace) {
+ var jstrace = trace.split("\n").filter(function(line) {
+ return /^\tat JS\$.*?\.js:\d+\)$/.test(line);
+ });
+ if (jstrace.length > 3) {
+ return "JS Exception:\n"+jstrace.slice(0, 10).join("\n").replace(/\t[^\(]*/g, "");
+ }
+ return trace.split("\n").slice(1, 10).join("\n").replace(/\t/g, "");
+ });
+
+ addReturningUserStat("users_1day_returning_7days", 1, 7);
+ addReturningUserStat("users_7day_returning_7days", 7, 7);
+ addReturningUserStat("users_30day_returning_7days", 30, 7);
+
+ addReturningUserStat("users_1day_returning_30days", 1, 30);
+ addReturningUserStat("users_7day_returning_30days", 7, 30);
+ addReturningUserStat("users_30day_returning_30days", 30, 30);
+
+ addReturningProAccountStat("pro_accounts_1day_returning_7days", 1, 7);
+ addReturningProAccountStat("pro_accounts_7day_returning_7days", 7, 7);
+ addReturningProAccountStat("pro_accounts_30day_returning_7days", 30, 7);
+
+ addReturningProAccountStat("pro_accounts_1day_returning_30days", 1, 30);
+ addReturningProAccountStat("pro_accounts_7day_returning_30days", 7, 30);
+ addReturningProAccountStat("pro_accounts_30day_returning_30days", 30, 30);
+
+
+ addDisconnectStat();
+ addTopValuesStat("disconnect_causes", 1, "frontend/avoidable_disconnects", null, "errorMessage");
+
+ var staticFileRegExp = /^\/static\/|^\/favicon.ico/;
+ addLatenciesStat("execution_latencies", 1, "backend/latency",
+ invertMatcher(fieldMatcher('path', staticFileRegExp)),
+ "path", "time");
+ addLatenciesStat("static_file_latencies", 1, "backend/latency",
+ fieldMatcher('path', staticFileRegExp),
+ "path", "time");
+
+ addUniquenessCount("disconnects_with_clientside_errors", 1,
+ ["frontend/reconnect", "frontend/disconnected_autopost"],
+ fieldMatcher("hasClientErrors", true), "uniqueId");
+
+ addTopValuesStat("imports_exports_counts", 1, "frontend/import-export",
+ typeMatcher("request"), "direction");
+
+ addPadStartupLatencyStat();
+
+ addCometLatencySampleTracker("streaming_latencies");
+ addConnectionTypeSampleTracker("streaming_connections");
+ // TODO: add more stats here.
+
+ setupStatsCollector();
+}
+
+//----------------------------------------------------------------
+// Log processing
+//----------------------------------------------------------------
+
+function _whichStats(statNames) {
+ var whichStats = _statData();
+ var logNamesToInterestedStatNames = _stats().logNamesToInterestedStatNames;
+
+ if (statNames) {
+ whichStats = {};
+ statNames.forEach(function(statName) { whichStats[statName] = getStatData(statName) });
+ logNamesToInterestedStatNames = _generateLogInterestMap(statNames);
+ }
+
+ return [whichStats, logNamesToInterestedStatNames];
+}
+
+function _initStatDataMap(statNames) {
+ var [whichStats, logNamesToInterestedStatNames] = _whichStats(statNames);
+
+ var statDataMap = {};
+
+ function initStat(statName, statObject) {
+ statDataMap[statName] =
+ _callFunction(statObject.init_f, statName, statObject.options, HIST);
+ }
+ eachProperty(whichStats, initStat);
+
+ return statDataMap;
+}
+
+function _saveStats(day, statDataMap, statNames) {
+ var [whichStats, logNamesToInterestedStatNames] = _whichStats(statNames);
+
+ function saveStat(statName, statObject) {
+ var value = _callFunction(statObject.snapshot_f,
+ statName, statObject.options, statDataMap[statName]).total;
+ if (typeof(value) != 'object') {
+ value = {value: value};
+ }
+ _saveStat(day, statName, value);
+ }
+ eachProperty(whichStats, saveStat);
+}
+
+function _processSingleDayLogs(day, logNamesToInterestedStatNames, statDataMap) {
+ var iterators = {};
+ keys(logNamesToInterestedStatNames).forEach(function(logName) {
+ var [prefix, logId] = logName.split("/");
+ var fileName = log.logFileName(prefix, logId, day);
+ if (! fileName) {
+ _info("No such file: "+logName+" on day "+day);
+ return;
+ }
+ iterators[logName] = fileLineIterator(fileName);
+ });
+
+ var numIterators = keys(iterators).length;
+ if (numIterators == 0) {
+ _info("No logs to process on day "+day);
+ return;
+ }
+ var sortedLogObjects = new java.util.PriorityQueue(numIterators,
+ new java.util.Comparator({
+ compare: function(o1, o2) { return o1.logObject.date - o2.logObject.date }
+ }));
+
+ function lineToLogObject(logName, json) {
+ return {logName: logName, logObject: fastJSON.parse(json)};
+ }
+
+ // begin by filling the queue with one object from each log.
+ eachProperty(iterators, function(logName, iterator) {
+ if (iterator.hasNext) {
+ sortedLogObjects.add(lineToLogObject(logName, iterator.next));
+ }
+ });
+
+ // update with all log objects, in date order (enforced by priority queue).
+ while (! sortedLogObjects.isEmpty()) {
+ var nextObject = sortedLogObjects.poll();
+ var logName = nextObject.logName;
+
+ keys(logNamesToInterestedStatNames[logName]).forEach(function(statName) {
+ var statObject = getStatData(statName);
+ _callFunction(statObject.update_f,
+ statName, statObject.options, logName, statDataMap[statName], nextObject.logObject);
+ });
+
+ // get next entry from this log, if there is one.
+ if (iterators[logName].hasNext) {
+ sortedLogObjects.add(lineToLogObject(logName, iterators[logName].next));
+ }
+ }
+}
+
+function processStatsForDay(day, statNames, statDataMap) {
+ var [whichStats, logNamesToInterestedStatNames] = _whichStats(statNames);
+
+ // process the logs, notifying the right statistics updaters.
+ _processSingleDayLogs(day, logNamesToInterestedStatNames, statDataMap);
+}
+
+//----------------------------------------------------------------
+// Daily update
+//----------------------------------------------------------------
+serverhandlers.tasks.statisticsDailyUpdate = function() {
+// do nothing for now.
+
+// dailyUpdate();
+};
+
+function _scheduleNextDailyUpdate() {
+ // Run at 1:11am every day
+ var now = +(new Date);
+ var tomorrow = new Date(now + 1000*60*60*24);
+ tomorrow.setHours(1);
+ tomorrow.setMinutes(11);
+ tomorrow.setMilliseconds(111);
+ log.info("Scheduling next daily statistics update for: "+tomorrow.toString());
+ var delay = +tomorrow - (+(new Date));
+ execution.scheduleTask("statistics", "statisticsDailyUpdate", delay, []);
+}
+
+function processStatsAsOfDay(date, statNames) {
+ var latestDay = noon(new Date(date - 1000*60*60*24));
+
+ _processLogsForNeededDays(latestDay, statNames);
+}
+
+function _processLogsForNeededDays(latestDay, statNames) {
+ if (! statNames) {
+ statNames = getAllStatNames();
+ }
+ var statDataMap = _initStatDataMap(statNames);
+
+ var agesToStats = [];
+ var atLeastOneStat = true;
+ for (var i = 0; atLeastOneStat; ++i) {
+ atLeastOneStat = false;
+ agesToStats[i] = [];
+ statNames.forEach(function(statName) {
+ var statData = getStatData(statName);
+ if (statData.historicalDays > i) {
+ atLeastOneStat = true;
+ agesToStats[i].push(statName);
+ }
+ });
+ }
+ agesToStats.pop();
+
+ for (var i = agesToStats.length-1; i >= 0; --i) {
+ var day = new Date(+latestDay - i*24*60*60*1000);
+ processStatsForDay(day, agesToStats[i], statDataMap);
+ }
+ _saveStats(latestDay, statDataMap, statNames);
+}
+
+function doDailyUpdate(date) {
+ var now = (date === undefined ? new Date() : date);
+ var yesterdayNoon = noon(new Date(+now - 1000*60*60*24));
+
+ _processLogsForNeededDays(yesterdayNoon);
+}
+
+function dailyUpdate() {
+ try {
+ doDailyUpdate();
+ } catch (ex) {
+ log.warn("statistics.dailyUpdate() failed: "+ex.toString());
+ } finally {
+ _scheduleNextDailyUpdate();
+ }
+} \ No newline at end of file