Skip to content

Commit 3138028

Browse files
Jason Bulicekabsrivastava
authored andcommitted
AA AM Wiring (#469)
* Bug Fixes Code Refactoring * fix bugs in traces decoupling, fix bugs in alerts connector * fix more bugs with alerts connector * more bug fixes, re-introduce metric_key as a value in anomaly and subscription requests * revert subscription UI workflow to how it was previously, but wired in with new data model * fix create subscription, add error handling for empty subscription * fix issue with setState in the new subscription box * fix issues regarding strong and weak anomalies, slightly modify UI pieces to reflect more information, change wording, refactor backend algorithms * bug fix for rerendering alert history expand component on alert type tab switch * bugfix * switching flatten to merge in api response * Fixing the arguments to the fetchOperations method. Removing console.log statement * fix issues with interval, moving it into search and out of tabs state * fix bug with interval removing other tabs * add default value to alertFreqInSec (fixes isUnhealthy value in alerts) * pass interval into getUnhealyCount * Formatting alerts properly
1 parent a843ddc commit 3138028

38 files changed

+502
-518
lines changed

deployment/terraform/templates/haystack-ui_json.tpl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,16 @@
99
"host": "${graphite_hostname}",
1010
"port": ${graphite_port}
1111
},
12+
"grpcOptions": {
13+
"grpc.max_receive_message_length": 52428800
14+
}
1215
"connectors": {
1316
"traces": {
1417
"connectorName": "haystack",
1518
"haystackHost": "${trace_reader_hostname}",
1619
"haystackPort": ${trace_reader_service_port},
1720
"serviceRefreshIntervalInSecs": 60,
1821
"fieldKeys": [${whitelisted_fields}],
19-
"grpcOptions": {
20-
"grpc.max_receive_message_length": 52428800
21-
}
2222
},
2323
"trends": {
2424
"connectorName": "haystack",

haystack-idl

server/config/base.js

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@ module.exports = {
2929
// base64 and periodreplacement are supported, default to noop if none provided
3030
encoder: 'periodreplacement',
3131

32+
grpcOptions: {
33+
'grpc.max_receive_message_length': 10485760
34+
},
35+
3236
// this list defines subsystems for which UI should be enabled
3337
// traces connector must be present in connectors config
3438
connectors: {

server/connectors/alerts/haystack/alertsConnector.js

Lines changed: 87 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ const _ = require('lodash');
1919
const grpc = require('grpc');
2020

2121
const config = require('../../../config/config');
22-
const servicesConnector = require('../../services/servicesConnector');
22+
const servicesConnector = config.connectors.traces && require('../../services/servicesConnector'); // eslint-disable-line
2323

2424
const fetcher = require('../../operations/grpcFetcher');
2525
const services = require('../../../../static_codegen/anomaly/anomalyReader_grpc_pb');
@@ -28,83 +28,120 @@ const MetricpointNameEncoder = require('../../utils/encoders/MetricpointNameEnco
2828

2929
const metricpointNameEncoder = new MetricpointNameEncoder(config.encoder);
3030

31-
const grpcOptions = {
32-
'grpc.max_receive_message_length': 10485760, // todo: do I need these?
33-
...config.connectors.traces.grpcOptions
34-
};
31+
const grpcOptions = config.grpcOptions || {};
3532

3633
const connector = {};
3734
const client = new services.AnomalyReaderClient(
3835
`${config.connectors.alerts.haystackHost}:${config.connectors.alerts.haystackPort}`,
3936
grpc.credentials.createInsecure(),
4037
grpcOptions); // TODO make client secure
41-
const alertTypes = ['durationTP99', 'failureCount'];
38+
const alertTypes = ['duration', 'failure-span'];
4239
const getAnomaliesFetcher = fetcher('getAnomalies', client);
43-
const alertFreqInSec = config.connectors.alerts.alertFreqInSec; // TODO make this based on alert type
40+
const alertFreqInSec = config.connectors.alerts.alertFreqInSec || 300; // TODO make this based on alert type
4441

4542

4643
function fetchOperations(serviceName) {
47-
return servicesConnector.getOperations(serviceName);
44+
return servicesConnector && servicesConnector.getOperations(serviceName);
45+
}
46+
47+
function sameOperationAndType(alertToCheck, operationName, type) {
48+
if (!alertToCheck) {
49+
return false;
50+
}
51+
const operationToCheck = alertToCheck.labelsMap.find(label => label[0] === 'operationName');
52+
const typeToCheck = alertToCheck.labelsMap.find(label => label[0] === 'metric_key');
53+
return ((operationToCheck && operationToCheck[1] === operationName) && typeToCheck && typeToCheck[1] === type);
4854
}
4955

5056
function parseOperationAlertsResponse(data) {
51-
return data.searchanomalyresponseList.map((anomalyResponse) => {
52-
const labels = anomalyResponse.labels;
53-
54-
const operationName = labels.operationName;
55-
const alertType = labels.alertType;
56-
const latestUnhealthy = _.maxBy(anomalyResponse.anomalies, anomaly => anomaly.timestamp);
57-
58-
const isUnhealthy = (latestUnhealthy && latestUnhealthy.timestamp >= (Date.now() - alertFreqInSec));
59-
const timestamp = latestUnhealthy && latestUnhealthy.timestamp;
60-
return {
61-
operationName,
62-
alertType,
63-
isUnhealthy,
64-
timestamp
65-
};
57+
const fullAnomalyList = data.searchanomalyresponseList;
58+
const mappedAndMergedResponse = fullAnomalyList.map((anomalyResponse, baseIterationIndex) => {
59+
if (anomalyResponse === null) return null;
60+
const operationLabel = anomalyResponse.labelsMap.find(label => label[0] === 'operationName');
61+
if (operationLabel) {
62+
const operationName = operationLabel[1];
63+
const type = anomalyResponse.labelsMap.find(label => label[0] === 'metric_key')[1];
64+
let anomaliesList = anomalyResponse.anomaliesList;
65+
66+
fullAnomalyList.slice(baseIterationIndex + 1, fullAnomalyList.length).forEach((alertToCheck, checkIndex) => {
67+
if (sameOperationAndType(alertToCheck, operationName, type)) {
68+
anomaliesList = _.merge(anomaliesList, alertToCheck.anomaliesList);
69+
fullAnomalyList[baseIterationIndex + checkIndex + 1] = null;
70+
}
71+
});
72+
73+
const latestUnhealthy = _.maxBy(anomaliesList, anomaly => anomaly.timestamp);
74+
const timestamp = latestUnhealthy && latestUnhealthy.timestamp * 1000;
75+
const isUnhealthy = (timestamp && timestamp >= (Date.now() - (alertFreqInSec * 1000)));
76+
77+
return {
78+
operationName,
79+
type,
80+
isUnhealthy,
81+
timestamp
82+
};
83+
}
84+
85+
return null;
6686
});
87+
88+
return _.filter(mappedAndMergedResponse, a => a !== null);
6789
}
6890

69-
function fetchOperationAlerts(serviceName, interval, from) {
91+
function fetchAlerts(serviceName, interval, from, stat, key) {
7092
const request = new messages.SearchAnamoliesRequest();
7193
request.getLabelsMap()
7294
.set('serviceName', metricpointNameEncoder.encodeMetricpointName(decodeURIComponent(serviceName)))
7395
.set('interval', interval)
7496
.set('mtype', 'gauge')
75-
.set('product', 'haystack');
76-
request.setStarttime(from);
77-
request.setEndtime(Date.now());
97+
.set('product', 'haystack')
98+
.set('stat', stat)
99+
.set('metric_key', key);
100+
request.setStarttime(Math.trunc(from / 1000));
101+
request.setEndtime(Math.trunc(Date.now() / 1000));
102+
request.setSize(-1);
78103

79104
return getAnomaliesFetcher
80105
.fetch(request)
81106
.then(pbResult => parseOperationAlertsResponse(messages.SearchAnomaliesResponse.toObject(false, pbResult)));
82107
}
83108

84-
function mergeOperationsWithAlerts({operationAlerts, operations}) {
85-
return _.flatten(operations.map(operation => alertTypes.map((alertType) => {
86-
const operationAlert = operationAlerts.find(alert => (alert.operationName.toLowerCase() === operation.toLowerCase() && alert.type === alertType));
109+
function fetchOperationAlerts(serviceName, interval, from) {
110+
return Q.all([fetchAlerts(serviceName, interval, from, '*_99', 'duration'), fetchAlerts(serviceName, interval, from, 'count', 'failure-span')])
111+
.then(stats => (_.merge(stats[0], stats[1])));
112+
}
87113

88-
if (operationAlert !== undefined) {
114+
function mergeOperationsWithAlerts({operationAlerts, operations}) {
115+
if (operations && operations.length) {
116+
return _.flatten(operations.map(operation => alertTypes.map((alertType) => {
117+
const operationAlert = operationAlerts.find(alert => (alert.operationName.toLowerCase() === operation.toLowerCase() && alert.type === alertType));
118+
119+
if (operationAlert !== undefined) {
120+
return {
121+
...operationAlert
122+
};
123+
}
89124
return {
90-
...operationAlert
125+
operationName: operation,
126+
type: alertType,
127+
isUnhealthy: false,
128+
timestamp: null
91129
};
92-
}
93-
return {
94-
operationName: operation,
95-
type: alertType,
96-
isUnhealthy: false,
97-
timestamp: null
98-
};
99-
})));
130+
})));
131+
}
132+
133+
return _.flatten(alertTypes.map(alertType => (_.filter(operationAlerts, alert => (alert.type === alertType)))));
100134
}
101135

102136
function returnAnomalies(data) {
103-
if (!data || !data.length || !data[0].length) {
137+
if (!data || !data.length || !data[0].anomaliesList.length) {
104138
return [];
105139
}
106140

107-
return data[0].anomalies;
141+
return _.flatten(data.map((anomaly) => {
142+
const strength = anomaly.labelsMap.find(label => label[0] === 'anomalyLevel')[1];
143+
return anomaly.anomaliesList.map(a => ({strength, ...a}));
144+
}));
108145
}
109146

110147
function getActiveAlertCount(operationAlerts) {
@@ -113,7 +150,7 @@ function getActiveAlertCount(operationAlerts) {
113150

114151
connector.getServiceAlerts = (serviceName, interval) => {
115152
// todo: calculate "from" value based on selected interval
116-
const oneDayAgo = Math.trunc(Date.now() - (24 * 60 * 60 * 1000));
153+
const oneDayAgo = Math.trunc((Date.now() - (24 * 60 * 60 * 1000)));
117154
return Q.all([fetchOperations(serviceName), fetchOperationAlerts(serviceName, interval, oneDayAgo)])
118155
.then(stats => mergeOperationsWithAlerts({
119156
operations: stats[0],
@@ -130,20 +167,21 @@ connector.getAnomalies = (serviceName, operationName, alertType, from, interval)
130167
.set('serviceName', metricpointNameEncoder.encodeMetricpointName(decodeURIComponent(serviceName)))
131168
.set('operationName', metricpointNameEncoder.encodeMetricpointName(decodeURIComponent(operationName)))
132169
.set('product', 'haystack')
133-
.set('name', alertType)
170+
.set('metric_key', alertType)
134171
.set('stat', stat)
135172
.set('interval', interval)
136173
.set('mtype', 'gauge');
137-
request.setStarttime(from);
138-
request.setEndtime(Date.now());
174+
request.setStarttime(Math.trunc(from / 1000));
175+
request.setEndtime(Math.trunc(Date.now() / 1000));
176+
request.setSize(-1);
139177

140178
return getAnomaliesFetcher
141179
.fetch(request)
142-
.then(pbResult => returnAnomalies(messages.SearchAnomaliesResponse.toObject(false, pbResult)));
180+
.then(pbResult => returnAnomalies(messages.SearchAnomaliesResponse.toObject(false, pbResult).searchanomalyresponseList));
143181
};
144182

145-
connector.getServiceUnhealthyAlertCount = serviceName =>
146-
fetchOperationAlerts(serviceName, '5m', Math.trunc(Date.now() - (5 * 60 * 1000)))
183+
connector.getServiceUnhealthyAlertCount = (serviceName, interval) =>
184+
fetchOperationAlerts(serviceName, interval, Math.trunc((Date.now() - (5 * 60 * 1000))))
147185
.then(result => getActiveAlertCount(result));
148186

149187
module.exports = connector;

server/connectors/alerts/haystack/subscriptionsConnector.js

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,11 @@ const putter = require('../../operations/grpcPutter');
2424
const deleter = require('../../operations/grpcDeleter');
2525
const poster = require('../../operations/grpcPoster');
2626

27-
const grpcOptions = {
28-
'grpc.max_receive_message_length': 10485760, // todo: do I need these?
29-
...config.connectors.traces.grpcOptions
30-
};
27+
const grpcOptions = config.grpcOptions || {};
28+
29+
const MetricpointNameEncoder = require('../../utils/encoders/MetricpointNameEncoder');
3130

31+
const metricpointNameEncoder = new MetricpointNameEncoder(config.encoder);
3232

3333
const client = new services.SubscriptionManagementClient(
3434
`${config.connectors.alerts.haystackHost}:${config.connectors.alerts.haystackPort}`,
@@ -84,9 +84,9 @@ connector.searchSubscriptions = (serviceName, operationName, alertType, interval
8484

8585
const request = new messages.SearchSubscriptionRequest();
8686
request.getLabelsMap()
87-
.set('serviceName', decodeURIComponent(serviceName))
88-
.set('operationName', decodeURIComponent(operationName))
89-
.set('type', alertType)
87+
.set('serviceName', metricpointNameEncoder.encodeMetricpointName(decodeURIComponent(serviceName)))
88+
.set('operationName', metricpointNameEncoder.encodeMetricpointName(decodeURIComponent(operationName)))
89+
.set('metric_key', alertType)
9090
.set('stat', stat)
9191
.set('interval', interval)
9292
.set('product', 'haystack')
@@ -96,7 +96,6 @@ connector.searchSubscriptions = (serviceName, operationName, alertType, interval
9696
.fetch(request)
9797
.then((result) => {
9898
const pbResult = messages.SearchSubscriptionResponse.toObject(false, result);
99-
console.log(pbResult.subscriptionresponseList.map(pbSubResponse => converter.toSubscriptionJson(pbSubResponse)));
10099
return pbResult.subscriptionresponseList.map(pbSubResponse => converter.toSubscriptionJson(pbSubResponse));
101100
});
102101
};

server/connectors/alerts/stub/alertsConnector.js

Lines changed: 20 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -17,75 +17,69 @@
1717
const Q = require('q');
1818

1919
function getRandomTimeStamp() {
20-
const currentTime = ((new Date()).getTime()) * 1000;
21-
return (currentTime - Math.floor((Math.random() * 5000 * 60 * 1000)));
20+
const currentTime = ((new Date()).getTime());
21+
return (currentTime - Math.floor((Math.random() * 5000 * 60)));
2222
}
2323

2424
function generateAnomaly() {
25-
const currentTime = ((new Date()).getTime()) * 1000;
26-
const timestamp = (currentTime - Math.floor((Math.random() * 2000000 * 60 * 1000)));
27-
const expectedValue = Math.floor(Math.random() * 100000);
28-
const observedValue = Math.floor(expectedValue * (Math.random() * 100));
25+
const currentTime = ((new Date()).getTime() / 1000);
26+
const timestamp = (currentTime - Math.floor((Math.random() * 2000 * 60)));
27+
const expectedvalue = Math.floor(Math.random() * 100000);
28+
const observedvalue = Math.floor(expectedvalue * (Math.random() * 100));
2929
return {
30-
observedValue,
31-
expectedValue,
32-
timestamp
30+
observedvalue,
31+
expectedvalue,
32+
timestamp,
33+
strength: observedvalue % 2 ? 'STRONG' : 'WEAK'
3334
};
3435
}
3536

3637
function getAlerts() {
3738
return [
3839
{
3940
operationName: 'tarley-1',
40-
type: 'count',
41+
type: 'duration',
4142
isUnhealthy: true,
4243
timestamp: getRandomTimeStamp()
4344
},
4445
{
4546
operationName: 'tarley-1',
46-
type: 'durationTP99',
47+
type: 'failure-span',
4748
isUnhealthy: true,
4849
timestamp: getRandomTimeStamp()
4950
},
5051
{
51-
operationName: 'tarley-1',
52-
type: 'failureCount',
52+
operationName: 'tully-1',
53+
type: 'duration',
5354
isUnhealthy: false,
5455
timestamp: getRandomTimeStamp()
5556
},
5657
{
5758
operationName: 'tully-1',
58-
type: 'count',
59+
type: 'failure-span',
5960
isUnhealthy: false,
6061
timestamp: getRandomTimeStamp()
6162
},
6263
{
6364
operationName: 'tully-1',
64-
type: 'durationTP99',
65+
type: 'duration',
6566
isUnhealthy: false,
6667
timestamp: getRandomTimeStamp()
67-
},
68-
{
68+
}, {
6969
operationName: 'tully-1',
70-
type: 'failureCount',
70+
type: 'failure-span',
7171
isUnhealthy: false,
7272
timestamp: getRandomTimeStamp()
7373
},
7474
{
7575
operationName: 'dondarrion-1',
76-
type: 'count',
77-
isUnhealthy: true,
78-
timestamp: getRandomTimeStamp()
79-
},
80-
{
81-
operationName: 'dondarrion-1',
82-
type: 'durationTP99',
76+
type: 'duration',
8377
isUnhealthy: false,
8478
timestamp: getRandomTimeStamp()
8579
},
8680
{
8781
operationName: 'dondarrion-1',
88-
type: 'failureCount',
82+
type: 'failure-span',
8983
isUnhealthy: false,
9084
timestamp: getRandomTimeStamp()
9185
}

0 commit comments

Comments
 (0)