Anomaly detection Engine for Linux Logs (ADE)
XML Results from an Analyze Request - details of each interval
The following code illustrates the XML structure generated by analyze for each analysis interval for each Linux system in the model group from the collection of logs processed by analyze. The major element is the interval element, which contains information about a specific analysis interval for a specific system . The interval element also contains one interval_message element for each unique message issued during the interval. If the same message ID was issued more than once during the selected interval, the XML contains only one interval_message element for that unique message ID.
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
targetNamespace="http://www.ibm.com/zAware/MelodyCoreIntervalV2"
xmlns="http://www.ibm.com/zAware/MelodyCoreIntervalV2" elementFormDefault="qualified">
<xs:element name="interval">
<xs:complexType>
<xs:sequence>
<xs:element name="version" type="xs:int" />
<xs:element name="sys_id" type="xs:string" />
<xs:element name="start_time" type="xs:dateTime" />
<xs:element name="end_time" type="xs:dateTime" />
<xs:element name="anomaly_score" type="xs:double" />
<xs:element name="model_internal_id" type="xs:int" />
<xs:element name="melody_version" type="xs:int" />
<xs:element name="gmt_offset" type="xs:string" />
<xs:element name="model_info">
<xs:complexType>
<xs:attribute name="model_creation_date" type="xs:dateTime" use="required" />
<xs:attribute name="training_period" type="xs:int" use="required" />
<xs:attribute name="interval_size_in_sec" type="xs:long" use="required" />
<xs:attribute name="analysis_group" type="xs:string" use="required" />
</xs:complexType>
</xs:element>
<xs:element name="msg_summary">
<xs:complexType>
<xs:attribute name="num_new_msg" type="xs:int" use="required" />
</xs:complexType>
</xs:element>
<xs:element name="interval_message" type="interval_message_type" maxOccurs="unbounded"
minOccurs="0" />
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:complexType name="interval_message_type">
<xs:sequence>
<xs:element name="num_instances" type="xs:int" />
<xs:element name="bernoulli">
<xs:complexType>
<xs:simpleContent>
<xs:extension base="xs:double">
<xs:attribute name="frequency" type="xs:double" use="required" />
</xs:extension>
</xs:simpleContent>
</xs:complexType>
</xs:element>
<xs:element name="cluster_id" type="xs:int" />
<xs:element name="periodicity">
<xs:complexType>
<xs:attribute name="status" use="required">
<xs:simpleType>
<xs:restriction base="xs:string">
<xs:enumeration value="IN_SYNC" />
<xs:enumeration value="NOT_IN_SYNC" />
<xs:enumeration value="NOT_PERIODIC" />
<xs:enumeration value="NEW" />
</xs:restriction>
</xs:simpleType>
</xs:attribute>
<xs:attribute name="last_issued" type="xs:dateTime" use="optional" />
<xs:attribute name="score" type="xs:double" use="optional" />
</xs:complexType>
</xs:element>
<xs:element name="poisson">
<xs:complexType>
<xs:simpleContent>
<xs:extension base="xs:double">
<xs:attribute name="mean" type="xs:double" use="required" />
</xs:extension>
</xs:simpleContent>
</xs:complexType>
</xs:element>
<xs:element name="intCont" type="xs:double" />
<xs:element name="normIntCont" type="xs:double" />
<xs:element name="anomaly" type="xs:double" />
<xs:element name="cluster_status" type="xs:string" />
<xs:element name="critical_words" type="xs:double" />
<xs:element name="text_sum" type="xs:string" />
<xs:element name="text_smp" type="xs:string" />
<xs:element name="time_vec" type="interval_time_vector_type" />
<xs:element name="active_rules" type="active_rules_type" maxOccurs="1" minOccurs="0" />
</xs:sequence>
<xs:attribute name="msg_id" type="xs:string" use="required" />
</xs:complexType>
<xs:complexType name="interval_time_vector_type">
<xs:sequence>
<xs:element name="occ" maxOccurs="unbounded" minOccurs="0" type="xs:int" />
</xs:sequence>
</xs:complexType>
<xs:complexType name="active_rules_type">
<xs:sequence>
<xs:element name="rule" type="rule_type" maxOccurs="unbounded" minOccurs="0" />
</xs:sequence>
</xs:complexType>
<xs:complexType name="rule_type">
<xs:sequence>
<xs:element name="name">
<xs:complexType>
<xs:simpleContent>
<xs:extension base="xs:string">
<xs:attribute name="affected_score" type="xs:boolean" />
</xs:extension>
</xs:simpleContent>
</xs:complexType>
</xs:element>
<xs:element name="action" type="xs:string" />
</xs:sequence>
</xs:complexType>
</xs:schema>
XML element descriptions for INTERVAL output
- version
- An integer that identifies the version of the ADE application programming interface (API).
- sys_id
- A string that provides the name of the system that was specified on the INTERVAL request, and the name of the system group to which the system belongs.
- start_time
- Indicates the beginning of the first
interval for which data is
available for the specified system on the date in the INTERVAL request.
The start time is indicated in the XML dateTime
data
type format in Coordinated Universal Time (UTC).
YYYY-MM-DDThh:mm:ss.tttZ
- end_time
- Indicates the beginning of the first
interval after the
date specified in the INTERVAL request. The end time is indicated
in the XML dateTime
data type format in Coordinated
Universal Time (UTC).
YYYY-MM-DDThh:mm:ss.tttZ
- anomaly_score
- A double value that provides the
anomaly
score for this interval. The interval anomaly score is the percentile
of the sum of each anomaly score for individual message IDs within
an interval. When ADE
uses priming data and current data to create a model of system
behavior,
a process that is called "training",
ADE captures the
distribution of interval anomaly scores for all intervals that are
represented in the training data. ADE uses the distribution
results and uses them to establish the range of values for each
percentile.
The possible interval anomaly scores are:
- 0 through 99.4
- The analysis interval contains
messages and message clusters that
match or exhibit relatively insignificant differences in expected
behavior, as defined in the ADE
model. A score of
0 is possible because the server eliminates all expected, in-context
messages from its scoring calculation. A score of 0 indicates intervals
that exhibit no difference in behavior compared to the system or
group model. The analysis
snapshots for these analysis
intervals are colored with the lightest blue shade.
Analysis intervals with scores that are greater than 0 but less than 99.5 contain some messages that are unexpected or issued out of context. Scores in this range indicate intervals that do not vary significantly from the system model. The analysis snapshots for these analysis intervals are colored with varying shades of blue.
- 99.5
- Analysis intervals with this score contain some rarely seen, unexpected, or out-of-context messages. Generally speaking, this score indicates analysis intervals with some differences from the system or group model but do not contain messages of much diagnostic value. The analysis snapshots for these analysis intervals are colored with the darkest blue shade.
- 99.6 - 100
- Analysis intervals with this score contain rarely seen messages (these messages appear in the model only once or twice), or many messages that are unexpected or issued out of context. This score indicates analysis intervals with more differences from the system or group model; these intervals can contain messages that might help you diagnose anomalous system behavior. The analysis snapshots for these analysis intervals are the color gold.
- 101
- Analysis intervals with this score
exhibit the most significant
differences from the system or group
model; these intervals
contain messages that merit investigation. The analysis
assigns this score
to analysis intervals that contain:
- Unusual or unexpected messages.
- A much higher volume of messages than expected.
- model_internal_id
- An integer that the ADE uses to identify this system model.
- melody_version
- An integer that represents the version of the analytics engine that the ADE is using.
- gmt_offset
- An integer that indicates the difference in hours and minutes from Coordinated Universal Time (UTC) for the requested start time.
- model_info
- Provides information about the model
associated with the specified
system.
- model_creation_date
- An element that provides the date and time when ADE successfully built the most recent model of system behavior.
- training_period
- An integer that indicates the number of consecutive calendar days that the ADE uses to identify the instrumentation data to include in training models.
- interval_size_in_sec
- An integer that indicates the number of seconds in an interval.
- analysis_group
- An element that provides the name of a Linux model group in the ADE topology.
- msg_summary
- An element that contains summary
information about messages in
the interval.
- num_new_msg
- An integer that provides the total number of new messages issued by the system during this interval.
- interval_message
- The XML response contains one interval_message
element
for each unique message ID that was issued within the interval
specified
on the INTERVAL request. Each interval_message
contains
the following attributes for the message.
- num_instances
- An integer that specifies the number of times that this message was issued within this 10-minute interval.
- bernoulli
- A double value that indicates how
frequently the message ID is
issued within a sampled set of 10-minute analysis snapshots in the
system model. Values range from 1 to 101:
- A value of 1 indicates that the message is issued in almost all analysis intervals in the model.
- A value of 100 indicates that the message is issued in almost none of the analysis intervals in the model.
- A value of 101 indicates that this message ID has not been issued in any analysis interval in the model.
- frequency
- An integer that indicates the average number of analysis intervals in which the message is issued each day, according to analysis of the message data that ADE uses for training.
- cluster_id
- An integer that represents the identifier of the cluster to which this message belongs. When the message is not part of a recognized cluster, the cluster ID is -1.
- periodicty_status
- An element that indicates whether
or not this message has a tendency
to recur at specific times, and whether the message recurred as
expected
within the analysis interval. Valid values are:
- NEW
- ADE has not previously detected this message. .
- IN_SYNC
- ADE expects this message to be issued in a periodic pattern, and the message was issued as expected during the analysis interval.
- NOT_IN_SYNC
- ADE expects this message to be issued in a periodic pattern, but the message was not issued as expected during the analysis interval.
- NOT_PERIODIC
- ADE does not expect this message to be issued in a periodic pattern.
- last_issued
- An element that provides the UTC date and time when this message was last observed.
- score
- An integer that indicates how the periodicity status of this message contributed to the message anomaly score for the analysis interval. Higher scores generally indicate greater contribution to the message anomaly score
- poisson
- A double value that indicates how closely the message ID distribution in current data matches the Poisson distribution of that message ID in data during the training period for the system model. This value is provided only for message IDs that are not part of a cluster. The higher the poisson value, the greater the difference from expected behavior.
- intCont
- A double value that indicates the relative contribution of this message to the interval anomaly score for the analysis interval. This interval score is a function of the message anomaly score, the number of times that the message appears within this interval, and whether the message appeared in context.
- normIntCont
- A double value that indicates the normalized contribution of this message to the interval anomaly score for the analysis interval.
- anomaly
- A double value that indicates the rarity of this specific message ID within the selected interval. The anomaly score is a combination of the interval contribution score for this message and the rule, if any, that is in effect for this message. Higher scores indicate greater anomaly so messages with high anomaly scores are more likely to indicate a problem.
- cluster_status
- A string that indicates whether or
not this message is part of
an expected pattern of messages associated with a routine system event
(for example, starting a subsystem or workload). ADE
identifies and recognizes
these patterns or groups, which are called "clusters",
and the
specific message IDs that constitute a specific cluster. When analyzing
data, the ADE determines whether a specific
message is expected to be issued within a specific cluster. A message that is issued out
of context (without the other messages in the same cluster) might
indicate a problem.
Values for cluster_status are:
- New
- ADE has not previously detected this message in the model. .
- Unclustered
- This message is not part of a defined cluster.
- In context
- ADE expects this message to be issued within a specific cluster, and the message was issued as expected in the analysis interval.
- Out of context
- ADE expects this message to be issued within a specific cluster, but the message was issued in a different context during the analysis interval.
- critical_words
- A double value that indicates whether the message contains specific words that indicate potential problems. Critical words include "abend", "failure", and "warning".
- text_sum
- A string that contains a summary of the common message text that was issued for each occurrence of the same message.
- text_smp
- A string that contains the full message text for the first occurrence of this message within the interval.
- time_vec
- The XML response contains one time_vec
element
for each unique message ID that was issued within the interval
specified
on the INTERVAL request.
- occ
- The XML response contains one occ element for each time that this message ID was issued within the interval specified on the INTERVAL request.
-
-
-
- .
-
-
- msg_id
- A string that identifies the unique message ID.
Sample XML Output Messages within an Interval
- The output:
<?xml version='1.0' encoding='UTF-8' ?> <?xml-stylesheet href='./xslt/AdeCoreIntervalV2.xsl' type='text/xsl' ?> <interval xsi:noNamespaceSchemaLocation="/xml/AdeCoreIntervalV2.xsd" xmlns="http://www.openmainframe.org/ade/AdeCoreIntervalV2" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <version>2</version> <sys_id>sys1.openmainframe.org</sys_id> <start_time>2015-12-11T23:50:00.000Z</start_time> <end_time>2015-12-12T00:50:00.000Z</end_time> <anomaly_score>99.5</anomaly_score> <model_internal_id>1</model_internal_id> <ade_version>321</ade_version> <gmt_offset>GMT+00:00</gmt_offset> <model_info model_creation_date="2016-02-10T16:02:56.840Z" training_period="7" interval_size_in_sec="3600" analysis_group="default" limited_model="Yes"/> <msg_summary num_new_msg="8"/> <interval_message msg_id="sshd_47"> <num_instances>24</num_instances> <bernoulli frequency="0.07058823529411765">99.70588235294117</bernoulli> <cluster_id>14</cluster_id> <periodicity status="NOT_PERIODIC" last_issued="2015-12-11T23:35:30.000Z"/> <poisson mean="1.4234978142529124">0.9999876239444027</poisson> <intCont>6.920749476374009</intCont> <normIntCont>6.920749476374009</normIntCont> <anomaly>0.9990129101373006</anomaly> <cluster_status>OUT_OF_CONTEXT</cluster_status> <critical_words>0.0</critical_words> <text_sum>Accepted keyboard-interactive/pam for user1 from 5dbf2b6d08477c5fc7774f590b31abd4f39be38a port * ssh2</text_sum> <text_smp>Accepted keyboard-interactive/pam for user1 from 5dbf2b6d08477c5fc7774f590b31abd4f39be38a port 1582 ssh2</text_smp> <time_vec> <occ>1</occ> <occ>31</occ> <occ>61</occ> <occ>62</occ> <occ>91</occ> <occ>92</occ> </time_vec> </interval_message> <interval_message msg_id="/usr/sbin/cron(notes):/home/notes/getsysdata.ksh_311"> <num_instances>4</num_instances> <bernoulli frequency="0.07100591715976332">101.0</bernoulli> <cluster_id>-1</cluster_id> <periodicity status="NOT_PERIODIC" last_issued="2015-12-11T23:45:00.000Z"/> <poisson mean="2.0">0.0</poisson> <intCont>4.656066344281889</intCont> <normIntCont>4.656066344281889</normIntCont> <anomaly>0.9904962264578541</anomaly> <cluster_status>NEW</cluster_status> <critical_words>0.0</critical_words> <text_sum>(notes) CMD (/home/notes/getsysdata.ksh 1>/home/notes/run.out 2>&1)</text_sum> <text_smp>(notes) CMD (/home/notes/getsysdata.ksh 1>/home/notes/run.out 2>&1)</text_smp> <time_vec> <occ>20</occ> <occ>50</occ> <occ>80</occ> <occ>110</occ> </time_vec> </interval_message> <interval_message msg_id="/usr/sbin/cron(notesa):/home/notes/getdomdata.ksh_309"> <num_instances>4</num_instances> <bernoulli frequency="0.07100591715976332">101.0</bernoulli> <cluster_id>-1</cluster_id> <periodicity status="NOT_PERIODIC" last_issued="2015-12-11T23:45:00.000Z"/> <poisson mean="2.0">0.0</poisson> <intCont>4.656066344281889</intCont> <normIntCont>4.656066344281889</normIntCont> <anomaly>0.9904962264578541</anomaly> <cluster_status>NEW</cluster_status> <critical_words>0.0</critical_words> <text_sum>(notesa) CMD (/home/notes/getdomdata.ksh 1>~/doms.out 2>&1)</text_sum> <text_smp>(notesa) CMD (/home/notes/getdomdata.ksh 1>~/doms.out 2>&1)</text_smp> <time_vec> <occ>20</occ> <occ>50</occ> <occ>80</occ> <occ>110</occ> </time_vec> </interval_message> .......... <interval_message msg_id="sudo(root):/etc/init.d/syslog_41"> <num_instances>1</num_instances> <bernoulli frequency="23.788235294117648">1.0</bernoulli> <cluster_id>-1</cluster_id> <periodicity status="NOT_IN_SYNC" last_issued="2015-12-11T23:45:00.000Z" score="0.005970166986503796"/> <poisson mean="1.0">0.0</poisson> <intCont>-3.777176513347405</intCont> <normIntCont>-3.777176513347405</normIntCont> <anomaly>0.0</anomaly> <cluster_status>UNCLUSTERED</cluster_status> <critical_words>0.0</critical_words> <text_sum> root : TTY=unknown ; PWD=/root ; USER=root ; COMMAND=/etc/init.d/syslog status</text_sum> <text_smp> root : TTY=unknown ; PWD=/root ; USER=root ; COMMAND=/etc/init.d/syslog status</text_smp> <time_vec> <occ>110</occ> </time_vec> </interval_message> </interval>