Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion src/TraceEvent/EventPipe/EventCache.cs
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ public void ProcessSequencePointBlockV5OrLess(Block block)

enum SequencePointFlags : uint
{
FlushThreads = 1
FlushThreads = 1,
FlushMetadata = 2
}

public void ProcessSequencePointBlockV6OrGreater(Block block)
Expand All @@ -124,6 +125,11 @@ public void ProcessSequencePointBlockV6OrGreater(Block block)
{
_threads.Flush();
}

if((flags & (uint)SequencePointFlags.FlushMetadata) != 0)
{
_source.FlushMetadataCache();
}
}

/// <summary>
Expand Down
89 changes: 64 additions & 25 deletions src/TraceEvent/EventPipe/EventPipeEventSource.cs
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ private void ReadAndProcessEvent(ref SpanReader reader, EventPipeEventHeader eve
}
else
{
DispatchEventRecord(ConvertEventHeaderToRecord(ref eventHeader));
DispatchEvent(ref eventHeader);
}
if(eventHeader.PayloadSize < eventHeader.TotalNonHeaderSize)
{
Expand All @@ -416,21 +416,54 @@ private void ReadAndProcessEvent(ref SpanReader reader, EventPipeEventHeader eve
}
}

private void DispatchEventRecord(TraceEventNativeMethods.EVENT_RECORD* eventRecord)
private void DispatchEvent(ref EventPipeEventHeader eventHeader)
{
if (!_eventMetadataDictionary.TryGetValue(eventHeader.MetaDataId, out var metadata))
{
throw new FormatException($"Event is referencing undefined Metadata ID {eventHeader.MetaDataId}");

}

TraceEventNativeMethods.EVENT_RECORD* eventRecord = metadata.GetEventRecordForEventData(eventHeader);
ValidateStackFields(eventHeader, eventRecord);

if (eventRecord != null)
{
// in the code below we set sessionEndTimeQPC to be the timestamp of the last event.
// Thus the new timestamp should be later, and not more than 1 day later.
Debug.Assert(sessionEndTimeQPC <= eventRecord->EventHeader.TimeStamp);
Debug.Assert(sessionEndTimeQPC == 0 || eventRecord->EventHeader.TimeStamp - sessionEndTimeQPC < _QPCFreq * 24 * 3600);

var traceEvent = Lookup(eventRecord);
if (traceEvent.NeedsFixup)
var templateEvent = Lookup(eventRecord);
if (templateEvent.NeedsFixup)
{
templateEvent.FixupData();
}

// TraceEvent templates include a fixed opcode in the template even though it is permitted to vary per event. Ideally
// TraceEvent library would handle this is in a general purpose way but for now we are doing a fix scoped to
// EventPipeEventSource only.
//
byte? eventOpcode = eventHeader.OpCodeOverride ?? metadata.Opcode;
if (!eventOpcode.HasValue || eventOpcode == (byte)templateEvent.Opcode)
{
Dispatch(templateEvent);
}
else
{
traceEvent.FixupData();
// temporarily modify the opcode, opcodeName, and eventName to match the event record.
TraceEventOpcode templateOpcode = templateEvent.opcode;
string templateOpcodeName = templateEvent.opcodeName;
string templateEventName = templateEvent.eventName;
templateEvent.opcode = (TraceEventOpcode)eventOpcode;
templateEvent.opcodeName = null; // gets recalculated on demand
templateEvent.eventName = null; // gets recalculated on demand
Dispatch(templateEvent);
templateEvent.opcode = templateOpcode;
templateEvent.opcodeName = templateOpcodeName;
templateEvent.eventName = templateEventName;
}
Dispatch(traceEvent);

sessionEndTimeQPC = eventRecord->EventHeader.TimeStamp;
}
}
Expand Down Expand Up @@ -464,6 +497,10 @@ internal void ReadEventHeader(ref SpanReader reader, bool useHeaderCompression,
LabelList labelList = LabelListCache.GetLabelList(eventData.LabelListId);
eventData.ActivityID = labelList.ActivityId.HasValue ? labelList.ActivityId.Value : Guid.Empty;
eventData.RelatedActivityID = labelList.RelatedActivityId.HasValue ? labelList.RelatedActivityId.Value : Guid.Empty;
eventData.OpCodeOverride = labelList.OpCode;
eventData.KeywordsOverride = labelList.Keywords;
eventData.LevelOverride = labelList.Level;
eventData.VersionOverride = labelList.Version;
}

// Basic sanity checks. Are the timestamps and sizes sane.
Expand Down Expand Up @@ -498,24 +535,25 @@ private void ReadAndCacheMetadataV6OrGreater(ref SpanReader reader)
private void CacheMetadata(EventPipeMetadata metadata)
{
DynamicTraceEventData eventTemplate = CreateTemplate(metadata);
_eventMetadataDictionary.Add(metadata.MetaDataId, metadata);
_metadataTemplates[eventTemplate] = eventTemplate;
try
{
_eventMetadataDictionary.Add(metadata.MetaDataId, metadata);
_metadataTemplates[eventTemplate] = eventTemplate;
}
catch (ArgumentException)
{
EventPipeMetadata old = _eventMetadataDictionary[metadata.MetaDataId];
throw new FormatException($"Metadata ID {metadata.MetaDataId} already in use. Previously this id defined event {old.ProviderName}\\{old.EventName}. Now redefined for event {metadata.ProviderName}\\{metadata.EventName}.");
}
}

internal bool TryGetMetadata(int metadataId, out EventPipeMetadata metadata) => _eventMetadataDictionary.TryGetValue(metadataId, out metadata);
internal bool TryGetThread(long threadIndex, out EventPipeThread thread) => _threadCache.TryGetValue(threadIndex, out thread);

private TraceEventNativeMethods.EVENT_RECORD* ConvertEventHeaderToRecord(ref EventPipeEventHeader eventData)
internal void FlushMetadataCache()
{
if (_eventMetadataDictionary.TryGetValue(eventData.MetaDataId, out var metaData))
{
return metaData.GetEventRecordForEventData(eventData);
}
else
{
Debug.Assert(false, "Warning can't find metaData for ID " + eventData.MetaDataId);
return null;
}
_eventMetadataDictionary.Clear();
_metadataTemplates.Clear();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Once the _metadataTemplates are cleared, would the m_state.m_templates dictionary need to be updated or completely flushed as well? I'm still getting acquainted with this codebase, but I was thinking that known metadata templates would get added to the m_state.m_templates dictionary through the RegisterUnhandledEvent callback

this.source.RegisterUnhandledEvent(delegate (TraceEvent unknown)
{
// See if we already have this definition
DynamicTraceEventData parsedTemplate = null;
if (!m_state.m_templates.TryGetValue(unknown, out parsedTemplate))
{
parsedTemplate = TryLookup(unknown);
if (parsedTemplate == null)
{
m_state.m_templates.Add(unknown.Clone(), null); // add an entry to remember that we tried and failed.
}
}
if (parsedTemplate == null)
{
return false;
}
// registeredWithTraceEventSource is a fail safe. Basically if you added yourself to the table
// (In OnNewEventDefinition) then you should not come back as unknown, however because of dual events
// and just general fragility we don't want to rely on that. So we keep a bit and ensure that we
// only add the event definition once.
if (!parsedTemplate.registeredWithTraceEventSource)
{
parsedTemplate.registeredWithTraceEventSource = true;
bool ret = OnNewEventDefintion(parsedTemplate, false) == EventFilterResponse.AcceptEvent;
// If we have subscribers, notify them as well.
var newEventDefinition = NewEventDefinition;
if (newEventDefinition != null)
{
ret |= (NewEventDefinition(parsedTemplate, false) == EventFilterResponse.AcceptEvent);
}
return ret;
}
return false;
});
}
}
, and once we flush, they're still in that dictionary.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good point. If the metadata gets flushed and can be re-defined, then yes, m_state.m_templates will likely contain templates that correspond to the old metadata and would need to be flushed. This is likely to be difficult though because the template has also likely been added to the main lookup hash table. Can you remind me what the goal is of flushing the metadata part way through a trace?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you remind me what the goal is of flushing the metadata part way through a trace?

Today if you want to read/write a NetTrace file you need to maintain a mapping for MetadataID -> Metadata for every event type in the file. Although typical files don't have large numbers of event types there is no guarantee that will be true for all traces. A pathological trace could have 10M events each with a different metadata definition. If the reader/writer are required to maintain these caches there is a potential for unbounded memory usage. To avoid that we need a mechanism that allows the reader/writer to keep the cache bounded. When the writer is writing out events any time the cache gets too big it can emit a sequence point, delete its cache, and begin accumulating a new cache. This is the same pattern we already do for Stacks, LabelLists, and optionally Threads. On the reader side the reader doesn't have the freedom to enforce its own cache size limit but it can at least benefit from whatever limit the writer put in place.
A 2nd benefit is that flushable metadata is a precursor to being able to use nettrace format as part of a circular event buffer. Today if we wrote a bunch of nettrace data and then deleted the first half of the file we might have deleted the metadata necessary to understand events in the 2nd half of the file. However if deletion is done at sequence point boundaries where metadata is flushed then we can be assured that all metadata necessary to parse the remaining events is still available. It makes regions of the file between sequence points self-contained.

If the metadata gets flushed and can be re-defined, then yes, m_state.m_templates will likely contain templates that correspond to the old metadata and would need to be flushed.

I expect this will be fine, with a few caveats:

  • We've always expected writers of the NetTrace format not to redefine the meaning of the same providerGuid+EventId in the same trace (at least if they want to work properly with TraceEvent). For example even without metadata flushing its possible in NetTrace to define:
    Metadata=1, EventId=1, ProviderGuid=X, EventName=Foo
    Metadata=2, EventId=1, ProviderGuid=X, EventName=Bar

Parsing such a Trace with TraceEvent would treat all events with either of those MetadataIDs as having whichever name was observed first. With flushing the same rule still applies as before. If writers want the trace to be parsed properly we expect Metadata that refers to the same ProviderId+EventId should agree on the same definition. This means it will be fine if TraceEvent's cache retains an old template because we expect it to store the same data as any new template on the same ProviderId+EventId.

NOTE: Looking at TraceEvent's code, it seems to assume that ProviderId+EventId is a sufficient identifier without considering PID. If two processes disagreed on the definition of an event TraceEvent appears to treat all events from all processes using whatever definition was observed first. Nothing specific to NetTrace, this behavior appears to apply for everything sharing the default TraceDispatcher logic.

  • Since TraceEvent isn't bounding its own template cache we may get a memory usage improvement from this change but it still won't be fully bounded memory usage. In the future if we want to do better we'd need to bound/flush that central cache as well knowing that templates can always be reproduced from the Parser if they are observed again after being droppped from the central cache. In order to keep the scope of the change smaller I didn't attempt to change the caching behavior in the common template cache.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @noahfalk. That makes sense. I agree that it's already possible to end up in the situation you describe where the EventID and Provider GUIDs for two metadata entries are the same but the event name is different, and only the first one will be used during lookup. Given that we expect writers not to do this, I think it we're fine for now.

We could certainly build a feature in the future that would allow you to remove a template from the cache. However, this would only be impactful if nothing else rooted the template. Some applications would benefit from this, while others might benefit much less if they hooked all/many of the templates to a delegate in such a way that the template remained rooted during execution. Either way, this is something we can look at going forward.

}

internal override unsafe Guid GetRelatedActivityID(TraceEventNativeMethods.EVENT_RECORD* eventRecord)
Expand Down Expand Up @@ -553,9 +591,7 @@ internal LabelList GetLastLabelList()
private void EventCache_OnEvent(ref EventPipeEventHeader header)
{
_lastLabelListId = header.LabelListId;
TraceEventNativeMethods.EVENT_RECORD* eventRecord = ConvertEventHeaderToRecord(ref header);
ValidateStackFields(header, eventRecord);
DispatchEventRecord(eventRecord);
DispatchEvent(ref header);
}

private void EventCache_OnEventsDropped(int droppedEventCount)
Expand All @@ -578,8 +614,7 @@ private void ValidateStackFields(EventPipeEventHeader header, TraceEventNativeMe
}

if (header.MetaDataId != 0 &&
StackCache.TryGetStack(header.StackId, out int stackBytesSize, out IntPtr stackBytes) &&
stackBytesSize != 0 && stackBytes != IntPtr.Zero) // Sometimes the values in the StackCache are zeros.
header.StackBytesSize != 0 && header.StackBytes != IntPtr.Zero)
{
// .NET Core emits stacks for some events that we don't need and so they are excised out before we get here.
if (_eventMetadataDictionary.TryGetValue(header.MetaDataId, out EventPipeMetadata metadataHeader) &&
Expand All @@ -605,8 +640,8 @@ internal bool TryGetTemplateFromMetadata(TraceEvent unhandledEvent, out DynamicT

private DynamicTraceEventData CreateTemplate(EventPipeMetadata metadata)
{
DynamicTraceEventData template = new DynamicTraceEventData(null, metadata.EventId, 0, metadata.EventName, Guid.Empty, metadata.Opcode, null, metadata.ProviderId, metadata.ProviderName);
template.opcode = (TraceEventOpcode)metadata.Opcode;
int opcode = metadata.Opcode ?? 0;
DynamicTraceEventData template = new DynamicTraceEventData(null, metadata.EventId, 0, metadata.EventName, Guid.Empty, opcode, null, metadata.ProviderId, metadata.ProviderName);
template.opcodeName = template.opcode.ToString();
template.payloadNames = metadata.ParameterNames;
template.payloadFetches = metadata.ParameterTypes;
Expand Down Expand Up @@ -1289,6 +1324,10 @@ public static void ReadFromFormatV6(ref SpanReader reader, bool useHeaderCompres
public int HeaderSize; // The size of the event up to the payload
public int TotalNonHeaderSize; // The size of the payload, stack, and alignment padding

public byte? LevelOverride;
public byte? OpCodeOverride;
public ulong? KeywordsOverride;
public byte? VersionOverride;

public bool IsMetadata() => MetaDataId == 0; // 0 means that it's a metadata Id

Expand Down
22 changes: 15 additions & 7 deletions src/TraceEvent/EventPipe/EventPipeMetadata.cs
Original file line number Diff line number Diff line change
Expand Up @@ -162,9 +162,15 @@ private void InitDefaultParameters()
/// </summary>
internal TraceEventNativeMethods.EVENT_RECORD* GetEventRecordForEventData(in EventPipeEventHeader eventData)
{
// We have already initialize all the fields of _eventRecord that do no vary from event to event.
// We have already initialize all the fields of _eventRecord that do not vary from event to event.
// Now we only have to copy over the fields that are specific to particular event.
//

// these events usually come from metadata, but they can be overridden by the label list
_eventRecord->EventHeader.Opcode = eventData.OpCodeOverride ?? Opcode ?? 0;
_eventRecord->EventHeader.Level = eventData.LevelOverride ?? Level;
_eventRecord->EventHeader.Keyword = eventData.KeywordsOverride ?? Keywords;
_eventRecord->EventHeader.Version = eventData.VersionOverride ?? EventVersion;

// Note: ThreadId isn't 32 bit on all of our platforms but ETW EVENT_RECORD* only has room for a 32 bit
// ID. We'll need to refactor up the stack if we want to expose a bigger ID.
_eventRecord->EventHeader.ThreadId = unchecked((int)eventData.ThreadId);
Expand Down Expand Up @@ -257,10 +263,10 @@ private void InitDefaultParameters()
public Dictionary<string, string> Attributes { get; private set; } = new Dictionary<string, string>();
public Guid ProviderId { get { return _eventRecord->EventHeader.ProviderId; } private set { _eventRecord->EventHeader.ProviderId = value; } }
public int EventId { get { return _eventRecord->EventHeader.Id; } private set { _eventRecord->EventHeader.Id = (ushort)value; } }
public int EventVersion { get { return _eventRecord->EventHeader.Version; } private set { _eventRecord->EventHeader.Version = (byte)value; } }
public ulong Keywords { get { return _eventRecord->EventHeader.Keyword; } private set { _eventRecord->EventHeader.Keyword = value; } }
public int Level { get { return _eventRecord->EventHeader.Level; } private set { _eventRecord->EventHeader.Level = (byte)value; } }
public byte Opcode { get { return _eventRecord->EventHeader.Opcode; } internal set { _eventRecord->EventHeader.Opcode = (byte)value; } }
public byte EventVersion { get; private set; }
public ulong Keywords { get; private set; }
public byte Level { get; private set; }
public byte? Opcode { get; private set; }

public DynamicTraceEventData.PayloadFetch[] ParameterTypes { get; internal set; }
public string[] ParameterNames { get; internal set; }
Expand Down Expand Up @@ -784,6 +790,8 @@ private static void ClearMemory(void* buffer, int length)
}

// After metadata has been read we do a set of baked in transforms.
// None of these transforms are part of the NetTrace file format, rather they are a combination of TraceEvent/EventSource specific conventions
// and workarounds for well known events in historic scenarios where data was missing.
public void ApplyTransforms()
{
//TraceEvent expects empty name to be canonicalized as null rather than ""
Expand All @@ -805,7 +813,7 @@ public void ApplyTransforms()
PopulateWellKnownEventParameters();
}

if (Opcode == 0)
if (!Opcode.HasValue)
{
ExtractImpliedOpcode();
}
Expand Down
38 changes: 37 additions & 1 deletion src/TraceEvent/EventPipe/LabelListCache.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,11 @@ enum LabelKind : byte
TraceId = 3,
SpanId = 4,
KeyValueString = 5,
KeyValueVarInt = 6
KeyValueVarInt = 6,
OpCode = 7,
Keywords = 8,
Level = 9,
Version = 10
}

struct LabelList
Expand Down Expand Up @@ -59,6 +63,18 @@ internal static LabelList Parse(ref SpanReader reader)
}
otherLabels.Add(new KeyValuePair<string, object>(key, varInt));
break;
case LabelKind.OpCode:
result.OpCode = reader.ReadUInt8();
break;
case LabelKind.Keywords:
result.Keywords = reader.ReadUInt64();
break;
case LabelKind.Level:
result.Level = reader.ReadUInt8();
break;
case LabelKind.Version:
result.Version = reader.ReadUInt8();
break;
default:
throw new FormatException($"Unknown label kind {kind} in label list");
}
Expand All @@ -76,6 +92,10 @@ internal static LabelList Parse(ref SpanReader reader)
public Guid? TraceId { get; private set; }
public ulong? SpanId { get; private set; }
public KeyValuePair<string, object>[] OtherLabels { get; private set; }
public byte? OpCode { get; private set; }
public ulong? Keywords { get; private set; }
public byte? Level { get; private set; }
public byte? Version { get; private set; }

public IEnumerable<KeyValuePair<string, object>> AllLabels
{
Expand Down Expand Up @@ -104,6 +124,22 @@ public IEnumerable<KeyValuePair<string, object>> AllLabels
yield return label;
}
}
if (OpCode.HasValue)
{
yield return new KeyValuePair<string, object>("OpCode", OpCode.Value);
}
if (Keywords.HasValue)
{
yield return new KeyValuePair<string, object>("Keywords", Keywords.Value);
}
if (Level.HasValue)
{
yield return new KeyValuePair<string, object>("Level", Level.Value);
}
if (Version.HasValue)
{
yield return new KeyValuePair<string, object>("Version", Version.Value);
}
}
}
}
Expand Down
14 changes: 13 additions & 1 deletion src/TraceEvent/EventPipe/NetTraceFormat.md
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,7 @@ A SequencePointBlock payload is encoded:
- SequenceNumber - varuint32

If Flags & 1 == 1 then the sequence point also flushes the thread cache. Logically the flush occurs after doing any thread sequence number checks so the reader can still detect dropped events.
If Flags & 2 == 2 then the sequence point also flushes the metadata cache.

## EndOfStreamBlock

Expand Down Expand Up @@ -465,13 +466,24 @@ The content of a LabelListBlock is:
- if(Kind & 0x7F == 6)
- Key - string
- Value - varint64
- if(Kind & 0x7F == 7)
- OpCode - uint8
- if(Kind & 0x7F == 8)
- Keywords - uint64
- if(Kind & 0x7F == 9)
- Level - uint8
- if(Kind & 0x7F == 10)
- Version - uint8

If the high bit of the Kind field is set that demarcates that this is the last label in a label list and the next label is part of the next list.

Similar to StackBlock, references to a row in the LabelListBlock are only valid in the file after the LabelListBlock that defines it and before the next SequencePoint block.
This prevents the reader from needing a lookup table that grows indefinitely with file length or requiring the reader to search the entire file to resolve a given label list index.

An empty LabelList can't be explicitly encoded in the LabelListBlock, but implicitly the LabelList index 0 refers to an empty LabelList.
An empty LabelList can't be explicitly encoded in the LabelListBlock, but implicitly the LabelList index 0 refers to an empty LabelList.

The OpCode, Keywords, Level, and Version fields are considered to override any value found in the metadata for the event. The trace writer is free to provide these values in either metadata
or in a LabelList, but for the common scenario where they are the same across all events of a given type metadata is probably the more space efficient option.

## Changes relative to older file format versions

Expand Down
Loading