diff --git a/krabs/krabs/parser.hpp b/krabs/krabs/parser.hpp index 2736857..bf49c28 100644 --- a/krabs/krabs/parser.hpp +++ b/krabs/krabs/parser.hpp @@ -78,6 +78,15 @@ namespace krabs { template bool try_parse(std::wstring_view name, T &out); + /** + * + * Attempts to retrieve the given property by name and type, + * starting the name scan at the given hint index. + * + */ + template + bool try_parse(std::wstring_view name, T &out, ULONG hint); + /** * * Attempts to parse the given property by name and type. If the @@ -87,22 +96,36 @@ namespace krabs { template T parse(std::wstring_view name); + /** + * + * Attempts to parse the given property by name and type, + * starting the name scan at the given hint index. + * + */ + template + T parse(std::wstring_view name, ULONG hint); + template auto view_of(std::wstring_view name, Adapter &adapter) -> collection_view; + template + auto view_of(std::wstring_view name, ULONG hint, Adapter &adapter) -> collection_view; + private: property_info find_property(std::wstring_view name); - void cache_property(ULONG index, property_info info); + property_info find_property(std::wstring_view name, ULONG hint); + void ensure_cache_populated(); private: const schema &schema_; const BYTE *pEndBuffer_; - BYTE *pBufferIndex_; - ULONG lastPropertyIndex_; - // Persistent name to index map shared across all events of the same type. - const property_name_map *pPropertyNames_; - // Maintain a mapping from property index to blob data location. + + // Fully populated on first access -- maps property index to its + // location and size in the event's user-data blob. std::vector propertyCache_; + + // Hint for name scan -- start from here on the next lookup. + ULONG nextHint_; }; // Implementation @@ -111,10 +134,7 @@ namespace krabs { inline parser::parser(const schema &s) : schema_(s) , pEndBuffer_((BYTE*)s.record_.UserData + s.record_.UserDataLength) - , pBufferIndex_((BYTE*)s.record_.UserData) - , lastPropertyIndex_(0) - , pPropertyNames_(s.pPropertyNames_) - , propertyCache_(s.pSchema_->PropertyCount) + , nextHint_(0) {} inline property_iterator parser::properties() const @@ -122,101 +142,80 @@ namespace krabs { return property_iterator(schema_); } - inline property_info parser::find_property(std::wstring_view name) + inline void parser::ensure_cache_populated() { - // A schema contains a collection of properties that are keyed by name. - // These properties are stored in a blob of bytes that needs to be - // interpreted according to information that is packaged up in the - // schema and that can be retrieved using the Tdh* APIs. This format - // requires a linear traversal over the blob, incrementing according to - // the contents within it. This is janky, so our strategy is to - // minimize this as much as possible via caching. - - const ULONG totalPropCount = schema_.pSchema_->PropertyCount; - - // Resolve property name to index. - ULONG index = totalPropCount; // sentinel = not found - if (pPropertyNames_) { - // Fast path: use the persistent name to index map shared across - // all events of the same type. - auto it = pPropertyNames_->find(name); - if (it != pPropertyNames_->end()) { - index = it->second; - } - } else { - // Fallback: linear scan of property names in the schema. - for (ULONG i = 0; i < totalPropCount; ++i) { - auto &propInfo = schema_.pSchema_->EventPropertyInfoArray[i]; - const wchar_t *pName = reinterpret_cast( - reinterpret_cast(schema_.pSchema_) + - propInfo.NameOffset); - if (name == pName) { - index = i; - break; - } - } - } - - if (index >= totalPropCount) { - return property_info(); + if (!propertyCache_.empty()) { + return; } - // The first step is to use our cache for the property to see if we've - // discovered it already. - if (index < lastPropertyIndex_) { - return propertyCache_[index]; + const ULONG totalPropCount = schema_.pSchema_->PropertyCount; + if (totalPropCount == 0) { + return; } - assert((pBufferIndex_ <= pEndBuffer_ && pBufferIndex_ >= schema_.record_.UserData) && - "invariant: we should've already thrown for falling off the edge"); - - // accept that last property can be omitted from buffer. this happens if last property - // is string but empty and the provider strips the null terminator - assert((pBufferIndex_ == pEndBuffer_ ? ((totalPropCount - lastPropertyIndex_) <= 1) - : true) - && "invariant: if we've exhausted our buffer, then we must've" - "exhausted the properties as well"); + propertyCache_.reserve(totalPropCount); + BYTE *pBuffer = (BYTE*)schema_.record_.UserData; - // We've not looked up this property before, so we have to do the work - // to find it. While we're going through the blob to find it, we'll - // remember what we've seen to save time later. - // - // Note: The name-to-index map is built once per schema type (cheap - // metadata scan). But the blob walk below is lazy per-event -- we - // only walk forward to the requested index, avoiding overhead when - // only a subset of properties are needed. - while (lastPropertyIndex_ <= index) { - - auto ¤tPropInfo = schema_.pSchema_->EventPropertyInfoArray[lastPropertyIndex_]; + for (ULONG i = 0; i < totalPropCount; ++i) { + auto ¤tPropInfo = schema_.pSchema_->EventPropertyInfoArray[i]; const wchar_t *pName = reinterpret_cast( reinterpret_cast(schema_.pSchema_) + currentPropInfo.NameOffset); ULONG propertyLength = size_provider::get_property_size( - pBufferIndex_, + pBuffer, pName, schema_.record_, currentPropInfo); - // verify that the length of the property doesn't exceed the buffer - if (pBufferIndex_ + propertyLength > pEndBuffer_) { + if (pBuffer + propertyLength > pEndBuffer_) { throw std::out_of_range("Property length past end of property buffer"); } - property_info propInfo(pBufferIndex_, currentPropInfo, propertyLength); - cache_property(lastPropertyIndex_, propInfo); - - // advance the buffer index since we've already processed this property - pBufferIndex_ += propertyLength; - lastPropertyIndex_++; + propertyCache_.emplace_back(pBuffer, currentPropInfo, propertyLength); + pBuffer += propertyLength; } + } - return propertyCache_[index]; + inline property_info parser::find_property(std::wstring_view name) + { + return find_property(name, nextHint_); } - inline void parser::cache_property(ULONG index, property_info info) + inline property_info parser::find_property(std::wstring_view name, ULONG hint) { - propertyCache_[index] = info; + ensure_cache_populated(); + + const ULONG totalPropCount = schema_.pSchema_->PropertyCount; + if (totalPropCount == 0) { + return property_info(); + } + + // Hinted linear scan. In the common case (sequential access + // or caller-provided index) this hits on the first comparison. + if (hint >= totalPropCount) { + hint = 0; + } + + ULONG index = totalPropCount; // sentinel = not found + for (ULONG n = 0; n < totalPropCount; ++n) { + ULONG i = (hint + n) % totalPropCount; + auto &propInfo = schema_.pSchema_->EventPropertyInfoArray[i]; + const wchar_t *pName = reinterpret_cast( + reinterpret_cast(schema_.pSchema_) + + propInfo.NameOffset); + if (name == pName) { + index = i; + break; + } + } + + if (index >= totalPropCount) { + return property_info(); + } + + nextHint_ = (index + 1) % totalPropCount; + return propertyCache_[index]; } inline void throw_if_property_not_found(const property_info &propInfo) @@ -244,6 +243,13 @@ namespace krabs { // try_parse // ------------------------------------------------------------------------ + template + bool parser::try_parse(std::wstring_view name, T &out, ULONG hint) + { + nextHint_ = hint; + return try_parse(name, out); + } + template bool parser::try_parse(std::wstring_view name, T &out) { @@ -269,6 +275,13 @@ namespace krabs { // parse // ------------------------------------------------------------------------ + template + T parser::parse(std::wstring_view name, ULONG hint) + { + nextHint_ = hint; + return parse(name); + } + template T parser::parse(std::wstring_view name) { @@ -435,6 +448,14 @@ namespace krabs { // view_of // ------------------------------------------------------------------------ + template + auto parser::view_of(std::wstring_view name, ULONG hint, Adapter &adapter) + -> collection_view + { + nextHint_ = hint; + return view_of(name, adapter); + } + template auto parser::view_of(std::wstring_view name, Adapter &adapter) -> collection_view diff --git a/krabs/krabs/schema.hpp b/krabs/krabs/schema.hpp index 5212e8c..9016f7b 100644 --- a/krabs/krabs/schema.hpp +++ b/krabs/krabs/schema.hpp @@ -308,8 +308,6 @@ namespace krabs { private: const EVENT_RECORD &record_; const TRACE_EVENT_INFO *pSchema_; - // Persistent name to index map, owned by schema_locator. May be nullptr. - const property_name_map *pPropertyNames_; private: friend std::wstring event_name(const schema &); @@ -339,13 +337,11 @@ namespace krabs { inline schema::schema(const EVENT_RECORD &record, const krabs::schema_locator &schema_locator) : record_(record) , pSchema_(schema_locator.get_event_schema(record)) - , pPropertyNames_(schema_locator.get_property_names(pSchema_)) { } inline schema::schema(const EVENT_RECORD &record, const PTRACE_EVENT_INFO pSchema) : record_(record) , pSchema_(pSchema) - , pPropertyNames_(nullptr) { } inline bool schema::operator==(const schema &other) const diff --git a/krabs/krabs/schema_locator.hpp b/krabs/krabs/schema_locator.hpp index db00d56..4dffb7d 100644 --- a/krabs/krabs/schema_locator.hpp +++ b/krabs/krabs/schema_locator.hpp @@ -173,14 +173,6 @@ namespace krabs { */ std::string_view get_trace_logger_event_name(const EVENT_RECORD &); - /** - * - * Maps property names to their index in the schema. - * Keys are wstring_views pointing into stable TRACE_EVENT_INFO memory. - * - */ - using property_name_map = std::unordered_map; - /** * * Fetches and caches schemas from TDH. @@ -215,21 +207,8 @@ namespace krabs { */ bool has_event_schema(const EVENT_RECORD& record) const; - /** - * - * Returns the persistent property name to index map for a schema. - * The map is built when the schema is first cached. - * Returns nullptr if pSchema is null or not in the cache. - * - */ - const property_name_map* get_property_names(const TRACE_EVENT_INFO* pSchema) const; - private: - void build_property_names(const TRACE_EVENT_INFO* pSchema) const; - mutable std::unordered_map, TDHSTATUS>> cache_; - // Persistent property name to index maps, keyed by schema pointer. - mutable std::unordered_map property_name_cache_; }; // Implementation @@ -332,10 +311,9 @@ namespace krabs { // Add the new instance to the cache. // NB: key's 'internalize_name' gets called by the cctor here. - if (status == ERROR_SUCCESS) { + if (status == ERROR_SUCCESS) cache_.emplace(key, std::move(buffer)); - build_property_names(returnVal); - } else + else cache_.emplace(key, status); return returnVal; @@ -348,29 +326,6 @@ namespace krabs { return status == ERROR_SUCCESS; } - inline void schema_locator::build_property_names(const TRACE_EVENT_INFO* pSchema) const - { - property_name_map names; - for (ULONG i = 0; i < pSchema->PropertyCount; ++i) { - const wchar_t* pName = reinterpret_cast( - reinterpret_cast(pSchema) + - pSchema->EventPropertyInfoArray[i].NameOffset); - names.emplace(std::wstring_view(pName), i); - } - property_name_cache_.emplace(pSchema, std::move(names)); - } - - inline const property_name_map* schema_locator::get_property_names(const TRACE_EVENT_INFO* pSchema) const - { - if (!pSchema) return nullptr; - - auto it = property_name_cache_.find(pSchema); - if (it != property_name_cache_.end()) { - return &it->second; - } - return nullptr; - } - inline std::unique_ptr get_event_schema_from_tdh(const EVENT_RECORD &record) { TDHSTATUS status = ERROR_SUCCESS; diff --git a/krabs/krabs/tdh_helpers.hpp b/krabs/krabs/tdh_helpers.hpp index 2e633f1..d4bf464 100644 --- a/krabs/krabs/tdh_helpers.hpp +++ b/krabs/krabs/tdh_helpers.hpp @@ -126,12 +126,9 @@ namespace krabs { throw_if_invalid(name, info, tdh_type); \ } - // NOTE: don't just blindly add assertions here, some types - // that seem trivial (e.g. bool) are not because of differences - // between the representation in C++ and the representation in ETW. - // Ensure that type sizes match and that the ETW form isn't - // a variant or variable length. A type that requires a specialized - // assertion will also require a specialized parser. + // Types that check a single InType can use the BUILD_ASSERT macro. + // Types that check OutType or accept multiple InTypes need manual + // specializations below. // strings BUILD_ASSERT(std::wstring, TDH_INTYPE_UNICODESTRING); @@ -156,6 +153,12 @@ namespace krabs { BUILD_ASSERT(::FILETIME, TDH_INTYPE_FILETIME); BUILD_ASSERT(::SYSTEMTIME, TDH_INTYPE_SYSTEMTIME); + BUILD_ASSERT(pointer, TDH_INTYPE_POINTER); + BUILD_ASSERT(bool, TDH_INTYPE_BOOLEAN); + + // NOTE: No assert for krabs::binary which can be used to read + // any property as raw bytes and so it is valid with any type. + #undef BUILD_ASSERT template <> @@ -195,30 +198,6 @@ namespace krabs { } } - template <> - inline void assert_valid_assignment( - std::wstring_view, const property_info& info) - { - auto inType = info.pEventPropertyInfo_->nonStructType.InType; - - if (inType != TDH_INTYPE_POINTER) { - throw std::runtime_error( - "Requested a POINTER from property that is not one"); - } - } - - template <> - inline void assert_valid_assignment( - std::wstring_view, const property_info& info) - { - auto inType = info.pEventPropertyInfo_->nonStructType.InType; - - if (inType != TDH_INTYPE_BOOLEAN) { - throw std::runtime_error( - "Requested a BOOLEAN from property that is not one"); - } - } - #endif // NDEBUG } /* namespace debug */