From 973dc9c06dcd3d035ebd039cfb9ea457721ec213 Mon Sep 17 00:00:00 2001 From: Leonard Chan Date: Tue, 9 May 2023 21:31:22 +0000 Subject: [PATCH] Avoid ptrdiff between pointers to different allocations When using running both Undefined Behavior Sanitizer (UBSan) and Hardware-Assisted Address Sanitizer (HWASan) on Fuchsia, ubsan complained about a pointer overflow when computing the new token->name pointer. This happens because the initial pointer diff takes the offset between two allocations with different tags, so the arithmetic results in a very large diff that gets added to the original token->name ptr which overflows. Any arithmetic between pointers to two allocations is unspecified behavior, so hwasan+ubsan is catching a bug here. It looks like rapidjson is just attempting to update the name pointers to strings copied into the new nameBuffer_ via this arithmetic, but since these strings and the tokens are in the same buffer, the offset between them should be the same. For each token we can just get this offset and adjust the new name pointers accordingly which avoids the bad arithmetic. --- include/rapidjson/pointer.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/include/rapidjson/pointer.h b/include/rapidjson/pointer.h index 05b1704..6f4ef38 100644 --- a/include/rapidjson/pointer.h +++ b/include/rapidjson/pointer.h @@ -890,10 +890,16 @@ private: std::memcpy(nameBuffer_, rhs.nameBuffer_, nameBufferSize * sizeof(Ch)); } - // Adjust pointers to name buffer - std::ptrdiff_t diff = nameBuffer_ - rhs.nameBuffer_; - for (Token *t = tokens_; t != tokens_ + rhs.tokenCount_; ++t) - t->name += diff; + // The names of each token point to a string in the nameBuffer_. The + // previous memcpy copied over string pointers into the rhs.nameBuffer_, + // but they should point to the strings in the new nameBuffer_. + for (size_t i = 0; i < rhs.tokenCount_; ++i) { + // The offset between the string address and the name buffer should + // still be constant, so we can just get this offset and set each new + // token name according the new buffer start + the known offset. + std::ptrdiff_t name_offset = rhs.tokens_[i].name - rhs.nameBuffer_; + tokens_[i].name = nameBuffer_ + name_offset; + } return nameBuffer_ + nameBufferSize; }