Figure out stupid math shit

Signed-off-by: Slendi <slendi@socopon.com>
This commit is contained in:
2026-01-10 16:15:36 +02:00
parent f896ddae74
commit e0ca1f1043
475 changed files with 499637 additions and 14 deletions

View File

@@ -0,0 +1,76 @@
#ifndef __TRACY__CHARUTIL_HPP__
#define __TRACY__CHARUTIL_HPP__
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#define XXH_INLINE_ALL
#include "tracy_xxhash.h"
namespace tracy
{
namespace charutil
{
static inline size_t hash( const char* str )
{
const auto sz = strlen( str );
return XXH3_64bits( str, sz );
}
static inline size_t hash( const char* str, size_t sz )
{
return XXH3_64bits( str, sz );
}
struct Hasher
{
size_t operator()( const char* key ) const
{
return hash( key );
}
};
struct Comparator
{
bool operator()( const char* lhs, const char* rhs ) const
{
return strcmp( lhs, rhs ) == 0;
}
};
struct LessComparator
{
bool operator()( const char* lhs, const char* rhs ) const
{
return strcmp( lhs, rhs ) < 0;
}
};
struct StringKey
{
const char* ptr;
size_t sz;
struct Hasher
{
size_t operator()( const StringKey& key ) const
{
return hash( key.ptr, key.sz );
}
};
struct Comparator
{
bool operator()( const StringKey& lhs, const StringKey& rhs ) const
{
return lhs.sz == rhs.sz && memcmp( lhs.ptr, rhs.ptr, lhs.sz ) == 0;
}
};
};
}
}
#endif

View File

@@ -0,0 +1,850 @@
#ifndef __TRACYEVENT_HPP__
#define __TRACYEVENT_HPP__
#include <assert.h>
#include <limits>
#include <stdint.h>
#include <string>
#include <string.h>
#include "TracyCharUtil.hpp"
#include "TracyShortPtr.hpp"
#include "TracySortedVector.hpp"
#include "TracyVector.hpp"
#include "tracy_robin_hood.h"
#include "../public/common/TracyForceInline.hpp"
#include "../public/common/TracyQueue.hpp"
namespace tracy
{
#pragma pack( push, 1 )
struct StringRef
{
enum Type { Ptr, Idx };
tracy_force_inline StringRef() : str( 0 ), __data( 0 ) {}
tracy_force_inline StringRef( Type t, uint64_t data )
: str( data )
, __data( 0 )
{
isidx = t == Idx;
active = 1;
}
uint64_t str;
union
{
struct
{
uint8_t isidx : 1;
uint8_t active : 1;
};
uint8_t __data;
};
};
struct StringRefHasher
{
size_t operator()( const StringRef& key ) const
{
return charutil::hash( (const char*)&key, sizeof( StringRef ) );
}
};
struct StringRefComparator
{
bool operator()( const StringRef& lhs, const StringRef& rhs ) const
{
return memcmp( &lhs, &rhs, sizeof( StringRef ) ) == 0;
}
};
class StringIdx
{
public:
tracy_force_inline StringIdx() { memset( m_idx, 0, sizeof( m_idx ) ); }
tracy_force_inline StringIdx( uint32_t idx )
{
SetIdx( idx );
}
tracy_force_inline void SetIdx( uint32_t idx )
{
idx++;
memcpy( m_idx, &idx, 3 );
}
tracy_force_inline uint32_t Idx() const
{
uint32_t idx = 0;
memcpy( &idx, m_idx, 3 );
assert( idx != 0 );
return idx - 1;
}
tracy_force_inline bool Active() const
{
uint32_t zero = 0;
return memcmp( m_idx, &zero, 3 ) != 0;
}
private:
uint8_t m_idx[3];
};
struct StringIdxHasher
{
size_t operator()( const StringIdx& key ) const
{
return charutil::hash( (const char*)&key, sizeof( StringIdx ) );
}
};
struct StringIdxComparator
{
bool operator()( const StringIdx& lhs, const StringIdx& rhs ) const
{
return memcmp( &lhs, &rhs, sizeof( StringIdx ) ) == 0;
}
};
class Int24
{
public:
tracy_force_inline Int24() { memset( m_val, 0, sizeof( m_val ) ); }
tracy_force_inline Int24( uint32_t val )
{
SetVal( val );
}
tracy_force_inline void SetVal( uint32_t val )
{
memcpy( m_val, &val, 2 );
val >>= 16;
memcpy( m_val+2, &val, 1 );
}
tracy_force_inline uint32_t Val() const
{
uint8_t hi;
memcpy( &hi, m_val+2, 1 );
uint16_t lo;
memcpy( &lo, m_val, 2 );
return ( uint32_t( hi ) << 16 ) | lo;
}
private:
uint8_t m_val[3];
};
class Int48
{
public:
tracy_force_inline Int48() {}
tracy_force_inline Int48( int64_t val )
{
SetVal( val );
}
tracy_force_inline void Clear()
{
memset( m_val, 0, 6 );
}
tracy_force_inline void SetVal( int64_t val )
{
memcpy( m_val, &val, 4 );
val >>= 32;
memcpy( m_val+4, &val, 2 );
}
tracy_force_inline int64_t Val() const
{
int16_t hi;
memcpy( &hi, m_val+4, 2 );
uint32_t lo;
memcpy( &lo, m_val, 4 );
return ( int64_t( uint64_t( hi ) << 32 ) ) | lo;
}
tracy_force_inline bool IsNonNegative() const
{
return ( m_val[5] >> 7 ) == 0;
}
private:
uint8_t m_val[6];
};
struct Int48Sort { bool operator()( const Int48& lhs, const Int48& rhs ) { return lhs.Val() < rhs.Val(); }; };
struct SourceLocationBase
{
StringRef name;
StringRef function;
StringRef file;
uint32_t line;
uint32_t color;
};
struct SourceLocation : public SourceLocationBase
{
mutable uint32_t namehash;
};
enum { SourceLocationSize = sizeof( SourceLocation ) };
struct ZoneEvent
{
tracy_force_inline ZoneEvent() {};
tracy_force_inline int64_t Start() const { return int64_t( _start_srcloc ) >> 16; }
tracy_force_inline void SetStart( int64_t start ) { assert( start < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_start_srcloc)+2, &start, 4 ); memcpy( ((char*)&_start_srcloc)+6, ((char*)&start)+4, 2 ); }
tracy_force_inline int64_t End() const { return int64_t( _end_child1 ) >> 16; }
tracy_force_inline void SetEnd( int64_t end ) { assert( end < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_end_child1)+2, &end, 4 ); memcpy( ((char*)&_end_child1)+6, ((char*)&end)+4, 2 ); }
tracy_force_inline bool IsEndValid() const { return ( _end_child1 >> 63 ) == 0; }
tracy_force_inline int16_t SrcLoc() const { return int16_t( _start_srcloc & 0xFFFF ); }
tracy_force_inline void SetSrcLoc( int16_t srcloc ) { memcpy( &_start_srcloc, &srcloc, 2 ); }
tracy_force_inline int32_t Child() const { int32_t child; memcpy( &child, &_child2, 4 ); return child; }
tracy_force_inline void SetChild( int32_t child ) { memcpy( &_child2, &child, 4 ); }
tracy_force_inline bool HasChildren() const { uint8_t tmp; memcpy( &tmp, ((char*)&_end_child1)+1, 1 ); return ( tmp >> 7 ) == 0; }
tracy_force_inline void SetStartSrcLoc( int64_t start, int16_t srcloc ) { assert( start < (int64_t)( 1ull << 47 ) ); start <<= 16; start |= uint16_t( srcloc ); memcpy( &_start_srcloc, &start, 8 ); }
uint64_t _start_srcloc;
uint16_t _child2;
uint64_t _end_child1;
uint32_t extra;
};
enum { ZoneEventSize = sizeof( ZoneEvent ) };
static_assert( std::is_standard_layout<ZoneEvent>::value, "ZoneEvent is not standard layout" );
struct ZoneExtra
{
Int24 callstack;
StringIdx text;
StringIdx name;
Int24 color;
};
enum { ZoneExtraSize = sizeof( ZoneExtra ) };
// This union exploits the fact that the current implementations of x64 and arm64 do not provide
// full 64 bit address space. The high bits must be bit-extended, so 0x80... is an invalid pointer.
// This allows using the highest bit as a selector between a native pointer and a table index here.
union CallstackFrameId
{
struct
{
uint64_t idx : 62;
uint64_t sel : 1;
uint64_t custom : 1;
};
uint64_t data;
};
enum { CallstackFrameIdSize = sizeof( CallstackFrameId ) };
static tracy_force_inline bool operator==( const CallstackFrameId& lhs, const CallstackFrameId& rhs ) { return lhs.data == rhs.data; }
struct SampleData
{
Int48 time;
Int24 callstack;
};
enum { SampleDataSize = sizeof( SampleData ) };
struct SampleDataSort { bool operator()( const SampleData& lhs, const SampleData& rhs ) { return lhs.time.Val() < rhs.time.Val(); }; };
struct SampleDataRange
{
Int48 time;
uint16_t thread;
CallstackFrameId ip;
};
enum { SampleDataRangeSize = sizeof( SampleDataRange ) };
struct HwSampleData
{
SortedVector<Int48, Int48Sort> cycles;
SortedVector<Int48, Int48Sort> retired;
SortedVector<Int48, Int48Sort> cacheRef;
SortedVector<Int48, Int48Sort> cacheMiss;
SortedVector<Int48, Int48Sort> branchRetired;
SortedVector<Int48, Int48Sort> branchMiss;
bool is_sorted() const
{
return
cycles.is_sorted() &&
retired.is_sorted() &&
cacheRef.is_sorted() &&
cacheMiss.is_sorted() &&
branchRetired.is_sorted() &&
branchMiss.is_sorted();
}
void sort()
{
if( !cycles.is_sorted() ) cycles.sort();
if( !retired.is_sorted() ) retired.sort();
if( !cacheRef.is_sorted() ) cacheRef.sort();
if( !cacheMiss.is_sorted() ) cacheMiss.sort();
if( !branchRetired.is_sorted() ) branchRetired.sort();
if( !branchMiss.is_sorted() ) branchMiss.sort();
}
};
enum { HwSampleDataSize = sizeof( HwSampleData ) };
struct LockEvent
{
enum class Type : uint8_t
{
Wait,
Obtain,
Release,
WaitShared,
ObtainShared,
ReleaseShared
};
tracy_force_inline int64_t Time() const { return int64_t( _time_srcloc ) >> 16; }
tracy_force_inline void SetTime( int64_t time ) { assert( time < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_time_srcloc)+2, &time, 4 ); memcpy( ((char*)&_time_srcloc)+6, ((char*)&time)+4, 2 ); }
tracy_force_inline int16_t SrcLoc() const { return int16_t( _time_srcloc & 0xFFFF ); }
tracy_force_inline void SetSrcLoc( int16_t srcloc ) { memcpy( &_time_srcloc, &srcloc, 2 ); }
uint64_t _time_srcloc;
uint8_t thread;
Type type;
};
struct LockEventShared : public LockEvent
{
uint64_t waitShared;
uint64_t sharedList;
};
struct LockEventPtr
{
short_ptr<LockEvent> ptr;
uint8_t lockingThread;
uint8_t lockCount;
uint64_t waitList;
};
enum { LockEventSize = sizeof( LockEvent ) };
enum { LockEventSharedSize = sizeof( LockEventShared ) };
enum { LockEventPtrSize = sizeof( LockEventPtr ) };
enum { MaxLockThreads = sizeof( LockEventPtr::waitList ) * 8 };
static_assert( std::numeric_limits<decltype(LockEventPtr::lockCount)>::max() >= MaxLockThreads, "Not enough space for lock count." );
enum class LockType : uint8_t;
struct LockMap
{
struct TimeRange
{
int64_t start = std::numeric_limits<int64_t>::max();
int64_t end = std::numeric_limits<int64_t>::min();
};
StringIdx customName;
int16_t srcloc;
Vector<LockEventPtr> timeline;
unordered_flat_map<uint64_t, uint8_t> threadMap;
std::vector<uint64_t> threadList;
LockType type;
int64_t timeAnnounce;
int64_t timeTerminate;
bool valid;
bool isContended;
uint64_t lockingThread;
TimeRange range[64];
};
struct LockHighlight
{
int64_t id;
int64_t begin;
int64_t end;
uint8_t thread;
bool blocked;
};
struct GpuEvent
{
tracy_force_inline int64_t CpuStart() const { return int64_t( _cpuStart_srcloc ) >> 16; }
tracy_force_inline void SetCpuStart( int64_t cpuStart ) { assert( cpuStart < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_cpuStart_srcloc)+2, &cpuStart, 4 ); memcpy( ((char*)&_cpuStart_srcloc)+6, ((char*)&cpuStart)+4, 2 ); }
tracy_force_inline int64_t CpuEnd() const { return int64_t( _cpuEnd_thread ) >> 16; }
tracy_force_inline void SetCpuEnd( int64_t cpuEnd ) { assert( cpuEnd < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_cpuEnd_thread)+2, &cpuEnd, 4 ); memcpy( ((char*)&_cpuEnd_thread)+6, ((char*)&cpuEnd)+4, 2 ); }
tracy_force_inline int64_t GpuStart() const { return int64_t( _gpuStart_child1 ) >> 16; }
tracy_force_inline void SetGpuStart( int64_t gpuStart ) { /*assert( gpuStart < (int64_t)( 1ull << 47 ) );*/ memcpy( ((char*)&_gpuStart_child1)+2, &gpuStart, 4 ); memcpy( ((char*)&_gpuStart_child1)+6, ((char*)&gpuStart)+4, 2 ); }
tracy_force_inline int64_t GpuEnd() const { return int64_t( _gpuEnd_child2 ) >> 16; }
tracy_force_inline void SetGpuEnd( int64_t gpuEnd ) { assert( gpuEnd < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_gpuEnd_child2)+2, &gpuEnd, 4 ); memcpy( ((char*)&_gpuEnd_child2)+6, ((char*)&gpuEnd)+4, 2 ); }
tracy_force_inline int16_t SrcLoc() const { return int16_t( _cpuStart_srcloc & 0xFFFF ); }
tracy_force_inline void SetSrcLoc( int16_t srcloc ) { memcpy( &_cpuStart_srcloc, &srcloc, 2 ); }
tracy_force_inline uint16_t Thread() const { return uint16_t( _cpuEnd_thread & 0xFFFF ); }
tracy_force_inline void SetThread( uint16_t thread ) { memcpy( &_cpuEnd_thread, &thread, 2 ); }
tracy_force_inline int32_t Child() const { return int32_t( uint32_t( _gpuStart_child1 & 0xFFFF ) | ( uint32_t( _gpuEnd_child2 & 0xFFFF ) << 16 ) ); }
tracy_force_inline void SetChild( int32_t child ) { memcpy( &_gpuStart_child1, &child, 2 ); memcpy( &_gpuEnd_child2, ((char*)&child)+2, 2 ); }
uint64_t _cpuStart_srcloc;
uint64_t _cpuEnd_thread;
uint64_t _gpuStart_child1;
uint64_t _gpuEnd_child2;
Int24 callstack;
};
enum { GpuEventSize = sizeof( GpuEvent ) };
static_assert( std::is_standard_layout<GpuEvent>::value, "GpuEvent is not standard layout" );
struct MemEvent
{
tracy_force_inline uint64_t Ptr() const { return uint64_t( int64_t( _ptr_csalloc1 ) >> 8 ); }
tracy_force_inline void SetPtr( uint64_t ptr ) { memcpy( ((char*)&_ptr_csalloc1)+1, &ptr, 4 ); memcpy( ((char*)&_ptr_csalloc1)+5, ((char*)&ptr)+4, 2 ); memcpy( ((char*)&_ptr_csalloc1)+7, ((char*)&ptr)+6, 1 ); }
tracy_force_inline uint64_t Size() const { return _size_csalloc2 >> 16; }
tracy_force_inline void SetSize( uint64_t size ) { assert( size < ( 1ull << 47 ) ); memcpy( ((char*)&_size_csalloc2)+2, &size, 4 ); memcpy( ((char*)&_size_csalloc2)+6, ((char*)&size)+4, 2 ); }
tracy_force_inline uint32_t CsAlloc() const { return uint8_t( _ptr_csalloc1 ) | ( uint16_t( _size_csalloc2 ) << 8 ); }
tracy_force_inline void SetCsAlloc( uint32_t csAlloc ) { memcpy( &_ptr_csalloc1, &csAlloc, 1 ); memcpy( &_size_csalloc2, ((char*)&csAlloc)+1, 2 ); }
tracy_force_inline int64_t TimeAlloc() const { return int64_t( _time_thread_alloc ) >> 16; }
tracy_force_inline void SetTimeAlloc( int64_t time ) { assert( time < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_time_thread_alloc)+2, &time, 4 ); memcpy( ((char*)&_time_thread_alloc)+6, ((char*)&time)+4, 2 ); }
tracy_force_inline int64_t TimeFree() const { return int64_t( _time_thread_free ) >> 16; }
tracy_force_inline void SetTimeFree( int64_t time ) { assert( time < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_time_thread_free)+2, &time, 4 ); memcpy( ((char*)&_time_thread_free)+6, ((char*)&time)+4, 2 ); }
tracy_force_inline uint16_t ThreadAlloc() const { return uint16_t( _time_thread_alloc ); }
tracy_force_inline void SetThreadAlloc( uint16_t thread ) { memcpy( &_time_thread_alloc, &thread, 2 ); }
tracy_force_inline uint16_t ThreadFree() const { return uint16_t( _time_thread_free ); }
tracy_force_inline void SetThreadFree( uint16_t thread ) { memcpy( &_time_thread_free, &thread, 2 ); }
tracy_force_inline void SetTimeThreadAlloc( int64_t time, uint16_t thread ) { time <<= 16; time |= thread; memcpy( &_time_thread_alloc, &time, 8 ); }
tracy_force_inline void SetTimeThreadFree( int64_t time, uint16_t thread ) { uint64_t t; memcpy( &t, &time, 8 ); t <<= 16; t |= thread; memcpy( &_time_thread_free, &t, 8 ); }
uint64_t _ptr_csalloc1;
uint64_t _size_csalloc2;
Int24 csFree;
uint64_t _time_thread_alloc;
uint64_t _time_thread_free;
};
enum { MemEventSize = sizeof( MemEvent ) };
static_assert( std::is_standard_layout<MemEvent>::value, "MemEvent is not standard layout" );
struct CallstackFrameBasic
{
StringIdx name;
StringIdx file;
uint32_t line;
};
struct CallstackFrame : public CallstackFrameBasic
{
uint64_t symAddr;
};
struct SymbolData : public CallstackFrameBasic
{
StringIdx imageName;
StringIdx callFile;
uint32_t callLine;
uint8_t isInline;
Int24 size;
};
enum { CallstackFrameBasicSize = sizeof( CallstackFrameBasic ) };
enum { CallstackFrameSize = sizeof( CallstackFrame ) };
enum { SymbolDataSize = sizeof( SymbolData ) };
struct SymbolLocation
{
uint64_t addr;
uint32_t len;
};
enum { SymbolLocationSize = sizeof( SymbolLocation ) };
struct CallstackFrameData
{
short_ptr<CallstackFrame> data;
uint8_t size;
StringIdx imageName;
};
enum { CallstackFrameDataSize = sizeof( CallstackFrameData ) };
struct MemCallstackFrameTree
{
MemCallstackFrameTree( CallstackFrameId id ) : frame( id ), alloc( 0 ), count( 0 ) {}
CallstackFrameId frame;
uint64_t alloc;
uint32_t count;
unordered_flat_map<uint64_t, MemCallstackFrameTree> children;
unordered_flat_set<uint32_t> callstacks;
};
enum { MemCallstackFrameTreeSize = sizeof( MemCallstackFrameTree ) };
struct CallstackFrameTree
{
CallstackFrameTree( CallstackFrameId id ) : frame( id ), count( 0 ) {}
CallstackFrameId frame;
uint32_t count;
unordered_flat_map<uint64_t, CallstackFrameTree> children;
};
enum { CallstackFrameTreeSize = sizeof( CallstackFrameTree ) };
struct CrashEvent
{
uint64_t thread = 0;
int64_t time = 0;
uint64_t message = 0;
uint32_t callstack = 0;
};
enum { CrashEventSize = sizeof( CrashEvent ) };
struct ContextSwitchData
{
enum : int8_t { Fiber = 99 };
enum : int8_t { NoState = 100 };
enum : int8_t { Wakeup = -2 };
tracy_force_inline int64_t Start() const { return int64_t( _start_cpu ) >> 16; }
tracy_force_inline void SetStart( int64_t start ) { assert( start < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_start_cpu)+2, &start, 4 ); memcpy( ((char*)&_start_cpu)+6, ((char*)&start)+4, 2 ); }
tracy_force_inline int64_t End() const { return int64_t( _end_reason_state ) >> 16; }
tracy_force_inline void SetEnd( int64_t end ) { assert( end < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_end_reason_state)+2, &end, 4 ); memcpy( ((char*)&_end_reason_state)+6, ((char*)&end)+4, 2 ); }
tracy_force_inline bool IsEndValid() const { return ( _end_reason_state >> 63 ) == 0; }
tracy_force_inline uint8_t Cpu() const { return uint8_t( _start_cpu & 0xFF ); }
tracy_force_inline void SetCpu( uint8_t cpu ) { memcpy( &_start_cpu, &cpu, 1 ); }
tracy_force_inline int8_t Reason() const { return int8_t( (_end_reason_state >> 8) & 0xFF ); }
tracy_force_inline void SetReason( int8_t reason ) { memcpy( ((char*)&_end_reason_state)+1, &reason, 1 ); }
tracy_force_inline int8_t State() const { return int8_t( _end_reason_state & 0xFF ); }
tracy_force_inline void SetState( int8_t state ) { memcpy( &_end_reason_state, &state, 1 ); }
tracy_force_inline int64_t WakeupVal() const { return _wakeup.Val(); }
tracy_force_inline void SetWakeup( int64_t wakeup ) { assert( wakeup < (int64_t)( 1ull << 47 ) ); _wakeup.SetVal( wakeup ); }
tracy_force_inline uint16_t Thread() const { return _thread; }
tracy_force_inline void SetThread( uint16_t thread ) { _thread = thread; }
tracy_force_inline void SetStartCpu( int64_t start, uint8_t cpu ) { assert( start < (int64_t)( 1ull << 47 ) ); _start_cpu = ( uint64_t( start ) << 16 ) | cpu; }
tracy_force_inline void SetEndReasonState( int64_t end, int8_t reason, int8_t state ) { assert( end < (int64_t)( 1ull << 47 ) ); _end_reason_state = ( uint64_t( end ) << 16 ) | ( uint64_t( reason ) << 8 ) | uint8_t( state ); }
uint64_t _start_cpu;
uint64_t _end_reason_state;
Int48 _wakeup;
uint16_t _thread;
};
enum { ContextSwitchDataSize = sizeof( ContextSwitchData ) };
struct ContextSwitchCpu
{
tracy_force_inline int64_t Start() const { return int64_t( _start_thread ) >> 16; }
tracy_force_inline void SetStart( int64_t start ) { assert( start < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_start_thread)+2, &start, 4 ); memcpy( ((char*)&_start_thread)+6, ((char*)&start)+4, 2 ); }
tracy_force_inline int64_t End() const { int64_t v; memcpy( &v, ((char*)&_end)-2, 8 ); return v >> 16; }
tracy_force_inline void SetEnd( int64_t end ) { assert( end < (int64_t)( 1ull << 47 ) ); _end.SetVal( end ); }
tracy_force_inline bool IsEndValid() const { return _end.IsNonNegative(); }
tracy_force_inline uint16_t Thread() const { return uint16_t( _start_thread ); }
tracy_force_inline void SetThread( uint16_t thread ) { memcpy( &_start_thread, &thread, 2 ); }
tracy_force_inline void SetStartThread( int64_t start, uint16_t thread ) { assert( start < (int64_t)( 1ull << 47 ) ); _start_thread = ( uint64_t( start ) << 16 ) | thread; }
uint64_t _start_thread;
Int48 _end;
};
enum { ContextSwitchCpuSize = sizeof( ContextSwitchCpu ) };
struct ContextSwitchUsage
{
ContextSwitchUsage() {}
ContextSwitchUsage( int64_t time, uint8_t other, uint8_t own ) { SetTime( time ); SetOther( other ); SetOwn( own ); }
tracy_force_inline int64_t Time() const { return int64_t( _time_other_own ) >> 16; }
tracy_force_inline void SetTime( int64_t time ) { assert( time < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_time_other_own)+2, &time, 4 ); memcpy( ((char*)&_time_other_own)+6, ((char*)&time)+4, 2 ); }
tracy_force_inline uint8_t Other() const { return uint8_t( _time_other_own ); }
tracy_force_inline void SetOther( uint8_t other ) { memcpy( &_time_other_own, &other, 1 ); }
tracy_force_inline uint8_t Own() const { uint8_t v; memcpy( &v, ((char*)&_time_other_own)+1, 1 );return v; }
tracy_force_inline void SetOwn( uint8_t own ) { memcpy( ((char*)&_time_other_own)+1, &own, 1 ); }
uint64_t _time_other_own;
};
enum { ContextSwitchUsageSize = sizeof( ContextSwitchUsage ) };
struct MessageData
{
int64_t time;
StringRef ref;
uint16_t thread;
uint32_t color;
Int24 callstack;
};
enum { MessageDataSize = sizeof( MessageData ) };
struct PlotItem
{
Int48 time;
double val;
};
enum { PlotItemSize = sizeof( PlotItem ) };
struct FrameEvent
{
int64_t start;
int64_t end;
int32_t frameImage;
};
enum { FrameEventSize = sizeof( FrameEvent ) };
struct FrameImage
{
short_ptr<const char> ptr;
uint32_t csz;
uint16_t w, h;
uint32_t frameRef;
uint8_t flip;
};
enum { FrameImageSize = sizeof( FrameImage ) };
struct GhostZone
{
Int48 start, end;
Int24 frame;
int32_t child;
};
enum { GhostZoneSize = sizeof( GhostZone ) };
struct ChildSample
{
Int48 time;
uint64_t addr;
};
enum { ChildSampleSize = sizeof( ChildSample ) };
#pragma pack( pop )
struct ThreadData
{
uint64_t id;
uint64_t count;
Vector<short_ptr<ZoneEvent>> timeline;
Vector<short_ptr<ZoneEvent>> stack;
Vector<short_ptr<MessageData>> messages;
uint32_t nextZoneId;
Vector<uint32_t> zoneIdStack;
#ifndef TRACY_NO_STATISTICS
Vector<int64_t> childTimeStack;
Vector<GhostZone> ghostZones;
uint64_t ghostIdx;
SortedVector<SampleData, SampleDataSort> postponedSamples;
#endif
Vector<SampleData> samples;
SampleData pendingSample;
Vector<SampleData> ctxSwitchSamples;
uint64_t kernelSampleCnt;
uint8_t isFiber;
ThreadData* fiber;
uint8_t* stackCount;
int32_t groupHint;
tracy_force_inline void IncStackCount( int16_t srcloc ) { stackCount[uint16_t(srcloc)]++; }
tracy_force_inline bool DecStackCount( int16_t srcloc ) { return --stackCount[uint16_t(srcloc)] != 0; }
};
struct GpuCtxThreadData
{
Vector<short_ptr<GpuEvent>> timeline;
Vector<short_ptr<GpuEvent>> stack;
};
struct GpuCtxData
{
int64_t timeDiff;
uint64_t thread;
uint64_t count;
float period;
GpuContextType type;
bool hasPeriod;
bool hasCalibration;
int64_t calibratedGpuTime;
int64_t calibratedCpuTime;
double calibrationMod;
int64_t lastGpuTime;
uint64_t overflow;
uint32_t overflowMul;
StringIdx name;
unordered_flat_map<uint64_t, GpuCtxThreadData> threadData;
short_ptr<GpuEvent> query[64*1024];
};
enum { GpuCtxDataSize = sizeof( GpuCtxData ) };
enum class PlotType : uint8_t
{
User,
Memory,
SysTime,
Power
};
// Keep this in sync with enum in TracyC.h
enum class PlotValueFormatting : uint8_t
{
Number,
Memory,
Percentage,
Watt
};
struct PlotData
{
struct PlotItemSort { bool operator()( const PlotItem& lhs, const PlotItem& rhs ) { return lhs.time.Val() < rhs.time.Val(); }; };
uint64_t name;
double min;
double max;
double sum;
SortedVector<PlotItem, PlotItemSort> data;
PlotType type;
PlotValueFormatting format;
uint8_t showSteps;
uint8_t fill;
uint32_t color;
double rMin, rMax, num;
};
struct MemData
{
Vector<MemEvent> data;
Vector<uint32_t> frees;
unordered_flat_map<uint64_t, size_t> active;
uint64_t high = std::numeric_limits<uint64_t>::min();
uint64_t low = std::numeric_limits<uint64_t>::max();
uint64_t usage = 0;
PlotData* plot = nullptr;
bool reconstruct = false;
uint64_t name = 0;
};
struct FrameData
{
uint64_t name;
Vector<FrameEvent> frames;
uint8_t continuous;
int64_t min = std::numeric_limits<int64_t>::max();
int64_t max = std::numeric_limits<int64_t>::min();
int64_t total = 0;
double sumSq = 0;
};
struct StringLocation
{
const char* ptr;
uint32_t idx;
};
struct SourceLocationHasher
{
size_t operator()( const SourceLocation* ptr ) const
{
return charutil::hash( (const char*)ptr, sizeof( SourceLocationBase ) );
}
};
struct SourceLocationComparator
{
bool operator()( const SourceLocation* lhs, const SourceLocation* rhs ) const
{
return memcmp( lhs, rhs, sizeof( SourceLocationBase ) ) == 0;
}
};
struct ContextSwitch
{
Vector<ContextSwitchData> v;
int64_t runningTime = 0;
};
struct CpuData
{
Vector<ContextSwitchCpu> cs;
};
struct CpuThreadData
{
int64_t runningTime = 0;
uint32_t runningRegions = 0;
uint32_t migrations = 0;
};
enum { CpuThreadDataSize = sizeof( CpuThreadData ) };
struct Parameter
{
uint32_t idx;
StringRef name;
bool isBool;
int32_t val;
};
struct SymbolStats
{
uint32_t incl, excl;
unordered_flat_map<uint32_t, uint32_t> parents;
unordered_flat_map<uint32_t, uint32_t> baseParents;
};
enum { SymbolStatsSize = sizeof( SymbolStats ) };
}
#endif

View File

@@ -0,0 +1,22 @@
#ifndef __TRACYFILEHEADER_HPP__
#define __TRACYFILEHEADER_HPP__
#include <stdint.h>
#include "../public/common/TracyForceInline.hpp"
namespace tracy
{
static const uint8_t TracyHeader[4] = { 't', 'r', 253, 'P' };
static const uint8_t Lz4Header[4] = { 't', 'l', 'Z', 4 };
static const uint8_t ZstdHeader[4] = { 't', 'Z', 's', 't' };
static constexpr tracy_force_inline int FileVersion( uint8_t h5, uint8_t h6, uint8_t h7 )
{
return ( h5 << 16 ) | ( h6 << 8 ) | h7;
}
}
#endif

View File

@@ -0,0 +1,18 @@
#ifndef __TRACYFILEMETA_HPP__
#define __TRACYFILEMETA_HPP__
#include <algorithm>
#include <stddef.h>
#include "../public/common/tracy_lz4.hpp"
#include "../zstd/zstd.h"
namespace tracy
{
constexpr size_t FileBufSize = 64 * 1024;
constexpr size_t FileBoundSize = std::max( LZ4_COMPRESSBOUND( FileBufSize ), ZSTD_COMPRESSBOUND( FileBufSize ) );
}
#endif

View File

@@ -0,0 +1,601 @@
#ifndef __TRACYFILEREAD_HPP__
#define __TRACYFILEREAD_HPP__
#include <assert.h>
#include <atomic>
#include <algorithm>
#include <condition_variable>
#include <stdexcept>
#include <stdio.h>
#include <string.h>
#include <string>
#include <thread>
#include <utility>
#include <vector>
#include <sys/stat.h>
#ifdef _MSC_VER
# define stat64 _stat64
#endif
#if defined __APPLE__ || defined __FreeBSD__
# define stat64 stat
#endif
#include "TracyFileHeader.hpp"
#include "TracyFileMeta.hpp"
#include "TracyMmap.hpp"
#include "../public/common/TracyYield.hpp"
#include "../public/common/tracy_lz4.hpp"
#include "../public/common/TracyForceInline.hpp"
#include "../zstd/zstd.h"
namespace tracy
{
struct NotTracyDump : public std::exception {};
struct FileReadError : public std::exception {};
class ReadStream
{
public:
ReadStream( uint8_t type )
: m_stream( nullptr )
, m_streamZstd( nullptr )
, m_buf( new char[FileBufSize] )
, m_second( new char[FileBufSize] )
{
switch( type )
{
case 0:
m_stream = LZ4_createStreamDecode();
break;
case 1:
m_streamZstd = ZSTD_createDStream();
break;
default:
assert( false );
break;
}
}
~ReadStream()
{
delete[] m_buf;
delete[] m_second;
if( m_stream ) LZ4_freeStreamDecode( m_stream );
if( m_streamZstd ) ZSTD_freeDStream( m_streamZstd );
}
void Decompress( const char* src, uint32_t size )
{
std::swap( m_buf, m_second );
if( m_stream )
{
m_size = (size_t)LZ4_decompress_safe_continue( m_stream, src, m_buf, size, FileBufSize );
}
else
{
ZSTD_outBuffer out = { m_buf, FileBufSize, 0 };
ZSTD_inBuffer in = { src, size, 0 };
ZSTD_decompressStream( m_streamZstd, &out, &in );
m_size = out.pos;
}
}
const char* GetBuffer() const { return m_buf; }
size_t GetSize() const { return m_size; }
private:
LZ4_streamDecode_t* m_stream;
ZSTD_DStream* m_streamZstd;
char* m_buf;
char* m_second;
size_t m_size;
};
class FileRead
{
struct StreamHandle
{
StreamHandle( uint8_t type ) : stream( type ), outputReady( false ) {}
ReadStream stream;
const char* src;
uint32_t size;
bool inputReady = false;
bool exit = false;
alignas(64) std::atomic<bool> outputReady;
std::mutex signalLock;
std::condition_variable signal;
std::thread thread;
};
public:
static FileRead* Open( const char* fn )
{
auto f = fopen( fn, "rb" );
return f ? new FileRead( f, fn ) : nullptr;
}
~FileRead()
{
for( auto& v : m_streams )
{
std::lock_guard lock( v->signalLock );
v->exit = true;
v->signal.notify_one();
}
for( auto& v : m_streams ) v->thread.join();
m_streams.clear();
if( m_data ) munmap( m_data, m_dataSize );
}
tracy_force_inline void Read( void* ptr, size_t size )
{
if( size <= FileBufSize - m_offset )
{
ReadSmall( ptr, size );
}
else
{
ReadBig( ptr, size );
}
}
tracy_force_inline void Skip( size_t size )
{
if( size <= FileBufSize - m_offset )
{
m_offset += size;
}
else
{
SkipBig( size );
}
}
template<class T>
tracy_force_inline void Read( T& v )
{
if( sizeof( T ) <= FileBufSize - m_offset )
{
memcpy( &v, m_buf + m_offset, sizeof( T ) );
m_offset += sizeof( T );
}
else
{
T tmp;
ReadBig( &tmp, sizeof( T ) );
memcpy( &v, &tmp, sizeof( T ) );
}
}
template<class T, class U>
tracy_force_inline void Read2( T& v0, U& v1 )
{
if( sizeof( T ) + sizeof( U ) <= FileBufSize - m_offset )
{
memcpy( &v0, m_buf + m_offset, sizeof( T ) );
memcpy( &v1, m_buf + m_offset + sizeof( T ), sizeof( U ) );
m_offset += sizeof( T ) + sizeof( U );
}
else
{
char tmp[sizeof( T ) + sizeof( U )];
ReadBig( tmp, sizeof( T ) + sizeof( U ) );
memcpy( &v0, tmp, sizeof( T ) );
memcpy( &v1, tmp + sizeof( T ), sizeof( U ) );
}
}
template<class T, class U, class V>
tracy_force_inline void Read3( T& v0, U& v1, V& v2 )
{
if( sizeof( T ) + sizeof( U ) + sizeof( V ) <= FileBufSize - m_offset )
{
memcpy( &v0, m_buf + m_offset, sizeof( T ) );
memcpy( &v1, m_buf + m_offset + sizeof( T ), sizeof( U ) );
memcpy( &v2, m_buf + m_offset + sizeof( T ) + sizeof( U ), sizeof( V ) );
m_offset += sizeof( T ) + sizeof( U ) + sizeof( V );
}
else
{
char tmp[sizeof( T ) + sizeof( U ) + sizeof( V )];
ReadBig( tmp, sizeof( T ) + sizeof( U ) + sizeof( V ) );
memcpy( &v0, tmp, sizeof( T ) );
memcpy( &v1, tmp + sizeof( T ), sizeof( U ) );
memcpy( &v2, tmp + sizeof( T ) + sizeof( U ), sizeof( V ) );
}
}
template<class T, class U, class V, class W>
tracy_force_inline void Read4( T& v0, U& v1, V& v2, W& v3 )
{
if( sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) <= FileBufSize - m_offset )
{
memcpy( &v0, m_buf + m_offset, sizeof( T ) );
memcpy( &v1, m_buf + m_offset + sizeof( T ), sizeof( U ) );
memcpy( &v2, m_buf + m_offset + sizeof( T ) + sizeof( U ), sizeof( V ) );
memcpy( &v3, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ), sizeof( W ) );
m_offset += sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W );
}
else
{
char tmp[sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W )];
ReadBig( tmp, sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) );
memcpy( &v0, tmp, sizeof( T ) );
memcpy( &v1, tmp + sizeof( T ), sizeof( U ) );
memcpy( &v2, tmp + sizeof( T ) + sizeof( U ), sizeof( V ) );
memcpy( &v3, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ), sizeof( W ) );
}
}
template<class T, class U, class V, class W, class X>
tracy_force_inline void Read5( T& v0, U& v1, V& v2, W& v3, X& v4 )
{
if( sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) <= FileBufSize - m_offset )
{
memcpy( &v0, m_buf + m_offset, sizeof( T ) );
memcpy( &v1, m_buf + m_offset + sizeof( T ), sizeof( U ) );
memcpy( &v2, m_buf + m_offset + sizeof( T ) + sizeof( U ), sizeof( V ) );
memcpy( &v3, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ), sizeof( W ) );
memcpy( &v4, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ), sizeof( X ) );
m_offset += sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X );
}
else
{
char tmp[sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X )];
ReadBig( tmp, sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) );
memcpy( &v0, tmp, sizeof( T ) );
memcpy( &v1, tmp + sizeof( T ), sizeof( U ) );
memcpy( &v2, tmp + sizeof( T ) + sizeof( U ), sizeof( V ) );
memcpy( &v3, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ), sizeof( W ) );
memcpy( &v4, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ), sizeof( X ) );
}
}
template<class T, class U, class V, class W, class X, class Y>
tracy_force_inline void Read6( T& v0, U& v1, V& v2, W& v3, X& v4, Y& v5 )
{
if( sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) <= FileBufSize - m_offset )
{
memcpy( &v0, m_buf + m_offset, sizeof( T ) );
memcpy( &v1, m_buf + m_offset + sizeof( T ), sizeof( U ) );
memcpy( &v2, m_buf + m_offset + sizeof( T ) + sizeof( U ), sizeof( V ) );
memcpy( &v3, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ), sizeof( W ) );
memcpy( &v4, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ), sizeof( X ) );
memcpy( &v5, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ), sizeof( Y ) );
m_offset += sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y );
}
else
{
char tmp[sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y )];
ReadBig( tmp, sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) );
memcpy( &v0, tmp, sizeof( T ) );
memcpy( &v1, tmp + sizeof( T ), sizeof( U ) );
memcpy( &v2, tmp + sizeof( T ) + sizeof( U ), sizeof( V ) );
memcpy( &v3, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ), sizeof( W ) );
memcpy( &v4, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ), sizeof( X ) );
memcpy( &v5, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ), sizeof( Y ) );
}
}
template<class T, class U, class V, class W, class X, class Y, class Z>
tracy_force_inline void Read7( T& v0, U& v1, V& v2, W& v3, X& v4, Y& v5, Z& v6 )
{
if( sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) <= FileBufSize - m_offset )
{
memcpy( &v0, m_buf + m_offset, sizeof( T ) );
memcpy( &v1, m_buf + m_offset + sizeof( T ), sizeof( U ) );
memcpy( &v2, m_buf + m_offset + sizeof( T ) + sizeof( U ), sizeof( V ) );
memcpy( &v3, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ), sizeof( W ) );
memcpy( &v4, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ), sizeof( X ) );
memcpy( &v5, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ), sizeof( Y ) );
memcpy( &v6, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ), sizeof( Z ) );
m_offset += sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z );
}
else
{
char tmp[sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z )];
ReadBig( tmp, sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) );
memcpy( &v0, tmp, sizeof( T ) );
memcpy( &v1, tmp + sizeof( T ), sizeof( U ) );
memcpy( &v2, tmp + sizeof( T ) + sizeof( U ), sizeof( V ) );
memcpy( &v3, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ), sizeof( W ) );
memcpy( &v4, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ), sizeof( X ) );
memcpy( &v5, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ), sizeof( Y ) );
memcpy( &v6, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ), sizeof( Z ) );
}
}
template<class T, class U, class V, class W, class X, class Y, class Z, class A>
tracy_force_inline void Read8( T& v0, U& v1, V& v2, W& v3, X& v4, Y& v5, Z& v6, A& v7 )
{
if( sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A ) <= FileBufSize - m_offset )
{
memcpy( &v0, m_buf + m_offset, sizeof( T ) );
memcpy( &v1, m_buf + m_offset + sizeof( T ), sizeof( U ) );
memcpy( &v2, m_buf + m_offset + sizeof( T ) + sizeof( U ), sizeof( V ) );
memcpy( &v3, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ), sizeof( W ) );
memcpy( &v4, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ), sizeof( X ) );
memcpy( &v5, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ), sizeof( Y ) );
memcpy( &v6, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ), sizeof( Z ) );
memcpy( &v7, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ), sizeof( A ) );
m_offset += sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A );
}
else
{
char tmp[sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A )];
ReadBig( tmp, sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A ) );
memcpy( &v0, tmp, sizeof( T ) );
memcpy( &v1, tmp + sizeof( T ), sizeof( U ) );
memcpy( &v2, tmp + sizeof( T ) + sizeof( U ), sizeof( V ) );
memcpy( &v3, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ), sizeof( W ) );
memcpy( &v4, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ), sizeof( X ) );
memcpy( &v5, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ), sizeof( Y ) );
memcpy( &v6, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ), sizeof( Z ) );
memcpy( &v7, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ), sizeof( A ) );
}
}
template<class T, class U, class V, class W, class X, class Y, class Z, class A, class B>
tracy_force_inline void Read9( T& v0, U& v1, V& v2, W& v3, X& v4, Y& v5, Z& v6, A& v7, B& v8 )
{
if( sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A ) + sizeof( B ) <= FileBufSize - m_offset )
{
memcpy( &v0, m_buf + m_offset, sizeof( T ) );
memcpy( &v1, m_buf + m_offset + sizeof( T ), sizeof( U ) );
memcpy( &v2, m_buf + m_offset + sizeof( T ) + sizeof( U ), sizeof( V ) );
memcpy( &v3, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ), sizeof( W ) );
memcpy( &v4, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ), sizeof( X ) );
memcpy( &v5, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ), sizeof( Y ) );
memcpy( &v6, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ), sizeof( Z ) );
memcpy( &v7, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ), sizeof( A ) );
memcpy( &v8, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A ), sizeof( B ) );
m_offset += sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A ) + sizeof( B );
}
else
{
char tmp[sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A ) + sizeof( B )];
ReadBig( tmp, sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A ) + sizeof( B ) );
memcpy( &v0, tmp, sizeof( T ) );
memcpy( &v1, tmp + sizeof( T ), sizeof( U ) );
memcpy( &v2, tmp + sizeof( T ) + sizeof( U ), sizeof( V ) );
memcpy( &v3, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ), sizeof( W ) );
memcpy( &v4, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ), sizeof( X ) );
memcpy( &v5, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ), sizeof( Y ) );
memcpy( &v6, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ), sizeof( Z ) );
memcpy( &v7, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ), sizeof( A ) );
memcpy( &v8, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A ), sizeof( B ) );
}
}
template<class T, class U, class V, class W, class X, class Y, class Z, class A, class B, class C>
tracy_force_inline void Read10( T& v0, U& v1, V& v2, W& v3, X& v4, Y& v5, Z& v6, A& v7, B& v8, C& v9 )
{
if( sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A ) + sizeof( B ) + sizeof( C ) <= FileBufSize - m_offset )
{
memcpy( &v0, m_buf + m_offset, sizeof( T ) );
memcpy( &v1, m_buf + m_offset + sizeof( T ), sizeof( U ) );
memcpy( &v2, m_buf + m_offset + sizeof( T ) + sizeof( U ), sizeof( V ) );
memcpy( &v3, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ), sizeof( W ) );
memcpy( &v4, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ), sizeof( X ) );
memcpy( &v5, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ), sizeof( Y ) );
memcpy( &v6, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ), sizeof( Z ) );
memcpy( &v7, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ), sizeof( A ) );
memcpy( &v8, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A ), sizeof( B ) );
memcpy( &v9, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A ) + sizeof( B ), sizeof( C ) );
m_offset += sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A ) + sizeof( B ) + sizeof( C );
}
else
{
char tmp[sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A ) + sizeof( B ) + sizeof( C )];
ReadBig( tmp, sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A ) + sizeof( B ) + sizeof( C ) );
memcpy( &v0, tmp, sizeof( T ) );
memcpy( &v1, tmp + sizeof( T ), sizeof( U ) );
memcpy( &v2, tmp + sizeof( T ) + sizeof( U ), sizeof( V ) );
memcpy( &v3, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ), sizeof( W ) );
memcpy( &v4, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ), sizeof( X ) );
memcpy( &v5, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ), sizeof( Y ) );
memcpy( &v6, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ), sizeof( Z ) );
memcpy( &v7, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ), sizeof( A ) );
memcpy( &v8, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A ), sizeof( B ) );
memcpy( &v9, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A ) + sizeof( B ), sizeof( C ) );
}
}
const std::string& GetFilename() const { return m_filename; }
private:
FileRead( FILE* f, const char* fn )
: m_data( nullptr )
, m_offset( 0 )
, m_streamId( 0 )
, m_filename( fn )
{
char hdr[4];
if( fread( hdr, 1, sizeof( hdr ), f ) != sizeof( hdr ) )
{
fclose( f );
throw NotTracyDump();
}
uint8_t streams = 1;
uint8_t type;
m_dataOffset = sizeof( hdr );
if( memcmp( hdr, TracyHeader, sizeof( hdr ) ) == 0 )
{
if( fread( &type, 1, 1, f ) != 1 || type > 1 )
{
fclose( f );
throw NotTracyDump();
}
if( fread( &streams, 1, 1, f ) != 1 )
{
fclose( f );
throw NotTracyDump();
}
m_dataOffset += 2;
}
else if( memcmp( hdr, Lz4Header, sizeof( hdr ) ) == 0 )
{
type = 0;
}
else if( memcmp( hdr, ZstdHeader, sizeof( hdr ) ) == 0 )
{
type = 1;
}
else
{
fclose( f );
throw NotTracyDump();
}
struct stat64 buf;
if( stat64( fn, &buf ) == 0 )
{
m_dataSize = buf.st_size;
}
else
{
fclose( f );
throw FileReadError();
}
m_data = (char*)mmap( nullptr, m_dataSize, PROT_READ, MAP_SHARED, fileno( f ), 0 );
fclose( f );
if( !m_data )
{
throw FileReadError();
}
for( int i=0; i<(int)streams; i++ )
{
if( m_dataOffset == m_dataSize ) break;
const auto sz = ReadBlockSize();
auto uptr = std::make_unique<StreamHandle>( type );
uptr->src = m_data + m_dataOffset;
uptr->size = sz;
uptr->inputReady = true;
uptr->thread = std::thread( [ptr = uptr.get()] { Worker( ptr ); } );
m_streams.emplace_back( std::move( uptr ) );
m_dataOffset += sz;
}
GetNextDataBlock();
}
tracy_force_inline uint32_t ReadBlockSize()
{
uint32_t sz;
memcpy( &sz, m_data + m_dataOffset, sizeof( sz ) );
m_dataOffset += sizeof( sz );
return sz;
}
static void Worker( StreamHandle* hnd )
{
for(;;)
{
std::unique_lock lock( hnd->signalLock );
hnd->signal.wait( lock, [&] { return hnd->inputReady || hnd->exit; } );
if( hnd->exit ) return;
lock.unlock();
hnd->stream.Decompress( hnd->src, hnd->size );
hnd->inputReady = false;
hnd->outputReady.store( true, std::memory_order_release );
}
}
tracy_force_inline void ReadSmall( void* ptr, size_t size )
{
memcpy( ptr, m_buf + m_offset, size );
m_offset += size;
}
void ReadBig( void* ptr, size_t size )
{
assert( size > 0 );
auto dst = (char*)ptr;
do
{
size_t sz;
if( m_offset == FileBufSize )
{
sz = std::min<size_t>( size, FileBufSize );
GetNextDataBlock();
memcpy( dst, m_buf, sz );
m_offset = sz;
}
else
{
sz = std::min( size, FileBufSize - m_offset );
memcpy( dst, m_buf + m_offset, sz );
m_offset += sz;
}
dst += sz;
size -= sz;
}
while( size > 0 );
}
void SkipBig( size_t size )
{
while( size > 0 )
{
if( m_offset == FileBufSize ) GetNextDataBlock();
const auto sz = std::min( size, FileBufSize - m_offset );
m_offset += sz;
size -= sz;
}
}
void GetNextDataBlock()
{
auto& hnd = *m_streams[m_streamId];
while( hnd.outputReady.load( std::memory_order_acquire ) == false ) { YieldThread(); }
hnd.outputReady.store( false, std::memory_order_relaxed );
m_buf = hnd.stream.GetBuffer();
m_offset = 0;
if( m_dataOffset < m_dataSize )
{
const auto sz = ReadBlockSize();
std::unique_lock lock( hnd.signalLock );
hnd.src = m_data + m_dataOffset;
hnd.size = sz;
hnd.inputReady = true;
hnd.signal.notify_one();
lock.unlock();
m_dataOffset += sz;
}
m_streamId = ( m_streamId + 1 ) % m_streams.size();
}
char* m_data;
const char* m_buf;
uint64_t m_dataSize;
uint64_t m_dataOffset;
size_t m_offset;
int m_streamId;
std::string m_filename;
std::vector<std::unique_ptr<StreamHandle>> m_streams;
};
}
#endif

View File

@@ -0,0 +1,303 @@
#ifndef __TRACYFILEWRITE_HPP__
#define __TRACYFILEWRITE_HPP__
#ifdef _MSC_VER
# pragma warning( disable: 4267 ) // conversion from don't care to whatever, possible loss of data
#endif
#include <algorithm>
#include <assert.h>
#include <condition_variable>
#include <mutex>
#include <stdio.h>
#include <string.h>
#include <thread>
#include <utility>
#include <vector>
#include "TracyFileHeader.hpp"
#include "TracyFileMeta.hpp"
#include "../public/common/tracy_lz4.hpp"
#include "../public/common/tracy_lz4hc.hpp"
#include "../public/common/TracyForceInline.hpp"
#include "../zstd/zstd.h"
namespace tracy
{
enum class FileCompression
{
Fast,
Slow,
Extreme,
Zstd
};
class WriteStream
{
public:
WriteStream( FileCompression comp, int level )
: m_stream( nullptr )
, m_streamHC( nullptr )
, m_streamZstd( nullptr )
, m_buf( new char[FileBufSize] )
, m_second( new char[FileBufSize] )
, m_compressed( new char[FileBoundSize] )
{
switch( comp )
{
case FileCompression::Fast:
m_stream = LZ4_createStream();
break;
case FileCompression::Slow:
m_streamHC = LZ4_createStreamHC();
break;
case FileCompression::Extreme:
m_streamHC = LZ4_createStreamHC();
LZ4_resetStreamHC( m_streamHC, LZ4HC_CLEVEL_MAX );
break;
case FileCompression::Zstd:
m_streamZstd = ZSTD_createCStream();
ZSTD_CCtx_setParameter( m_streamZstd, ZSTD_c_compressionLevel, level );
ZSTD_CCtx_setParameter( m_streamZstd, ZSTD_c_contentSizeFlag, 0 );
break;
default:
assert( false );
break;
}
}
~WriteStream()
{
delete[] m_buf;
delete[] m_second;
delete[] m_compressed;
if( m_stream ) LZ4_freeStream( m_stream );
if( m_streamHC ) LZ4_freeStreamHC( m_streamHC );
if( m_streamZstd ) ZSTD_freeCStream( m_streamZstd );
}
char* GetInputBuffer() { return m_buf; }
const char* GetCompressedData() const { return m_compressed; }
uint32_t GetSize() const { return m_size; }
void Compress( uint32_t sz )
{
if( m_stream )
{
m_size = LZ4_compress_fast_continue( m_stream, m_buf, m_compressed, sz, FileBoundSize, 1 );
}
else if( m_streamZstd )
{
ZSTD_outBuffer out = { m_compressed, FileBoundSize, 0 };
ZSTD_inBuffer in = { m_buf, sz, 0 };
const auto ret = ZSTD_compressStream2( m_streamZstd, &out, &in, ZSTD_e_flush );
assert( ret == 0 );
m_size = out.pos;
}
else
{
m_size = LZ4_compress_HC_continue( m_streamHC, m_buf, m_compressed, sz, FileBoundSize );
}
std::swap( m_buf, m_second );
}
private:
LZ4_stream_t* m_stream;
LZ4_streamHC_t* m_streamHC;
ZSTD_CStream* m_streamZstd;
char* m_buf;
char* m_second;
char* m_compressed;
uint32_t m_size;
};
class FileWrite
{
struct StreamHandle
{
StreamHandle( FileCompression comp, int level ) : stream( comp, level ) {}
WriteStream stream;
uint32_t size;
bool inputReady = false;
bool outputReady = false;
bool exit = false;
std::mutex signalLock;
std::condition_variable signal;
std::thread thread;
};
public:
static FileWrite* Open( const char* fn, FileCompression comp = FileCompression::Fast, int level = 1, int streams = -1 )
{
auto f = fopen( fn, "wb" );
if( !f ) return nullptr;
if( streams <= 0 ) streams = std::max<int>( 1, std::thread::hardware_concurrency() );
if( streams > 255 ) streams = 255;
return new FileWrite( f, comp, level, streams );
}
~FileWrite()
{
Finish();
fclose( m_file );
}
void Finish()
{
if( m_offset > 0 ) WriteBlock();
while( m_streamPending > 0 ) ProcessPending();
for( auto& v : m_streams )
{
std::lock_guard lock( v->signalLock );
v->exit = true;
v->signal.notify_one();
}
for( auto& v : m_streams ) v->thread.join();
m_streams.clear();
}
tracy_force_inline void Write( const void* ptr, size_t size )
{
if( m_offset + size <= FileBufSize )
{
WriteSmall( ptr, size );
}
else
{
WriteBig( ptr, size );
}
}
std::pair<size_t, size_t> GetCompressionStatistics() const { return std::make_pair( m_srcBytes, m_dstBytes ); }
private:
FileWrite( FILE* f, FileCompression comp, int level, int streams )
: m_offset( 0 )
, m_file( f )
, m_srcBytes( 0 )
, m_dstBytes( 0 )
{
assert( streams > 0 );
assert( streams < 256 );
fwrite( TracyHeader, 1, sizeof( TracyHeader ), m_file );
uint8_t u8 = comp == FileCompression::Zstd ? 1 : 0;
fwrite( &u8, 1, 1, m_file );
u8 = streams;
fwrite( &u8, 1, 1, m_file );
m_streams.reserve( streams );
for( int i=0; i<streams; i++ )
{
auto uptr = std::make_unique<StreamHandle>( comp, level );
uptr->thread = std::thread( [ptr = uptr.get()]{ Worker( ptr ); } );
m_streams.emplace_back( std::move( uptr ) );
}
m_buf = m_streams[m_streamId]->stream.GetInputBuffer();
}
tracy_force_inline void WriteSmall( const void* ptr, size_t size )
{
memcpy( m_buf + m_offset, ptr, size );
m_offset += size;
}
void WriteBig( const void* ptr, size_t size )
{
auto src = (const char*)ptr;
while( size > 0 )
{
const auto sz = std::min( size, FileBufSize - m_offset );
memcpy( m_buf + m_offset, src, sz );
m_offset += sz;
src += sz;
size -= sz;
if( m_offset == FileBufSize )
{
WriteBlock();
}
}
}
void WriteBlock()
{
m_srcBytes += m_offset;
auto& hnd = *m_streams[m_streamId];
assert( hnd.stream.GetInputBuffer() == m_buf );
std::unique_lock lock( hnd.signalLock );
hnd.inputReady = true;
hnd.size = m_offset;
hnd.signal.notify_one();
lock.unlock();
m_streamPending++;
m_streamId = ( m_streamId + 1 ) % m_streams.size();
if( m_streamPending == m_streams.size() ) ProcessPending();
m_offset = 0;
m_buf = m_streams[m_streamId]->stream.GetInputBuffer();
}
void ProcessPending()
{
assert( m_streamPending > 0 );
int id = ( m_streamId + m_streams.size() - m_streamPending ) % m_streams.size();
m_streamPending--;
auto& hnd = *m_streams[id];
std::unique_lock lock( hnd.signalLock );
hnd.signal.wait( lock, [&hnd]{ return hnd.outputReady; } );
lock.unlock();
hnd.outputReady = false;
const uint32_t size = hnd.stream.GetSize();
m_dstBytes += size;
fwrite( &size, 1, sizeof( size ), m_file );
fwrite( hnd.stream.GetCompressedData(), 1, size, m_file );
}
static void Worker( StreamHandle* hnd )
{
std::unique_lock lock( hnd->signalLock );
for(;;)
{
hnd->signal.wait( lock, [&hnd]{ return hnd->inputReady || hnd->exit; } );
if( hnd->exit ) return;
lock.unlock();
hnd->stream.Compress( hnd->size );
hnd->inputReady = false;
lock.lock();
hnd->outputReady = true;
hnd->signal.notify_one();
}
}
char* m_buf;
size_t m_offset;
int m_streamId = 0;
int m_streamPending = 0;
std::vector<std::unique_ptr<StreamHandle>> m_streams;
FILE* m_file;
size_t m_srcBytes;
size_t m_dstBytes;
};
}
#endif

View File

@@ -0,0 +1,8 @@
#include "TracyMemory.hpp"
namespace tracy
{
std::atomic<int64_t> memUsage( 0 );
}

View File

@@ -0,0 +1,14 @@
#ifndef __TRACYMEMORY_HPP__
#define __TRACYMEMORY_HPP__
#include <atomic>
#include <stdint.h>
namespace tracy
{
extern std::atomic<int64_t> memUsage;
}
#endif

View File

@@ -0,0 +1,38 @@
#include "TracyMmap.hpp"
#if defined _WIN32
# include <io.h>
# include <windows.h>
void* mmap( void* addr, size_t length, int prot, int flags, int fd, off_t offset )
{
HANDLE hnd;
void* map = nullptr;
switch( prot )
{
case PROT_READ:
if( hnd = CreateFileMapping( HANDLE( _get_osfhandle( fd ) ), nullptr, PAGE_READONLY, 0, 0, nullptr ) )
{
map = MapViewOfFile( hnd, FILE_MAP_READ, 0, 0, length );
CloseHandle( hnd );
}
break;
case PROT_WRITE:
if( hnd = CreateFileMapping( HANDLE( _get_osfhandle( fd ) ), nullptr, PAGE_READWRITE, 0, 0, nullptr ) )
{
map = MapViewOfFile( hnd, FILE_MAP_WRITE, 0, 0, length );
CloseHandle( hnd );
}
break;
}
return map ? (char*)map + offset : (void*)-1;
}
int munmap( void* addr, size_t length )
{
return UnmapViewOfFile( addr ) != 0 ? 0 : -1;
}
#endif

View File

@@ -0,0 +1,19 @@
#ifndef __TRACYMMAP_HPP__
#define __TRACYMMAP_HPP__
#if !defined _WIN32
# include <sys/mman.h>
#else
# include <string.h>
# include <sys/types.h>
# define PROT_READ 1
# define PROT_WRITE 2
# define MAP_SHARED 0
void* mmap( void* addr, size_t length, int prot, int flags, int fd, off_t offset );
int munmap( void* addr, size_t length );
#endif
#endif

View File

@@ -0,0 +1,40 @@
#ifndef __TRACYPOPCNT_HPP__
#define __TRACYPOPCNT_HPP__
#include <limits.h>
#include <stdint.h>
#if defined _WIN64
# include <intrin.h>
# define TracyCountBits __popcnt64
# define TracyLzcnt __lzcnt64
#elif defined __GNUC__ || defined __clang__
static inline uint64_t TracyCountBits( uint64_t i )
{
return uint64_t( __builtin_popcountll( i ) );
}
static inline uint64_t TracyLzcnt( uint64_t i )
{
return uint64_t( __builtin_clzll( i ) );
}
#else
static inline uint64_t TracyCountBits( uint64_t i )
{
i = i - ( (i >> 1) & 0x5555555555555555 );
i = ( i & 0x3333333333333333 ) + ( (i >> 2) & 0x3333333333333333 );
i = ( (i + (i >> 4) ) & 0x0F0F0F0F0F0F0F0F );
return ( i * (0x0101010101010101) ) >> 56;
}
static inline uint64_t TracyLzcnt( uint64_t i )
{
i |= i >> 1;
i |= i >> 2;
i |= i >> 4;
i |= i >> 8;
i |= i >> 16;
i |= i >> 32;
return 64 - TracyCountBits( i );
}
#endif
#endif

View File

@@ -0,0 +1,458 @@
#ifdef _MSC_VER
# pragma warning( disable: 4244 ) // conversion from don't care to whatever, possible loss of data
#endif
#ifdef __MINGW32__
# define __STDC_FORMAT_MACROS
#endif
#include <assert.h>
#include <inttypes.h>
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h> // llabs()
#include <string.h>
#include "TracyPrint.hpp"
namespace tracy
{
static const char* IntTable100 =
"00010203040506070809"
"10111213141516171819"
"20212223242526272829"
"30313233343536373839"
"40414243444546474849"
"50515253545556575859"
"60616263646566676869"
"70717273747576777879"
"80818283848586878889"
"90919293949596979899";
static inline void PrintTinyInt( char*& buf, uint64_t v )
{
assert( v < 100 );
if( v >= 10 )
{
*buf++ = '0' + v/10;
}
*buf++ = '0' + v%10;
}
static inline void PrintTinyInt0( char*& buf, uint64_t v )
{
assert( v < 100 );
if( v >= 10 )
{
*buf++ = '0' + v/10;
}
else
{
*buf++ = '0';
}
*buf++ = '0' + v%10;
}
static inline void PrintSmallInt( char*& buf, uint64_t v )
{
assert( v < 1000 );
if( v >= 100 )
{
memcpy( buf, IntTable100 + v/10*2, 2 );
buf += 2;
}
else if( v >= 10 )
{
*buf++ = '0' + v/10;
}
*buf++ = '0' + v%10;
}
static inline void PrintSmallInt0( char*& buf, uint64_t v )
{
assert( v < 1000 );
if( v >= 100 )
{
memcpy( buf, IntTable100 + v/10*2, 2 );
buf += 2;
}
else if( v >= 10 )
{
*buf++ = '0';
*buf++ = '0' + v/10;
}
else
{
memcpy( buf, "00", 2 );
buf += 2;
}
*buf++ = '0' + v%10;
}
static inline void PrintFrac00( char*& buf, uint64_t v )
{
*buf++ = '.';
v += 5;
if( v/10%10 == 0 )
{
*buf++ = '0' + v/100;
}
else
{
memcpy( buf, IntTable100 + v/10*2, 2 );
buf += 2;
}
}
static inline void PrintFrac0( char*& buf, uint64_t v )
{
*buf++ = '.';
*buf++ = '0' + (v+50)/100;
}
static inline void PrintSmallIntFrac( char*& buf, uint64_t v )
{
uint64_t in = v / 1000;
uint64_t fr = v % 1000;
if( fr >= 995 )
{
if( in < 999 )
{
PrintSmallInt( buf, in+1 );
}
else
{
memcpy( buf, "1000", 4 );
buf += 4;
}
}
else
{
PrintSmallInt( buf, in );
if( fr > 5 )
{
PrintFrac00( buf, fr );
}
}
}
static inline void PrintSecondsFrac( char*& buf, uint64_t v )
{
uint64_t in = v / 1000;
uint64_t fr = v % 1000;
if( fr >= 950 )
{
PrintTinyInt0( buf, in+1 );
}
else
{
PrintTinyInt0( buf, in );
if( fr > 50 )
{
PrintFrac0( buf, fr );
}
}
}
const char* TimeToString( int64_t _ns )
{
enum { Pool = 8 };
static char bufpool[Pool][64];
static int bufsel = 0;
char* buf = bufpool[bufsel];
char* bufstart = buf;
bufsel = ( bufsel + 1 ) % Pool;
uint64_t ns;
if( _ns < 0 )
{
*buf = '-';
buf++;
ns = -_ns;
}
else
{
ns = _ns;
}
if( ns < 1000 )
{
PrintSmallInt( buf, ns );
memcpy( buf, " ns", 4 );
}
else if( ns < 1000ll * 1000 )
{
PrintSmallIntFrac( buf, ns );
memcpy( buf, " \xce\xbcs", 5 );
}
else if( ns < 1000ll * 1000 * 1000 )
{
PrintSmallIntFrac( buf, ns / 1000 );
memcpy( buf, " ms", 4 );
}
else if( ns < 1000ll * 1000 * 1000 * 60 )
{
PrintSmallIntFrac( buf, ns / ( 1000ll * 1000 ) );
memcpy( buf, " s", 3 );
}
else if( ns < 1000ll * 1000 * 1000 * 60 * 60 )
{
const auto m = int64_t( ns / ( 1000ll * 1000 * 1000 * 60 ) );
const auto s = int64_t( ns - m * ( 1000ll * 1000 * 1000 * 60 ) ) / ( 1000ll * 1000 );
PrintTinyInt( buf, m );
*buf++ = ':';
PrintSecondsFrac( buf, s );
*buf++ = '\0';
}
else if( ns < 1000ll * 1000 * 1000 * 60 * 60 * 24 )
{
const auto h = int64_t( ns / ( 1000ll * 1000 * 1000 * 60 * 60 ) );
const auto m = int64_t( ns / ( 1000ll * 1000 * 1000 * 60 ) - h * 60 );
const auto s = int64_t( ns / ( 1000ll * 1000 * 1000 ) - h * ( 60 * 60 ) - m * 60 );
PrintTinyInt( buf, h );
*buf++ = ':';
PrintTinyInt0( buf, m );
*buf++ = ':';
PrintTinyInt0( buf, s );
*buf++ = '\0';
}
else
{
const auto d = int64_t( ns / ( 1000ll * 1000 * 1000 * 60 * 60 * 24 ) );
const auto h = int64_t( ns / ( 1000ll * 1000 * 1000 * 60 * 60 ) - d * 24 );
const auto m = int64_t( ns / ( 1000ll * 1000 * 1000 * 60 ) - d * ( 60 * 24 ) - h * 60 );
const auto s = int64_t( ns / ( 1000ll * 1000 * 1000 ) - d * ( 60 * 60 * 24 ) - h * ( 60 * 60 ) - m * 60 );
assert( d < 100 );
PrintTinyInt( buf, d );
*buf++ = 'd';
PrintTinyInt0( buf, h );
*buf++ = ':';
PrintTinyInt0( buf, m );
*buf++ = ':';
PrintTinyInt0( buf, s );
*buf++ = '\0';
}
return bufstart;
}
const char* TimeToStringExact( int64_t _ns )
{
enum { Pool = 8 };
static char bufpool[Pool][64];
static int bufsel = 0;
char* buf = bufpool[bufsel];
char* bufstart = buf;
bufsel = ( bufsel + 1 ) % Pool;
uint64_t ns;
if( _ns < 0 )
{
*buf = '-';
buf++;
ns = -_ns;
}
else
{
ns = _ns;
}
const char* numStart = buf;
if( ns >= 1000ll * 1000 * 1000 * 60 * 60 * 24 )
{
const auto d = int64_t( ns / ( 1000ll * 1000 * 1000 * 60 * 60 * 24 ) );
const auto h = int64_t( ns / ( 1000ll * 1000 * 1000 * 60 * 60 ) - d * 24 );
const auto m = int64_t( ns / ( 1000ll * 1000 * 1000 * 60 ) - d * ( 60 * 24 ) - h * 60 );
const auto s = int64_t( ns / ( 1000ll * 1000 * 1000 ) - d * ( 60 * 60 * 24 ) - h * ( 60 * 60 ) - m * 60 );
if( d < 100 )
{
PrintTinyInt( buf, d );
*buf++ = 'd';
}
else
{
memcpy( buf, "100+d", 5 );
buf += 5;
}
PrintTinyInt0( buf, h );
*buf++ = ':';
PrintTinyInt0( buf, m );
*buf++ = ':';
PrintTinyInt0( buf, s );
ns %= 1000ll * 1000 * 1000;
}
else if( ns >= 1000ll * 1000 * 1000 * 60 * 60 )
{
const auto h = int64_t( ns / ( 1000ll * 1000 * 1000 * 60 * 60 ) );
const auto m = int64_t( ns / ( 1000ll * 1000 * 1000 * 60 ) - h * 60 );
const auto s = int64_t( ns / ( 1000ll * 1000 * 1000 ) - h * ( 60 * 60 ) - m * 60 );
PrintTinyInt( buf, h );
*buf++ = ':';
PrintTinyInt0( buf, m );
*buf++ = ':';
PrintTinyInt0( buf, s );
ns %= 1000ll * 1000 * 1000;
}
else if( ns >= 1000ll * 1000 * 1000 * 60 )
{
const auto m = int64_t( ns / ( 1000ll * 1000 * 1000 * 60 ) );
const auto s = int64_t( ns / ( 1000ll * 1000 * 1000 ) - m * 60 );
PrintTinyInt( buf, m );
*buf++ = ':';
PrintTinyInt0( buf, s );
ns %= 1000ll * 1000 * 1000;
}
else if( ns >= 1000ll * 1000 * 1000 )
{
PrintTinyInt( buf, int64_t( ns / ( 1000ll * 1000 * 1000 ) ) );
*buf++ = 's';
ns %= 1000ll * 1000 * 1000;
}
if( ns > 0 )
{
if( buf != numStart ) *buf++ = ' ';
if( ns >= 1000ll * 1000 )
{
PrintSmallInt0( buf, int64_t( ns / ( 1000ll * 1000 ) ) );
*buf++ = ',';
ns %= 1000ll * 1000;
}
else
{
memcpy( buf, "000,", 4 );
buf += 4;
}
if( ns >= 1000ll )
{
PrintSmallInt0( buf, int64_t( ns / 1000ll ) );
*buf++ = ',';
ns %= 1000ll;
}
else
{
memcpy( buf, "000,", 4 );
buf += 4;
}
PrintSmallInt0( buf, ns );
*buf++ = 'n';
*buf++ = 's';
}
else
{
memcpy( buf, "000,000,000ns", 13 );
buf += 13;
}
*buf++ = '\0';
return bufstart;
}
const char* MemSizeToString( int64_t val )
{
enum { Pool = 8 };
static char bufpool[Pool][64];
static int bufsel = 0;
char* buf = bufpool[bufsel];
bufsel = ( bufsel + 1 ) % Pool;
const auto aval = llabs( val );
if( aval < 10000ll )
{
sprintf( buf, "%" PRIi64 " bytes", val );
return buf;
}
enum class Unit
{
Kilobyte,
Megabyte,
Gigabyte,
Terabyte
};
Unit unit;
char* ptr;
if( aval < 10000ll * 1024 )
{
ptr = PrintFloat( buf, buf+64, val / 1024., 2 );
unit = Unit::Kilobyte;
}
else if( aval < 10000ll * 1024 * 1024 )
{
ptr = PrintFloat( buf, buf+64, val / ( 1024. * 1024 ), 2 );
unit = Unit::Megabyte;
}
else if( aval < 10000ll * 1024 * 1024 * 1024 )
{
ptr = PrintFloat( buf, buf+64, val / ( 1024. * 1024 * 1024 ), 2 );
unit = Unit::Gigabyte;
}
else
{
ptr = PrintFloat( buf, buf+64, val / ( 1024. * 1024 * 1024 * 1024 ), 2 );
unit = Unit::Terabyte;
}
ptr--;
while( ptr >= buf && *ptr == '0' ) ptr--;
if( *ptr != '.' ) ptr++;
*ptr++ = ' ';
switch( unit )
{
case Unit::Kilobyte:
*ptr++ = 'K';
break;
case Unit::Megabyte:
*ptr++ = 'M';
break;
case Unit::Gigabyte:
*ptr++ = 'G';
break;
case Unit::Terabyte:
*ptr++ = 'T';
break;
default:
assert( false );
break;
}
*ptr++ = 'B';
*ptr++ = '\0';
return buf;
}
const char* LocationToString( const char* fn, uint32_t line )
{
if( line == 0 ) return fn;
enum { Pool = 8 };
static char bufpool[Pool][4096];
static int bufsel = 0;
char* buf = bufpool[bufsel];
bufsel = ( bufsel + 1 ) % Pool;
sprintf( buf, "%s:%i", fn, line );
return buf;
}
namespace detail
{
char* RealToStringGetBuffer()
{
enum { Pool = 8 };
static char bufpool[Pool][64];
static int bufsel = 0;
char* buf = bufpool[bufsel];
bufsel = ( bufsel + 1 ) % Pool;
return buf;
}
}
}

View File

@@ -0,0 +1,152 @@
#ifndef __TRACYPRINT_HPP__
#define __TRACYPRINT_HPP__
#if ( defined _MSC_VER && _MSVC_LANG >= 201703L ) || __cplusplus >= 201703L
# if __has_include(<charconv>) && __has_include(<type_traits>)
# include <charconv>
# include <type_traits>
# else
# define NO_CHARCONV
# endif
#else
# define NO_CHARCONV
#endif
#if defined _MSC_VER && _MSC_VER < 1924
# define NO_CHARCONV
#endif
#ifdef __GNUC__
# define NO_CHARCONV
#endif
#ifdef NO_CHARCONV
# include <stdio.h>
#endif
#include <stdint.h>
#include <string.h>
#include "../public/common/TracyForceInline.hpp"
namespace tracy
{
namespace detail
{
char* RealToStringGetBuffer();
static tracy_force_inline void RealToStringFloating( char* ptr, char* end )
{
if( *ptr == '-' ) ptr++;
const auto vbegin = ptr;
while( *ptr != '\0' && *ptr != '.' ) ptr++;
auto sz = end - ptr + 1;
while( ptr - vbegin > 3 )
{
ptr -= 3;
memmove( ptr+1, ptr, sz+3 );
*ptr = ',';
sz += 4;
}
while( *ptr != '\0' && *ptr != '.' ) ptr++;
if( *ptr == '\0' ) return;
while( *ptr != '\0' ) ptr++;
ptr--;
while( *ptr == '0' ) ptr--;
if( *ptr != '.' && *ptr != ',' ) ptr++;
*ptr = '\0';
}
static tracy_force_inline void RealToStringInteger( char* buf, char* end )
{
if( *buf == '-' ) buf++;
auto ptr = end;
auto sz = 1;
while( ptr - buf > 3 )
{
ptr -= 3;
memmove( ptr+1, ptr, sz+3 );
*ptr = ',';
sz += 4;
}
}
}
template<typename T>
static inline char* PrintFloat( char* begin, char* end, T value, int precision )
{
#ifndef NO_CHARCONV
return std::to_chars( begin, end, value, std::chars_format::fixed, precision ).ptr;
#else
return begin + sprintf( begin, "%.*f", precision, value );
#endif
}
template<typename T>
static inline char* PrintFloat( char* begin, char* end, T value )
{
#ifndef NO_CHARCONV
return std::to_chars( begin, end, value, std::chars_format::fixed ).ptr;
#else
return begin + sprintf( begin, "%f", value );
#endif
}
#ifndef NO_CHARCONV
template<typename T>
static inline const char* RealToString( T val )
{
auto buf = detail::RealToStringGetBuffer();
auto end = std::to_chars( buf, buf+64, val ).ptr;
*end = '\0';
if constexpr ( std::is_integral_v<T> )
{
detail::RealToStringInteger( buf, end );
}
else
{
detail::RealToStringFloating( buf, end );
}
return buf;
}
#else
static inline const char* RealToString( double val )
{
auto buf = detail::RealToStringGetBuffer();
const auto sz = sprintf( buf, "%f", val );
detail::RealToStringFloating( buf, buf+sz );
return buf;
}
#endif
const char* TimeToString( int64_t ns );
const char* TimeToStringExact( int64_t ns );
const char* MemSizeToString( int64_t val );
const char* LocationToString( const char* fn, uint32_t line );
static tracy_force_inline void PrintStringPercent( char* buf, const char* string, double percent )
{
const auto ssz = strlen( string );
memcpy( buf, string, ssz );
memcpy( buf+ssz, " (", 2 );
auto end = PrintFloat( buf+ssz+2, buf+128, percent, 2 );
memcpy( end, "%)", 3 );
}
static tracy_force_inline void PrintStringPercent( char* buf, double percent )
{
memcpy( buf, "(", 2 );
auto end = PrintFloat( buf+1, buf+64, percent, 2 );
memcpy( end, "%)", 3 );
}
}
#endif

View File

@@ -0,0 +1,82 @@
#ifndef __TRACYSHORTPTR_HPP__
#define __TRACYSHORTPTR_HPP__
#include <assert.h>
#include <stdint.h>
#include <string.h>
#include "../public/common/TracyForceInline.hpp"
namespace tracy
{
#if UINTPTR_MAX == 0xFFFFFFFFFFFFFFFF
template<typename T>
class short_ptr
{
public:
tracy_force_inline short_ptr() {}
tracy_force_inline short_ptr( const T* ptr ) { set( ptr ); }
tracy_force_inline operator T*() { return get(); }
tracy_force_inline operator const T*() const { return get(); }
tracy_force_inline T& operator*() { return *get(); }
tracy_force_inline const T& operator*() const { return *get(); }
tracy_force_inline T* operator->() { return get(); }
tracy_force_inline const T* operator->() const { return get(); }
tracy_force_inline void set( const T* ptr )
{
assert( ( uint64_t( ptr ) & 0xFFFF000000000000 ) == 0 );
memcpy( m_ptr, &ptr, 4 );
memcpy( m_ptr+4, ((char*)&ptr)+4, 2 );
}
tracy_force_inline T* get()
{
uint32_t lo;
uint16_t hi;
memcpy( &lo, m_ptr, 4 );
memcpy( &hi, m_ptr+4, 2 );
return (T*)( uint64_t( lo ) | ( ( uint64_t( hi ) << 32 ) ) );
}
tracy_force_inline const T* get() const
{
uint32_t lo;
uint16_t hi;
memcpy( &lo, m_ptr, 4 );
memcpy( &hi, m_ptr+4, 2 );
return (T*)( uint64_t( lo ) | ( ( uint64_t( hi ) << 32 ) ) );
}
private:
uint8_t m_ptr[6];
};
#else
template<typename T>
class short_ptr
{
public:
tracy_force_inline short_ptr() {}
tracy_force_inline short_ptr( const T* ptr ) { memcpy( &m_ptr, &ptr, sizeof( T* ) ); }
tracy_force_inline operator T*() { return m_ptr; }
tracy_force_inline operator const T*() const { return m_ptr; }
tracy_force_inline T& operator*() { return *m_ptr; }
tracy_force_inline const T& operator*() const { return *m_ptr; }
tracy_force_inline T* operator->() { return m_ptr; }
tracy_force_inline const T* operator->() const { return m_ptr; }
tracy_force_inline void set( const T* ptr ) { m_ptr = ptr; }
tracy_force_inline T* get() { return m_ptr; }
tracy_force_inline const T* get() const { return m_ptr; }
private:
T* m_ptr;
};
#endif
}
#endif

View File

@@ -0,0 +1,159 @@
#ifndef __TRACYSLAB_HPP__
#define __TRACYSLAB_HPP__
#include <assert.h>
#include <stdint.h>
#include <vector>
#include "TracyMemory.hpp"
#include "../public/common/TracyForceInline.hpp"
namespace tracy
{
template<size_t BlockSize>
class Slab
{
public:
Slab()
: m_ptr( new char[BlockSize] )
, m_offset( 0 )
, m_buffer( { m_ptr } )
, m_usage( BlockSize )
{
memUsage.fetch_add( BlockSize, std::memory_order_relaxed );
}
~Slab()
{
memUsage.fetch_sub( m_usage, std::memory_order_relaxed );
for( auto& v : m_buffer )
{
delete[] v;
}
}
tracy_force_inline void* AllocRaw( size_t size )
{
assert( size <= BlockSize );
const auto offset = m_offset;
if( offset + size > BlockSize )
{
return DoAlloc( size );
}
else
{
void* ret = m_ptr + offset;
m_offset += size;
return ret;
}
}
template<typename T>
tracy_force_inline T* AllocInit()
{
const auto size = sizeof( T );
auto ret = AllocRaw( size );
new( ret ) T;
return (T*)ret;
}
template<typename T>
tracy_force_inline T* AllocInit( size_t sz )
{
const auto size = sizeof( T ) * sz;
auto ret = AllocRaw( size );
T* ptr = (T*)ret;
for( size_t i=0; i<sz; i++ )
{
new( ptr ) T;
ptr++;
}
return (T*)ret;
}
template<typename T>
tracy_force_inline T* Alloc()
{
return (T*)AllocRaw( sizeof( T ) );
}
template<typename T>
tracy_force_inline T* Alloc( size_t size )
{
return (T*)AllocRaw( sizeof( T ) * size );
}
tracy_force_inline void Unalloc( size_t size )
{
assert( size <= m_offset );
m_offset -= size;
}
tracy_force_inline void* AllocBig( size_t size )
{
const auto offset = m_offset;
if( offset + size <= BlockSize )
{
void* ret = m_ptr + offset;
m_offset += size;
return ret;
}
else if( size <= BlockSize && BlockSize - offset <= 1024 )
{
return DoAlloc( size );
}
else
{
memUsage.fetch_add( size, std::memory_order_relaxed );
m_usage += size;
auto ret = new char[size];
m_buffer.emplace_back( ret );
return ret;
}
}
void Reset()
{
if( m_buffer.size() > 1 )
{
memUsage.fetch_sub( m_usage - BlockSize, std::memory_order_relaxed );
m_usage = BlockSize;
for( int i=1; i<m_buffer.size(); i++ )
{
delete[] m_buffer[i];
}
m_ptr = m_buffer[0];
m_buffer.clear();
m_buffer.emplace_back( m_ptr );
}
m_offset = 0;
}
Slab( const Slab& ) = delete;
Slab( Slab&& ) = delete;
Slab& operator=( const Slab& ) = delete;
Slab& operator=( Slab&& ) = delete;
private:
void* DoAlloc( uint32_t willUseBytes )
{
auto ptr = new char[BlockSize];
m_ptr = ptr;
m_offset = willUseBytes;
m_buffer.emplace_back( m_ptr );
memUsage.fetch_add( BlockSize, std::memory_order_relaxed );
m_usage += BlockSize;
return ptr;
}
char* m_ptr;
uint32_t m_offset;
std::vector<char*> m_buffer;
size_t m_usage;
};
}
#endif

View File

@@ -0,0 +1,19 @@
#ifndef __TRACYSORT_HPP__
#define __TRACYSORT_HPP__
#ifndef NO_PARALLEL_SORT
# if !defined __APPLE__ && !defined __EMSCRIPTEN__ && ( ( defined _MSC_VER && _MSVC_LANG >= 201703L ) || __cplusplus >= 201703L )
# if __has_include(<execution>)
# include <algorithm>
# include <execution>
# else
# define NO_PARALLEL_SORT
# endif
# else
# define NO_PARALLEL_SORT
# endif
#endif
#include "tracy_pdqsort.h"
#endif

View File

@@ -0,0 +1,127 @@
#ifndef __TRACYSORTEDVECTOR_HPP__
#define __TRACYSORTEDVECTOR_HPP__
#include "TracySort.hpp"
#include "TracyVector.hpp"
namespace tracy
{
#pragma pack( push, 1 )
template<typename T, class CompareDefault = std::less<T>>
class SortedVector
{
public:
using iterator = T*;
using const_iterator = const T*;
tracy_force_inline SortedVector()
: sortedEnd( 0 )
{}
SortedVector( const SortedVector& ) = delete;
tracy_force_inline SortedVector( SortedVector&& src ) noexcept
: v( std::move( src.v ) )
, sortedEnd( src.sortedEnd )
{
}
tracy_force_inline SortedVector( const T& value )
: v( value )
, sortedEnd( 0 )
{
}
SortedVector& operator=( const SortedVector& ) = delete;
tracy_force_inline SortedVector& operator=( SortedVector&& src ) noexcept
{
v = std::move( src.v );
sortedEnd = src.sortedEnd;
return *this;
}
tracy_force_inline void swap( SortedVector& other )
{
v.swap( other.v );
std::swap( sortedEnd, other.sortedEnd );
}
tracy_force_inline bool empty() const { return v.empty(); }
tracy_force_inline size_t size() const { return v.size(); }
tracy_force_inline bool is_sorted() const { return sortedEnd == 0; }
tracy_force_inline T* data() { return v.data(); }
tracy_force_inline const T* data() const { return v.data(); };
tracy_force_inline T* begin() { return v.begin(); }
tracy_force_inline const T* begin() const { return v.begin(); }
tracy_force_inline T* end() { return v.end(); }
tracy_force_inline const T* end() const { return v.end(); }
tracy_force_inline T& front() { return v.front(); }
tracy_force_inline const T& front() const { return v.front(); }
tracy_force_inline T& back() { return v.back(); }
tracy_force_inline const T& back() const { return v.back(); }
tracy_force_inline T& operator[]( size_t idx ) { return v[idx]; }
tracy_force_inline const T& operator[]( size_t idx ) const { return v[idx]; }
tracy_force_inline void push_back( const T& val ) { push_back( val, CompareDefault() ); }
template<class Compare>
tracy_force_inline void push_back( const T& val, Compare comp )
{
if( sortedEnd == 0 && !v.empty() && !comp( v.back(), val ) )
{
sortedEnd = (uint32_t)v.size();
}
v.push_back( val );
}
tracy_force_inline void reserve( size_t cap ) { v.reserve( cap ); }
template<size_t U>
tracy_force_inline void reserve_exact( uint32_t sz, Slab<U>& slab ) { v.reserve_exact( sz, slab ); }
tracy_force_inline void clear() { v.clear(); sortedEnd = 0; }
tracy_force_inline T* erase( T* begin, T* end )
{
assert( is_sorted() );
return v.erase( begin, end );
}
tracy_force_inline void sort() { sort( CompareDefault() ); }
tracy_force_inline void ensure_sorted() { if( !is_sorted() ) sort(); }
template<class Compare>
void sort( Compare comp )
{
assert( !is_sorted() );
const auto sb = v.begin();
const auto se = sb + sortedEnd;
const auto sl = se - 1;
const auto ue = v.end();
#ifdef NO_PARALLEL_SORT
pdqsort_branchless( se, ue, comp );
#else
std::sort( std::execution::par_unseq, se, ue, comp );
#endif
const auto ss = std::lower_bound( sb, se, *se, comp );
const auto uu = std::lower_bound( se, ue, *sl, comp );
std::inplace_merge( ss, se, uu, comp );
sortedEnd = 0;
}
private:
Vector<T> v;
uint32_t sortedEnd;
};
#pragma pack( pop )
enum { SortedVectorSize = sizeof( SortedVector<int> ) };
}
#endif

View File

@@ -0,0 +1,88 @@
#ifndef __TRACYSTRINGDISCOVERY_HPP__
#define __TRACYSTRINGDISCOVERY_HPP__
#include "../public/common/TracyForceInline.hpp"
#include "tracy_robin_hood.h"
#include "TracyCharUtil.hpp"
#include "TracyEvent.hpp"
#include "TracyVector.hpp"
namespace tracy
{
template<typename T>
class StringDiscovery
{
public:
tracy_force_inline Vector<T>& Data() { return m_data; }
tracy_force_inline const Vector<T>& Data() const { return m_data; }
tracy_force_inline bool IsPending() const { return !m_pending.empty(); }
// Merge( destination, postponed )
template<typename U>
tracy_force_inline void StringDiscovered( uint64_t name, const StringLocation& sl, U& stringMap, std::function<void(T,T)> Merge )
{
auto pit = m_pending.find( name );
assert( pit != m_pending.end() );
auto it = m_rev.find( sl.ptr );
if( it == m_rev.end() )
{
m_map.emplace( name, pit->second );
m_rev.emplace( sl.ptr, pit->second );
m_data.push_back( pit->second );
stringMap.emplace( name, sl.ptr );
}
else
{
auto item = it->second;
m_map.emplace( name, item );
Merge( item, pit->second );
}
m_pending.erase( pit );
}
tracy_force_inline T Retrieve( uint64_t name, const std::function<T(uint64_t)>& Create, const std::function<void(uint64_t)>& Query )
{
auto it = m_map.find( name );
if( it == m_map.end() )
{
auto pit = m_pending.find( name );
if( pit == m_pending.end() )
{
T item = Create( name );
if( item )
{
m_pending.emplace( name, item );
Query( name );
}
return item;
}
else
{
return pit->second;
}
}
else
{
return it->second;
}
}
tracy_force_inline void AddExternal( const T& val )
{
m_data.push_back( val );
}
private:
Vector<T> m_data;
unordered_flat_map<uint64_t, T> m_pending;
unordered_flat_map<uint64_t, T> m_map;
unordered_flat_map<const char*, T, charutil::Hasher, charutil::Comparator> m_rev;
};
}
#endif

View File

@@ -0,0 +1,43 @@
#include "TracySysUtil.hpp"
#ifdef _WIN32
# include <windows.h>
#elif defined __linux__
# include <sys/sysinfo.h>
#elif defined __APPLE__ || defined BSD
# include <sys/types.h>
# include <sys/sysctl.h>
#endif
namespace tracy
{
size_t GetPhysicalMemorySize()
{
#ifdef _WIN32
MEMORYSTATUSEX statex;
statex.dwLength = sizeof( statex );
GlobalMemoryStatusEx( &statex );
return statex.ullTotalPhys;
#elif defined __linux__
struct sysinfo sysInfo;
sysinfo( &sysInfo );
return sysInfo.totalram;
#elif defined __APPLE__
size_t memSize;
size_t sz = sizeof( memSize );
sysctlbyname( "hw.memsize", &memSize, &sz, nullptr, 0 );
return memSize;
#elif defined BSD
size_t memSize;
size_t sz = sizeof( memSize );
sysctlbyname( "hw.physmem", &memSize, &sz, nullptr, 0 );
return memSize;
#else
return 0;
#endif
}
}

View File

@@ -0,0 +1,13 @@
#ifndef __TRACYSYSUTIL_HPP__
#define __TRACYSYSUTIL_HPP__
#include <stdlib.h>
namespace tracy
{
size_t GetPhysicalMemorySize();
}
#endif

View File

@@ -0,0 +1,88 @@
#include <assert.h>
#include <stdio.h>
#include "../public/common/TracySystem.hpp"
#include "TracyTaskDispatch.hpp"
namespace tracy
{
TaskDispatch::TaskDispatch( size_t workers, const char* name )
: m_exit( false )
, m_jobs( 0 )
{
m_workers.reserve( workers );
for( size_t i=0; i<workers; i++ )
{
m_workers.emplace_back( [this, name, i]{ SetName( name, i ); Worker(); } );
}
}
TaskDispatch::~TaskDispatch()
{
m_exit.store( true, std::memory_order_release );
m_queueLock.lock();
m_cvWork.notify_all();
m_queueLock.unlock();
for( auto& worker : m_workers )
{
worker.join();
}
}
void TaskDispatch::Queue( const std::function<void(void)>& f )
{
std::lock_guard<std::mutex> lock( m_queueLock );
m_queue.emplace_back( f );
m_cvWork.notify_one();
}
void TaskDispatch::Queue( std::function<void(void)>&& f )
{
std::lock_guard<std::mutex> lock( m_queueLock );
m_queue.emplace_back( std::move( f ) );
m_cvWork.notify_one();
}
void TaskDispatch::Sync()
{
std::unique_lock<std::mutex> lock( m_queueLock );
while( !m_queue.empty() )
{
auto f = m_queue.back();
m_queue.pop_back();
lock.unlock();
f();
lock.lock();
}
m_cvJobs.wait( lock, [this]{ return m_jobs == 0; } );
}
void TaskDispatch::Worker()
{
for(;;)
{
std::unique_lock<std::mutex> lock( m_queueLock );
m_cvWork.wait( lock, [this]{ return !m_queue.empty() || m_exit.load( std::memory_order_acquire ); } );
if( m_exit.load( std::memory_order_acquire ) ) return;
auto f = m_queue.back();
m_queue.pop_back();
m_jobs++;
lock.unlock();
f();
lock.lock();
m_jobs--;
if( m_jobs == 0 && m_queue.empty() ) m_cvJobs.notify_one();
lock.unlock();
}
}
void TaskDispatch::SetName( const char* name, size_t num )
{
char tmp[128];
snprintf( tmp, sizeof( tmp ), "%s #%zu", name, num );
SetThreadName( tmp );
}
}

View File

@@ -0,0 +1,40 @@
#ifndef __TRACYTASKDISPATCH_HPP__
#define __TRACYTASKDISPATCH_HPP__
#include <atomic>
#include <condition_variable>
#include <functional>
#include <mutex>
#include <thread>
#include <vector>
namespace tracy
{
class TaskDispatch
{
public:
TaskDispatch( size_t workers, const char* name );
~TaskDispatch();
void Queue( const std::function<void(void)>& f );
void Queue( std::function<void(void)>&& f );
void Sync();
private:
void Worker();
void SetName( const char* name, size_t num );
std::vector<std::function<void(void)>> m_queue;
std::mutex m_queueLock;
std::condition_variable m_cvWork, m_cvJobs;
std::atomic<bool> m_exit;
size_t m_jobs;
std::vector<std::thread> m_workers;
};
}
#endif

View File

@@ -0,0 +1,211 @@
#include "../zstd/zstd.h"
#include "TracyEvent.hpp"
#include "TracyTextureCompression.hpp"
namespace tracy
{
TextureCompression::TextureCompression()
: m_buf( nullptr )
, m_bufSize( 0 )
, m_cctx( ZSTD_createCCtx() )
, m_dctx( ZSTD_createDCtx() )
, m_dict( nullptr )
{
}
TextureCompression::~TextureCompression()
{
delete[] m_buf;
ZSTD_freeCCtx( m_cctx );
ZSTD_freeDCtx( m_dctx );
ZSTD_freeDDict( m_dict );
}
uint32_t TextureCompression::Pack( struct ZSTD_CCtx_s* ctx, char*& buf, size_t& bufsz, const char* image, uint32_t inBytes )
{
const auto maxout = ZSTD_COMPRESSBOUND( inBytes );
if( bufsz < maxout )
{
bufsz = maxout;
delete[] buf;
buf = new char[maxout];
}
assert( ctx );
auto ret = (uint32_t)ZSTD_compressCCtx( ctx, buf, maxout, image, inBytes, 3 );
#ifndef TRACY_NO_STATISTICS
m_inputBytes.fetch_add( inBytes, std::memory_order_relaxed );
m_outputBytes.fetch_add( ret, std::memory_order_relaxed );
#endif
return ret;
}
uint32_t TextureCompression::Pack( struct ZSTD_CCtx_s* ctx, const struct ZSTD_CDict_s* dict, char*& buf, size_t& bufsz, const char* image, uint32_t inBytes )
{
const auto maxout = ZSTD_COMPRESSBOUND( inBytes );
if( bufsz < maxout )
{
bufsz = maxout;
delete[] buf;
buf = new char[maxout];
}
assert( ctx );
auto ret = (uint32_t)ZSTD_compress_usingCDict( ctx, buf, maxout, image, inBytes, dict );
#ifndef TRACY_NO_STATISTICS
m_inputBytes.fetch_add( inBytes, std::memory_order_relaxed );
m_outputBytes.fetch_add( ret, std::memory_order_relaxed );
#endif
return ret;
}
const char* TextureCompression::Unpack( const FrameImage& image )
{
const auto outsz = size_t( image.w ) * size_t( image.h ) / 2;
if( m_bufSize < outsz )
{
m_bufSize = outsz;
delete[] m_buf;
m_buf = new char[outsz];
}
assert( m_dctx );
if( m_dict )
{
ZSTD_decompress_usingDDict( m_dctx, m_buf, outsz, image.ptr, image.csz, m_dict );
}
else
{
ZSTD_decompressDCtx( m_dctx, m_buf, outsz, image.ptr, image.csz );
}
return m_buf;
}
static constexpr uint8_t Dxtc4To3Table[256] = {
85, 84, 86, 86, 81, 80, 82, 82, 89, 88, 90, 90, 89, 88, 90, 90,
69, 68, 70, 70, 65, 64, 66, 66, 73, 72, 74, 74, 73, 72, 74, 74,
101, 100, 102, 102, 97, 96, 98, 98, 105, 104, 106, 106, 105, 104, 106, 106,
101, 100, 102, 102, 97, 96, 98, 98, 105, 104, 106, 106, 105, 104, 106, 106,
21, 20, 22, 22, 17, 16, 18, 18, 25, 24, 26, 26, 25, 24, 26, 26,
5, 4, 6, 6, 1, 0, 2, 2, 9, 8, 10, 10, 9, 8, 10, 10,
37, 36, 38, 38, 33, 32, 34, 34, 41, 40, 42, 42, 41, 40, 42, 42,
37, 36, 38, 38, 33, 32, 34, 34, 41, 40, 42, 42, 41, 40, 42, 42,
149, 148, 150, 150, 145, 144, 146, 146, 153, 152, 154, 154, 153, 152, 154, 154,
133, 132, 134, 134, 129, 128, 130, 130, 137, 136, 138, 138, 137, 136, 138, 138,
165, 164, 166, 166, 161, 160, 162, 162, 169, 168, 170, 170, 169, 168, 170, 170,
165, 164, 166, 166, 161, 160, 162, 162, 169, 168, 170, 170, 169, 168, 170, 170,
149, 148, 150, 150, 145, 144, 146, 146, 153, 152, 154, 154, 153, 152, 154, 154,
133, 132, 134, 134, 129, 128, 130, 130, 137, 136, 138, 138, 137, 136, 138, 138,
165, 164, 166, 166, 161, 160, 162, 162, 169, 168, 170, 170, 169, 168, 170, 170,
165, 164, 166, 166, 161, 160, 162, 162, 169, 168, 170, 170, 169, 168, 170, 170
};
static tracy_force_inline int max3( int a, int b, int c )
{
if( a > b )
{
return a > c ? a : c;
}
else
{
return b > c ? b : c;
}
}
static constexpr int TrTbl1[] = { 12, 12, 12, 12, 6, 6, 6, 6, 6, 6, 6, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 };
static constexpr int TrTbl2[] = { 12, 12, 6, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 };
static constexpr int TrTbl3[] = { 48, 48, 48, 32, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 };
void TextureCompression::Rdo( char* data, size_t blocks )
{
assert( blocks > 0 );
do
{
uint64_t blk;
memcpy( &blk, data, 8 );
uint32_t idx = blk >> 32;
if( idx == 0x55555555 )
{
data += 8;
continue;
}
uint16_t c0 = blk & 0xFFFF;
uint16_t c1 = ( blk >> 16 ) & 0xFFFF;
const int r0b = c0 & 0xF800;
const int g0b = c0 & 0x07E0;
const int b0b = c0 & 0x001F;
const int r1b = c1 & 0xF800;
const int g1b = c1 & 0x07E0;
const int b1b = c1 & 0x001F;
const int r0 = ( r0b >> 8 ) | ( r0b >> 13 );
const int g0 = ( g0b >> 3 ) | ( g0b >> 9 );
const int b0 = ( b0b << 3 ) | ( b0b >> 2 );
const int r1 = ( r1b >> 8 ) | ( r1b >> 13 );
const int g1 = ( g1b >> 3 ) | ( g1b >> 9 );
const int b1 = ( b1b << 3 ) | ( b1b >> 2 );
const int dr = abs( r0 - r1 );
const int dg = abs( g0 - g1 );
const int db = abs( b0 - b1 );
const int maxChan1 = max3( r0-1, g0, b0-2 );
const int maxDelta1 = max3( dr-1, dg, db-2 );
const int tr1 = TrTbl1[maxChan1 / 4];
if( maxDelta1 <= tr1 )
{
uint64_t blk =
( ( ( r0b + r1b ) >> 1 ) & 0xF800 ) |
( ( ( g0b + g1b ) >> 1 ) & 0x07E0 ) |
( ( ( b0b + b1b ) >> 1 ) );
memcpy( data, &blk, 8 );
}
else
{
const int maxChan23 = max3( r0-2, g0, b0-5 );
const int maxDelta23 = max3( dr-2, dg, db-5 );
const int tr2 = TrTbl2[maxChan23 / 16];
if( maxDelta23 <= tr2 )
{
idx &= 0x55555555;
memcpy( data+4, &idx, 4 );
}
else
{
const int tr3 = TrTbl3[maxChan23 / 16];
if( maxDelta23 <= tr3 )
{
uint64_t c = c1 | ( uint64_t( c0 ) << 16 );
for( int k=0; k<4; k++ ) c |= uint64_t( Dxtc4To3Table[(idx >> (k*8)) & 0xFF] ) << ( 32 + k*8 );
memcpy( data, &c, 8 );
}
}
}
data += 8;
}
while( --blocks );
}
void TextureCompression::FixOrder( char* data, size_t blocks )
{
assert( blocks > 0 );
do
{
uint32_t tmp;
memcpy( &tmp, data+4, 4 );
tmp = ~tmp;
uint32_t t0 = tmp & 0x55555555;
uint32_t t1 = tmp & 0xAAAAAAAA;
tmp = ( ( t0 << 1 ) | ( t1 >> 1 ) ) ^ t1;
memcpy( data+4, &tmp, 4 );
data += 8;
}
while( --blocks );
}
}

View File

@@ -0,0 +1,64 @@
#ifndef __TRACY__TEXTURECOMPRESSION_HPP__
#define __TRACY__TEXTURECOMPRESSION_HPP__
#include <atomic>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "TracySlab.hpp"
struct ZSTD_CCtx_s;
struct ZSTD_DCtx_s;
struct ZSTD_CDict_s;
struct ZSTD_DDict_s;
namespace tracy
{
struct FrameImage;
class TextureCompression
{
public:
TextureCompression();
~TextureCompression();
void SetDict( struct ZSTD_DDict_s* dict ) { m_dict = dict; }
uint32_t Pack( struct ZSTD_CCtx_s* ctx, char*& buf, size_t& bufsz, const char* image, uint32_t inBytes );
uint32_t Pack( struct ZSTD_CCtx_s* ctx, const struct ZSTD_CDict_s* dict, char*& buf, size_t& bufsz, const char* image, uint32_t inBytes );
template<size_t Size>
const char* Pack( const char* image, uint32_t inBytes, uint32_t& csz, Slab<Size>& slab )
{
const auto outsz = Pack( m_cctx, m_buf, m_bufSize, image, inBytes );
auto ptr = (char*)slab.AllocBig( outsz );
memcpy( ptr, m_buf, outsz );
csz = outsz;
return ptr;
}
const char* Unpack( const FrameImage& image );
void Rdo( char* data, size_t blocks );
void FixOrder( char* data, size_t blocks );
uint64_t GetInputBytesCount() const { return m_inputBytes.load( std::memory_order_relaxed ); }
uint64_t GetOutputBytesCount() const { return m_outputBytes.load( std::memory_order_relaxed ); }
private:
char* m_buf;
size_t m_bufSize;
struct ZSTD_CCtx_s* m_cctx;
struct ZSTD_DCtx_s* m_dctx;
struct ZSTD_DDict_s* m_dict;
std::atomic<uint64_t> m_inputBytes { 0 };
std::atomic<uint64_t> m_outputBytes { 0 };
};
}
#endif

View File

@@ -0,0 +1,72 @@
#include <limits>
#include "TracyFileRead.hpp"
#include "TracyFileWrite.hpp"
#include "TracyThreadCompress.hpp"
namespace tracy
{
ThreadCompress::ThreadCompress()
: m_threadLast( std::numeric_limits<uint64_t>::max(), 0 )
{
}
void ThreadCompress::InitZero()
{
assert( m_threadExpand.empty() );
m_threadExpand.push_back( 0 );
}
void ThreadCompress::Load( FileRead& f )
{
assert( m_threadExpand.empty() );
assert( m_threadMap.empty() );
uint64_t sz;
f.Read( sz );
if( sz != 0 )
{
m_threadExpand.reserve_and_use( sz );
f.Read( m_threadExpand.data(), sizeof( uint64_t ) * sz );
m_threadMap.reserve( sz );
for( size_t i=0; i<sz; i++ )
{
m_threadMap.emplace( m_threadExpand[i], i );
}
}
}
void ThreadCompress::Save( FileWrite& f ) const
{
uint64_t sz = m_threadExpand.size();
f.Write( &sz, sizeof( sz ) );
if( sz != 0 ) f.Write( m_threadExpand.data(), sz * sizeof( uint64_t ) );
}
uint16_t ThreadCompress::CompressThreadReal( uint64_t thread )
{
auto it = m_threadMap.find( thread );
if( it != m_threadMap.end() )
{
m_threadLast.first = thread;
m_threadLast.second = it->second;
return it->second;
}
else
{
return CompressThreadNew( thread );
}
}
uint16_t ThreadCompress::CompressThreadNew( uint64_t thread )
{
auto sz = m_threadExpand.size();
m_threadExpand.push_back( thread );
m_threadMap.emplace( thread, sz );
m_threadLast.first = thread;
m_threadLast.second = sz;
return sz;
}
}

View File

@@ -0,0 +1,61 @@
#ifndef __TRACY__THREADCOMPRESS_HPP__
#define __TRACY__THREADCOMPRESS_HPP__
#include <assert.h>
#include <stdint.h>
#include "../public/common/TracyForceInline.hpp"
#include "tracy_robin_hood.h"
#include "TracyVector.hpp"
namespace tracy
{
class FileRead;
class FileWrite;
class ThreadCompress
{
public:
ThreadCompress();
void InitZero();
void Load( FileRead& f );
void Save( FileWrite& f ) const;
tracy_force_inline uint16_t CompressThread( uint64_t thread )
{
if( m_threadLast.first == thread ) return m_threadLast.second;
return CompressThreadReal( thread );
}
tracy_force_inline uint64_t DecompressThread( uint16_t thread ) const
{
assert( thread < m_threadExpand.size() );
return m_threadExpand[thread];
}
tracy_force_inline uint16_t DecompressMustRaw( uint64_t thread ) const
{
auto it = m_threadMap.find( thread );
assert( it != m_threadMap.end() );
return it->second;
}
tracy_force_inline bool Exists( uint64_t thread ) const
{
return m_threadMap.find( thread ) != m_threadMap.end();
}
private:
uint16_t CompressThreadReal( uint64_t thread );
uint16_t CompressThreadNew( uint64_t thread );
unordered_flat_map<uint64_t, uint16_t> m_threadMap;
Vector<uint64_t> m_threadExpand;
std::pair<uint64_t, uint16_t> m_threadLast;
};
}
#endif

View File

@@ -0,0 +1,97 @@
#ifndef __TRACYVARARRAY_HPP__
#define __TRACYVARARRAY_HPP__
#include <stdint.h>
#include <string.h>
#define XXH_INLINE_ALL
#include "tracy_xxhash.h"
#include "../public/common/TracyForceInline.hpp"
#include "TracyCharUtil.hpp"
#include "TracyEvent.hpp"
#include "TracyMemory.hpp"
#include "TracyShortPtr.hpp"
namespace tracy
{
#pragma pack( push, 1 )
template<typename T>
class VarArray
{
public:
VarArray( uint16_t size, const T* data )
: m_size( size )
, m_ptr( data )
{
CalcHash();
}
VarArray( const VarArray& ) = delete;
VarArray( VarArray&& ) = delete;
VarArray& operator=( const VarArray& ) = delete;
VarArray& operator=( VarArray&& ) = delete;
tracy_force_inline uint32_t get_hash() const { return m_hash; }
tracy_force_inline bool empty() const { return m_size == 0; }
tracy_force_inline uint16_t size() const { return m_size; }
tracy_force_inline const T* data() const { return m_ptr; };
tracy_force_inline const T* begin() const { return m_ptr; }
tracy_force_inline const T* end() const { return m_ptr + m_size; }
tracy_force_inline const T& front() const { assert( m_size > 0 ); return m_ptr[0]; }
tracy_force_inline const T& back() const { assert( m_size > 0 ); return m_ptr[m_size - 1]; }
tracy_force_inline const T& operator[]( size_t idx ) const { return m_ptr[idx]; }
private:
tracy_force_inline void CalcHash();
uint16_t m_size;
uint32_t m_hash;
const short_ptr<T> m_ptr;
};
#pragma pack( pop )
enum { VarArraySize = sizeof( VarArray<int> ) };
template<typename T>
inline void VarArray<T>::CalcHash()
{
m_hash = uint32_t( XXH3_64bits( m_ptr.get(), m_size * sizeof( T ) ) );
}
template<typename T>
static inline bool Compare( const VarArray<T>& lhs, const VarArray<T>& rhs )
{
if( lhs.size() != rhs.size() || lhs.get_hash() != rhs.get_hash() ) return false;
return memcmp( lhs.data(), rhs.data(), lhs.size() * sizeof( T ) ) == 0;
}
template<typename T>
struct VarArrayHasher
{
size_t operator()( const VarArray<T>* arr ) const
{
return arr->get_hash();
}
};
template<typename T>
struct VarArrayComparator
{
bool operator()( const VarArray<T>* lhs, const VarArray<T>* rhs ) const
{
return Compare( *lhs, *rhs );
}
};
}
#endif

View File

@@ -0,0 +1,357 @@
#ifndef __TRACYVECTOR_HPP__
#define __TRACYVECTOR_HPP__
#include <algorithm>
#include <assert.h>
#include <limits>
#include <stdint.h>
#include <stdlib.h>
#include <type_traits>
#include "../public/common/TracyForceInline.hpp"
#include "TracyMemory.hpp"
#include "TracyPopcnt.hpp"
#include "TracyShortPtr.hpp"
#include "TracySlab.hpp"
//#define TRACY_VECTOR_DEBUG
namespace tracy
{
#pragma pack( push, 1 )
template<typename T>
class Vector
{
constexpr uint8_t MaxCapacity() { return 0x7F; }
public:
using iterator = T*;
using const_iterator = const T*;
tracy_force_inline Vector()
{
memset( (char*)this, 0, sizeof( Vector<T> ) );
}
Vector( const Vector& ) = delete;
tracy_force_inline Vector( Vector&& src ) noexcept
{
memcpy( (char*)this, &src, sizeof( Vector<T> ) );
memset( (char*)&src, 0, sizeof( Vector<T> ) );
}
tracy_force_inline Vector( const T& value )
: m_ptr( (T*)malloc( sizeof( T ) ) )
, m_size( 1 )
, m_capacity( 0 )
, m_magic( 0 )
{
memUsage.fetch_add( sizeof( T ), std::memory_order_relaxed );
new(m_ptr) T( value );
}
tracy_force_inline ~Vector()
{
if( m_capacity != MaxCapacity() && m_ptr )
{
memUsage.fetch_sub( Capacity() * sizeof( T ), std::memory_order_relaxed );
free( m_ptr );
}
}
Vector& operator=( const Vector& ) = delete;
tracy_force_inline Vector& operator=( Vector&& src ) noexcept
{
if( m_capacity != MaxCapacity() && m_ptr )
{
memUsage.fetch_sub( Capacity() * sizeof( T ), std::memory_order_relaxed );
free( m_ptr );
}
memcpy( (char*)this, &src, sizeof( Vector<T> ) );
memset( (char*)&src, 0, sizeof( Vector<T> ) );
return *this;
}
tracy_force_inline void swap( Vector& other )
{
uint8_t tmp[sizeof( Vector<T> )];
memcpy( (char*)tmp, &other, sizeof( Vector<T> ) );
memcpy( (char*)&other, this, sizeof( Vector<T> ) );
memcpy( (char*)this, tmp, sizeof( Vector<T> ) );
}
tracy_force_inline bool empty() const { return m_size == 0; }
tracy_force_inline size_t size() const { return m_size; }
tracy_force_inline void set_size( size_t sz ) { assert( m_capacity != MaxCapacity() ); m_size = sz; }
tracy_force_inline T* data() { return m_ptr; }
tracy_force_inline const T* data() const { return m_ptr; };
tracy_force_inline T* begin() { return m_ptr; }
tracy_force_inline const T* begin() const { return m_ptr; }
tracy_force_inline T* end() { return m_ptr + m_size; }
tracy_force_inline const T* end() const { return m_ptr + m_size; }
tracy_force_inline T& front() { assert( m_size > 0 ); return m_ptr[0]; }
tracy_force_inline const T& front() const { assert( m_size > 0 ); return m_ptr[0]; }
tracy_force_inline T& back() { assert( m_size > 0 ); return m_ptr[m_size - 1]; }
tracy_force_inline const T& back() const { assert( m_size > 0 ); return m_ptr[m_size - 1]; }
tracy_force_inline T& operator[]( size_t idx ) { return m_ptr[idx]; }
tracy_force_inline const T& operator[]( size_t idx ) const { return m_ptr[idx]; }
tracy_force_inline void push_back( const T& v )
{
assert( m_capacity != MaxCapacity() );
if( m_size == Capacity() ) AllocMore();
new(m_ptr+m_size) T( v );
m_size++;
}
tracy_force_inline void push_back_non_empty( const T& v )
{
assert( m_capacity != MaxCapacity() );
assert( m_ptr );
if( m_size == CapacityNoNullptrCheck() ) AllocMore();
new(m_ptr+m_size) T( v );
m_size++;
}
tracy_force_inline void push_back_no_space_check( const T& v )
{
assert( m_capacity != MaxCapacity() );
assert( m_size < Capacity() );
new(m_ptr+m_size) T( v );
m_size++;
}
tracy_force_inline void push_back( T&& v )
{
assert( m_capacity != MaxCapacity() );
if( m_size == Capacity() ) AllocMore();
new(m_ptr+m_size) T( std::move( v ) );
m_size++;
}
tracy_force_inline T& push_next()
{
assert( m_capacity != MaxCapacity() );
if( m_size == Capacity() ) AllocMore();
new(m_ptr+m_size) T();
return m_ptr[m_size++];
}
tracy_force_inline T& push_next_non_empty()
{
assert( m_capacity != MaxCapacity() );
assert( m_ptr );
if( m_size == CapacityNoNullptrCheck() ) AllocMore();
new(m_ptr+m_size) T();
return m_ptr[m_size++];
}
tracy_force_inline T& push_next_no_space_check()
{
assert( m_capacity != MaxCapacity() );
assert( m_size < Capacity() );
new(m_ptr+m_size) T();
return m_ptr[m_size++];
}
T* insert( T* it, const T& v )
{
assert( m_capacity != MaxCapacity() );
assert( it >= m_ptr && it <= m_ptr + m_size );
const auto dist = it - m_ptr;
if( m_size == Capacity() ) AllocMore();
if( dist != m_size ) memmove( m_ptr + dist + 1, m_ptr + dist, ( m_size - dist ) * sizeof( T ) );
m_size++;
new(m_ptr+dist) T( v );
m_ptr[dist] = v;
return m_ptr + dist;
}
T* insert( T* it, T&& v )
{
assert( m_capacity != MaxCapacity() );
assert( it >= m_ptr && it <= m_ptr + m_size );
const auto dist = it - m_ptr;
if( m_size == Capacity() ) AllocMore();
if( dist != m_size ) memmove( m_ptr + dist + 1, m_ptr + dist, ( m_size - dist ) * sizeof( T ) );
m_size++;
new(m_ptr+dist) T( std::move( v ) );
return m_ptr + dist;
}
void insert( T* it, T* begin, T* end )
{
assert( m_capacity != MaxCapacity() );
assert( it >= m_ptr && it <= m_ptr + m_size );
const auto sz = end - begin;
const auto dist = it - m_ptr;
while( m_size + sz > Capacity() ) AllocMore();
if( dist != m_size ) memmove( m_ptr + dist + sz, m_ptr + dist, ( m_size - dist ) * sizeof( T ) );
m_size += sz;
memcpy( m_ptr + dist, begin, sz * sizeof( T ) );
}
T* erase( T* it )
{
assert( m_capacity != MaxCapacity() );
assert( it >= m_ptr && it <= m_ptr + m_size );
m_size--;
memmove( it, it+1, ( m_size - ( it - m_ptr ) ) * sizeof( T ) );
return it;
}
T* erase( T* begin, T* end )
{
assert( m_capacity != MaxCapacity() );
assert( begin >= m_ptr && begin <= m_ptr + m_size );
assert( end >= m_ptr && end <= m_ptr + m_size );
assert( begin <= end );
const auto dist = end - begin;
if( dist > 0 )
{
memmove( begin, end, ( m_size - ( end - m_ptr ) ) * sizeof( T ) );
m_size -= dist;
}
return begin;
}
tracy_force_inline void pop_back()
{
assert( m_capacity != MaxCapacity() );
assert( m_size > 0 );
m_size--;
}
tracy_force_inline T& back_and_pop()
{
assert( m_capacity != MaxCapacity() );
assert( m_size > 0 );
m_size--;
return m_ptr[m_size];
}
tracy_force_inline void reserve( size_t cap )
{
if( cap == 0 || cap <= Capacity() ) return;
reserve_non_zero( cap );
}
void reserve_non_zero( size_t cap )
{
assert( m_capacity != MaxCapacity() );
cap--;
cap |= cap >> 1;
cap |= cap >> 2;
cap |= cap >> 4;
cap |= cap >> 8;
cap |= cap >> 16;
cap = TracyCountBits( cap );
memUsage.fetch_add( ( ( 1 << cap ) - Capacity() ) * sizeof( T ), std::memory_order_relaxed );
m_capacity = cap;
Realloc();
}
tracy_force_inline void reserve_and_use( size_t sz )
{
assert( m_capacity != MaxCapacity() );
reserve( sz );
m_size = sz;
}
template<size_t U>
tracy_force_inline void reserve_exact( uint32_t sz, Slab<U>& slab )
{
assert( !m_ptr );
m_capacity = MaxCapacity();
m_size = sz;
m_ptr = (T*)slab.AllocBig( sizeof( T ) * sz );
}
tracy_force_inline void clear()
{
assert( m_capacity != MaxCapacity() );
m_size = 0;
}
tracy_force_inline bool is_magic() const { return m_magic; }
tracy_force_inline void set_magic() { assert( !m_magic ); m_magic = 1; }
private:
tracy_no_inline void AllocMore()
{
assert( m_capacity != MaxCapacity() );
if( m_ptr == nullptr )
{
memUsage.fetch_add( sizeof( T ), std::memory_order_relaxed );
m_ptr = (T*)malloc( sizeof( T ) );
m_capacity = 0;
}
else
{
memUsage.fetch_add( Capacity() * sizeof( T ), std::memory_order_relaxed );
m_capacity++;
Realloc();
}
}
void Realloc()
{
T* ptr = (T*)malloc( sizeof( T ) * CapacityNoNullptrCheck() );
if( m_size != 0 )
{
if( std::is_trivially_copyable<T>() )
{
memcpy( (char*)ptr, m_ptr, m_size * sizeof( T ) );
}
else
{
for( uint32_t i=0; i<m_size; i++ )
{
new(ptr+i) T( std::move( m_ptr[i] ) );
}
}
free( m_ptr );
}
m_ptr = ptr;
}
tracy_force_inline uint32_t Capacity() const
{
return m_ptr == nullptr ? 0 : 1 << m_capacity;
}
tracy_force_inline uint32_t CapacityNoNullptrCheck() const
{
return 1 << m_capacity;
}
#ifdef TRACY_VECTOR_DEBUG
T* m_ptr;
#else
short_ptr<T> m_ptr;
#endif
uint32_t m_size;
uint8_t m_capacity : 7;
uint8_t m_magic : 1;
};
template<typename T> struct VectorAdapterDirect { const T& operator()( const T& it ) const { return it; } };
template<typename T> struct VectorAdapterPointer { const T& operator()( const short_ptr<T>& it ) const { return *it; } };
#pragma pack( pop )
enum { VectorSize = sizeof( Vector<int> ) };
}
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,524 @@
/*
pdqsort.h - Pattern-defeating quicksort.
Copyright (c) 2015 Orson Peters
This software is provided 'as-is', without any express or implied warranty. In no event will the
authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose, including commercial
applications, and to alter it and redistribute it freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the
original software. If you use this software in a product, an acknowledgment in the product
documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as
being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef TRACY_PDQSORT_H
#define TRACY_PDQSORT_H
#include "../public/common/TracyForceInline.hpp"
#include <algorithm>
#include <cstddef>
#include <functional>
#include <utility>
#include <iterator>
#include <cstdint>
#include <type_traits>
#define PDQSORT_PREFER_MOVE(x) std::move(x)
namespace tracy{
namespace pdqsort_detail {
enum {
// Partitions below this size are sorted using insertion sort.
insertion_sort_threshold = 24,
// Partitions above this size use Tukey's ninther to select the pivot.
ninther_threshold = 128,
// When we detect an already sorted partition, attempt an insertion sort that allows this
// amount of element moves before giving up.
partial_insertion_sort_limit = 8,
// Must be multiple of 8 due to loop unrolling, and < 256 to fit in unsigned char.
block_size = 64,
// Cacheline size, assumes power of two.
cacheline_size = 64
};
template<class T> struct is_default_compare : std::false_type { };
template<class T> struct is_default_compare<std::less<T>> : std::true_type { };
template<class T> struct is_default_compare<std::greater<T>> : std::true_type { };
// Returns floor(log2(n)), assumes n > 0.
template<class T>
tracy_force_inline int log2(T n) {
int log = 0;
while (n >>= 1) ++log;
return log;
}
// Sorts [begin, end) using insertion sort with the given comparison function.
template<class Iter, class Compare>
tracy_force_inline void insertion_sort(Iter begin, Iter end, Compare comp) {
typedef typename std::iterator_traits<Iter>::value_type T;
if (begin == end) return;
for (Iter cur = begin + 1; cur != end; ++cur) {
Iter sift = cur;
Iter sift_1 = cur - 1;
// Compare first so we can avoid 2 moves for an element already positioned correctly.
if (comp(*sift, *sift_1)) {
T tmp = PDQSORT_PREFER_MOVE(*sift);
do { *sift-- = PDQSORT_PREFER_MOVE(*sift_1); }
while (sift != begin && comp(tmp, *--sift_1));
*sift = PDQSORT_PREFER_MOVE(tmp);
}
}
}
// Sorts [begin, end) using insertion sort with the given comparison function. Assumes
// *(begin - 1) is an element smaller than or equal to any element in [begin, end).
template<class Iter, class Compare>
tracy_force_inline void unguarded_insertion_sort(Iter begin, Iter end, Compare comp) {
typedef typename std::iterator_traits<Iter>::value_type T;
if (begin == end) return;
for (Iter cur = begin + 1; cur != end; ++cur) {
Iter sift = cur;
Iter sift_1 = cur - 1;
// Compare first so we can avoid 2 moves for an element already positioned correctly.
if (comp(*sift, *sift_1)) {
T tmp = PDQSORT_PREFER_MOVE(*sift);
do { *sift-- = PDQSORT_PREFER_MOVE(*sift_1); }
while (comp(tmp, *--sift_1));
*sift = PDQSORT_PREFER_MOVE(tmp);
}
}
}
// Attempts to use insertion sort on [begin, end). Will return false if more than
// partial_insertion_sort_limit elements were moved, and abort sorting. Otherwise it will
// successfully sort and return true.
template<class Iter, class Compare>
tracy_force_inline bool partial_insertion_sort(Iter begin, Iter end, Compare comp) {
typedef typename std::iterator_traits<Iter>::value_type T;
if (begin == end) return true;
std::size_t limit = 0;
for (Iter cur = begin + 1; cur != end; ++cur) {
Iter sift = cur;
Iter sift_1 = cur - 1;
// Compare first so we can avoid 2 moves for an element already positioned correctly.
if (comp(*sift, *sift_1)) {
T tmp = PDQSORT_PREFER_MOVE(*sift);
do { *sift-- = PDQSORT_PREFER_MOVE(*sift_1); }
while (sift != begin && comp(tmp, *--sift_1));
*sift = PDQSORT_PREFER_MOVE(tmp);
limit += cur - sift;
}
if (limit > partial_insertion_sort_limit) return false;
}
return true;
}
template<class Iter, class Compare>
tracy_force_inline void sort2(Iter a, Iter b, Compare comp) {
if (comp(*b, *a)) std::iter_swap(a, b);
}
// Sorts the elements *a, *b and *c using comparison function comp.
template<class Iter, class Compare>
tracy_force_inline void sort3(Iter a, Iter b, Iter c, Compare comp) {
sort2(a, b, comp);
sort2(b, c, comp);
sort2(a, b, comp);
}
template<class T>
tracy_force_inline T* align_cacheline(T* p) {
#if defined(UINTPTR_MAX)
std::uintptr_t ip = reinterpret_cast<std::uintptr_t>(p);
#else
std::size_t ip = reinterpret_cast<std::size_t>(p);
#endif
ip = (ip + cacheline_size - 1) & -cacheline_size;
return reinterpret_cast<T*>(ip);
}
template<class Iter>
tracy_force_inline void swap_offsets(Iter first, Iter last,
unsigned char* offsets_l, unsigned char* offsets_r,
size_t num, bool use_swaps) {
typedef typename std::iterator_traits<Iter>::value_type T;
if (use_swaps) {
// This case is needed for the descending distribution, where we need
// to have proper swapping for pdqsort to remain O(n).
for (size_t i = 0; i < num; ++i) {
std::iter_swap(first + offsets_l[i], last - offsets_r[i]);
}
} else if (num > 0) {
Iter l = first + offsets_l[0]; Iter r = last - offsets_r[0];
T tmp(PDQSORT_PREFER_MOVE(*l)); *l = PDQSORT_PREFER_MOVE(*r);
for (size_t i = 1; i < num; ++i) {
l = first + offsets_l[i]; *r = PDQSORT_PREFER_MOVE(*l);
r = last - offsets_r[i]; *l = PDQSORT_PREFER_MOVE(*r);
}
*r = PDQSORT_PREFER_MOVE(tmp);
}
}
// Partitions [begin, end) around pivot *begin using comparison function comp. Elements equal
// to the pivot are put in the right-hand partition. Returns the position of the pivot after
// partitioning and whether the passed sequence already was correctly partitioned. Assumes the
// pivot is a median of at least 3 elements and that [begin, end) is at least
// insertion_sort_threshold long. Uses branchless partitioning.
template<class Iter, class Compare>
tracy_force_inline std::pair<Iter, bool> partition_right_branchless(Iter begin, Iter end, Compare comp) {
typedef typename std::iterator_traits<Iter>::value_type T;
// Move pivot into local for speed.
T pivot(PDQSORT_PREFER_MOVE(*begin));
Iter first = begin;
Iter last = end;
// Find the first element greater than or equal than the pivot (the median of 3 guarantees
// this exists).
while (comp(*++first, pivot));
// Find the first element strictly smaller than the pivot. We have to guard this search if
// there was no element before *first.
if (first - 1 == begin) while (first < last && !comp(*--last, pivot));
else while ( !comp(*--last, pivot));
// If the first pair of elements that should be swapped to partition are the same element,
// the passed in sequence already was correctly partitioned.
bool already_partitioned = first >= last;
if (!already_partitioned) {
std::iter_swap(first, last);
++first;
// The following branchless partitioning is derived from "BlockQuicksort: How Branch
// Mispredictions dont affect Quicksort" by Stefan Edelkamp and Armin Weiss, but
// heavily micro-optimized.
unsigned char offsets_l_storage[block_size + cacheline_size];
unsigned char offsets_r_storage[block_size + cacheline_size];
unsigned char* offsets_l = align_cacheline(offsets_l_storage);
unsigned char* offsets_r = align_cacheline(offsets_r_storage);
Iter offsets_l_base = first;
Iter offsets_r_base = last;
size_t num_l, num_r, start_l, start_r;
num_l = num_r = start_l = start_r = 0;
while (first < last) {
// Fill up offset blocks with elements that are on the wrong side.
// First we determine how much elements are considered for each offset block.
size_t num_unknown = last - first;
size_t left_split = num_l == 0 ? (num_r == 0 ? num_unknown / 2 : num_unknown) : 0;
size_t right_split = num_r == 0 ? (num_unknown - left_split) : 0;
// Fill the offset blocks.
if (left_split >= block_size) {
for (size_t i = 0; i < block_size;) {
offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
}
} else {
for (size_t i = 0; i < left_split;) {
offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
}
}
if (right_split >= block_size) {
for (size_t i = 0; i < block_size;) {
offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
}
} else {
for (size_t i = 0; i < right_split;) {
offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
}
}
// Swap elements and update block sizes and first/last boundaries.
size_t num = std::min(num_l, num_r);
swap_offsets(offsets_l_base, offsets_r_base,
offsets_l + start_l, offsets_r + start_r,
num, num_l == num_r);
num_l -= num; num_r -= num;
start_l += num; start_r += num;
if (num_l == 0) {
start_l = 0;
offsets_l_base = first;
}
if (num_r == 0) {
start_r = 0;
offsets_r_base = last;
}
}
// We have now fully identified [first, last)'s proper position. Swap the last elements.
if (num_l) {
offsets_l += start_l;
while (num_l--) std::iter_swap(offsets_l_base + offsets_l[num_l], --last);
first = last;
}
if (num_r) {
offsets_r += start_r;
while (num_r--) std::iter_swap(offsets_r_base - offsets_r[num_r], first), ++first;
last = first;
}
}
// Put the pivot in the right place.
Iter pivot_pos = first - 1;
*begin = PDQSORT_PREFER_MOVE(*pivot_pos);
*pivot_pos = PDQSORT_PREFER_MOVE(pivot);
return std::make_pair(pivot_pos, already_partitioned);
}
// Partitions [begin, end) around pivot *begin using comparison function comp. Elements equal
// to the pivot are put in the right-hand partition. Returns the position of the pivot after
// partitioning and whether the passed sequence already was correctly partitioned. Assumes the
// pivot is a median of at least 3 elements and that [begin, end) is at least
// insertion_sort_threshold long.
template<class Iter, class Compare>
tracy_force_inline std::pair<Iter, bool> partition_right(Iter begin, Iter end, Compare comp) {
typedef typename std::iterator_traits<Iter>::value_type T;
// Move pivot into local for speed.
T pivot(PDQSORT_PREFER_MOVE(*begin));
Iter first = begin;
Iter last = end;
// Find the first element greater than or equal than the pivot (the median of 3 guarantees
// this exists).
while (comp(*++first, pivot));
// Find the first element strictly smaller than the pivot. We have to guard this search if
// there was no element before *first.
if (first - 1 == begin) while (first < last && !comp(*--last, pivot));
else while ( !comp(*--last, pivot));
// If the first pair of elements that should be swapped to partition are the same element,
// the passed in sequence already was correctly partitioned.
bool already_partitioned = first >= last;
// Keep swapping pairs of elements that are on the wrong side of the pivot. Previously
// swapped pairs guard the searches, which is why the first iteration is special-cased
// above.
while (first < last) {
std::iter_swap(first, last);
while (comp(*++first, pivot));
while (!comp(*--last, pivot));
}
// Put the pivot in the right place.
Iter pivot_pos = first - 1;
*begin = PDQSORT_PREFER_MOVE(*pivot_pos);
*pivot_pos = PDQSORT_PREFER_MOVE(pivot);
return std::make_pair(pivot_pos, already_partitioned);
}
// Similar function to the one above, except elements equal to the pivot are put to the left of
// the pivot and it doesn't check or return if the passed sequence already was partitioned.
// Since this is rarely used (the many equal case), and in that case pdqsort already has O(n)
// performance, no block quicksort is applied here for simplicity.
template<class Iter, class Compare>
tracy_force_inline Iter partition_left(Iter begin, Iter end, Compare comp) {
typedef typename std::iterator_traits<Iter>::value_type T;
T pivot(PDQSORT_PREFER_MOVE(*begin));
Iter first = begin;
Iter last = end;
while (comp(pivot, *--last));
if (last + 1 == end) while (first < last && !comp(pivot, *++first));
else while ( !comp(pivot, *++first));
while (first < last) {
std::iter_swap(first, last);
while (comp(pivot, *--last));
while (!comp(pivot, *++first));
}
Iter pivot_pos = last;
*begin = PDQSORT_PREFER_MOVE(*pivot_pos);
*pivot_pos = PDQSORT_PREFER_MOVE(pivot);
return pivot_pos;
}
template<class Iter, class Compare, bool Branchless>
inline void pdqsort_loop(Iter begin, Iter end, Compare comp, int bad_allowed, bool leftmost = true) {
typedef typename std::iterator_traits<Iter>::difference_type diff_t;
// Use a while loop for tail recursion elimination.
while (true) {
diff_t size = end - begin;
// Insertion sort is faster for small arrays.
if (size < insertion_sort_threshold) {
if (leftmost) insertion_sort(begin, end, comp);
else unguarded_insertion_sort(begin, end, comp);
return;
}
// Choose pivot as median of 3 or pseudomedian of 9.
diff_t s2 = size / 2;
if (size > ninther_threshold) {
sort3(begin, begin + s2, end - 1, comp);
sort3(begin + 1, begin + (s2 - 1), end - 2, comp);
sort3(begin + 2, begin + (s2 + 1), end - 3, comp);
sort3(begin + (s2 - 1), begin + s2, begin + (s2 + 1), comp);
std::iter_swap(begin, begin + s2);
} else sort3(begin + s2, begin, end - 1, comp);
// If *(begin - 1) is the end of the right partition of a previous partition operation
// there is no element in [begin, end) that is smaller than *(begin - 1). Then if our
// pivot compares equal to *(begin - 1) we change strategy, putting equal elements in
// the left partition, greater elements in the right partition. We do not have to
// recurse on the left partition, since it's sorted (all equal).
if (!leftmost && !comp(*(begin - 1), *begin)) {
begin = partition_left(begin, end, comp) + 1;
continue;
}
// Partition and get results.
std::pair<Iter, bool> part_result =
Branchless ? partition_right_branchless(begin, end, comp)
: partition_right(begin, end, comp);
Iter pivot_pos = part_result.first;
bool already_partitioned = part_result.second;
// Check for a highly unbalanced partition.
diff_t l_size = pivot_pos - begin;
diff_t r_size = end - (pivot_pos + 1);
bool highly_unbalanced = l_size < size / 8 || r_size < size / 8;
// If we got a highly unbalanced partition we shuffle elements to break many patterns.
if (highly_unbalanced) {
// If we had too many bad partitions, switch to heapsort to guarantee O(n log n).
if (--bad_allowed == 0) {
std::make_heap(begin, end, comp);
std::sort_heap(begin, end, comp);
return;
}
if (l_size >= insertion_sort_threshold) {
std::iter_swap(begin, begin + l_size / 4);
std::iter_swap(pivot_pos - 1, pivot_pos - l_size / 4);
if (l_size > ninther_threshold) {
std::iter_swap(begin + 1, begin + (l_size / 4 + 1));
std::iter_swap(begin + 2, begin + (l_size / 4 + 2));
std::iter_swap(pivot_pos - 2, pivot_pos - (l_size / 4 + 1));
std::iter_swap(pivot_pos - 3, pivot_pos - (l_size / 4 + 2));
}
}
if (r_size >= insertion_sort_threshold) {
std::iter_swap(pivot_pos + 1, pivot_pos + (1 + r_size / 4));
std::iter_swap(end - 1, end - r_size / 4);
if (r_size > ninther_threshold) {
std::iter_swap(pivot_pos + 2, pivot_pos + (2 + r_size / 4));
std::iter_swap(pivot_pos + 3, pivot_pos + (3 + r_size / 4));
std::iter_swap(end - 2, end - (1 + r_size / 4));
std::iter_swap(end - 3, end - (2 + r_size / 4));
}
}
} else {
// If we were decently balanced and we tried to sort an already partitioned
// sequence try to use insertion sort.
if (already_partitioned && partial_insertion_sort(begin, pivot_pos, comp)
&& partial_insertion_sort(pivot_pos + 1, end, comp)) return;
}
// Sort the left partition first using recursion and do tail recursion elimination for
// the right-hand partition.
pdqsort_loop<Iter, Compare, Branchless>(begin, pivot_pos, comp, bad_allowed, leftmost);
begin = pivot_pos + 1;
leftmost = false;
}
}
}
template<class Iter, class Compare>
inline void pdqsort(Iter begin, Iter end, Compare comp) {
if (begin == end) return;
pdqsort_detail::pdqsort_loop<Iter, Compare,
pdqsort_detail::is_default_compare<typename std::decay<Compare>::type>::value &&
std::is_arithmetic<typename std::iterator_traits<Iter>::value_type>::value>(
begin, end, comp, pdqsort_detail::log2(end - begin));
}
template<class Iter>
inline void pdqsort(Iter begin, Iter end) {
typedef typename std::iterator_traits<Iter>::value_type T;
pdqsort(begin, end, std::less<T>());
}
template<class Iter, class Compare>
inline void pdqsort_branchless(Iter begin, Iter end, Compare comp) {
if (begin == end) return;
pdqsort_detail::pdqsort_loop<Iter, Compare, true>(
begin, end, comp, pdqsort_detail::log2(end - begin));
}
template<class Iter>
tracy_force_inline void pdqsort_branchless(Iter begin, Iter end) {
typedef typename std::iterator_traits<Iter>::value_type T;
pdqsort_branchless(begin, end, std::less<T>());
}
}
#undef PDQSORT_PREFER_MOVE
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff