mirror of
https://github.com/slendidev/lunar.git
synced 2026-01-30 16:28:58 +02:00
76
subprojects/tracy/server/TracyCharUtil.hpp
Normal file
76
subprojects/tracy/server/TracyCharUtil.hpp
Normal file
@@ -0,0 +1,76 @@
|
||||
#ifndef __TRACY__CHARUTIL_HPP__
|
||||
#define __TRACY__CHARUTIL_HPP__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#define XXH_INLINE_ALL
|
||||
#include "tracy_xxhash.h"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
namespace charutil
|
||||
{
|
||||
|
||||
static inline size_t hash( const char* str )
|
||||
{
|
||||
const auto sz = strlen( str );
|
||||
return XXH3_64bits( str, sz );
|
||||
}
|
||||
|
||||
static inline size_t hash( const char* str, size_t sz )
|
||||
{
|
||||
return XXH3_64bits( str, sz );
|
||||
}
|
||||
|
||||
struct Hasher
|
||||
{
|
||||
size_t operator()( const char* key ) const
|
||||
{
|
||||
return hash( key );
|
||||
}
|
||||
};
|
||||
|
||||
struct Comparator
|
||||
{
|
||||
bool operator()( const char* lhs, const char* rhs ) const
|
||||
{
|
||||
return strcmp( lhs, rhs ) == 0;
|
||||
}
|
||||
};
|
||||
|
||||
struct LessComparator
|
||||
{
|
||||
bool operator()( const char* lhs, const char* rhs ) const
|
||||
{
|
||||
return strcmp( lhs, rhs ) < 0;
|
||||
}
|
||||
};
|
||||
|
||||
struct StringKey
|
||||
{
|
||||
const char* ptr;
|
||||
size_t sz;
|
||||
|
||||
struct Hasher
|
||||
{
|
||||
size_t operator()( const StringKey& key ) const
|
||||
{
|
||||
return hash( key.ptr, key.sz );
|
||||
}
|
||||
};
|
||||
|
||||
struct Comparator
|
||||
{
|
||||
bool operator()( const StringKey& lhs, const StringKey& rhs ) const
|
||||
{
|
||||
return lhs.sz == rhs.sz && memcmp( lhs.ptr, rhs.ptr, lhs.sz ) == 0;
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
850
subprojects/tracy/server/TracyEvent.hpp
Normal file
850
subprojects/tracy/server/TracyEvent.hpp
Normal file
@@ -0,0 +1,850 @@
|
||||
#ifndef __TRACYEVENT_HPP__
|
||||
#define __TRACYEVENT_HPP__
|
||||
|
||||
#include <assert.h>
|
||||
#include <limits>
|
||||
#include <stdint.h>
|
||||
#include <string>
|
||||
#include <string.h>
|
||||
|
||||
#include "TracyCharUtil.hpp"
|
||||
#include "TracyShortPtr.hpp"
|
||||
#include "TracySortedVector.hpp"
|
||||
#include "TracyVector.hpp"
|
||||
#include "tracy_robin_hood.h"
|
||||
#include "../public/common/TracyForceInline.hpp"
|
||||
#include "../public/common/TracyQueue.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
#pragma pack( push, 1 )
|
||||
|
||||
struct StringRef
|
||||
{
|
||||
enum Type { Ptr, Idx };
|
||||
|
||||
tracy_force_inline StringRef() : str( 0 ), __data( 0 ) {}
|
||||
tracy_force_inline StringRef( Type t, uint64_t data )
|
||||
: str( data )
|
||||
, __data( 0 )
|
||||
{
|
||||
isidx = t == Idx;
|
||||
active = 1;
|
||||
}
|
||||
|
||||
uint64_t str;
|
||||
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint8_t isidx : 1;
|
||||
uint8_t active : 1;
|
||||
};
|
||||
uint8_t __data;
|
||||
};
|
||||
};
|
||||
|
||||
struct StringRefHasher
|
||||
{
|
||||
size_t operator()( const StringRef& key ) const
|
||||
{
|
||||
return charutil::hash( (const char*)&key, sizeof( StringRef ) );
|
||||
}
|
||||
};
|
||||
|
||||
struct StringRefComparator
|
||||
{
|
||||
bool operator()( const StringRef& lhs, const StringRef& rhs ) const
|
||||
{
|
||||
return memcmp( &lhs, &rhs, sizeof( StringRef ) ) == 0;
|
||||
}
|
||||
};
|
||||
|
||||
class StringIdx
|
||||
{
|
||||
public:
|
||||
tracy_force_inline StringIdx() { memset( m_idx, 0, sizeof( m_idx ) ); }
|
||||
tracy_force_inline StringIdx( uint32_t idx )
|
||||
{
|
||||
SetIdx( idx );
|
||||
}
|
||||
|
||||
tracy_force_inline void SetIdx( uint32_t idx )
|
||||
{
|
||||
idx++;
|
||||
memcpy( m_idx, &idx, 3 );
|
||||
}
|
||||
|
||||
tracy_force_inline uint32_t Idx() const
|
||||
{
|
||||
uint32_t idx = 0;
|
||||
memcpy( &idx, m_idx, 3 );
|
||||
assert( idx != 0 );
|
||||
return idx - 1;
|
||||
}
|
||||
|
||||
tracy_force_inline bool Active() const
|
||||
{
|
||||
uint32_t zero = 0;
|
||||
return memcmp( m_idx, &zero, 3 ) != 0;
|
||||
}
|
||||
|
||||
private:
|
||||
uint8_t m_idx[3];
|
||||
};
|
||||
|
||||
struct StringIdxHasher
|
||||
{
|
||||
size_t operator()( const StringIdx& key ) const
|
||||
{
|
||||
return charutil::hash( (const char*)&key, sizeof( StringIdx ) );
|
||||
}
|
||||
};
|
||||
|
||||
struct StringIdxComparator
|
||||
{
|
||||
bool operator()( const StringIdx& lhs, const StringIdx& rhs ) const
|
||||
{
|
||||
return memcmp( &lhs, &rhs, sizeof( StringIdx ) ) == 0;
|
||||
}
|
||||
};
|
||||
|
||||
class Int24
|
||||
{
|
||||
public:
|
||||
tracy_force_inline Int24() { memset( m_val, 0, sizeof( m_val ) ); }
|
||||
tracy_force_inline Int24( uint32_t val )
|
||||
{
|
||||
SetVal( val );
|
||||
}
|
||||
|
||||
tracy_force_inline void SetVal( uint32_t val )
|
||||
{
|
||||
memcpy( m_val, &val, 2 );
|
||||
val >>= 16;
|
||||
memcpy( m_val+2, &val, 1 );
|
||||
}
|
||||
|
||||
tracy_force_inline uint32_t Val() const
|
||||
{
|
||||
uint8_t hi;
|
||||
memcpy( &hi, m_val+2, 1 );
|
||||
uint16_t lo;
|
||||
memcpy( &lo, m_val, 2 );
|
||||
return ( uint32_t( hi ) << 16 ) | lo;
|
||||
}
|
||||
|
||||
private:
|
||||
uint8_t m_val[3];
|
||||
};
|
||||
|
||||
class Int48
|
||||
{
|
||||
public:
|
||||
tracy_force_inline Int48() {}
|
||||
tracy_force_inline Int48( int64_t val )
|
||||
{
|
||||
SetVal( val );
|
||||
}
|
||||
|
||||
tracy_force_inline void Clear()
|
||||
{
|
||||
memset( m_val, 0, 6 );
|
||||
}
|
||||
|
||||
tracy_force_inline void SetVal( int64_t val )
|
||||
{
|
||||
memcpy( m_val, &val, 4 );
|
||||
val >>= 32;
|
||||
memcpy( m_val+4, &val, 2 );
|
||||
}
|
||||
|
||||
tracy_force_inline int64_t Val() const
|
||||
{
|
||||
int16_t hi;
|
||||
memcpy( &hi, m_val+4, 2 );
|
||||
uint32_t lo;
|
||||
memcpy( &lo, m_val, 4 );
|
||||
return ( int64_t( uint64_t( hi ) << 32 ) ) | lo;
|
||||
}
|
||||
|
||||
tracy_force_inline bool IsNonNegative() const
|
||||
{
|
||||
return ( m_val[5] >> 7 ) == 0;
|
||||
}
|
||||
|
||||
private:
|
||||
uint8_t m_val[6];
|
||||
};
|
||||
|
||||
struct Int48Sort { bool operator()( const Int48& lhs, const Int48& rhs ) { return lhs.Val() < rhs.Val(); }; };
|
||||
|
||||
|
||||
struct SourceLocationBase
|
||||
{
|
||||
StringRef name;
|
||||
StringRef function;
|
||||
StringRef file;
|
||||
uint32_t line;
|
||||
uint32_t color;
|
||||
};
|
||||
|
||||
struct SourceLocation : public SourceLocationBase
|
||||
{
|
||||
mutable uint32_t namehash;
|
||||
};
|
||||
|
||||
enum { SourceLocationSize = sizeof( SourceLocation ) };
|
||||
|
||||
|
||||
struct ZoneEvent
|
||||
{
|
||||
tracy_force_inline ZoneEvent() {};
|
||||
|
||||
tracy_force_inline int64_t Start() const { return int64_t( _start_srcloc ) >> 16; }
|
||||
tracy_force_inline void SetStart( int64_t start ) { assert( start < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_start_srcloc)+2, &start, 4 ); memcpy( ((char*)&_start_srcloc)+6, ((char*)&start)+4, 2 ); }
|
||||
tracy_force_inline int64_t End() const { return int64_t( _end_child1 ) >> 16; }
|
||||
tracy_force_inline void SetEnd( int64_t end ) { assert( end < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_end_child1)+2, &end, 4 ); memcpy( ((char*)&_end_child1)+6, ((char*)&end)+4, 2 ); }
|
||||
tracy_force_inline bool IsEndValid() const { return ( _end_child1 >> 63 ) == 0; }
|
||||
tracy_force_inline int16_t SrcLoc() const { return int16_t( _start_srcloc & 0xFFFF ); }
|
||||
tracy_force_inline void SetSrcLoc( int16_t srcloc ) { memcpy( &_start_srcloc, &srcloc, 2 ); }
|
||||
tracy_force_inline int32_t Child() const { int32_t child; memcpy( &child, &_child2, 4 ); return child; }
|
||||
tracy_force_inline void SetChild( int32_t child ) { memcpy( &_child2, &child, 4 ); }
|
||||
tracy_force_inline bool HasChildren() const { uint8_t tmp; memcpy( &tmp, ((char*)&_end_child1)+1, 1 ); return ( tmp >> 7 ) == 0; }
|
||||
|
||||
tracy_force_inline void SetStartSrcLoc( int64_t start, int16_t srcloc ) { assert( start < (int64_t)( 1ull << 47 ) ); start <<= 16; start |= uint16_t( srcloc ); memcpy( &_start_srcloc, &start, 8 ); }
|
||||
|
||||
uint64_t _start_srcloc;
|
||||
uint16_t _child2;
|
||||
uint64_t _end_child1;
|
||||
uint32_t extra;
|
||||
};
|
||||
|
||||
enum { ZoneEventSize = sizeof( ZoneEvent ) };
|
||||
static_assert( std::is_standard_layout<ZoneEvent>::value, "ZoneEvent is not standard layout" );
|
||||
|
||||
|
||||
struct ZoneExtra
|
||||
{
|
||||
Int24 callstack;
|
||||
StringIdx text;
|
||||
StringIdx name;
|
||||
Int24 color;
|
||||
};
|
||||
|
||||
enum { ZoneExtraSize = sizeof( ZoneExtra ) };
|
||||
|
||||
|
||||
// This union exploits the fact that the current implementations of x64 and arm64 do not provide
|
||||
// full 64 bit address space. The high bits must be bit-extended, so 0x80... is an invalid pointer.
|
||||
// This allows using the highest bit as a selector between a native pointer and a table index here.
|
||||
union CallstackFrameId
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint64_t idx : 62;
|
||||
uint64_t sel : 1;
|
||||
uint64_t custom : 1;
|
||||
};
|
||||
uint64_t data;
|
||||
};
|
||||
|
||||
enum { CallstackFrameIdSize = sizeof( CallstackFrameId ) };
|
||||
|
||||
static tracy_force_inline bool operator==( const CallstackFrameId& lhs, const CallstackFrameId& rhs ) { return lhs.data == rhs.data; }
|
||||
|
||||
|
||||
struct SampleData
|
||||
{
|
||||
Int48 time;
|
||||
Int24 callstack;
|
||||
};
|
||||
|
||||
enum { SampleDataSize = sizeof( SampleData ) };
|
||||
|
||||
struct SampleDataSort { bool operator()( const SampleData& lhs, const SampleData& rhs ) { return lhs.time.Val() < rhs.time.Val(); }; };
|
||||
|
||||
|
||||
struct SampleDataRange
|
||||
{
|
||||
Int48 time;
|
||||
uint16_t thread;
|
||||
CallstackFrameId ip;
|
||||
};
|
||||
|
||||
enum { SampleDataRangeSize = sizeof( SampleDataRange ) };
|
||||
|
||||
|
||||
struct HwSampleData
|
||||
{
|
||||
SortedVector<Int48, Int48Sort> cycles;
|
||||
SortedVector<Int48, Int48Sort> retired;
|
||||
SortedVector<Int48, Int48Sort> cacheRef;
|
||||
SortedVector<Int48, Int48Sort> cacheMiss;
|
||||
SortedVector<Int48, Int48Sort> branchRetired;
|
||||
SortedVector<Int48, Int48Sort> branchMiss;
|
||||
|
||||
bool is_sorted() const
|
||||
{
|
||||
return
|
||||
cycles.is_sorted() &&
|
||||
retired.is_sorted() &&
|
||||
cacheRef.is_sorted() &&
|
||||
cacheMiss.is_sorted() &&
|
||||
branchRetired.is_sorted() &&
|
||||
branchMiss.is_sorted();
|
||||
}
|
||||
|
||||
void sort()
|
||||
{
|
||||
if( !cycles.is_sorted() ) cycles.sort();
|
||||
if( !retired.is_sorted() ) retired.sort();
|
||||
if( !cacheRef.is_sorted() ) cacheRef.sort();
|
||||
if( !cacheMiss.is_sorted() ) cacheMiss.sort();
|
||||
if( !branchRetired.is_sorted() ) branchRetired.sort();
|
||||
if( !branchMiss.is_sorted() ) branchMiss.sort();
|
||||
}
|
||||
};
|
||||
|
||||
enum { HwSampleDataSize = sizeof( HwSampleData ) };
|
||||
|
||||
|
||||
struct LockEvent
|
||||
{
|
||||
enum class Type : uint8_t
|
||||
{
|
||||
Wait,
|
||||
Obtain,
|
||||
Release,
|
||||
WaitShared,
|
||||
ObtainShared,
|
||||
ReleaseShared
|
||||
};
|
||||
|
||||
tracy_force_inline int64_t Time() const { return int64_t( _time_srcloc ) >> 16; }
|
||||
tracy_force_inline void SetTime( int64_t time ) { assert( time < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_time_srcloc)+2, &time, 4 ); memcpy( ((char*)&_time_srcloc)+6, ((char*)&time)+4, 2 ); }
|
||||
tracy_force_inline int16_t SrcLoc() const { return int16_t( _time_srcloc & 0xFFFF ); }
|
||||
tracy_force_inline void SetSrcLoc( int16_t srcloc ) { memcpy( &_time_srcloc, &srcloc, 2 ); }
|
||||
|
||||
uint64_t _time_srcloc;
|
||||
uint8_t thread;
|
||||
Type type;
|
||||
};
|
||||
|
||||
struct LockEventShared : public LockEvent
|
||||
{
|
||||
uint64_t waitShared;
|
||||
uint64_t sharedList;
|
||||
};
|
||||
|
||||
struct LockEventPtr
|
||||
{
|
||||
short_ptr<LockEvent> ptr;
|
||||
uint8_t lockingThread;
|
||||
uint8_t lockCount;
|
||||
uint64_t waitList;
|
||||
};
|
||||
|
||||
enum { LockEventSize = sizeof( LockEvent ) };
|
||||
enum { LockEventSharedSize = sizeof( LockEventShared ) };
|
||||
enum { LockEventPtrSize = sizeof( LockEventPtr ) };
|
||||
|
||||
enum { MaxLockThreads = sizeof( LockEventPtr::waitList ) * 8 };
|
||||
static_assert( std::numeric_limits<decltype(LockEventPtr::lockCount)>::max() >= MaxLockThreads, "Not enough space for lock count." );
|
||||
|
||||
|
||||
enum class LockType : uint8_t;
|
||||
|
||||
struct LockMap
|
||||
{
|
||||
struct TimeRange
|
||||
{
|
||||
int64_t start = std::numeric_limits<int64_t>::max();
|
||||
int64_t end = std::numeric_limits<int64_t>::min();
|
||||
};
|
||||
|
||||
StringIdx customName;
|
||||
int16_t srcloc;
|
||||
Vector<LockEventPtr> timeline;
|
||||
unordered_flat_map<uint64_t, uint8_t> threadMap;
|
||||
std::vector<uint64_t> threadList;
|
||||
LockType type;
|
||||
int64_t timeAnnounce;
|
||||
int64_t timeTerminate;
|
||||
bool valid;
|
||||
bool isContended;
|
||||
uint64_t lockingThread;
|
||||
|
||||
TimeRange range[64];
|
||||
};
|
||||
|
||||
struct LockHighlight
|
||||
{
|
||||
int64_t id;
|
||||
int64_t begin;
|
||||
int64_t end;
|
||||
uint8_t thread;
|
||||
bool blocked;
|
||||
};
|
||||
|
||||
|
||||
struct GpuEvent
|
||||
{
|
||||
tracy_force_inline int64_t CpuStart() const { return int64_t( _cpuStart_srcloc ) >> 16; }
|
||||
tracy_force_inline void SetCpuStart( int64_t cpuStart ) { assert( cpuStart < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_cpuStart_srcloc)+2, &cpuStart, 4 ); memcpy( ((char*)&_cpuStart_srcloc)+6, ((char*)&cpuStart)+4, 2 ); }
|
||||
tracy_force_inline int64_t CpuEnd() const { return int64_t( _cpuEnd_thread ) >> 16; }
|
||||
tracy_force_inline void SetCpuEnd( int64_t cpuEnd ) { assert( cpuEnd < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_cpuEnd_thread)+2, &cpuEnd, 4 ); memcpy( ((char*)&_cpuEnd_thread)+6, ((char*)&cpuEnd)+4, 2 ); }
|
||||
tracy_force_inline int64_t GpuStart() const { return int64_t( _gpuStart_child1 ) >> 16; }
|
||||
tracy_force_inline void SetGpuStart( int64_t gpuStart ) { /*assert( gpuStart < (int64_t)( 1ull << 47 ) );*/ memcpy( ((char*)&_gpuStart_child1)+2, &gpuStart, 4 ); memcpy( ((char*)&_gpuStart_child1)+6, ((char*)&gpuStart)+4, 2 ); }
|
||||
tracy_force_inline int64_t GpuEnd() const { return int64_t( _gpuEnd_child2 ) >> 16; }
|
||||
tracy_force_inline void SetGpuEnd( int64_t gpuEnd ) { assert( gpuEnd < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_gpuEnd_child2)+2, &gpuEnd, 4 ); memcpy( ((char*)&_gpuEnd_child2)+6, ((char*)&gpuEnd)+4, 2 ); }
|
||||
tracy_force_inline int16_t SrcLoc() const { return int16_t( _cpuStart_srcloc & 0xFFFF ); }
|
||||
tracy_force_inline void SetSrcLoc( int16_t srcloc ) { memcpy( &_cpuStart_srcloc, &srcloc, 2 ); }
|
||||
tracy_force_inline uint16_t Thread() const { return uint16_t( _cpuEnd_thread & 0xFFFF ); }
|
||||
tracy_force_inline void SetThread( uint16_t thread ) { memcpy( &_cpuEnd_thread, &thread, 2 ); }
|
||||
tracy_force_inline int32_t Child() const { return int32_t( uint32_t( _gpuStart_child1 & 0xFFFF ) | ( uint32_t( _gpuEnd_child2 & 0xFFFF ) << 16 ) ); }
|
||||
tracy_force_inline void SetChild( int32_t child ) { memcpy( &_gpuStart_child1, &child, 2 ); memcpy( &_gpuEnd_child2, ((char*)&child)+2, 2 ); }
|
||||
|
||||
uint64_t _cpuStart_srcloc;
|
||||
uint64_t _cpuEnd_thread;
|
||||
uint64_t _gpuStart_child1;
|
||||
uint64_t _gpuEnd_child2;
|
||||
Int24 callstack;
|
||||
};
|
||||
|
||||
enum { GpuEventSize = sizeof( GpuEvent ) };
|
||||
static_assert( std::is_standard_layout<GpuEvent>::value, "GpuEvent is not standard layout" );
|
||||
|
||||
|
||||
struct MemEvent
|
||||
{
|
||||
tracy_force_inline uint64_t Ptr() const { return uint64_t( int64_t( _ptr_csalloc1 ) >> 8 ); }
|
||||
tracy_force_inline void SetPtr( uint64_t ptr ) { memcpy( ((char*)&_ptr_csalloc1)+1, &ptr, 4 ); memcpy( ((char*)&_ptr_csalloc1)+5, ((char*)&ptr)+4, 2 ); memcpy( ((char*)&_ptr_csalloc1)+7, ((char*)&ptr)+6, 1 ); }
|
||||
tracy_force_inline uint64_t Size() const { return _size_csalloc2 >> 16; }
|
||||
tracy_force_inline void SetSize( uint64_t size ) { assert( size < ( 1ull << 47 ) ); memcpy( ((char*)&_size_csalloc2)+2, &size, 4 ); memcpy( ((char*)&_size_csalloc2)+6, ((char*)&size)+4, 2 ); }
|
||||
tracy_force_inline uint32_t CsAlloc() const { return uint8_t( _ptr_csalloc1 ) | ( uint16_t( _size_csalloc2 ) << 8 ); }
|
||||
tracy_force_inline void SetCsAlloc( uint32_t csAlloc ) { memcpy( &_ptr_csalloc1, &csAlloc, 1 ); memcpy( &_size_csalloc2, ((char*)&csAlloc)+1, 2 ); }
|
||||
tracy_force_inline int64_t TimeAlloc() const { return int64_t( _time_thread_alloc ) >> 16; }
|
||||
tracy_force_inline void SetTimeAlloc( int64_t time ) { assert( time < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_time_thread_alloc)+2, &time, 4 ); memcpy( ((char*)&_time_thread_alloc)+6, ((char*)&time)+4, 2 ); }
|
||||
tracy_force_inline int64_t TimeFree() const { return int64_t( _time_thread_free ) >> 16; }
|
||||
tracy_force_inline void SetTimeFree( int64_t time ) { assert( time < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_time_thread_free)+2, &time, 4 ); memcpy( ((char*)&_time_thread_free)+6, ((char*)&time)+4, 2 ); }
|
||||
tracy_force_inline uint16_t ThreadAlloc() const { return uint16_t( _time_thread_alloc ); }
|
||||
tracy_force_inline void SetThreadAlloc( uint16_t thread ) { memcpy( &_time_thread_alloc, &thread, 2 ); }
|
||||
tracy_force_inline uint16_t ThreadFree() const { return uint16_t( _time_thread_free ); }
|
||||
tracy_force_inline void SetThreadFree( uint16_t thread ) { memcpy( &_time_thread_free, &thread, 2 ); }
|
||||
|
||||
tracy_force_inline void SetTimeThreadAlloc( int64_t time, uint16_t thread ) { time <<= 16; time |= thread; memcpy( &_time_thread_alloc, &time, 8 ); }
|
||||
tracy_force_inline void SetTimeThreadFree( int64_t time, uint16_t thread ) { uint64_t t; memcpy( &t, &time, 8 ); t <<= 16; t |= thread; memcpy( &_time_thread_free, &t, 8 ); }
|
||||
|
||||
uint64_t _ptr_csalloc1;
|
||||
uint64_t _size_csalloc2;
|
||||
Int24 csFree;
|
||||
uint64_t _time_thread_alloc;
|
||||
uint64_t _time_thread_free;
|
||||
};
|
||||
|
||||
enum { MemEventSize = sizeof( MemEvent ) };
|
||||
static_assert( std::is_standard_layout<MemEvent>::value, "MemEvent is not standard layout" );
|
||||
|
||||
|
||||
struct CallstackFrameBasic
|
||||
{
|
||||
StringIdx name;
|
||||
StringIdx file;
|
||||
uint32_t line;
|
||||
};
|
||||
|
||||
struct CallstackFrame : public CallstackFrameBasic
|
||||
{
|
||||
uint64_t symAddr;
|
||||
};
|
||||
|
||||
struct SymbolData : public CallstackFrameBasic
|
||||
{
|
||||
StringIdx imageName;
|
||||
StringIdx callFile;
|
||||
uint32_t callLine;
|
||||
uint8_t isInline;
|
||||
Int24 size;
|
||||
};
|
||||
|
||||
enum { CallstackFrameBasicSize = sizeof( CallstackFrameBasic ) };
|
||||
enum { CallstackFrameSize = sizeof( CallstackFrame ) };
|
||||
enum { SymbolDataSize = sizeof( SymbolData ) };
|
||||
|
||||
|
||||
struct SymbolLocation
|
||||
{
|
||||
uint64_t addr;
|
||||
uint32_t len;
|
||||
};
|
||||
|
||||
enum { SymbolLocationSize = sizeof( SymbolLocation ) };
|
||||
|
||||
|
||||
struct CallstackFrameData
|
||||
{
|
||||
short_ptr<CallstackFrame> data;
|
||||
uint8_t size;
|
||||
StringIdx imageName;
|
||||
};
|
||||
|
||||
enum { CallstackFrameDataSize = sizeof( CallstackFrameData ) };
|
||||
|
||||
|
||||
struct MemCallstackFrameTree
|
||||
{
|
||||
MemCallstackFrameTree( CallstackFrameId id ) : frame( id ), alloc( 0 ), count( 0 ) {}
|
||||
|
||||
CallstackFrameId frame;
|
||||
uint64_t alloc;
|
||||
uint32_t count;
|
||||
unordered_flat_map<uint64_t, MemCallstackFrameTree> children;
|
||||
unordered_flat_set<uint32_t> callstacks;
|
||||
};
|
||||
|
||||
enum { MemCallstackFrameTreeSize = sizeof( MemCallstackFrameTree ) };
|
||||
|
||||
|
||||
struct CallstackFrameTree
|
||||
{
|
||||
CallstackFrameTree( CallstackFrameId id ) : frame( id ), count( 0 ) {}
|
||||
|
||||
CallstackFrameId frame;
|
||||
uint32_t count;
|
||||
unordered_flat_map<uint64_t, CallstackFrameTree> children;
|
||||
};
|
||||
|
||||
enum { CallstackFrameTreeSize = sizeof( CallstackFrameTree ) };
|
||||
|
||||
|
||||
struct CrashEvent
|
||||
{
|
||||
uint64_t thread = 0;
|
||||
int64_t time = 0;
|
||||
uint64_t message = 0;
|
||||
uint32_t callstack = 0;
|
||||
};
|
||||
|
||||
enum { CrashEventSize = sizeof( CrashEvent ) };
|
||||
|
||||
|
||||
struct ContextSwitchData
|
||||
{
|
||||
enum : int8_t { Fiber = 99 };
|
||||
enum : int8_t { NoState = 100 };
|
||||
enum : int8_t { Wakeup = -2 };
|
||||
|
||||
tracy_force_inline int64_t Start() const { return int64_t( _start_cpu ) >> 16; }
|
||||
tracy_force_inline void SetStart( int64_t start ) { assert( start < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_start_cpu)+2, &start, 4 ); memcpy( ((char*)&_start_cpu)+6, ((char*)&start)+4, 2 ); }
|
||||
tracy_force_inline int64_t End() const { return int64_t( _end_reason_state ) >> 16; }
|
||||
tracy_force_inline void SetEnd( int64_t end ) { assert( end < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_end_reason_state)+2, &end, 4 ); memcpy( ((char*)&_end_reason_state)+6, ((char*)&end)+4, 2 ); }
|
||||
tracy_force_inline bool IsEndValid() const { return ( _end_reason_state >> 63 ) == 0; }
|
||||
tracy_force_inline uint8_t Cpu() const { return uint8_t( _start_cpu & 0xFF ); }
|
||||
tracy_force_inline void SetCpu( uint8_t cpu ) { memcpy( &_start_cpu, &cpu, 1 ); }
|
||||
tracy_force_inline int8_t Reason() const { return int8_t( (_end_reason_state >> 8) & 0xFF ); }
|
||||
tracy_force_inline void SetReason( int8_t reason ) { memcpy( ((char*)&_end_reason_state)+1, &reason, 1 ); }
|
||||
tracy_force_inline int8_t State() const { return int8_t( _end_reason_state & 0xFF ); }
|
||||
tracy_force_inline void SetState( int8_t state ) { memcpy( &_end_reason_state, &state, 1 ); }
|
||||
tracy_force_inline int64_t WakeupVal() const { return _wakeup.Val(); }
|
||||
tracy_force_inline void SetWakeup( int64_t wakeup ) { assert( wakeup < (int64_t)( 1ull << 47 ) ); _wakeup.SetVal( wakeup ); }
|
||||
tracy_force_inline uint16_t Thread() const { return _thread; }
|
||||
tracy_force_inline void SetThread( uint16_t thread ) { _thread = thread; }
|
||||
|
||||
tracy_force_inline void SetStartCpu( int64_t start, uint8_t cpu ) { assert( start < (int64_t)( 1ull << 47 ) ); _start_cpu = ( uint64_t( start ) << 16 ) | cpu; }
|
||||
tracy_force_inline void SetEndReasonState( int64_t end, int8_t reason, int8_t state ) { assert( end < (int64_t)( 1ull << 47 ) ); _end_reason_state = ( uint64_t( end ) << 16 ) | ( uint64_t( reason ) << 8 ) | uint8_t( state ); }
|
||||
|
||||
uint64_t _start_cpu;
|
||||
uint64_t _end_reason_state;
|
||||
Int48 _wakeup;
|
||||
uint16_t _thread;
|
||||
};
|
||||
|
||||
enum { ContextSwitchDataSize = sizeof( ContextSwitchData ) };
|
||||
|
||||
|
||||
struct ContextSwitchCpu
|
||||
{
|
||||
tracy_force_inline int64_t Start() const { return int64_t( _start_thread ) >> 16; }
|
||||
tracy_force_inline void SetStart( int64_t start ) { assert( start < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_start_thread)+2, &start, 4 ); memcpy( ((char*)&_start_thread)+6, ((char*)&start)+4, 2 ); }
|
||||
tracy_force_inline int64_t End() const { int64_t v; memcpy( &v, ((char*)&_end)-2, 8 ); return v >> 16; }
|
||||
tracy_force_inline void SetEnd( int64_t end ) { assert( end < (int64_t)( 1ull << 47 ) ); _end.SetVal( end ); }
|
||||
tracy_force_inline bool IsEndValid() const { return _end.IsNonNegative(); }
|
||||
tracy_force_inline uint16_t Thread() const { return uint16_t( _start_thread ); }
|
||||
tracy_force_inline void SetThread( uint16_t thread ) { memcpy( &_start_thread, &thread, 2 ); }
|
||||
|
||||
tracy_force_inline void SetStartThread( int64_t start, uint16_t thread ) { assert( start < (int64_t)( 1ull << 47 ) ); _start_thread = ( uint64_t( start ) << 16 ) | thread; }
|
||||
|
||||
uint64_t _start_thread;
|
||||
Int48 _end;
|
||||
};
|
||||
|
||||
enum { ContextSwitchCpuSize = sizeof( ContextSwitchCpu ) };
|
||||
|
||||
|
||||
struct ContextSwitchUsage
|
||||
{
|
||||
ContextSwitchUsage() {}
|
||||
ContextSwitchUsage( int64_t time, uint8_t other, uint8_t own ) { SetTime( time ); SetOther( other ); SetOwn( own ); }
|
||||
|
||||
tracy_force_inline int64_t Time() const { return int64_t( _time_other_own ) >> 16; }
|
||||
tracy_force_inline void SetTime( int64_t time ) { assert( time < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_time_other_own)+2, &time, 4 ); memcpy( ((char*)&_time_other_own)+6, ((char*)&time)+4, 2 ); }
|
||||
tracy_force_inline uint8_t Other() const { return uint8_t( _time_other_own ); }
|
||||
tracy_force_inline void SetOther( uint8_t other ) { memcpy( &_time_other_own, &other, 1 ); }
|
||||
tracy_force_inline uint8_t Own() const { uint8_t v; memcpy( &v, ((char*)&_time_other_own)+1, 1 );return v; }
|
||||
tracy_force_inline void SetOwn( uint8_t own ) { memcpy( ((char*)&_time_other_own)+1, &own, 1 ); }
|
||||
|
||||
uint64_t _time_other_own;
|
||||
};
|
||||
|
||||
enum { ContextSwitchUsageSize = sizeof( ContextSwitchUsage ) };
|
||||
|
||||
|
||||
struct MessageData
|
||||
{
|
||||
int64_t time;
|
||||
StringRef ref;
|
||||
uint16_t thread;
|
||||
uint32_t color;
|
||||
Int24 callstack;
|
||||
};
|
||||
|
||||
enum { MessageDataSize = sizeof( MessageData ) };
|
||||
|
||||
|
||||
struct PlotItem
|
||||
{
|
||||
Int48 time;
|
||||
double val;
|
||||
};
|
||||
|
||||
enum { PlotItemSize = sizeof( PlotItem ) };
|
||||
|
||||
|
||||
struct FrameEvent
|
||||
{
|
||||
int64_t start;
|
||||
int64_t end;
|
||||
int32_t frameImage;
|
||||
};
|
||||
|
||||
enum { FrameEventSize = sizeof( FrameEvent ) };
|
||||
|
||||
|
||||
struct FrameImage
|
||||
{
|
||||
short_ptr<const char> ptr;
|
||||
uint32_t csz;
|
||||
uint16_t w, h;
|
||||
uint32_t frameRef;
|
||||
uint8_t flip;
|
||||
};
|
||||
|
||||
enum { FrameImageSize = sizeof( FrameImage ) };
|
||||
|
||||
|
||||
struct GhostZone
|
||||
{
|
||||
Int48 start, end;
|
||||
Int24 frame;
|
||||
int32_t child;
|
||||
};
|
||||
|
||||
enum { GhostZoneSize = sizeof( GhostZone ) };
|
||||
|
||||
|
||||
struct ChildSample
|
||||
{
|
||||
Int48 time;
|
||||
uint64_t addr;
|
||||
};
|
||||
|
||||
enum { ChildSampleSize = sizeof( ChildSample ) };
|
||||
|
||||
#pragma pack( pop )
|
||||
|
||||
|
||||
struct ThreadData
|
||||
{
|
||||
uint64_t id;
|
||||
uint64_t count;
|
||||
Vector<short_ptr<ZoneEvent>> timeline;
|
||||
Vector<short_ptr<ZoneEvent>> stack;
|
||||
Vector<short_ptr<MessageData>> messages;
|
||||
uint32_t nextZoneId;
|
||||
Vector<uint32_t> zoneIdStack;
|
||||
#ifndef TRACY_NO_STATISTICS
|
||||
Vector<int64_t> childTimeStack;
|
||||
Vector<GhostZone> ghostZones;
|
||||
uint64_t ghostIdx;
|
||||
SortedVector<SampleData, SampleDataSort> postponedSamples;
|
||||
#endif
|
||||
Vector<SampleData> samples;
|
||||
SampleData pendingSample;
|
||||
Vector<SampleData> ctxSwitchSamples;
|
||||
uint64_t kernelSampleCnt;
|
||||
uint8_t isFiber;
|
||||
ThreadData* fiber;
|
||||
uint8_t* stackCount;
|
||||
int32_t groupHint;
|
||||
|
||||
tracy_force_inline void IncStackCount( int16_t srcloc ) { stackCount[uint16_t(srcloc)]++; }
|
||||
tracy_force_inline bool DecStackCount( int16_t srcloc ) { return --stackCount[uint16_t(srcloc)] != 0; }
|
||||
};
|
||||
|
||||
struct GpuCtxThreadData
|
||||
{
|
||||
Vector<short_ptr<GpuEvent>> timeline;
|
||||
Vector<short_ptr<GpuEvent>> stack;
|
||||
};
|
||||
|
||||
struct GpuCtxData
|
||||
{
|
||||
int64_t timeDiff;
|
||||
uint64_t thread;
|
||||
uint64_t count;
|
||||
float period;
|
||||
GpuContextType type;
|
||||
bool hasPeriod;
|
||||
bool hasCalibration;
|
||||
int64_t calibratedGpuTime;
|
||||
int64_t calibratedCpuTime;
|
||||
double calibrationMod;
|
||||
int64_t lastGpuTime;
|
||||
uint64_t overflow;
|
||||
uint32_t overflowMul;
|
||||
StringIdx name;
|
||||
unordered_flat_map<uint64_t, GpuCtxThreadData> threadData;
|
||||
short_ptr<GpuEvent> query[64*1024];
|
||||
};
|
||||
|
||||
enum { GpuCtxDataSize = sizeof( GpuCtxData ) };
|
||||
|
||||
|
||||
enum class PlotType : uint8_t
|
||||
{
|
||||
User,
|
||||
Memory,
|
||||
SysTime,
|
||||
Power
|
||||
};
|
||||
|
||||
// Keep this in sync with enum in TracyC.h
|
||||
enum class PlotValueFormatting : uint8_t
|
||||
{
|
||||
Number,
|
||||
Memory,
|
||||
Percentage,
|
||||
Watt
|
||||
};
|
||||
|
||||
struct PlotData
|
||||
{
|
||||
struct PlotItemSort { bool operator()( const PlotItem& lhs, const PlotItem& rhs ) { return lhs.time.Val() < rhs.time.Val(); }; };
|
||||
|
||||
uint64_t name;
|
||||
double min;
|
||||
double max;
|
||||
double sum;
|
||||
SortedVector<PlotItem, PlotItemSort> data;
|
||||
PlotType type;
|
||||
PlotValueFormatting format;
|
||||
uint8_t showSteps;
|
||||
uint8_t fill;
|
||||
uint32_t color;
|
||||
|
||||
double rMin, rMax, num;
|
||||
};
|
||||
|
||||
struct MemData
|
||||
{
|
||||
Vector<MemEvent> data;
|
||||
Vector<uint32_t> frees;
|
||||
unordered_flat_map<uint64_t, size_t> active;
|
||||
uint64_t high = std::numeric_limits<uint64_t>::min();
|
||||
uint64_t low = std::numeric_limits<uint64_t>::max();
|
||||
uint64_t usage = 0;
|
||||
PlotData* plot = nullptr;
|
||||
bool reconstruct = false;
|
||||
uint64_t name = 0;
|
||||
};
|
||||
|
||||
struct FrameData
|
||||
{
|
||||
uint64_t name;
|
||||
Vector<FrameEvent> frames;
|
||||
uint8_t continuous;
|
||||
|
||||
int64_t min = std::numeric_limits<int64_t>::max();
|
||||
int64_t max = std::numeric_limits<int64_t>::min();
|
||||
int64_t total = 0;
|
||||
double sumSq = 0;
|
||||
};
|
||||
|
||||
struct StringLocation
|
||||
{
|
||||
const char* ptr;
|
||||
uint32_t idx;
|
||||
};
|
||||
|
||||
struct SourceLocationHasher
|
||||
{
|
||||
size_t operator()( const SourceLocation* ptr ) const
|
||||
{
|
||||
return charutil::hash( (const char*)ptr, sizeof( SourceLocationBase ) );
|
||||
}
|
||||
};
|
||||
|
||||
struct SourceLocationComparator
|
||||
{
|
||||
bool operator()( const SourceLocation* lhs, const SourceLocation* rhs ) const
|
||||
{
|
||||
return memcmp( lhs, rhs, sizeof( SourceLocationBase ) ) == 0;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct ContextSwitch
|
||||
{
|
||||
Vector<ContextSwitchData> v;
|
||||
int64_t runningTime = 0;
|
||||
};
|
||||
|
||||
struct CpuData
|
||||
{
|
||||
Vector<ContextSwitchCpu> cs;
|
||||
};
|
||||
|
||||
struct CpuThreadData
|
||||
{
|
||||
int64_t runningTime = 0;
|
||||
uint32_t runningRegions = 0;
|
||||
uint32_t migrations = 0;
|
||||
};
|
||||
|
||||
enum { CpuThreadDataSize = sizeof( CpuThreadData ) };
|
||||
|
||||
|
||||
struct Parameter
|
||||
{
|
||||
uint32_t idx;
|
||||
StringRef name;
|
||||
bool isBool;
|
||||
int32_t val;
|
||||
};
|
||||
|
||||
|
||||
struct SymbolStats
|
||||
{
|
||||
uint32_t incl, excl;
|
||||
unordered_flat_map<uint32_t, uint32_t> parents;
|
||||
unordered_flat_map<uint32_t, uint32_t> baseParents;
|
||||
};
|
||||
|
||||
enum { SymbolStatsSize = sizeof( SymbolStats ) };
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
22
subprojects/tracy/server/TracyFileHeader.hpp
Normal file
22
subprojects/tracy/server/TracyFileHeader.hpp
Normal file
@@ -0,0 +1,22 @@
|
||||
#ifndef __TRACYFILEHEADER_HPP__
|
||||
#define __TRACYFILEHEADER_HPP__
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "../public/common/TracyForceInline.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
static const uint8_t TracyHeader[4] = { 't', 'r', 253, 'P' };
|
||||
static const uint8_t Lz4Header[4] = { 't', 'l', 'Z', 4 };
|
||||
static const uint8_t ZstdHeader[4] = { 't', 'Z', 's', 't' };
|
||||
|
||||
static constexpr tracy_force_inline int FileVersion( uint8_t h5, uint8_t h6, uint8_t h7 )
|
||||
{
|
||||
return ( h5 << 16 ) | ( h6 << 8 ) | h7;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
18
subprojects/tracy/server/TracyFileMeta.hpp
Normal file
18
subprojects/tracy/server/TracyFileMeta.hpp
Normal file
@@ -0,0 +1,18 @@
|
||||
#ifndef __TRACYFILEMETA_HPP__
|
||||
#define __TRACYFILEMETA_HPP__
|
||||
|
||||
#include <algorithm>
|
||||
#include <stddef.h>
|
||||
|
||||
#include "../public/common/tracy_lz4.hpp"
|
||||
#include "../zstd/zstd.h"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
constexpr size_t FileBufSize = 64 * 1024;
|
||||
constexpr size_t FileBoundSize = std::max( LZ4_COMPRESSBOUND( FileBufSize ), ZSTD_COMPRESSBOUND( FileBufSize ) );
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
601
subprojects/tracy/server/TracyFileRead.hpp
Normal file
601
subprojects/tracy/server/TracyFileRead.hpp
Normal file
@@ -0,0 +1,601 @@
|
||||
#ifndef __TRACYFILEREAD_HPP__
|
||||
#define __TRACYFILEREAD_HPP__
|
||||
|
||||
#include <assert.h>
|
||||
#include <atomic>
|
||||
#include <algorithm>
|
||||
#include <condition_variable>
|
||||
#include <stdexcept>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <sys/stat.h>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# define stat64 _stat64
|
||||
#endif
|
||||
#if defined __APPLE__ || defined __FreeBSD__
|
||||
# define stat64 stat
|
||||
#endif
|
||||
|
||||
#include "TracyFileHeader.hpp"
|
||||
#include "TracyFileMeta.hpp"
|
||||
#include "TracyMmap.hpp"
|
||||
#include "../public/common/TracyYield.hpp"
|
||||
#include "../public/common/tracy_lz4.hpp"
|
||||
#include "../public/common/TracyForceInline.hpp"
|
||||
#include "../zstd/zstd.h"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
struct NotTracyDump : public std::exception {};
|
||||
struct FileReadError : public std::exception {};
|
||||
|
||||
class ReadStream
|
||||
{
|
||||
public:
|
||||
ReadStream( uint8_t type )
|
||||
: m_stream( nullptr )
|
||||
, m_streamZstd( nullptr )
|
||||
, m_buf( new char[FileBufSize] )
|
||||
, m_second( new char[FileBufSize] )
|
||||
{
|
||||
switch( type )
|
||||
{
|
||||
case 0:
|
||||
m_stream = LZ4_createStreamDecode();
|
||||
break;
|
||||
case 1:
|
||||
m_streamZstd = ZSTD_createDStream();
|
||||
break;
|
||||
default:
|
||||
assert( false );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
~ReadStream()
|
||||
{
|
||||
delete[] m_buf;
|
||||
delete[] m_second;
|
||||
|
||||
if( m_stream ) LZ4_freeStreamDecode( m_stream );
|
||||
if( m_streamZstd ) ZSTD_freeDStream( m_streamZstd );
|
||||
}
|
||||
|
||||
void Decompress( const char* src, uint32_t size )
|
||||
{
|
||||
std::swap( m_buf, m_second );
|
||||
if( m_stream )
|
||||
{
|
||||
m_size = (size_t)LZ4_decompress_safe_continue( m_stream, src, m_buf, size, FileBufSize );
|
||||
}
|
||||
else
|
||||
{
|
||||
ZSTD_outBuffer out = { m_buf, FileBufSize, 0 };
|
||||
ZSTD_inBuffer in = { src, size, 0 };
|
||||
ZSTD_decompressStream( m_streamZstd, &out, &in );
|
||||
m_size = out.pos;
|
||||
}
|
||||
}
|
||||
|
||||
const char* GetBuffer() const { return m_buf; }
|
||||
size_t GetSize() const { return m_size; }
|
||||
|
||||
private:
|
||||
LZ4_streamDecode_t* m_stream;
|
||||
ZSTD_DStream* m_streamZstd;
|
||||
|
||||
char* m_buf;
|
||||
char* m_second;
|
||||
|
||||
size_t m_size;
|
||||
};
|
||||
|
||||
class FileRead
|
||||
{
|
||||
struct StreamHandle
|
||||
{
|
||||
StreamHandle( uint8_t type ) : stream( type ), outputReady( false ) {}
|
||||
|
||||
ReadStream stream;
|
||||
const char* src;
|
||||
uint32_t size;
|
||||
|
||||
bool inputReady = false;
|
||||
bool exit = false;
|
||||
alignas(64) std::atomic<bool> outputReady;
|
||||
|
||||
std::mutex signalLock;
|
||||
std::condition_variable signal;
|
||||
|
||||
std::thread thread;
|
||||
};
|
||||
|
||||
public:
|
||||
static FileRead* Open( const char* fn )
|
||||
{
|
||||
auto f = fopen( fn, "rb" );
|
||||
return f ? new FileRead( f, fn ) : nullptr;
|
||||
}
|
||||
|
||||
~FileRead()
|
||||
{
|
||||
for( auto& v : m_streams )
|
||||
{
|
||||
std::lock_guard lock( v->signalLock );
|
||||
v->exit = true;
|
||||
v->signal.notify_one();
|
||||
}
|
||||
for( auto& v : m_streams ) v->thread.join();
|
||||
m_streams.clear();
|
||||
if( m_data ) munmap( m_data, m_dataSize );
|
||||
}
|
||||
|
||||
tracy_force_inline void Read( void* ptr, size_t size )
|
||||
{
|
||||
if( size <= FileBufSize - m_offset )
|
||||
{
|
||||
ReadSmall( ptr, size );
|
||||
}
|
||||
else
|
||||
{
|
||||
ReadBig( ptr, size );
|
||||
}
|
||||
}
|
||||
|
||||
tracy_force_inline void Skip( size_t size )
|
||||
{
|
||||
if( size <= FileBufSize - m_offset )
|
||||
{
|
||||
m_offset += size;
|
||||
}
|
||||
else
|
||||
{
|
||||
SkipBig( size );
|
||||
}
|
||||
}
|
||||
|
||||
template<class T>
|
||||
tracy_force_inline void Read( T& v )
|
||||
{
|
||||
if( sizeof( T ) <= FileBufSize - m_offset )
|
||||
{
|
||||
memcpy( &v, m_buf + m_offset, sizeof( T ) );
|
||||
m_offset += sizeof( T );
|
||||
}
|
||||
else
|
||||
{
|
||||
T tmp;
|
||||
ReadBig( &tmp, sizeof( T ) );
|
||||
memcpy( &v, &tmp, sizeof( T ) );
|
||||
}
|
||||
}
|
||||
|
||||
template<class T, class U>
|
||||
tracy_force_inline void Read2( T& v0, U& v1 )
|
||||
{
|
||||
if( sizeof( T ) + sizeof( U ) <= FileBufSize - m_offset )
|
||||
{
|
||||
memcpy( &v0, m_buf + m_offset, sizeof( T ) );
|
||||
memcpy( &v1, m_buf + m_offset + sizeof( T ), sizeof( U ) );
|
||||
m_offset += sizeof( T ) + sizeof( U );
|
||||
}
|
||||
else
|
||||
{
|
||||
char tmp[sizeof( T ) + sizeof( U )];
|
||||
ReadBig( tmp, sizeof( T ) + sizeof( U ) );
|
||||
memcpy( &v0, tmp, sizeof( T ) );
|
||||
memcpy( &v1, tmp + sizeof( T ), sizeof( U ) );
|
||||
}
|
||||
}
|
||||
|
||||
template<class T, class U, class V>
|
||||
tracy_force_inline void Read3( T& v0, U& v1, V& v2 )
|
||||
{
|
||||
if( sizeof( T ) + sizeof( U ) + sizeof( V ) <= FileBufSize - m_offset )
|
||||
{
|
||||
memcpy( &v0, m_buf + m_offset, sizeof( T ) );
|
||||
memcpy( &v1, m_buf + m_offset + sizeof( T ), sizeof( U ) );
|
||||
memcpy( &v2, m_buf + m_offset + sizeof( T ) + sizeof( U ), sizeof( V ) );
|
||||
m_offset += sizeof( T ) + sizeof( U ) + sizeof( V );
|
||||
}
|
||||
else
|
||||
{
|
||||
char tmp[sizeof( T ) + sizeof( U ) + sizeof( V )];
|
||||
ReadBig( tmp, sizeof( T ) + sizeof( U ) + sizeof( V ) );
|
||||
memcpy( &v0, tmp, sizeof( T ) );
|
||||
memcpy( &v1, tmp + sizeof( T ), sizeof( U ) );
|
||||
memcpy( &v2, tmp + sizeof( T ) + sizeof( U ), sizeof( V ) );
|
||||
}
|
||||
}
|
||||
|
||||
template<class T, class U, class V, class W>
|
||||
tracy_force_inline void Read4( T& v0, U& v1, V& v2, W& v3 )
|
||||
{
|
||||
if( sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) <= FileBufSize - m_offset )
|
||||
{
|
||||
memcpy( &v0, m_buf + m_offset, sizeof( T ) );
|
||||
memcpy( &v1, m_buf + m_offset + sizeof( T ), sizeof( U ) );
|
||||
memcpy( &v2, m_buf + m_offset + sizeof( T ) + sizeof( U ), sizeof( V ) );
|
||||
memcpy( &v3, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ), sizeof( W ) );
|
||||
m_offset += sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W );
|
||||
}
|
||||
else
|
||||
{
|
||||
char tmp[sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W )];
|
||||
ReadBig( tmp, sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) );
|
||||
memcpy( &v0, tmp, sizeof( T ) );
|
||||
memcpy( &v1, tmp + sizeof( T ), sizeof( U ) );
|
||||
memcpy( &v2, tmp + sizeof( T ) + sizeof( U ), sizeof( V ) );
|
||||
memcpy( &v3, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ), sizeof( W ) );
|
||||
}
|
||||
}
|
||||
|
||||
template<class T, class U, class V, class W, class X>
|
||||
tracy_force_inline void Read5( T& v0, U& v1, V& v2, W& v3, X& v4 )
|
||||
{
|
||||
if( sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) <= FileBufSize - m_offset )
|
||||
{
|
||||
memcpy( &v0, m_buf + m_offset, sizeof( T ) );
|
||||
memcpy( &v1, m_buf + m_offset + sizeof( T ), sizeof( U ) );
|
||||
memcpy( &v2, m_buf + m_offset + sizeof( T ) + sizeof( U ), sizeof( V ) );
|
||||
memcpy( &v3, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ), sizeof( W ) );
|
||||
memcpy( &v4, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ), sizeof( X ) );
|
||||
m_offset += sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X );
|
||||
}
|
||||
else
|
||||
{
|
||||
char tmp[sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X )];
|
||||
ReadBig( tmp, sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) );
|
||||
memcpy( &v0, tmp, sizeof( T ) );
|
||||
memcpy( &v1, tmp + sizeof( T ), sizeof( U ) );
|
||||
memcpy( &v2, tmp + sizeof( T ) + sizeof( U ), sizeof( V ) );
|
||||
memcpy( &v3, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ), sizeof( W ) );
|
||||
memcpy( &v4, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ), sizeof( X ) );
|
||||
}
|
||||
}
|
||||
|
||||
template<class T, class U, class V, class W, class X, class Y>
|
||||
tracy_force_inline void Read6( T& v0, U& v1, V& v2, W& v3, X& v4, Y& v5 )
|
||||
{
|
||||
if( sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) <= FileBufSize - m_offset )
|
||||
{
|
||||
memcpy( &v0, m_buf + m_offset, sizeof( T ) );
|
||||
memcpy( &v1, m_buf + m_offset + sizeof( T ), sizeof( U ) );
|
||||
memcpy( &v2, m_buf + m_offset + sizeof( T ) + sizeof( U ), sizeof( V ) );
|
||||
memcpy( &v3, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ), sizeof( W ) );
|
||||
memcpy( &v4, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ), sizeof( X ) );
|
||||
memcpy( &v5, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ), sizeof( Y ) );
|
||||
m_offset += sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y );
|
||||
}
|
||||
else
|
||||
{
|
||||
char tmp[sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y )];
|
||||
ReadBig( tmp, sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) );
|
||||
memcpy( &v0, tmp, sizeof( T ) );
|
||||
memcpy( &v1, tmp + sizeof( T ), sizeof( U ) );
|
||||
memcpy( &v2, tmp + sizeof( T ) + sizeof( U ), sizeof( V ) );
|
||||
memcpy( &v3, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ), sizeof( W ) );
|
||||
memcpy( &v4, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ), sizeof( X ) );
|
||||
memcpy( &v5, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ), sizeof( Y ) );
|
||||
}
|
||||
}
|
||||
|
||||
template<class T, class U, class V, class W, class X, class Y, class Z>
|
||||
tracy_force_inline void Read7( T& v0, U& v1, V& v2, W& v3, X& v4, Y& v5, Z& v6 )
|
||||
{
|
||||
if( sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) <= FileBufSize - m_offset )
|
||||
{
|
||||
memcpy( &v0, m_buf + m_offset, sizeof( T ) );
|
||||
memcpy( &v1, m_buf + m_offset + sizeof( T ), sizeof( U ) );
|
||||
memcpy( &v2, m_buf + m_offset + sizeof( T ) + sizeof( U ), sizeof( V ) );
|
||||
memcpy( &v3, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ), sizeof( W ) );
|
||||
memcpy( &v4, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ), sizeof( X ) );
|
||||
memcpy( &v5, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ), sizeof( Y ) );
|
||||
memcpy( &v6, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ), sizeof( Z ) );
|
||||
m_offset += sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z );
|
||||
}
|
||||
else
|
||||
{
|
||||
char tmp[sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z )];
|
||||
ReadBig( tmp, sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) );
|
||||
memcpy( &v0, tmp, sizeof( T ) );
|
||||
memcpy( &v1, tmp + sizeof( T ), sizeof( U ) );
|
||||
memcpy( &v2, tmp + sizeof( T ) + sizeof( U ), sizeof( V ) );
|
||||
memcpy( &v3, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ), sizeof( W ) );
|
||||
memcpy( &v4, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ), sizeof( X ) );
|
||||
memcpy( &v5, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ), sizeof( Y ) );
|
||||
memcpy( &v6, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ), sizeof( Z ) );
|
||||
}
|
||||
}
|
||||
|
||||
template<class T, class U, class V, class W, class X, class Y, class Z, class A>
|
||||
tracy_force_inline void Read8( T& v0, U& v1, V& v2, W& v3, X& v4, Y& v5, Z& v6, A& v7 )
|
||||
{
|
||||
if( sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A ) <= FileBufSize - m_offset )
|
||||
{
|
||||
memcpy( &v0, m_buf + m_offset, sizeof( T ) );
|
||||
memcpy( &v1, m_buf + m_offset + sizeof( T ), sizeof( U ) );
|
||||
memcpy( &v2, m_buf + m_offset + sizeof( T ) + sizeof( U ), sizeof( V ) );
|
||||
memcpy( &v3, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ), sizeof( W ) );
|
||||
memcpy( &v4, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ), sizeof( X ) );
|
||||
memcpy( &v5, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ), sizeof( Y ) );
|
||||
memcpy( &v6, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ), sizeof( Z ) );
|
||||
memcpy( &v7, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ), sizeof( A ) );
|
||||
m_offset += sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A );
|
||||
}
|
||||
else
|
||||
{
|
||||
char tmp[sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A )];
|
||||
ReadBig( tmp, sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A ) );
|
||||
memcpy( &v0, tmp, sizeof( T ) );
|
||||
memcpy( &v1, tmp + sizeof( T ), sizeof( U ) );
|
||||
memcpy( &v2, tmp + sizeof( T ) + sizeof( U ), sizeof( V ) );
|
||||
memcpy( &v3, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ), sizeof( W ) );
|
||||
memcpy( &v4, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ), sizeof( X ) );
|
||||
memcpy( &v5, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ), sizeof( Y ) );
|
||||
memcpy( &v6, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ), sizeof( Z ) );
|
||||
memcpy( &v7, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ), sizeof( A ) );
|
||||
}
|
||||
}
|
||||
|
||||
template<class T, class U, class V, class W, class X, class Y, class Z, class A, class B>
|
||||
tracy_force_inline void Read9( T& v0, U& v1, V& v2, W& v3, X& v4, Y& v5, Z& v6, A& v7, B& v8 )
|
||||
{
|
||||
if( sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A ) + sizeof( B ) <= FileBufSize - m_offset )
|
||||
{
|
||||
memcpy( &v0, m_buf + m_offset, sizeof( T ) );
|
||||
memcpy( &v1, m_buf + m_offset + sizeof( T ), sizeof( U ) );
|
||||
memcpy( &v2, m_buf + m_offset + sizeof( T ) + sizeof( U ), sizeof( V ) );
|
||||
memcpy( &v3, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ), sizeof( W ) );
|
||||
memcpy( &v4, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ), sizeof( X ) );
|
||||
memcpy( &v5, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ), sizeof( Y ) );
|
||||
memcpy( &v6, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ), sizeof( Z ) );
|
||||
memcpy( &v7, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ), sizeof( A ) );
|
||||
memcpy( &v8, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A ), sizeof( B ) );
|
||||
m_offset += sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A ) + sizeof( B );
|
||||
}
|
||||
else
|
||||
{
|
||||
char tmp[sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A ) + sizeof( B )];
|
||||
ReadBig( tmp, sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A ) + sizeof( B ) );
|
||||
memcpy( &v0, tmp, sizeof( T ) );
|
||||
memcpy( &v1, tmp + sizeof( T ), sizeof( U ) );
|
||||
memcpy( &v2, tmp + sizeof( T ) + sizeof( U ), sizeof( V ) );
|
||||
memcpy( &v3, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ), sizeof( W ) );
|
||||
memcpy( &v4, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ), sizeof( X ) );
|
||||
memcpy( &v5, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ), sizeof( Y ) );
|
||||
memcpy( &v6, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ), sizeof( Z ) );
|
||||
memcpy( &v7, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ), sizeof( A ) );
|
||||
memcpy( &v8, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A ), sizeof( B ) );
|
||||
}
|
||||
}
|
||||
|
||||
template<class T, class U, class V, class W, class X, class Y, class Z, class A, class B, class C>
|
||||
tracy_force_inline void Read10( T& v0, U& v1, V& v2, W& v3, X& v4, Y& v5, Z& v6, A& v7, B& v8, C& v9 )
|
||||
{
|
||||
if( sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A ) + sizeof( B ) + sizeof( C ) <= FileBufSize - m_offset )
|
||||
{
|
||||
memcpy( &v0, m_buf + m_offset, sizeof( T ) );
|
||||
memcpy( &v1, m_buf + m_offset + sizeof( T ), sizeof( U ) );
|
||||
memcpy( &v2, m_buf + m_offset + sizeof( T ) + sizeof( U ), sizeof( V ) );
|
||||
memcpy( &v3, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ), sizeof( W ) );
|
||||
memcpy( &v4, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ), sizeof( X ) );
|
||||
memcpy( &v5, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ), sizeof( Y ) );
|
||||
memcpy( &v6, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ), sizeof( Z ) );
|
||||
memcpy( &v7, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ), sizeof( A ) );
|
||||
memcpy( &v8, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A ), sizeof( B ) );
|
||||
memcpy( &v9, m_buf + m_offset + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A ) + sizeof( B ), sizeof( C ) );
|
||||
m_offset += sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A ) + sizeof( B ) + sizeof( C );
|
||||
}
|
||||
else
|
||||
{
|
||||
char tmp[sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A ) + sizeof( B ) + sizeof( C )];
|
||||
ReadBig( tmp, sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A ) + sizeof( B ) + sizeof( C ) );
|
||||
memcpy( &v0, tmp, sizeof( T ) );
|
||||
memcpy( &v1, tmp + sizeof( T ), sizeof( U ) );
|
||||
memcpy( &v2, tmp + sizeof( T ) + sizeof( U ), sizeof( V ) );
|
||||
memcpy( &v3, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ), sizeof( W ) );
|
||||
memcpy( &v4, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ), sizeof( X ) );
|
||||
memcpy( &v5, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ), sizeof( Y ) );
|
||||
memcpy( &v6, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ), sizeof( Z ) );
|
||||
memcpy( &v7, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ), sizeof( A ) );
|
||||
memcpy( &v8, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A ), sizeof( B ) );
|
||||
memcpy( &v9, tmp + sizeof( T ) + sizeof( U ) + sizeof( V ) + sizeof( W ) + sizeof( X ) + sizeof( Y ) + sizeof( Z ) + sizeof( A ) + sizeof( B ), sizeof( C ) );
|
||||
}
|
||||
}
|
||||
|
||||
const std::string& GetFilename() const { return m_filename; }
|
||||
|
||||
private:
|
||||
FileRead( FILE* f, const char* fn )
|
||||
: m_data( nullptr )
|
||||
, m_offset( 0 )
|
||||
, m_streamId( 0 )
|
||||
, m_filename( fn )
|
||||
{
|
||||
char hdr[4];
|
||||
if( fread( hdr, 1, sizeof( hdr ), f ) != sizeof( hdr ) )
|
||||
{
|
||||
fclose( f );
|
||||
throw NotTracyDump();
|
||||
}
|
||||
|
||||
uint8_t streams = 1;
|
||||
uint8_t type;
|
||||
m_dataOffset = sizeof( hdr );
|
||||
|
||||
if( memcmp( hdr, TracyHeader, sizeof( hdr ) ) == 0 )
|
||||
{
|
||||
if( fread( &type, 1, 1, f ) != 1 || type > 1 )
|
||||
{
|
||||
fclose( f );
|
||||
throw NotTracyDump();
|
||||
}
|
||||
if( fread( &streams, 1, 1, f ) != 1 )
|
||||
{
|
||||
fclose( f );
|
||||
throw NotTracyDump();
|
||||
}
|
||||
m_dataOffset += 2;
|
||||
}
|
||||
else if( memcmp( hdr, Lz4Header, sizeof( hdr ) ) == 0 )
|
||||
{
|
||||
type = 0;
|
||||
}
|
||||
else if( memcmp( hdr, ZstdHeader, sizeof( hdr ) ) == 0 )
|
||||
{
|
||||
type = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
fclose( f );
|
||||
throw NotTracyDump();
|
||||
}
|
||||
|
||||
struct stat64 buf;
|
||||
if( stat64( fn, &buf ) == 0 )
|
||||
{
|
||||
m_dataSize = buf.st_size;
|
||||
}
|
||||
else
|
||||
{
|
||||
fclose( f );
|
||||
throw FileReadError();
|
||||
}
|
||||
|
||||
m_data = (char*)mmap( nullptr, m_dataSize, PROT_READ, MAP_SHARED, fileno( f ), 0 );
|
||||
fclose( f );
|
||||
if( !m_data )
|
||||
{
|
||||
throw FileReadError();
|
||||
}
|
||||
|
||||
for( int i=0; i<(int)streams; i++ )
|
||||
{
|
||||
if( m_dataOffset == m_dataSize ) break;
|
||||
|
||||
const auto sz = ReadBlockSize();
|
||||
auto uptr = std::make_unique<StreamHandle>( type );
|
||||
uptr->src = m_data + m_dataOffset;
|
||||
uptr->size = sz;
|
||||
uptr->inputReady = true;
|
||||
uptr->thread = std::thread( [ptr = uptr.get()] { Worker( ptr ); } );
|
||||
m_streams.emplace_back( std::move( uptr ) );
|
||||
m_dataOffset += sz;
|
||||
}
|
||||
|
||||
GetNextDataBlock();
|
||||
}
|
||||
|
||||
tracy_force_inline uint32_t ReadBlockSize()
|
||||
{
|
||||
uint32_t sz;
|
||||
memcpy( &sz, m_data + m_dataOffset, sizeof( sz ) );
|
||||
m_dataOffset += sizeof( sz );
|
||||
return sz;
|
||||
}
|
||||
|
||||
static void Worker( StreamHandle* hnd )
|
||||
{
|
||||
for(;;)
|
||||
{
|
||||
std::unique_lock lock( hnd->signalLock );
|
||||
hnd->signal.wait( lock, [&] { return hnd->inputReady || hnd->exit; } );
|
||||
if( hnd->exit ) return;
|
||||
lock.unlock();
|
||||
|
||||
hnd->stream.Decompress( hnd->src, hnd->size );
|
||||
hnd->inputReady = false;
|
||||
hnd->outputReady.store( true, std::memory_order_release );
|
||||
}
|
||||
}
|
||||
|
||||
tracy_force_inline void ReadSmall( void* ptr, size_t size )
|
||||
{
|
||||
memcpy( ptr, m_buf + m_offset, size );
|
||||
m_offset += size;
|
||||
}
|
||||
|
||||
void ReadBig( void* ptr, size_t size )
|
||||
{
|
||||
assert( size > 0 );
|
||||
auto dst = (char*)ptr;
|
||||
do
|
||||
{
|
||||
size_t sz;
|
||||
if( m_offset == FileBufSize )
|
||||
{
|
||||
sz = std::min<size_t>( size, FileBufSize );
|
||||
GetNextDataBlock();
|
||||
memcpy( dst, m_buf, sz );
|
||||
m_offset = sz;
|
||||
}
|
||||
else
|
||||
{
|
||||
sz = std::min( size, FileBufSize - m_offset );
|
||||
memcpy( dst, m_buf + m_offset, sz );
|
||||
m_offset += sz;
|
||||
}
|
||||
|
||||
dst += sz;
|
||||
size -= sz;
|
||||
}
|
||||
while( size > 0 );
|
||||
}
|
||||
|
||||
void SkipBig( size_t size )
|
||||
{
|
||||
while( size > 0 )
|
||||
{
|
||||
if( m_offset == FileBufSize ) GetNextDataBlock();
|
||||
const auto sz = std::min( size, FileBufSize - m_offset );
|
||||
m_offset += sz;
|
||||
size -= sz;
|
||||
}
|
||||
}
|
||||
|
||||
void GetNextDataBlock()
|
||||
{
|
||||
auto& hnd = *m_streams[m_streamId];
|
||||
while( hnd.outputReady.load( std::memory_order_acquire ) == false ) { YieldThread(); }
|
||||
hnd.outputReady.store( false, std::memory_order_relaxed );
|
||||
m_buf = hnd.stream.GetBuffer();
|
||||
m_offset = 0;
|
||||
|
||||
if( m_dataOffset < m_dataSize )
|
||||
{
|
||||
const auto sz = ReadBlockSize();
|
||||
std::unique_lock lock( hnd.signalLock );
|
||||
hnd.src = m_data + m_dataOffset;
|
||||
hnd.size = sz;
|
||||
hnd.inputReady = true;
|
||||
hnd.signal.notify_one();
|
||||
lock.unlock();
|
||||
m_dataOffset += sz;
|
||||
}
|
||||
|
||||
m_streamId = ( m_streamId + 1 ) % m_streams.size();
|
||||
}
|
||||
|
||||
char* m_data;
|
||||
const char* m_buf;
|
||||
uint64_t m_dataSize;
|
||||
uint64_t m_dataOffset;
|
||||
size_t m_offset;
|
||||
int m_streamId;
|
||||
|
||||
std::string m_filename;
|
||||
|
||||
std::vector<std::unique_ptr<StreamHandle>> m_streams;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
303
subprojects/tracy/server/TracyFileWrite.hpp
Normal file
303
subprojects/tracy/server/TracyFileWrite.hpp
Normal file
@@ -0,0 +1,303 @@
|
||||
#ifndef __TRACYFILEWRITE_HPP__
|
||||
#define __TRACYFILEWRITE_HPP__
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# pragma warning( disable: 4267 ) // conversion from don't care to whatever, possible loss of data
|
||||
#endif
|
||||
|
||||
#include <algorithm>
|
||||
#include <assert.h>
|
||||
#include <condition_variable>
|
||||
#include <mutex>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <thread>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "TracyFileHeader.hpp"
|
||||
#include "TracyFileMeta.hpp"
|
||||
#include "../public/common/tracy_lz4.hpp"
|
||||
#include "../public/common/tracy_lz4hc.hpp"
|
||||
#include "../public/common/TracyForceInline.hpp"
|
||||
#include "../zstd/zstd.h"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
enum class FileCompression
|
||||
{
|
||||
Fast,
|
||||
Slow,
|
||||
Extreme,
|
||||
Zstd
|
||||
};
|
||||
|
||||
class WriteStream
|
||||
{
|
||||
public:
|
||||
WriteStream( FileCompression comp, int level )
|
||||
: m_stream( nullptr )
|
||||
, m_streamHC( nullptr )
|
||||
, m_streamZstd( nullptr )
|
||||
, m_buf( new char[FileBufSize] )
|
||||
, m_second( new char[FileBufSize] )
|
||||
, m_compressed( new char[FileBoundSize] )
|
||||
{
|
||||
switch( comp )
|
||||
{
|
||||
case FileCompression::Fast:
|
||||
m_stream = LZ4_createStream();
|
||||
break;
|
||||
case FileCompression::Slow:
|
||||
m_streamHC = LZ4_createStreamHC();
|
||||
break;
|
||||
case FileCompression::Extreme:
|
||||
m_streamHC = LZ4_createStreamHC();
|
||||
LZ4_resetStreamHC( m_streamHC, LZ4HC_CLEVEL_MAX );
|
||||
break;
|
||||
case FileCompression::Zstd:
|
||||
m_streamZstd = ZSTD_createCStream();
|
||||
ZSTD_CCtx_setParameter( m_streamZstd, ZSTD_c_compressionLevel, level );
|
||||
ZSTD_CCtx_setParameter( m_streamZstd, ZSTD_c_contentSizeFlag, 0 );
|
||||
break;
|
||||
default:
|
||||
assert( false );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
~WriteStream()
|
||||
{
|
||||
delete[] m_buf;
|
||||
delete[] m_second;
|
||||
delete[] m_compressed;
|
||||
|
||||
if( m_stream ) LZ4_freeStream( m_stream );
|
||||
if( m_streamHC ) LZ4_freeStreamHC( m_streamHC );
|
||||
if( m_streamZstd ) ZSTD_freeCStream( m_streamZstd );
|
||||
}
|
||||
|
||||
char* GetInputBuffer() { return m_buf; }
|
||||
const char* GetCompressedData() const { return m_compressed; }
|
||||
uint32_t GetSize() const { return m_size; }
|
||||
|
||||
void Compress( uint32_t sz )
|
||||
{
|
||||
if( m_stream )
|
||||
{
|
||||
m_size = LZ4_compress_fast_continue( m_stream, m_buf, m_compressed, sz, FileBoundSize, 1 );
|
||||
}
|
||||
else if( m_streamZstd )
|
||||
{
|
||||
ZSTD_outBuffer out = { m_compressed, FileBoundSize, 0 };
|
||||
ZSTD_inBuffer in = { m_buf, sz, 0 };
|
||||
const auto ret = ZSTD_compressStream2( m_streamZstd, &out, &in, ZSTD_e_flush );
|
||||
assert( ret == 0 );
|
||||
m_size = out.pos;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_size = LZ4_compress_HC_continue( m_streamHC, m_buf, m_compressed, sz, FileBoundSize );
|
||||
}
|
||||
|
||||
std::swap( m_buf, m_second );
|
||||
}
|
||||
|
||||
private:
|
||||
LZ4_stream_t* m_stream;
|
||||
LZ4_streamHC_t* m_streamHC;
|
||||
ZSTD_CStream* m_streamZstd;
|
||||
|
||||
char* m_buf;
|
||||
char* m_second;
|
||||
char* m_compressed;
|
||||
uint32_t m_size;
|
||||
};
|
||||
|
||||
class FileWrite
|
||||
{
|
||||
struct StreamHandle
|
||||
{
|
||||
StreamHandle( FileCompression comp, int level ) : stream( comp, level ) {}
|
||||
|
||||
WriteStream stream;
|
||||
uint32_t size;
|
||||
|
||||
bool inputReady = false;
|
||||
bool outputReady = false;
|
||||
bool exit = false;
|
||||
|
||||
std::mutex signalLock;
|
||||
std::condition_variable signal;
|
||||
|
||||
std::thread thread;
|
||||
};
|
||||
|
||||
public:
|
||||
static FileWrite* Open( const char* fn, FileCompression comp = FileCompression::Fast, int level = 1, int streams = -1 )
|
||||
{
|
||||
auto f = fopen( fn, "wb" );
|
||||
if( !f ) return nullptr;
|
||||
if( streams <= 0 ) streams = std::max<int>( 1, std::thread::hardware_concurrency() );
|
||||
if( streams > 255 ) streams = 255;
|
||||
return new FileWrite( f, comp, level, streams );
|
||||
}
|
||||
|
||||
~FileWrite()
|
||||
{
|
||||
Finish();
|
||||
fclose( m_file );
|
||||
}
|
||||
|
||||
void Finish()
|
||||
{
|
||||
if( m_offset > 0 ) WriteBlock();
|
||||
while( m_streamPending > 0 ) ProcessPending();
|
||||
for( auto& v : m_streams )
|
||||
{
|
||||
std::lock_guard lock( v->signalLock );
|
||||
v->exit = true;
|
||||
v->signal.notify_one();
|
||||
}
|
||||
for( auto& v : m_streams ) v->thread.join();
|
||||
m_streams.clear();
|
||||
}
|
||||
|
||||
tracy_force_inline void Write( const void* ptr, size_t size )
|
||||
{
|
||||
if( m_offset + size <= FileBufSize )
|
||||
{
|
||||
WriteSmall( ptr, size );
|
||||
}
|
||||
else
|
||||
{
|
||||
WriteBig( ptr, size );
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<size_t, size_t> GetCompressionStatistics() const { return std::make_pair( m_srcBytes, m_dstBytes ); }
|
||||
|
||||
private:
|
||||
FileWrite( FILE* f, FileCompression comp, int level, int streams )
|
||||
: m_offset( 0 )
|
||||
, m_file( f )
|
||||
, m_srcBytes( 0 )
|
||||
, m_dstBytes( 0 )
|
||||
{
|
||||
assert( streams > 0 );
|
||||
assert( streams < 256 );
|
||||
|
||||
fwrite( TracyHeader, 1, sizeof( TracyHeader ), m_file );
|
||||
uint8_t u8 = comp == FileCompression::Zstd ? 1 : 0;
|
||||
fwrite( &u8, 1, 1, m_file );
|
||||
u8 = streams;
|
||||
fwrite( &u8, 1, 1, m_file );
|
||||
|
||||
m_streams.reserve( streams );
|
||||
for( int i=0; i<streams; i++ )
|
||||
{
|
||||
auto uptr = std::make_unique<StreamHandle>( comp, level );
|
||||
uptr->thread = std::thread( [ptr = uptr.get()]{ Worker( ptr ); } );
|
||||
m_streams.emplace_back( std::move( uptr ) );
|
||||
}
|
||||
|
||||
m_buf = m_streams[m_streamId]->stream.GetInputBuffer();
|
||||
}
|
||||
|
||||
tracy_force_inline void WriteSmall( const void* ptr, size_t size )
|
||||
{
|
||||
memcpy( m_buf + m_offset, ptr, size );
|
||||
m_offset += size;
|
||||
}
|
||||
|
||||
void WriteBig( const void* ptr, size_t size )
|
||||
{
|
||||
auto src = (const char*)ptr;
|
||||
while( size > 0 )
|
||||
{
|
||||
const auto sz = std::min( size, FileBufSize - m_offset );
|
||||
memcpy( m_buf + m_offset, src, sz );
|
||||
m_offset += sz;
|
||||
src += sz;
|
||||
size -= sz;
|
||||
|
||||
if( m_offset == FileBufSize )
|
||||
{
|
||||
WriteBlock();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void WriteBlock()
|
||||
{
|
||||
m_srcBytes += m_offset;
|
||||
|
||||
auto& hnd = *m_streams[m_streamId];
|
||||
assert( hnd.stream.GetInputBuffer() == m_buf );
|
||||
|
||||
std::unique_lock lock( hnd.signalLock );
|
||||
hnd.inputReady = true;
|
||||
hnd.size = m_offset;
|
||||
hnd.signal.notify_one();
|
||||
lock.unlock();
|
||||
|
||||
m_streamPending++;
|
||||
m_streamId = ( m_streamId + 1 ) % m_streams.size();
|
||||
if( m_streamPending == m_streams.size() ) ProcessPending();
|
||||
|
||||
m_offset = 0;
|
||||
m_buf = m_streams[m_streamId]->stream.GetInputBuffer();
|
||||
}
|
||||
|
||||
void ProcessPending()
|
||||
{
|
||||
assert( m_streamPending > 0 );
|
||||
int id = ( m_streamId + m_streams.size() - m_streamPending ) % m_streams.size();
|
||||
m_streamPending--;
|
||||
auto& hnd = *m_streams[id];
|
||||
|
||||
std::unique_lock lock( hnd.signalLock );
|
||||
hnd.signal.wait( lock, [&hnd]{ return hnd.outputReady; } );
|
||||
lock.unlock();
|
||||
|
||||
hnd.outputReady = false;
|
||||
const uint32_t size = hnd.stream.GetSize();
|
||||
m_dstBytes += size;
|
||||
fwrite( &size, 1, sizeof( size ), m_file );
|
||||
fwrite( hnd.stream.GetCompressedData(), 1, size, m_file );
|
||||
}
|
||||
|
||||
static void Worker( StreamHandle* hnd )
|
||||
{
|
||||
std::unique_lock lock( hnd->signalLock );
|
||||
for(;;)
|
||||
{
|
||||
hnd->signal.wait( lock, [&hnd]{ return hnd->inputReady || hnd->exit; } );
|
||||
if( hnd->exit ) return;
|
||||
lock.unlock();
|
||||
|
||||
hnd->stream.Compress( hnd->size );
|
||||
hnd->inputReady = false;
|
||||
|
||||
lock.lock();
|
||||
hnd->outputReady = true;
|
||||
hnd->signal.notify_one();
|
||||
}
|
||||
}
|
||||
|
||||
char* m_buf;
|
||||
size_t m_offset;
|
||||
|
||||
int m_streamId = 0;
|
||||
int m_streamPending = 0;
|
||||
std::vector<std::unique_ptr<StreamHandle>> m_streams;
|
||||
FILE* m_file;
|
||||
|
||||
size_t m_srcBytes;
|
||||
size_t m_dstBytes;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
8
subprojects/tracy/server/TracyMemory.cpp
Normal file
8
subprojects/tracy/server/TracyMemory.cpp
Normal file
@@ -0,0 +1,8 @@
|
||||
#include "TracyMemory.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
std::atomic<int64_t> memUsage( 0 );
|
||||
|
||||
}
|
||||
14
subprojects/tracy/server/TracyMemory.hpp
Normal file
14
subprojects/tracy/server/TracyMemory.hpp
Normal file
@@ -0,0 +1,14 @@
|
||||
#ifndef __TRACYMEMORY_HPP__
|
||||
#define __TRACYMEMORY_HPP__
|
||||
|
||||
#include <atomic>
|
||||
#include <stdint.h>
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
extern std::atomic<int64_t> memUsage;
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
38
subprojects/tracy/server/TracyMmap.cpp
Normal file
38
subprojects/tracy/server/TracyMmap.cpp
Normal file
@@ -0,0 +1,38 @@
|
||||
#include "TracyMmap.hpp"
|
||||
|
||||
#if defined _WIN32
|
||||
# include <io.h>
|
||||
# include <windows.h>
|
||||
|
||||
void* mmap( void* addr, size_t length, int prot, int flags, int fd, off_t offset )
|
||||
{
|
||||
HANDLE hnd;
|
||||
void* map = nullptr;
|
||||
|
||||
switch( prot )
|
||||
{
|
||||
case PROT_READ:
|
||||
if( hnd = CreateFileMapping( HANDLE( _get_osfhandle( fd ) ), nullptr, PAGE_READONLY, 0, 0, nullptr ) )
|
||||
{
|
||||
map = MapViewOfFile( hnd, FILE_MAP_READ, 0, 0, length );
|
||||
CloseHandle( hnd );
|
||||
}
|
||||
break;
|
||||
case PROT_WRITE:
|
||||
if( hnd = CreateFileMapping( HANDLE( _get_osfhandle( fd ) ), nullptr, PAGE_READWRITE, 0, 0, nullptr ) )
|
||||
{
|
||||
map = MapViewOfFile( hnd, FILE_MAP_WRITE, 0, 0, length );
|
||||
CloseHandle( hnd );
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return map ? (char*)map + offset : (void*)-1;
|
||||
}
|
||||
|
||||
int munmap( void* addr, size_t length )
|
||||
{
|
||||
return UnmapViewOfFile( addr ) != 0 ? 0 : -1;
|
||||
}
|
||||
|
||||
#endif
|
||||
19
subprojects/tracy/server/TracyMmap.hpp
Normal file
19
subprojects/tracy/server/TracyMmap.hpp
Normal file
@@ -0,0 +1,19 @@
|
||||
#ifndef __TRACYMMAP_HPP__
|
||||
#define __TRACYMMAP_HPP__
|
||||
|
||||
#if !defined _WIN32
|
||||
# include <sys/mman.h>
|
||||
#else
|
||||
# include <string.h>
|
||||
# include <sys/types.h>
|
||||
|
||||
# define PROT_READ 1
|
||||
# define PROT_WRITE 2
|
||||
# define MAP_SHARED 0
|
||||
|
||||
void* mmap( void* addr, size_t length, int prot, int flags, int fd, off_t offset );
|
||||
int munmap( void* addr, size_t length );
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
40
subprojects/tracy/server/TracyPopcnt.hpp
Normal file
40
subprojects/tracy/server/TracyPopcnt.hpp
Normal file
@@ -0,0 +1,40 @@
|
||||
#ifndef __TRACYPOPCNT_HPP__
|
||||
#define __TRACYPOPCNT_HPP__
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined _WIN64
|
||||
# include <intrin.h>
|
||||
# define TracyCountBits __popcnt64
|
||||
# define TracyLzcnt __lzcnt64
|
||||
#elif defined __GNUC__ || defined __clang__
|
||||
static inline uint64_t TracyCountBits( uint64_t i )
|
||||
{
|
||||
return uint64_t( __builtin_popcountll( i ) );
|
||||
}
|
||||
static inline uint64_t TracyLzcnt( uint64_t i )
|
||||
{
|
||||
return uint64_t( __builtin_clzll( i ) );
|
||||
}
|
||||
#else
|
||||
static inline uint64_t TracyCountBits( uint64_t i )
|
||||
{
|
||||
i = i - ( (i >> 1) & 0x5555555555555555 );
|
||||
i = ( i & 0x3333333333333333 ) + ( (i >> 2) & 0x3333333333333333 );
|
||||
i = ( (i + (i >> 4) ) & 0x0F0F0F0F0F0F0F0F );
|
||||
return ( i * (0x0101010101010101) ) >> 56;
|
||||
}
|
||||
static inline uint64_t TracyLzcnt( uint64_t i )
|
||||
{
|
||||
i |= i >> 1;
|
||||
i |= i >> 2;
|
||||
i |= i >> 4;
|
||||
i |= i >> 8;
|
||||
i |= i >> 16;
|
||||
i |= i >> 32;
|
||||
return 64 - TracyCountBits( i );
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
458
subprojects/tracy/server/TracyPrint.cpp
Normal file
458
subprojects/tracy/server/TracyPrint.cpp
Normal file
@@ -0,0 +1,458 @@
|
||||
#ifdef _MSC_VER
|
||||
# pragma warning( disable: 4244 ) // conversion from don't care to whatever, possible loss of data
|
||||
#endif
|
||||
#ifdef __MINGW32__
|
||||
# define __STDC_FORMAT_MACROS
|
||||
#endif
|
||||
|
||||
#include <assert.h>
|
||||
#include <inttypes.h>
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h> // llabs()
|
||||
#include <string.h>
|
||||
|
||||
#include "TracyPrint.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
static const char* IntTable100 =
|
||||
"00010203040506070809"
|
||||
"10111213141516171819"
|
||||
"20212223242526272829"
|
||||
"30313233343536373839"
|
||||
"40414243444546474849"
|
||||
"50515253545556575859"
|
||||
"60616263646566676869"
|
||||
"70717273747576777879"
|
||||
"80818283848586878889"
|
||||
"90919293949596979899";
|
||||
|
||||
static inline void PrintTinyInt( char*& buf, uint64_t v )
|
||||
{
|
||||
assert( v < 100 );
|
||||
if( v >= 10 )
|
||||
{
|
||||
*buf++ = '0' + v/10;
|
||||
}
|
||||
*buf++ = '0' + v%10;
|
||||
}
|
||||
|
||||
static inline void PrintTinyInt0( char*& buf, uint64_t v )
|
||||
{
|
||||
assert( v < 100 );
|
||||
if( v >= 10 )
|
||||
{
|
||||
*buf++ = '0' + v/10;
|
||||
}
|
||||
else
|
||||
{
|
||||
*buf++ = '0';
|
||||
}
|
||||
*buf++ = '0' + v%10;
|
||||
}
|
||||
|
||||
static inline void PrintSmallInt( char*& buf, uint64_t v )
|
||||
{
|
||||
assert( v < 1000 );
|
||||
if( v >= 100 )
|
||||
{
|
||||
memcpy( buf, IntTable100 + v/10*2, 2 );
|
||||
buf += 2;
|
||||
}
|
||||
else if( v >= 10 )
|
||||
{
|
||||
*buf++ = '0' + v/10;
|
||||
}
|
||||
*buf++ = '0' + v%10;
|
||||
}
|
||||
|
||||
static inline void PrintSmallInt0( char*& buf, uint64_t v )
|
||||
{
|
||||
assert( v < 1000 );
|
||||
if( v >= 100 )
|
||||
{
|
||||
memcpy( buf, IntTable100 + v/10*2, 2 );
|
||||
buf += 2;
|
||||
}
|
||||
else if( v >= 10 )
|
||||
{
|
||||
*buf++ = '0';
|
||||
*buf++ = '0' + v/10;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy( buf, "00", 2 );
|
||||
buf += 2;
|
||||
}
|
||||
*buf++ = '0' + v%10;
|
||||
}
|
||||
|
||||
static inline void PrintFrac00( char*& buf, uint64_t v )
|
||||
{
|
||||
*buf++ = '.';
|
||||
v += 5;
|
||||
if( v/10%10 == 0 )
|
||||
{
|
||||
*buf++ = '0' + v/100;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy( buf, IntTable100 + v/10*2, 2 );
|
||||
buf += 2;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void PrintFrac0( char*& buf, uint64_t v )
|
||||
{
|
||||
*buf++ = '.';
|
||||
*buf++ = '0' + (v+50)/100;
|
||||
}
|
||||
|
||||
static inline void PrintSmallIntFrac( char*& buf, uint64_t v )
|
||||
{
|
||||
uint64_t in = v / 1000;
|
||||
uint64_t fr = v % 1000;
|
||||
if( fr >= 995 )
|
||||
{
|
||||
if( in < 999 )
|
||||
{
|
||||
PrintSmallInt( buf, in+1 );
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy( buf, "1000", 4 );
|
||||
buf += 4;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
PrintSmallInt( buf, in );
|
||||
if( fr > 5 )
|
||||
{
|
||||
PrintFrac00( buf, fr );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void PrintSecondsFrac( char*& buf, uint64_t v )
|
||||
{
|
||||
uint64_t in = v / 1000;
|
||||
uint64_t fr = v % 1000;
|
||||
if( fr >= 950 )
|
||||
{
|
||||
PrintTinyInt0( buf, in+1 );
|
||||
}
|
||||
else
|
||||
{
|
||||
PrintTinyInt0( buf, in );
|
||||
if( fr > 50 )
|
||||
{
|
||||
PrintFrac0( buf, fr );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const char* TimeToString( int64_t _ns )
|
||||
{
|
||||
enum { Pool = 8 };
|
||||
static char bufpool[Pool][64];
|
||||
static int bufsel = 0;
|
||||
char* buf = bufpool[bufsel];
|
||||
char* bufstart = buf;
|
||||
bufsel = ( bufsel + 1 ) % Pool;
|
||||
|
||||
uint64_t ns;
|
||||
if( _ns < 0 )
|
||||
{
|
||||
*buf = '-';
|
||||
buf++;
|
||||
ns = -_ns;
|
||||
}
|
||||
else
|
||||
{
|
||||
ns = _ns;
|
||||
}
|
||||
|
||||
if( ns < 1000 )
|
||||
{
|
||||
PrintSmallInt( buf, ns );
|
||||
memcpy( buf, " ns", 4 );
|
||||
}
|
||||
else if( ns < 1000ll * 1000 )
|
||||
{
|
||||
PrintSmallIntFrac( buf, ns );
|
||||
memcpy( buf, " \xce\xbcs", 5 );
|
||||
}
|
||||
else if( ns < 1000ll * 1000 * 1000 )
|
||||
{
|
||||
PrintSmallIntFrac( buf, ns / 1000 );
|
||||
memcpy( buf, " ms", 4 );
|
||||
}
|
||||
else if( ns < 1000ll * 1000 * 1000 * 60 )
|
||||
{
|
||||
PrintSmallIntFrac( buf, ns / ( 1000ll * 1000 ) );
|
||||
memcpy( buf, " s", 3 );
|
||||
}
|
||||
else if( ns < 1000ll * 1000 * 1000 * 60 * 60 )
|
||||
{
|
||||
const auto m = int64_t( ns / ( 1000ll * 1000 * 1000 * 60 ) );
|
||||
const auto s = int64_t( ns - m * ( 1000ll * 1000 * 1000 * 60 ) ) / ( 1000ll * 1000 );
|
||||
PrintTinyInt( buf, m );
|
||||
*buf++ = ':';
|
||||
PrintSecondsFrac( buf, s );
|
||||
*buf++ = '\0';
|
||||
}
|
||||
else if( ns < 1000ll * 1000 * 1000 * 60 * 60 * 24 )
|
||||
{
|
||||
const auto h = int64_t( ns / ( 1000ll * 1000 * 1000 * 60 * 60 ) );
|
||||
const auto m = int64_t( ns / ( 1000ll * 1000 * 1000 * 60 ) - h * 60 );
|
||||
const auto s = int64_t( ns / ( 1000ll * 1000 * 1000 ) - h * ( 60 * 60 ) - m * 60 );
|
||||
PrintTinyInt( buf, h );
|
||||
*buf++ = ':';
|
||||
PrintTinyInt0( buf, m );
|
||||
*buf++ = ':';
|
||||
PrintTinyInt0( buf, s );
|
||||
*buf++ = '\0';
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto d = int64_t( ns / ( 1000ll * 1000 * 1000 * 60 * 60 * 24 ) );
|
||||
const auto h = int64_t( ns / ( 1000ll * 1000 * 1000 * 60 * 60 ) - d * 24 );
|
||||
const auto m = int64_t( ns / ( 1000ll * 1000 * 1000 * 60 ) - d * ( 60 * 24 ) - h * 60 );
|
||||
const auto s = int64_t( ns / ( 1000ll * 1000 * 1000 ) - d * ( 60 * 60 * 24 ) - h * ( 60 * 60 ) - m * 60 );
|
||||
assert( d < 100 );
|
||||
PrintTinyInt( buf, d );
|
||||
*buf++ = 'd';
|
||||
PrintTinyInt0( buf, h );
|
||||
*buf++ = ':';
|
||||
PrintTinyInt0( buf, m );
|
||||
*buf++ = ':';
|
||||
PrintTinyInt0( buf, s );
|
||||
*buf++ = '\0';
|
||||
}
|
||||
return bufstart;
|
||||
}
|
||||
|
||||
const char* TimeToStringExact( int64_t _ns )
|
||||
{
|
||||
enum { Pool = 8 };
|
||||
static char bufpool[Pool][64];
|
||||
static int bufsel = 0;
|
||||
char* buf = bufpool[bufsel];
|
||||
char* bufstart = buf;
|
||||
bufsel = ( bufsel + 1 ) % Pool;
|
||||
|
||||
uint64_t ns;
|
||||
if( _ns < 0 )
|
||||
{
|
||||
*buf = '-';
|
||||
buf++;
|
||||
ns = -_ns;
|
||||
}
|
||||
else
|
||||
{
|
||||
ns = _ns;
|
||||
}
|
||||
|
||||
const char* numStart = buf;
|
||||
|
||||
if( ns >= 1000ll * 1000 * 1000 * 60 * 60 * 24 )
|
||||
{
|
||||
const auto d = int64_t( ns / ( 1000ll * 1000 * 1000 * 60 * 60 * 24 ) );
|
||||
const auto h = int64_t( ns / ( 1000ll * 1000 * 1000 * 60 * 60 ) - d * 24 );
|
||||
const auto m = int64_t( ns / ( 1000ll * 1000 * 1000 * 60 ) - d * ( 60 * 24 ) - h * 60 );
|
||||
const auto s = int64_t( ns / ( 1000ll * 1000 * 1000 ) - d * ( 60 * 60 * 24 ) - h * ( 60 * 60 ) - m * 60 );
|
||||
if( d < 100 )
|
||||
{
|
||||
PrintTinyInt( buf, d );
|
||||
*buf++ = 'd';
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy( buf, "100+d", 5 );
|
||||
buf += 5;
|
||||
}
|
||||
PrintTinyInt0( buf, h );
|
||||
*buf++ = ':';
|
||||
PrintTinyInt0( buf, m );
|
||||
*buf++ = ':';
|
||||
PrintTinyInt0( buf, s );
|
||||
ns %= 1000ll * 1000 * 1000;
|
||||
}
|
||||
else if( ns >= 1000ll * 1000 * 1000 * 60 * 60 )
|
||||
{
|
||||
const auto h = int64_t( ns / ( 1000ll * 1000 * 1000 * 60 * 60 ) );
|
||||
const auto m = int64_t( ns / ( 1000ll * 1000 * 1000 * 60 ) - h * 60 );
|
||||
const auto s = int64_t( ns / ( 1000ll * 1000 * 1000 ) - h * ( 60 * 60 ) - m * 60 );
|
||||
PrintTinyInt( buf, h );
|
||||
*buf++ = ':';
|
||||
PrintTinyInt0( buf, m );
|
||||
*buf++ = ':';
|
||||
PrintTinyInt0( buf, s );
|
||||
ns %= 1000ll * 1000 * 1000;
|
||||
}
|
||||
else if( ns >= 1000ll * 1000 * 1000 * 60 )
|
||||
{
|
||||
const auto m = int64_t( ns / ( 1000ll * 1000 * 1000 * 60 ) );
|
||||
const auto s = int64_t( ns / ( 1000ll * 1000 * 1000 ) - m * 60 );
|
||||
PrintTinyInt( buf, m );
|
||||
*buf++ = ':';
|
||||
PrintTinyInt0( buf, s );
|
||||
ns %= 1000ll * 1000 * 1000;
|
||||
}
|
||||
else if( ns >= 1000ll * 1000 * 1000 )
|
||||
{
|
||||
PrintTinyInt( buf, int64_t( ns / ( 1000ll * 1000 * 1000 ) ) );
|
||||
*buf++ = 's';
|
||||
ns %= 1000ll * 1000 * 1000;
|
||||
}
|
||||
|
||||
if( ns > 0 )
|
||||
{
|
||||
if( buf != numStart ) *buf++ = ' ';
|
||||
if( ns >= 1000ll * 1000 )
|
||||
{
|
||||
PrintSmallInt0( buf, int64_t( ns / ( 1000ll * 1000 ) ) );
|
||||
*buf++ = ',';
|
||||
ns %= 1000ll * 1000;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy( buf, "000,", 4 );
|
||||
buf += 4;
|
||||
}
|
||||
if( ns >= 1000ll )
|
||||
{
|
||||
PrintSmallInt0( buf, int64_t( ns / 1000ll ) );
|
||||
*buf++ = ',';
|
||||
ns %= 1000ll;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy( buf, "000,", 4 );
|
||||
buf += 4;
|
||||
}
|
||||
PrintSmallInt0( buf, ns );
|
||||
*buf++ = 'n';
|
||||
*buf++ = 's';
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy( buf, "000,000,000ns", 13 );
|
||||
buf += 13;
|
||||
}
|
||||
|
||||
*buf++ = '\0';
|
||||
|
||||
return bufstart;
|
||||
}
|
||||
|
||||
const char* MemSizeToString( int64_t val )
|
||||
{
|
||||
enum { Pool = 8 };
|
||||
static char bufpool[Pool][64];
|
||||
static int bufsel = 0;
|
||||
char* buf = bufpool[bufsel];
|
||||
bufsel = ( bufsel + 1 ) % Pool;
|
||||
|
||||
const auto aval = llabs( val );
|
||||
|
||||
if( aval < 10000ll )
|
||||
{
|
||||
sprintf( buf, "%" PRIi64 " bytes", val );
|
||||
return buf;
|
||||
}
|
||||
|
||||
enum class Unit
|
||||
{
|
||||
Kilobyte,
|
||||
Megabyte,
|
||||
Gigabyte,
|
||||
Terabyte
|
||||
};
|
||||
Unit unit;
|
||||
|
||||
char* ptr;
|
||||
if( aval < 10000ll * 1024 )
|
||||
{
|
||||
ptr = PrintFloat( buf, buf+64, val / 1024., 2 );
|
||||
unit = Unit::Kilobyte;
|
||||
}
|
||||
else if( aval < 10000ll * 1024 * 1024 )
|
||||
{
|
||||
ptr = PrintFloat( buf, buf+64, val / ( 1024. * 1024 ), 2 );
|
||||
unit = Unit::Megabyte;
|
||||
}
|
||||
else if( aval < 10000ll * 1024 * 1024 * 1024 )
|
||||
{
|
||||
ptr = PrintFloat( buf, buf+64, val / ( 1024. * 1024 * 1024 ), 2 );
|
||||
unit = Unit::Gigabyte;
|
||||
}
|
||||
else
|
||||
{
|
||||
ptr = PrintFloat( buf, buf+64, val / ( 1024. * 1024 * 1024 * 1024 ), 2 );
|
||||
unit = Unit::Terabyte;
|
||||
}
|
||||
|
||||
ptr--;
|
||||
while( ptr >= buf && *ptr == '0' ) ptr--;
|
||||
if( *ptr != '.' ) ptr++;
|
||||
|
||||
*ptr++ = ' ';
|
||||
switch( unit )
|
||||
{
|
||||
case Unit::Kilobyte:
|
||||
*ptr++ = 'K';
|
||||
break;
|
||||
case Unit::Megabyte:
|
||||
*ptr++ = 'M';
|
||||
break;
|
||||
case Unit::Gigabyte:
|
||||
*ptr++ = 'G';
|
||||
break;
|
||||
case Unit::Terabyte:
|
||||
*ptr++ = 'T';
|
||||
break;
|
||||
default:
|
||||
assert( false );
|
||||
break;
|
||||
}
|
||||
*ptr++ = 'B';
|
||||
*ptr++ = '\0';
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
const char* LocationToString( const char* fn, uint32_t line )
|
||||
{
|
||||
if( line == 0 ) return fn;
|
||||
|
||||
enum { Pool = 8 };
|
||||
static char bufpool[Pool][4096];
|
||||
static int bufsel = 0;
|
||||
char* buf = bufpool[bufsel];
|
||||
bufsel = ( bufsel + 1 ) % Pool;
|
||||
|
||||
sprintf( buf, "%s:%i", fn, line );
|
||||
return buf;
|
||||
}
|
||||
|
||||
namespace detail
|
||||
{
|
||||
|
||||
char* RealToStringGetBuffer()
|
||||
{
|
||||
enum { Pool = 8 };
|
||||
static char bufpool[Pool][64];
|
||||
static int bufsel = 0;
|
||||
char* buf = bufpool[bufsel];
|
||||
bufsel = ( bufsel + 1 ) % Pool;
|
||||
return buf;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
152
subprojects/tracy/server/TracyPrint.hpp
Normal file
152
subprojects/tracy/server/TracyPrint.hpp
Normal file
@@ -0,0 +1,152 @@
|
||||
#ifndef __TRACYPRINT_HPP__
|
||||
#define __TRACYPRINT_HPP__
|
||||
|
||||
#if ( defined _MSC_VER && _MSVC_LANG >= 201703L ) || __cplusplus >= 201703L
|
||||
# if __has_include(<charconv>) && __has_include(<type_traits>)
|
||||
# include <charconv>
|
||||
# include <type_traits>
|
||||
# else
|
||||
# define NO_CHARCONV
|
||||
# endif
|
||||
#else
|
||||
# define NO_CHARCONV
|
||||
#endif
|
||||
|
||||
#if defined _MSC_VER && _MSC_VER < 1924
|
||||
# define NO_CHARCONV
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
# define NO_CHARCONV
|
||||
#endif
|
||||
|
||||
#ifdef NO_CHARCONV
|
||||
# include <stdio.h>
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "../public/common/TracyForceInline.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
namespace detail
|
||||
{
|
||||
|
||||
char* RealToStringGetBuffer();
|
||||
|
||||
static tracy_force_inline void RealToStringFloating( char* ptr, char* end )
|
||||
{
|
||||
if( *ptr == '-' ) ptr++;
|
||||
const auto vbegin = ptr;
|
||||
|
||||
while( *ptr != '\0' && *ptr != '.' ) ptr++;
|
||||
auto sz = end - ptr + 1;
|
||||
|
||||
while( ptr - vbegin > 3 )
|
||||
{
|
||||
ptr -= 3;
|
||||
memmove( ptr+1, ptr, sz+3 );
|
||||
*ptr = ',';
|
||||
sz += 4;
|
||||
}
|
||||
|
||||
while( *ptr != '\0' && *ptr != '.' ) ptr++;
|
||||
if( *ptr == '\0' ) return;
|
||||
|
||||
while( *ptr != '\0' ) ptr++;
|
||||
ptr--;
|
||||
while( *ptr == '0' ) ptr--;
|
||||
if( *ptr != '.' && *ptr != ',' ) ptr++;
|
||||
*ptr = '\0';
|
||||
}
|
||||
|
||||
static tracy_force_inline void RealToStringInteger( char* buf, char* end )
|
||||
{
|
||||
if( *buf == '-' ) buf++;
|
||||
auto ptr = end;
|
||||
auto sz = 1;
|
||||
while( ptr - buf > 3 )
|
||||
{
|
||||
ptr -= 3;
|
||||
memmove( ptr+1, ptr, sz+3 );
|
||||
*ptr = ',';
|
||||
sz += 4;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static inline char* PrintFloat( char* begin, char* end, T value, int precision )
|
||||
{
|
||||
#ifndef NO_CHARCONV
|
||||
return std::to_chars( begin, end, value, std::chars_format::fixed, precision ).ptr;
|
||||
#else
|
||||
return begin + sprintf( begin, "%.*f", precision, value );
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static inline char* PrintFloat( char* begin, char* end, T value )
|
||||
{
|
||||
#ifndef NO_CHARCONV
|
||||
return std::to_chars( begin, end, value, std::chars_format::fixed ).ptr;
|
||||
#else
|
||||
return begin + sprintf( begin, "%f", value );
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef NO_CHARCONV
|
||||
template<typename T>
|
||||
static inline const char* RealToString( T val )
|
||||
{
|
||||
auto buf = detail::RealToStringGetBuffer();
|
||||
auto end = std::to_chars( buf, buf+64, val ).ptr;
|
||||
*end = '\0';
|
||||
if constexpr ( std::is_integral_v<T> )
|
||||
{
|
||||
detail::RealToStringInteger( buf, end );
|
||||
}
|
||||
else
|
||||
{
|
||||
detail::RealToStringFloating( buf, end );
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
#else
|
||||
static inline const char* RealToString( double val )
|
||||
{
|
||||
auto buf = detail::RealToStringGetBuffer();
|
||||
const auto sz = sprintf( buf, "%f", val );
|
||||
detail::RealToStringFloating( buf, buf+sz );
|
||||
return buf;
|
||||
}
|
||||
#endif
|
||||
|
||||
const char* TimeToString( int64_t ns );
|
||||
const char* TimeToStringExact( int64_t ns );
|
||||
const char* MemSizeToString( int64_t val );
|
||||
const char* LocationToString( const char* fn, uint32_t line );
|
||||
|
||||
static tracy_force_inline void PrintStringPercent( char* buf, const char* string, double percent )
|
||||
{
|
||||
const auto ssz = strlen( string );
|
||||
memcpy( buf, string, ssz );
|
||||
memcpy( buf+ssz, " (", 2 );
|
||||
auto end = PrintFloat( buf+ssz+2, buf+128, percent, 2 );
|
||||
memcpy( end, "%)", 3 );
|
||||
}
|
||||
|
||||
static tracy_force_inline void PrintStringPercent( char* buf, double percent )
|
||||
{
|
||||
memcpy( buf, "(", 2 );
|
||||
auto end = PrintFloat( buf+1, buf+64, percent, 2 );
|
||||
memcpy( end, "%)", 3 );
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
82
subprojects/tracy/server/TracyShortPtr.hpp
Normal file
82
subprojects/tracy/server/TracyShortPtr.hpp
Normal file
@@ -0,0 +1,82 @@
|
||||
#ifndef __TRACYSHORTPTR_HPP__
|
||||
#define __TRACYSHORTPTR_HPP__
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "../public/common/TracyForceInline.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
#if UINTPTR_MAX == 0xFFFFFFFFFFFFFFFF
|
||||
template<typename T>
|
||||
class short_ptr
|
||||
{
|
||||
public:
|
||||
tracy_force_inline short_ptr() {}
|
||||
tracy_force_inline short_ptr( const T* ptr ) { set( ptr ); }
|
||||
|
||||
tracy_force_inline operator T*() { return get(); }
|
||||
tracy_force_inline operator const T*() const { return get(); }
|
||||
tracy_force_inline T& operator*() { return *get(); }
|
||||
tracy_force_inline const T& operator*() const { return *get(); }
|
||||
tracy_force_inline T* operator->() { return get(); }
|
||||
tracy_force_inline const T* operator->() const { return get(); }
|
||||
|
||||
tracy_force_inline void set( const T* ptr )
|
||||
{
|
||||
assert( ( uint64_t( ptr ) & 0xFFFF000000000000 ) == 0 );
|
||||
memcpy( m_ptr, &ptr, 4 );
|
||||
memcpy( m_ptr+4, ((char*)&ptr)+4, 2 );
|
||||
}
|
||||
|
||||
tracy_force_inline T* get()
|
||||
{
|
||||
uint32_t lo;
|
||||
uint16_t hi;
|
||||
memcpy( &lo, m_ptr, 4 );
|
||||
memcpy( &hi, m_ptr+4, 2 );
|
||||
return (T*)( uint64_t( lo ) | ( ( uint64_t( hi ) << 32 ) ) );
|
||||
}
|
||||
|
||||
tracy_force_inline const T* get() const
|
||||
{
|
||||
uint32_t lo;
|
||||
uint16_t hi;
|
||||
memcpy( &lo, m_ptr, 4 );
|
||||
memcpy( &hi, m_ptr+4, 2 );
|
||||
return (T*)( uint64_t( lo ) | ( ( uint64_t( hi ) << 32 ) ) );
|
||||
}
|
||||
|
||||
private:
|
||||
uint8_t m_ptr[6];
|
||||
};
|
||||
#else
|
||||
template<typename T>
|
||||
class short_ptr
|
||||
{
|
||||
public:
|
||||
tracy_force_inline short_ptr() {}
|
||||
tracy_force_inline short_ptr( const T* ptr ) { memcpy( &m_ptr, &ptr, sizeof( T* ) ); }
|
||||
|
||||
tracy_force_inline operator T*() { return m_ptr; }
|
||||
tracy_force_inline operator const T*() const { return m_ptr; }
|
||||
tracy_force_inline T& operator*() { return *m_ptr; }
|
||||
tracy_force_inline const T& operator*() const { return *m_ptr; }
|
||||
tracy_force_inline T* operator->() { return m_ptr; }
|
||||
tracy_force_inline const T* operator->() const { return m_ptr; }
|
||||
|
||||
tracy_force_inline void set( const T* ptr ) { m_ptr = ptr; }
|
||||
tracy_force_inline T* get() { return m_ptr; }
|
||||
tracy_force_inline const T* get() const { return m_ptr; }
|
||||
|
||||
private:
|
||||
T* m_ptr;
|
||||
};
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
159
subprojects/tracy/server/TracySlab.hpp
Normal file
159
subprojects/tracy/server/TracySlab.hpp
Normal file
@@ -0,0 +1,159 @@
|
||||
#ifndef __TRACYSLAB_HPP__
|
||||
#define __TRACYSLAB_HPP__
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#include <vector>
|
||||
|
||||
#include "TracyMemory.hpp"
|
||||
#include "../public/common/TracyForceInline.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
template<size_t BlockSize>
|
||||
class Slab
|
||||
{
|
||||
public:
|
||||
Slab()
|
||||
: m_ptr( new char[BlockSize] )
|
||||
, m_offset( 0 )
|
||||
, m_buffer( { m_ptr } )
|
||||
, m_usage( BlockSize )
|
||||
{
|
||||
memUsage.fetch_add( BlockSize, std::memory_order_relaxed );
|
||||
}
|
||||
|
||||
~Slab()
|
||||
{
|
||||
memUsage.fetch_sub( m_usage, std::memory_order_relaxed );
|
||||
for( auto& v : m_buffer )
|
||||
{
|
||||
delete[] v;
|
||||
}
|
||||
}
|
||||
|
||||
tracy_force_inline void* AllocRaw( size_t size )
|
||||
{
|
||||
assert( size <= BlockSize );
|
||||
const auto offset = m_offset;
|
||||
if( offset + size > BlockSize )
|
||||
{
|
||||
return DoAlloc( size );
|
||||
}
|
||||
else
|
||||
{
|
||||
void* ret = m_ptr + offset;
|
||||
m_offset += size;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
tracy_force_inline T* AllocInit()
|
||||
{
|
||||
const auto size = sizeof( T );
|
||||
auto ret = AllocRaw( size );
|
||||
new( ret ) T;
|
||||
return (T*)ret;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
tracy_force_inline T* AllocInit( size_t sz )
|
||||
{
|
||||
const auto size = sizeof( T ) * sz;
|
||||
auto ret = AllocRaw( size );
|
||||
T* ptr = (T*)ret;
|
||||
for( size_t i=0; i<sz; i++ )
|
||||
{
|
||||
new( ptr ) T;
|
||||
ptr++;
|
||||
}
|
||||
return (T*)ret;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
tracy_force_inline T* Alloc()
|
||||
{
|
||||
return (T*)AllocRaw( sizeof( T ) );
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
tracy_force_inline T* Alloc( size_t size )
|
||||
{
|
||||
return (T*)AllocRaw( sizeof( T ) * size );
|
||||
}
|
||||
|
||||
tracy_force_inline void Unalloc( size_t size )
|
||||
{
|
||||
assert( size <= m_offset );
|
||||
m_offset -= size;
|
||||
}
|
||||
|
||||
tracy_force_inline void* AllocBig( size_t size )
|
||||
{
|
||||
const auto offset = m_offset;
|
||||
if( offset + size <= BlockSize )
|
||||
{
|
||||
void* ret = m_ptr + offset;
|
||||
m_offset += size;
|
||||
return ret;
|
||||
}
|
||||
else if( size <= BlockSize && BlockSize - offset <= 1024 )
|
||||
{
|
||||
return DoAlloc( size );
|
||||
}
|
||||
else
|
||||
{
|
||||
memUsage.fetch_add( size, std::memory_order_relaxed );
|
||||
m_usage += size;
|
||||
auto ret = new char[size];
|
||||
m_buffer.emplace_back( ret );
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
void Reset()
|
||||
{
|
||||
if( m_buffer.size() > 1 )
|
||||
{
|
||||
memUsage.fetch_sub( m_usage - BlockSize, std::memory_order_relaxed );
|
||||
m_usage = BlockSize;
|
||||
for( int i=1; i<m_buffer.size(); i++ )
|
||||
{
|
||||
delete[] m_buffer[i];
|
||||
}
|
||||
m_ptr = m_buffer[0];
|
||||
m_buffer.clear();
|
||||
m_buffer.emplace_back( m_ptr );
|
||||
}
|
||||
m_offset = 0;
|
||||
}
|
||||
|
||||
Slab( const Slab& ) = delete;
|
||||
Slab( Slab&& ) = delete;
|
||||
|
||||
Slab& operator=( const Slab& ) = delete;
|
||||
Slab& operator=( Slab&& ) = delete;
|
||||
|
||||
private:
|
||||
void* DoAlloc( uint32_t willUseBytes )
|
||||
{
|
||||
auto ptr = new char[BlockSize];
|
||||
m_ptr = ptr;
|
||||
m_offset = willUseBytes;
|
||||
m_buffer.emplace_back( m_ptr );
|
||||
memUsage.fetch_add( BlockSize, std::memory_order_relaxed );
|
||||
m_usage += BlockSize;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
char* m_ptr;
|
||||
uint32_t m_offset;
|
||||
std::vector<char*> m_buffer;
|
||||
size_t m_usage;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
19
subprojects/tracy/server/TracySort.hpp
Normal file
19
subprojects/tracy/server/TracySort.hpp
Normal file
@@ -0,0 +1,19 @@
|
||||
#ifndef __TRACYSORT_HPP__
|
||||
#define __TRACYSORT_HPP__
|
||||
|
||||
#ifndef NO_PARALLEL_SORT
|
||||
# if !defined __APPLE__ && !defined __EMSCRIPTEN__ && ( ( defined _MSC_VER && _MSVC_LANG >= 201703L ) || __cplusplus >= 201703L )
|
||||
# if __has_include(<execution>)
|
||||
# include <algorithm>
|
||||
# include <execution>
|
||||
# else
|
||||
# define NO_PARALLEL_SORT
|
||||
# endif
|
||||
# else
|
||||
# define NO_PARALLEL_SORT
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include "tracy_pdqsort.h"
|
||||
|
||||
#endif
|
||||
127
subprojects/tracy/server/TracySortedVector.hpp
Normal file
127
subprojects/tracy/server/TracySortedVector.hpp
Normal file
@@ -0,0 +1,127 @@
|
||||
#ifndef __TRACYSORTEDVECTOR_HPP__
|
||||
#define __TRACYSORTEDVECTOR_HPP__
|
||||
|
||||
#include "TracySort.hpp"
|
||||
#include "TracyVector.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
#pragma pack( push, 1 )
|
||||
template<typename T, class CompareDefault = std::less<T>>
|
||||
class SortedVector
|
||||
{
|
||||
public:
|
||||
using iterator = T*;
|
||||
using const_iterator = const T*;
|
||||
|
||||
tracy_force_inline SortedVector()
|
||||
: sortedEnd( 0 )
|
||||
{}
|
||||
|
||||
SortedVector( const SortedVector& ) = delete;
|
||||
tracy_force_inline SortedVector( SortedVector&& src ) noexcept
|
||||
: v( std::move( src.v ) )
|
||||
, sortedEnd( src.sortedEnd )
|
||||
{
|
||||
}
|
||||
|
||||
tracy_force_inline SortedVector( const T& value )
|
||||
: v( value )
|
||||
, sortedEnd( 0 )
|
||||
{
|
||||
}
|
||||
|
||||
SortedVector& operator=( const SortedVector& ) = delete;
|
||||
tracy_force_inline SortedVector& operator=( SortedVector&& src ) noexcept
|
||||
{
|
||||
v = std::move( src.v );
|
||||
sortedEnd = src.sortedEnd;
|
||||
return *this;
|
||||
}
|
||||
|
||||
tracy_force_inline void swap( SortedVector& other )
|
||||
{
|
||||
v.swap( other.v );
|
||||
std::swap( sortedEnd, other.sortedEnd );
|
||||
}
|
||||
|
||||
tracy_force_inline bool empty() const { return v.empty(); }
|
||||
tracy_force_inline size_t size() const { return v.size(); }
|
||||
tracy_force_inline bool is_sorted() const { return sortedEnd == 0; }
|
||||
|
||||
tracy_force_inline T* data() { return v.data(); }
|
||||
tracy_force_inline const T* data() const { return v.data(); };
|
||||
|
||||
tracy_force_inline T* begin() { return v.begin(); }
|
||||
tracy_force_inline const T* begin() const { return v.begin(); }
|
||||
tracy_force_inline T* end() { return v.end(); }
|
||||
tracy_force_inline const T* end() const { return v.end(); }
|
||||
|
||||
tracy_force_inline T& front() { return v.front(); }
|
||||
tracy_force_inline const T& front() const { return v.front(); }
|
||||
|
||||
tracy_force_inline T& back() { return v.back(); }
|
||||
tracy_force_inline const T& back() const { return v.back(); }
|
||||
|
||||
tracy_force_inline T& operator[]( size_t idx ) { return v[idx]; }
|
||||
tracy_force_inline const T& operator[]( size_t idx ) const { return v[idx]; }
|
||||
|
||||
tracy_force_inline void push_back( const T& val ) { push_back( val, CompareDefault() ); }
|
||||
|
||||
template<class Compare>
|
||||
tracy_force_inline void push_back( const T& val, Compare comp )
|
||||
{
|
||||
if( sortedEnd == 0 && !v.empty() && !comp( v.back(), val ) )
|
||||
{
|
||||
sortedEnd = (uint32_t)v.size();
|
||||
}
|
||||
v.push_back( val );
|
||||
}
|
||||
|
||||
tracy_force_inline void reserve( size_t cap ) { v.reserve( cap ); }
|
||||
template<size_t U>
|
||||
tracy_force_inline void reserve_exact( uint32_t sz, Slab<U>& slab ) { v.reserve_exact( sz, slab ); }
|
||||
|
||||
tracy_force_inline void clear() { v.clear(); sortedEnd = 0; }
|
||||
|
||||
tracy_force_inline T* erase( T* begin, T* end )
|
||||
{
|
||||
assert( is_sorted() );
|
||||
return v.erase( begin, end );
|
||||
}
|
||||
|
||||
tracy_force_inline void sort() { sort( CompareDefault() ); }
|
||||
tracy_force_inline void ensure_sorted() { if( !is_sorted() ) sort(); }
|
||||
|
||||
template<class Compare>
|
||||
void sort( Compare comp )
|
||||
{
|
||||
assert( !is_sorted() );
|
||||
const auto sb = v.begin();
|
||||
const auto se = sb + sortedEnd;
|
||||
const auto sl = se - 1;
|
||||
const auto ue = v.end();
|
||||
#ifdef NO_PARALLEL_SORT
|
||||
pdqsort_branchless( se, ue, comp );
|
||||
#else
|
||||
std::sort( std::execution::par_unseq, se, ue, comp );
|
||||
#endif
|
||||
const auto ss = std::lower_bound( sb, se, *se, comp );
|
||||
const auto uu = std::lower_bound( se, ue, *sl, comp );
|
||||
std::inplace_merge( ss, se, uu, comp );
|
||||
sortedEnd = 0;
|
||||
}
|
||||
|
||||
private:
|
||||
Vector<T> v;
|
||||
uint32_t sortedEnd;
|
||||
};
|
||||
|
||||
#pragma pack( pop )
|
||||
|
||||
enum { SortedVectorSize = sizeof( SortedVector<int> ) };
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
88
subprojects/tracy/server/TracyStringDiscovery.hpp
Normal file
88
subprojects/tracy/server/TracyStringDiscovery.hpp
Normal file
@@ -0,0 +1,88 @@
|
||||
#ifndef __TRACYSTRINGDISCOVERY_HPP__
|
||||
#define __TRACYSTRINGDISCOVERY_HPP__
|
||||
|
||||
#include "../public/common/TracyForceInline.hpp"
|
||||
#include "tracy_robin_hood.h"
|
||||
#include "TracyCharUtil.hpp"
|
||||
#include "TracyEvent.hpp"
|
||||
#include "TracyVector.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
template<typename T>
|
||||
class StringDiscovery
|
||||
{
|
||||
public:
|
||||
tracy_force_inline Vector<T>& Data() { return m_data; }
|
||||
tracy_force_inline const Vector<T>& Data() const { return m_data; }
|
||||
|
||||
tracy_force_inline bool IsPending() const { return !m_pending.empty(); }
|
||||
|
||||
// Merge( destination, postponed )
|
||||
template<typename U>
|
||||
tracy_force_inline void StringDiscovered( uint64_t name, const StringLocation& sl, U& stringMap, std::function<void(T,T)> Merge )
|
||||
{
|
||||
auto pit = m_pending.find( name );
|
||||
assert( pit != m_pending.end() );
|
||||
|
||||
auto it = m_rev.find( sl.ptr );
|
||||
if( it == m_rev.end() )
|
||||
{
|
||||
m_map.emplace( name, pit->second );
|
||||
m_rev.emplace( sl.ptr, pit->second );
|
||||
m_data.push_back( pit->second );
|
||||
stringMap.emplace( name, sl.ptr );
|
||||
}
|
||||
else
|
||||
{
|
||||
auto item = it->second;
|
||||
m_map.emplace( name, item );
|
||||
Merge( item, pit->second );
|
||||
}
|
||||
|
||||
m_pending.erase( pit );
|
||||
}
|
||||
|
||||
tracy_force_inline T Retrieve( uint64_t name, const std::function<T(uint64_t)>& Create, const std::function<void(uint64_t)>& Query )
|
||||
{
|
||||
auto it = m_map.find( name );
|
||||
if( it == m_map.end() )
|
||||
{
|
||||
auto pit = m_pending.find( name );
|
||||
if( pit == m_pending.end() )
|
||||
{
|
||||
T item = Create( name );
|
||||
if( item )
|
||||
{
|
||||
m_pending.emplace( name, item );
|
||||
Query( name );
|
||||
}
|
||||
return item;
|
||||
}
|
||||
else
|
||||
{
|
||||
return pit->second;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return it->second;
|
||||
}
|
||||
}
|
||||
|
||||
tracy_force_inline void AddExternal( const T& val )
|
||||
{
|
||||
m_data.push_back( val );
|
||||
}
|
||||
|
||||
private:
|
||||
Vector<T> m_data;
|
||||
unordered_flat_map<uint64_t, T> m_pending;
|
||||
unordered_flat_map<uint64_t, T> m_map;
|
||||
unordered_flat_map<const char*, T, charutil::Hasher, charutil::Comparator> m_rev;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
43
subprojects/tracy/server/TracySysUtil.cpp
Normal file
43
subprojects/tracy/server/TracySysUtil.cpp
Normal file
@@ -0,0 +1,43 @@
|
||||
|
||||
|
||||
#include "TracySysUtil.hpp"
|
||||
|
||||
#ifdef _WIN32
|
||||
# include <windows.h>
|
||||
#elif defined __linux__
|
||||
# include <sys/sysinfo.h>
|
||||
#elif defined __APPLE__ || defined BSD
|
||||
# include <sys/types.h>
|
||||
# include <sys/sysctl.h>
|
||||
#endif
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
size_t GetPhysicalMemorySize()
|
||||
{
|
||||
#ifdef _WIN32
|
||||
MEMORYSTATUSEX statex;
|
||||
statex.dwLength = sizeof( statex );
|
||||
GlobalMemoryStatusEx( &statex );
|
||||
return statex.ullTotalPhys;
|
||||
#elif defined __linux__
|
||||
struct sysinfo sysInfo;
|
||||
sysinfo( &sysInfo );
|
||||
return sysInfo.totalram;
|
||||
#elif defined __APPLE__
|
||||
size_t memSize;
|
||||
size_t sz = sizeof( memSize );
|
||||
sysctlbyname( "hw.memsize", &memSize, &sz, nullptr, 0 );
|
||||
return memSize;
|
||||
#elif defined BSD
|
||||
size_t memSize;
|
||||
size_t sz = sizeof( memSize );
|
||||
sysctlbyname( "hw.physmem", &memSize, &sz, nullptr, 0 );
|
||||
return memSize;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
13
subprojects/tracy/server/TracySysUtil.hpp
Normal file
13
subprojects/tracy/server/TracySysUtil.hpp
Normal file
@@ -0,0 +1,13 @@
|
||||
#ifndef __TRACYSYSUTIL_HPP__
|
||||
#define __TRACYSYSUTIL_HPP__
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
size_t GetPhysicalMemorySize();
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
88
subprojects/tracy/server/TracyTaskDispatch.cpp
Normal file
88
subprojects/tracy/server/TracyTaskDispatch.cpp
Normal file
@@ -0,0 +1,88 @@
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "../public/common/TracySystem.hpp"
|
||||
#include "TracyTaskDispatch.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
TaskDispatch::TaskDispatch( size_t workers, const char* name )
|
||||
: m_exit( false )
|
||||
, m_jobs( 0 )
|
||||
{
|
||||
m_workers.reserve( workers );
|
||||
for( size_t i=0; i<workers; i++ )
|
||||
{
|
||||
m_workers.emplace_back( [this, name, i]{ SetName( name, i ); Worker(); } );
|
||||
}
|
||||
}
|
||||
|
||||
TaskDispatch::~TaskDispatch()
|
||||
{
|
||||
m_exit.store( true, std::memory_order_release );
|
||||
m_queueLock.lock();
|
||||
m_cvWork.notify_all();
|
||||
m_queueLock.unlock();
|
||||
|
||||
for( auto& worker : m_workers )
|
||||
{
|
||||
worker.join();
|
||||
}
|
||||
}
|
||||
|
||||
void TaskDispatch::Queue( const std::function<void(void)>& f )
|
||||
{
|
||||
std::lock_guard<std::mutex> lock( m_queueLock );
|
||||
m_queue.emplace_back( f );
|
||||
m_cvWork.notify_one();
|
||||
}
|
||||
|
||||
void TaskDispatch::Queue( std::function<void(void)>&& f )
|
||||
{
|
||||
std::lock_guard<std::mutex> lock( m_queueLock );
|
||||
m_queue.emplace_back( std::move( f ) );
|
||||
m_cvWork.notify_one();
|
||||
}
|
||||
|
||||
void TaskDispatch::Sync()
|
||||
{
|
||||
std::unique_lock<std::mutex> lock( m_queueLock );
|
||||
while( !m_queue.empty() )
|
||||
{
|
||||
auto f = m_queue.back();
|
||||
m_queue.pop_back();
|
||||
lock.unlock();
|
||||
f();
|
||||
lock.lock();
|
||||
}
|
||||
m_cvJobs.wait( lock, [this]{ return m_jobs == 0; } );
|
||||
}
|
||||
|
||||
void TaskDispatch::Worker()
|
||||
{
|
||||
for(;;)
|
||||
{
|
||||
std::unique_lock<std::mutex> lock( m_queueLock );
|
||||
m_cvWork.wait( lock, [this]{ return !m_queue.empty() || m_exit.load( std::memory_order_acquire ); } );
|
||||
if( m_exit.load( std::memory_order_acquire ) ) return;
|
||||
auto f = m_queue.back();
|
||||
m_queue.pop_back();
|
||||
m_jobs++;
|
||||
lock.unlock();
|
||||
f();
|
||||
lock.lock();
|
||||
m_jobs--;
|
||||
if( m_jobs == 0 && m_queue.empty() ) m_cvJobs.notify_one();
|
||||
lock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
void TaskDispatch::SetName( const char* name, size_t num )
|
||||
{
|
||||
char tmp[128];
|
||||
snprintf( tmp, sizeof( tmp ), "%s #%zu", name, num );
|
||||
SetThreadName( tmp );
|
||||
}
|
||||
|
||||
}
|
||||
40
subprojects/tracy/server/TracyTaskDispatch.hpp
Normal file
40
subprojects/tracy/server/TracyTaskDispatch.hpp
Normal file
@@ -0,0 +1,40 @@
|
||||
#ifndef __TRACYTASKDISPATCH_HPP__
|
||||
#define __TRACYTASKDISPATCH_HPP__
|
||||
|
||||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
class TaskDispatch
|
||||
{
|
||||
public:
|
||||
TaskDispatch( size_t workers, const char* name );
|
||||
~TaskDispatch();
|
||||
|
||||
void Queue( const std::function<void(void)>& f );
|
||||
void Queue( std::function<void(void)>&& f );
|
||||
|
||||
void Sync();
|
||||
|
||||
private:
|
||||
void Worker();
|
||||
void SetName( const char* name, size_t num );
|
||||
|
||||
std::vector<std::function<void(void)>> m_queue;
|
||||
std::mutex m_queueLock;
|
||||
std::condition_variable m_cvWork, m_cvJobs;
|
||||
std::atomic<bool> m_exit;
|
||||
size_t m_jobs;
|
||||
|
||||
std::vector<std::thread> m_workers;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
211
subprojects/tracy/server/TracyTextureCompression.cpp
Normal file
211
subprojects/tracy/server/TracyTextureCompression.cpp
Normal file
@@ -0,0 +1,211 @@
|
||||
#include "../zstd/zstd.h"
|
||||
|
||||
#include "TracyEvent.hpp"
|
||||
#include "TracyTextureCompression.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
TextureCompression::TextureCompression()
|
||||
: m_buf( nullptr )
|
||||
, m_bufSize( 0 )
|
||||
, m_cctx( ZSTD_createCCtx() )
|
||||
, m_dctx( ZSTD_createDCtx() )
|
||||
, m_dict( nullptr )
|
||||
{
|
||||
}
|
||||
|
||||
TextureCompression::~TextureCompression()
|
||||
{
|
||||
delete[] m_buf;
|
||||
ZSTD_freeCCtx( m_cctx );
|
||||
ZSTD_freeDCtx( m_dctx );
|
||||
ZSTD_freeDDict( m_dict );
|
||||
}
|
||||
|
||||
uint32_t TextureCompression::Pack( struct ZSTD_CCtx_s* ctx, char*& buf, size_t& bufsz, const char* image, uint32_t inBytes )
|
||||
{
|
||||
const auto maxout = ZSTD_COMPRESSBOUND( inBytes );
|
||||
if( bufsz < maxout )
|
||||
{
|
||||
bufsz = maxout;
|
||||
delete[] buf;
|
||||
buf = new char[maxout];
|
||||
}
|
||||
assert( ctx );
|
||||
auto ret = (uint32_t)ZSTD_compressCCtx( ctx, buf, maxout, image, inBytes, 3 );
|
||||
#ifndef TRACY_NO_STATISTICS
|
||||
m_inputBytes.fetch_add( inBytes, std::memory_order_relaxed );
|
||||
m_outputBytes.fetch_add( ret, std::memory_order_relaxed );
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
uint32_t TextureCompression::Pack( struct ZSTD_CCtx_s* ctx, const struct ZSTD_CDict_s* dict, char*& buf, size_t& bufsz, const char* image, uint32_t inBytes )
|
||||
{
|
||||
const auto maxout = ZSTD_COMPRESSBOUND( inBytes );
|
||||
if( bufsz < maxout )
|
||||
{
|
||||
bufsz = maxout;
|
||||
delete[] buf;
|
||||
buf = new char[maxout];
|
||||
}
|
||||
assert( ctx );
|
||||
auto ret = (uint32_t)ZSTD_compress_usingCDict( ctx, buf, maxout, image, inBytes, dict );
|
||||
#ifndef TRACY_NO_STATISTICS
|
||||
m_inputBytes.fetch_add( inBytes, std::memory_order_relaxed );
|
||||
m_outputBytes.fetch_add( ret, std::memory_order_relaxed );
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
const char* TextureCompression::Unpack( const FrameImage& image )
|
||||
{
|
||||
const auto outsz = size_t( image.w ) * size_t( image.h ) / 2;
|
||||
if( m_bufSize < outsz )
|
||||
{
|
||||
m_bufSize = outsz;
|
||||
delete[] m_buf;
|
||||
m_buf = new char[outsz];
|
||||
}
|
||||
assert( m_dctx );
|
||||
if( m_dict )
|
||||
{
|
||||
ZSTD_decompress_usingDDict( m_dctx, m_buf, outsz, image.ptr, image.csz, m_dict );
|
||||
}
|
||||
else
|
||||
{
|
||||
ZSTD_decompressDCtx( m_dctx, m_buf, outsz, image.ptr, image.csz );
|
||||
}
|
||||
return m_buf;
|
||||
}
|
||||
|
||||
static constexpr uint8_t Dxtc4To3Table[256] = {
|
||||
85, 84, 86, 86, 81, 80, 82, 82, 89, 88, 90, 90, 89, 88, 90, 90,
|
||||
69, 68, 70, 70, 65, 64, 66, 66, 73, 72, 74, 74, 73, 72, 74, 74,
|
||||
101, 100, 102, 102, 97, 96, 98, 98, 105, 104, 106, 106, 105, 104, 106, 106,
|
||||
101, 100, 102, 102, 97, 96, 98, 98, 105, 104, 106, 106, 105, 104, 106, 106,
|
||||
21, 20, 22, 22, 17, 16, 18, 18, 25, 24, 26, 26, 25, 24, 26, 26,
|
||||
5, 4, 6, 6, 1, 0, 2, 2, 9, 8, 10, 10, 9, 8, 10, 10,
|
||||
37, 36, 38, 38, 33, 32, 34, 34, 41, 40, 42, 42, 41, 40, 42, 42,
|
||||
37, 36, 38, 38, 33, 32, 34, 34, 41, 40, 42, 42, 41, 40, 42, 42,
|
||||
149, 148, 150, 150, 145, 144, 146, 146, 153, 152, 154, 154, 153, 152, 154, 154,
|
||||
133, 132, 134, 134, 129, 128, 130, 130, 137, 136, 138, 138, 137, 136, 138, 138,
|
||||
165, 164, 166, 166, 161, 160, 162, 162, 169, 168, 170, 170, 169, 168, 170, 170,
|
||||
165, 164, 166, 166, 161, 160, 162, 162, 169, 168, 170, 170, 169, 168, 170, 170,
|
||||
149, 148, 150, 150, 145, 144, 146, 146, 153, 152, 154, 154, 153, 152, 154, 154,
|
||||
133, 132, 134, 134, 129, 128, 130, 130, 137, 136, 138, 138, 137, 136, 138, 138,
|
||||
165, 164, 166, 166, 161, 160, 162, 162, 169, 168, 170, 170, 169, 168, 170, 170,
|
||||
165, 164, 166, 166, 161, 160, 162, 162, 169, 168, 170, 170, 169, 168, 170, 170
|
||||
};
|
||||
|
||||
static tracy_force_inline int max3( int a, int b, int c )
|
||||
{
|
||||
if( a > b )
|
||||
{
|
||||
return a > c ? a : c;
|
||||
}
|
||||
else
|
||||
{
|
||||
return b > c ? b : c;
|
||||
}
|
||||
}
|
||||
|
||||
static constexpr int TrTbl1[] = { 12, 12, 12, 12, 6, 6, 6, 6, 6, 6, 6, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 };
|
||||
static constexpr int TrTbl2[] = { 12, 12, 6, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 };
|
||||
static constexpr int TrTbl3[] = { 48, 48, 48, 32, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 };
|
||||
|
||||
void TextureCompression::Rdo( char* data, size_t blocks )
|
||||
{
|
||||
assert( blocks > 0 );
|
||||
do
|
||||
{
|
||||
uint64_t blk;
|
||||
memcpy( &blk, data, 8 );
|
||||
|
||||
uint32_t idx = blk >> 32;
|
||||
if( idx == 0x55555555 )
|
||||
{
|
||||
data += 8;
|
||||
continue;
|
||||
}
|
||||
|
||||
uint16_t c0 = blk & 0xFFFF;
|
||||
uint16_t c1 = ( blk >> 16 ) & 0xFFFF;
|
||||
|
||||
const int r0b = c0 & 0xF800;
|
||||
const int g0b = c0 & 0x07E0;
|
||||
const int b0b = c0 & 0x001F;
|
||||
|
||||
const int r1b = c1 & 0xF800;
|
||||
const int g1b = c1 & 0x07E0;
|
||||
const int b1b = c1 & 0x001F;
|
||||
|
||||
const int r0 = ( r0b >> 8 ) | ( r0b >> 13 );
|
||||
const int g0 = ( g0b >> 3 ) | ( g0b >> 9 );
|
||||
const int b0 = ( b0b << 3 ) | ( b0b >> 2 );
|
||||
|
||||
const int r1 = ( r1b >> 8 ) | ( r1b >> 13 );
|
||||
const int g1 = ( g1b >> 3 ) | ( g1b >> 9 );
|
||||
const int b1 = ( b1b << 3 ) | ( b1b >> 2 );
|
||||
|
||||
const int dr = abs( r0 - r1 );
|
||||
const int dg = abs( g0 - g1 );
|
||||
const int db = abs( b0 - b1 );
|
||||
|
||||
const int maxChan1 = max3( r0-1, g0, b0-2 );
|
||||
const int maxDelta1 = max3( dr-1, dg, db-2 );
|
||||
const int tr1 = TrTbl1[maxChan1 / 4];
|
||||
if( maxDelta1 <= tr1 )
|
||||
{
|
||||
uint64_t blk =
|
||||
( ( ( r0b + r1b ) >> 1 ) & 0xF800 ) |
|
||||
( ( ( g0b + g1b ) >> 1 ) & 0x07E0 ) |
|
||||
( ( ( b0b + b1b ) >> 1 ) );
|
||||
memcpy( data, &blk, 8 );
|
||||
}
|
||||
else
|
||||
{
|
||||
const int maxChan23 = max3( r0-2, g0, b0-5 );
|
||||
const int maxDelta23 = max3( dr-2, dg, db-5 );
|
||||
const int tr2 = TrTbl2[maxChan23 / 16];
|
||||
if( maxDelta23 <= tr2 )
|
||||
{
|
||||
idx &= 0x55555555;
|
||||
memcpy( data+4, &idx, 4 );
|
||||
}
|
||||
else
|
||||
{
|
||||
const int tr3 = TrTbl3[maxChan23 / 16];
|
||||
if( maxDelta23 <= tr3 )
|
||||
{
|
||||
uint64_t c = c1 | ( uint64_t( c0 ) << 16 );
|
||||
for( int k=0; k<4; k++ ) c |= uint64_t( Dxtc4To3Table[(idx >> (k*8)) & 0xFF] ) << ( 32 + k*8 );
|
||||
memcpy( data, &c, 8 );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
data += 8;
|
||||
}
|
||||
while( --blocks );
|
||||
}
|
||||
|
||||
void TextureCompression::FixOrder( char* data, size_t blocks )
|
||||
{
|
||||
assert( blocks > 0 );
|
||||
do
|
||||
{
|
||||
uint32_t tmp;
|
||||
memcpy( &tmp, data+4, 4 );
|
||||
tmp = ~tmp;
|
||||
uint32_t t0 = tmp & 0x55555555;
|
||||
uint32_t t1 = tmp & 0xAAAAAAAA;
|
||||
tmp = ( ( t0 << 1 ) | ( t1 >> 1 ) ) ^ t1;
|
||||
memcpy( data+4, &tmp, 4 );
|
||||
data += 8;
|
||||
}
|
||||
while( --blocks );
|
||||
}
|
||||
|
||||
}
|
||||
64
subprojects/tracy/server/TracyTextureCompression.hpp
Normal file
64
subprojects/tracy/server/TracyTextureCompression.hpp
Normal file
@@ -0,0 +1,64 @@
|
||||
#ifndef __TRACY__TEXTURECOMPRESSION_HPP__
|
||||
#define __TRACY__TEXTURECOMPRESSION_HPP__
|
||||
|
||||
#include <atomic>
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "TracySlab.hpp"
|
||||
|
||||
struct ZSTD_CCtx_s;
|
||||
struct ZSTD_DCtx_s;
|
||||
struct ZSTD_CDict_s;
|
||||
struct ZSTD_DDict_s;
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
struct FrameImage;
|
||||
|
||||
class TextureCompression
|
||||
{
|
||||
public:
|
||||
TextureCompression();
|
||||
~TextureCompression();
|
||||
|
||||
void SetDict( struct ZSTD_DDict_s* dict ) { m_dict = dict; }
|
||||
|
||||
uint32_t Pack( struct ZSTD_CCtx_s* ctx, char*& buf, size_t& bufsz, const char* image, uint32_t inBytes );
|
||||
uint32_t Pack( struct ZSTD_CCtx_s* ctx, const struct ZSTD_CDict_s* dict, char*& buf, size_t& bufsz, const char* image, uint32_t inBytes );
|
||||
|
||||
template<size_t Size>
|
||||
const char* Pack( const char* image, uint32_t inBytes, uint32_t& csz, Slab<Size>& slab )
|
||||
{
|
||||
const auto outsz = Pack( m_cctx, m_buf, m_bufSize, image, inBytes );
|
||||
auto ptr = (char*)slab.AllocBig( outsz );
|
||||
memcpy( ptr, m_buf, outsz );
|
||||
csz = outsz;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
const char* Unpack( const FrameImage& image );
|
||||
|
||||
void Rdo( char* data, size_t blocks );
|
||||
void FixOrder( char* data, size_t blocks );
|
||||
|
||||
uint64_t GetInputBytesCount() const { return m_inputBytes.load( std::memory_order_relaxed ); }
|
||||
uint64_t GetOutputBytesCount() const { return m_outputBytes.load( std::memory_order_relaxed ); }
|
||||
|
||||
private:
|
||||
char* m_buf;
|
||||
size_t m_bufSize;
|
||||
struct ZSTD_CCtx_s* m_cctx;
|
||||
struct ZSTD_DCtx_s* m_dctx;
|
||||
struct ZSTD_DDict_s* m_dict;
|
||||
|
||||
std::atomic<uint64_t> m_inputBytes { 0 };
|
||||
std::atomic<uint64_t> m_outputBytes { 0 };
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
72
subprojects/tracy/server/TracyThreadCompress.cpp
Normal file
72
subprojects/tracy/server/TracyThreadCompress.cpp
Normal file
@@ -0,0 +1,72 @@
|
||||
#include <limits>
|
||||
|
||||
#include "TracyFileRead.hpp"
|
||||
#include "TracyFileWrite.hpp"
|
||||
#include "TracyThreadCompress.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
ThreadCompress::ThreadCompress()
|
||||
: m_threadLast( std::numeric_limits<uint64_t>::max(), 0 )
|
||||
{
|
||||
}
|
||||
|
||||
void ThreadCompress::InitZero()
|
||||
{
|
||||
assert( m_threadExpand.empty() );
|
||||
m_threadExpand.push_back( 0 );
|
||||
}
|
||||
|
||||
void ThreadCompress::Load( FileRead& f )
|
||||
{
|
||||
assert( m_threadExpand.empty() );
|
||||
assert( m_threadMap.empty() );
|
||||
|
||||
uint64_t sz;
|
||||
f.Read( sz );
|
||||
if( sz != 0 )
|
||||
{
|
||||
m_threadExpand.reserve_and_use( sz );
|
||||
f.Read( m_threadExpand.data(), sizeof( uint64_t ) * sz );
|
||||
m_threadMap.reserve( sz );
|
||||
for( size_t i=0; i<sz; i++ )
|
||||
{
|
||||
m_threadMap.emplace( m_threadExpand[i], i );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ThreadCompress::Save( FileWrite& f ) const
|
||||
{
|
||||
uint64_t sz = m_threadExpand.size();
|
||||
f.Write( &sz, sizeof( sz ) );
|
||||
if( sz != 0 ) f.Write( m_threadExpand.data(), sz * sizeof( uint64_t ) );
|
||||
}
|
||||
|
||||
uint16_t ThreadCompress::CompressThreadReal( uint64_t thread )
|
||||
{
|
||||
auto it = m_threadMap.find( thread );
|
||||
if( it != m_threadMap.end() )
|
||||
{
|
||||
m_threadLast.first = thread;
|
||||
m_threadLast.second = it->second;
|
||||
return it->second;
|
||||
}
|
||||
else
|
||||
{
|
||||
return CompressThreadNew( thread );
|
||||
}
|
||||
}
|
||||
|
||||
uint16_t ThreadCompress::CompressThreadNew( uint64_t thread )
|
||||
{
|
||||
auto sz = m_threadExpand.size();
|
||||
m_threadExpand.push_back( thread );
|
||||
m_threadMap.emplace( thread, sz );
|
||||
m_threadLast.first = thread;
|
||||
m_threadLast.second = sz;
|
||||
return sz;
|
||||
}
|
||||
|
||||
}
|
||||
61
subprojects/tracy/server/TracyThreadCompress.hpp
Normal file
61
subprojects/tracy/server/TracyThreadCompress.hpp
Normal file
@@ -0,0 +1,61 @@
|
||||
#ifndef __TRACY__THREADCOMPRESS_HPP__
|
||||
#define __TRACY__THREADCOMPRESS_HPP__
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "../public/common/TracyForceInline.hpp"
|
||||
#include "tracy_robin_hood.h"
|
||||
#include "TracyVector.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
class FileRead;
|
||||
class FileWrite;
|
||||
|
||||
class ThreadCompress
|
||||
{
|
||||
public:
|
||||
ThreadCompress();
|
||||
|
||||
void InitZero();
|
||||
void Load( FileRead& f );
|
||||
void Save( FileWrite& f ) const;
|
||||
|
||||
tracy_force_inline uint16_t CompressThread( uint64_t thread )
|
||||
{
|
||||
if( m_threadLast.first == thread ) return m_threadLast.second;
|
||||
return CompressThreadReal( thread );
|
||||
}
|
||||
|
||||
tracy_force_inline uint64_t DecompressThread( uint16_t thread ) const
|
||||
{
|
||||
assert( thread < m_threadExpand.size() );
|
||||
return m_threadExpand[thread];
|
||||
}
|
||||
|
||||
tracy_force_inline uint16_t DecompressMustRaw( uint64_t thread ) const
|
||||
{
|
||||
auto it = m_threadMap.find( thread );
|
||||
assert( it != m_threadMap.end() );
|
||||
return it->second;
|
||||
}
|
||||
|
||||
tracy_force_inline bool Exists( uint64_t thread ) const
|
||||
{
|
||||
return m_threadMap.find( thread ) != m_threadMap.end();
|
||||
}
|
||||
|
||||
private:
|
||||
uint16_t CompressThreadReal( uint64_t thread );
|
||||
uint16_t CompressThreadNew( uint64_t thread );
|
||||
|
||||
unordered_flat_map<uint64_t, uint16_t> m_threadMap;
|
||||
Vector<uint64_t> m_threadExpand;
|
||||
std::pair<uint64_t, uint16_t> m_threadLast;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
97
subprojects/tracy/server/TracyVarArray.hpp
Normal file
97
subprojects/tracy/server/TracyVarArray.hpp
Normal file
@@ -0,0 +1,97 @@
|
||||
#ifndef __TRACYVARARRAY_HPP__
|
||||
#define __TRACYVARARRAY_HPP__
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#define XXH_INLINE_ALL
|
||||
#include "tracy_xxhash.h"
|
||||
|
||||
#include "../public/common/TracyForceInline.hpp"
|
||||
#include "TracyCharUtil.hpp"
|
||||
#include "TracyEvent.hpp"
|
||||
#include "TracyMemory.hpp"
|
||||
#include "TracyShortPtr.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
#pragma pack( push, 1 )
|
||||
template<typename T>
|
||||
class VarArray
|
||||
{
|
||||
public:
|
||||
VarArray( uint16_t size, const T* data )
|
||||
: m_size( size )
|
||||
, m_ptr( data )
|
||||
{
|
||||
CalcHash();
|
||||
}
|
||||
|
||||
VarArray( const VarArray& ) = delete;
|
||||
VarArray( VarArray&& ) = delete;
|
||||
|
||||
VarArray& operator=( const VarArray& ) = delete;
|
||||
VarArray& operator=( VarArray&& ) = delete;
|
||||
|
||||
tracy_force_inline uint32_t get_hash() const { return m_hash; }
|
||||
|
||||
tracy_force_inline bool empty() const { return m_size == 0; }
|
||||
tracy_force_inline uint16_t size() const { return m_size; }
|
||||
|
||||
tracy_force_inline const T* data() const { return m_ptr; };
|
||||
|
||||
tracy_force_inline const T* begin() const { return m_ptr; }
|
||||
tracy_force_inline const T* end() const { return m_ptr + m_size; }
|
||||
|
||||
tracy_force_inline const T& front() const { assert( m_size > 0 ); return m_ptr[0]; }
|
||||
tracy_force_inline const T& back() const { assert( m_size > 0 ); return m_ptr[m_size - 1]; }
|
||||
|
||||
tracy_force_inline const T& operator[]( size_t idx ) const { return m_ptr[idx]; }
|
||||
|
||||
private:
|
||||
tracy_force_inline void CalcHash();
|
||||
|
||||
uint16_t m_size;
|
||||
uint32_t m_hash;
|
||||
const short_ptr<T> m_ptr;
|
||||
};
|
||||
#pragma pack( pop )
|
||||
|
||||
enum { VarArraySize = sizeof( VarArray<int> ) };
|
||||
|
||||
|
||||
template<typename T>
|
||||
inline void VarArray<T>::CalcHash()
|
||||
{
|
||||
m_hash = uint32_t( XXH3_64bits( m_ptr.get(), m_size * sizeof( T ) ) );
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static inline bool Compare( const VarArray<T>& lhs, const VarArray<T>& rhs )
|
||||
{
|
||||
if( lhs.size() != rhs.size() || lhs.get_hash() != rhs.get_hash() ) return false;
|
||||
return memcmp( lhs.data(), rhs.data(), lhs.size() * sizeof( T ) ) == 0;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
struct VarArrayHasher
|
||||
{
|
||||
size_t operator()( const VarArray<T>* arr ) const
|
||||
{
|
||||
return arr->get_hash();
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct VarArrayComparator
|
||||
{
|
||||
bool operator()( const VarArray<T>* lhs, const VarArray<T>* rhs ) const
|
||||
{
|
||||
return Compare( *lhs, *rhs );
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
357
subprojects/tracy/server/TracyVector.hpp
Normal file
357
subprojects/tracy/server/TracyVector.hpp
Normal file
@@ -0,0 +1,357 @@
|
||||
#ifndef __TRACYVECTOR_HPP__
|
||||
#define __TRACYVECTOR_HPP__
|
||||
|
||||
#include <algorithm>
|
||||
#include <assert.h>
|
||||
#include <limits>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <type_traits>
|
||||
|
||||
#include "../public/common/TracyForceInline.hpp"
|
||||
#include "TracyMemory.hpp"
|
||||
#include "TracyPopcnt.hpp"
|
||||
#include "TracyShortPtr.hpp"
|
||||
#include "TracySlab.hpp"
|
||||
|
||||
//#define TRACY_VECTOR_DEBUG
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
#pragma pack( push, 1 )
|
||||
template<typename T>
|
||||
class Vector
|
||||
{
|
||||
constexpr uint8_t MaxCapacity() { return 0x7F; }
|
||||
|
||||
public:
|
||||
using iterator = T*;
|
||||
using const_iterator = const T*;
|
||||
|
||||
tracy_force_inline Vector()
|
||||
{
|
||||
memset( (char*)this, 0, sizeof( Vector<T> ) );
|
||||
}
|
||||
|
||||
Vector( const Vector& ) = delete;
|
||||
tracy_force_inline Vector( Vector&& src ) noexcept
|
||||
{
|
||||
memcpy( (char*)this, &src, sizeof( Vector<T> ) );
|
||||
memset( (char*)&src, 0, sizeof( Vector<T> ) );
|
||||
}
|
||||
|
||||
tracy_force_inline Vector( const T& value )
|
||||
: m_ptr( (T*)malloc( sizeof( T ) ) )
|
||||
, m_size( 1 )
|
||||
, m_capacity( 0 )
|
||||
, m_magic( 0 )
|
||||
{
|
||||
memUsage.fetch_add( sizeof( T ), std::memory_order_relaxed );
|
||||
new(m_ptr) T( value );
|
||||
}
|
||||
|
||||
tracy_force_inline ~Vector()
|
||||
{
|
||||
if( m_capacity != MaxCapacity() && m_ptr )
|
||||
{
|
||||
memUsage.fetch_sub( Capacity() * sizeof( T ), std::memory_order_relaxed );
|
||||
free( m_ptr );
|
||||
}
|
||||
}
|
||||
|
||||
Vector& operator=( const Vector& ) = delete;
|
||||
tracy_force_inline Vector& operator=( Vector&& src ) noexcept
|
||||
{
|
||||
if( m_capacity != MaxCapacity() && m_ptr )
|
||||
{
|
||||
memUsage.fetch_sub( Capacity() * sizeof( T ), std::memory_order_relaxed );
|
||||
free( m_ptr );
|
||||
}
|
||||
memcpy( (char*)this, &src, sizeof( Vector<T> ) );
|
||||
memset( (char*)&src, 0, sizeof( Vector<T> ) );
|
||||
return *this;
|
||||
}
|
||||
|
||||
tracy_force_inline void swap( Vector& other )
|
||||
{
|
||||
uint8_t tmp[sizeof( Vector<T> )];
|
||||
memcpy( (char*)tmp, &other, sizeof( Vector<T> ) );
|
||||
memcpy( (char*)&other, this, sizeof( Vector<T> ) );
|
||||
memcpy( (char*)this, tmp, sizeof( Vector<T> ) );
|
||||
}
|
||||
|
||||
tracy_force_inline bool empty() const { return m_size == 0; }
|
||||
tracy_force_inline size_t size() const { return m_size; }
|
||||
|
||||
tracy_force_inline void set_size( size_t sz ) { assert( m_capacity != MaxCapacity() ); m_size = sz; }
|
||||
|
||||
tracy_force_inline T* data() { return m_ptr; }
|
||||
tracy_force_inline const T* data() const { return m_ptr; };
|
||||
|
||||
tracy_force_inline T* begin() { return m_ptr; }
|
||||
tracy_force_inline const T* begin() const { return m_ptr; }
|
||||
tracy_force_inline T* end() { return m_ptr + m_size; }
|
||||
tracy_force_inline const T* end() const { return m_ptr + m_size; }
|
||||
|
||||
tracy_force_inline T& front() { assert( m_size > 0 ); return m_ptr[0]; }
|
||||
tracy_force_inline const T& front() const { assert( m_size > 0 ); return m_ptr[0]; }
|
||||
|
||||
tracy_force_inline T& back() { assert( m_size > 0 ); return m_ptr[m_size - 1]; }
|
||||
tracy_force_inline const T& back() const { assert( m_size > 0 ); return m_ptr[m_size - 1]; }
|
||||
|
||||
tracy_force_inline T& operator[]( size_t idx ) { return m_ptr[idx]; }
|
||||
tracy_force_inline const T& operator[]( size_t idx ) const { return m_ptr[idx]; }
|
||||
|
||||
tracy_force_inline void push_back( const T& v )
|
||||
{
|
||||
assert( m_capacity != MaxCapacity() );
|
||||
if( m_size == Capacity() ) AllocMore();
|
||||
new(m_ptr+m_size) T( v );
|
||||
m_size++;
|
||||
}
|
||||
|
||||
tracy_force_inline void push_back_non_empty( const T& v )
|
||||
{
|
||||
assert( m_capacity != MaxCapacity() );
|
||||
assert( m_ptr );
|
||||
if( m_size == CapacityNoNullptrCheck() ) AllocMore();
|
||||
new(m_ptr+m_size) T( v );
|
||||
m_size++;
|
||||
}
|
||||
|
||||
tracy_force_inline void push_back_no_space_check( const T& v )
|
||||
{
|
||||
assert( m_capacity != MaxCapacity() );
|
||||
assert( m_size < Capacity() );
|
||||
new(m_ptr+m_size) T( v );
|
||||
m_size++;
|
||||
}
|
||||
|
||||
tracy_force_inline void push_back( T&& v )
|
||||
{
|
||||
assert( m_capacity != MaxCapacity() );
|
||||
if( m_size == Capacity() ) AllocMore();
|
||||
new(m_ptr+m_size) T( std::move( v ) );
|
||||
m_size++;
|
||||
}
|
||||
|
||||
tracy_force_inline T& push_next()
|
||||
{
|
||||
assert( m_capacity != MaxCapacity() );
|
||||
if( m_size == Capacity() ) AllocMore();
|
||||
new(m_ptr+m_size) T();
|
||||
return m_ptr[m_size++];
|
||||
}
|
||||
|
||||
tracy_force_inline T& push_next_non_empty()
|
||||
{
|
||||
assert( m_capacity != MaxCapacity() );
|
||||
assert( m_ptr );
|
||||
if( m_size == CapacityNoNullptrCheck() ) AllocMore();
|
||||
new(m_ptr+m_size) T();
|
||||
return m_ptr[m_size++];
|
||||
}
|
||||
|
||||
tracy_force_inline T& push_next_no_space_check()
|
||||
{
|
||||
assert( m_capacity != MaxCapacity() );
|
||||
assert( m_size < Capacity() );
|
||||
new(m_ptr+m_size) T();
|
||||
return m_ptr[m_size++];
|
||||
}
|
||||
|
||||
T* insert( T* it, const T& v )
|
||||
{
|
||||
assert( m_capacity != MaxCapacity() );
|
||||
assert( it >= m_ptr && it <= m_ptr + m_size );
|
||||
const auto dist = it - m_ptr;
|
||||
if( m_size == Capacity() ) AllocMore();
|
||||
if( dist != m_size ) memmove( m_ptr + dist + 1, m_ptr + dist, ( m_size - dist ) * sizeof( T ) );
|
||||
m_size++;
|
||||
new(m_ptr+dist) T( v );
|
||||
m_ptr[dist] = v;
|
||||
return m_ptr + dist;
|
||||
}
|
||||
|
||||
T* insert( T* it, T&& v )
|
||||
{
|
||||
assert( m_capacity != MaxCapacity() );
|
||||
assert( it >= m_ptr && it <= m_ptr + m_size );
|
||||
const auto dist = it - m_ptr;
|
||||
if( m_size == Capacity() ) AllocMore();
|
||||
if( dist != m_size ) memmove( m_ptr + dist + 1, m_ptr + dist, ( m_size - dist ) * sizeof( T ) );
|
||||
m_size++;
|
||||
new(m_ptr+dist) T( std::move( v ) );
|
||||
return m_ptr + dist;
|
||||
}
|
||||
|
||||
void insert( T* it, T* begin, T* end )
|
||||
{
|
||||
assert( m_capacity != MaxCapacity() );
|
||||
assert( it >= m_ptr && it <= m_ptr + m_size );
|
||||
const auto sz = end - begin;
|
||||
const auto dist = it - m_ptr;
|
||||
while( m_size + sz > Capacity() ) AllocMore();
|
||||
if( dist != m_size ) memmove( m_ptr + dist + sz, m_ptr + dist, ( m_size - dist ) * sizeof( T ) );
|
||||
m_size += sz;
|
||||
memcpy( m_ptr + dist, begin, sz * sizeof( T ) );
|
||||
}
|
||||
|
||||
T* erase( T* it )
|
||||
{
|
||||
assert( m_capacity != MaxCapacity() );
|
||||
assert( it >= m_ptr && it <= m_ptr + m_size );
|
||||
m_size--;
|
||||
memmove( it, it+1, ( m_size - ( it - m_ptr ) ) * sizeof( T ) );
|
||||
return it;
|
||||
}
|
||||
|
||||
T* erase( T* begin, T* end )
|
||||
{
|
||||
assert( m_capacity != MaxCapacity() );
|
||||
assert( begin >= m_ptr && begin <= m_ptr + m_size );
|
||||
assert( end >= m_ptr && end <= m_ptr + m_size );
|
||||
assert( begin <= end );
|
||||
|
||||
const auto dist = end - begin;
|
||||
if( dist > 0 )
|
||||
{
|
||||
memmove( begin, end, ( m_size - ( end - m_ptr ) ) * sizeof( T ) );
|
||||
m_size -= dist;
|
||||
}
|
||||
return begin;
|
||||
}
|
||||
|
||||
tracy_force_inline void pop_back()
|
||||
{
|
||||
assert( m_capacity != MaxCapacity() );
|
||||
assert( m_size > 0 );
|
||||
m_size--;
|
||||
}
|
||||
|
||||
tracy_force_inline T& back_and_pop()
|
||||
{
|
||||
assert( m_capacity != MaxCapacity() );
|
||||
assert( m_size > 0 );
|
||||
m_size--;
|
||||
return m_ptr[m_size];
|
||||
}
|
||||
|
||||
tracy_force_inline void reserve( size_t cap )
|
||||
{
|
||||
if( cap == 0 || cap <= Capacity() ) return;
|
||||
reserve_non_zero( cap );
|
||||
}
|
||||
|
||||
void reserve_non_zero( size_t cap )
|
||||
{
|
||||
assert( m_capacity != MaxCapacity() );
|
||||
cap--;
|
||||
cap |= cap >> 1;
|
||||
cap |= cap >> 2;
|
||||
cap |= cap >> 4;
|
||||
cap |= cap >> 8;
|
||||
cap |= cap >> 16;
|
||||
cap = TracyCountBits( cap );
|
||||
memUsage.fetch_add( ( ( 1 << cap ) - Capacity() ) * sizeof( T ), std::memory_order_relaxed );
|
||||
m_capacity = cap;
|
||||
Realloc();
|
||||
}
|
||||
|
||||
tracy_force_inline void reserve_and_use( size_t sz )
|
||||
{
|
||||
assert( m_capacity != MaxCapacity() );
|
||||
reserve( sz );
|
||||
m_size = sz;
|
||||
}
|
||||
|
||||
template<size_t U>
|
||||
tracy_force_inline void reserve_exact( uint32_t sz, Slab<U>& slab )
|
||||
{
|
||||
assert( !m_ptr );
|
||||
m_capacity = MaxCapacity();
|
||||
m_size = sz;
|
||||
m_ptr = (T*)slab.AllocBig( sizeof( T ) * sz );
|
||||
}
|
||||
|
||||
tracy_force_inline void clear()
|
||||
{
|
||||
assert( m_capacity != MaxCapacity() );
|
||||
m_size = 0;
|
||||
}
|
||||
|
||||
tracy_force_inline bool is_magic() const { return m_magic; }
|
||||
tracy_force_inline void set_magic() { assert( !m_magic ); m_magic = 1; }
|
||||
|
||||
private:
|
||||
tracy_no_inline void AllocMore()
|
||||
{
|
||||
assert( m_capacity != MaxCapacity() );
|
||||
|
||||
if( m_ptr == nullptr )
|
||||
{
|
||||
memUsage.fetch_add( sizeof( T ), std::memory_order_relaxed );
|
||||
m_ptr = (T*)malloc( sizeof( T ) );
|
||||
m_capacity = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
memUsage.fetch_add( Capacity() * sizeof( T ), std::memory_order_relaxed );
|
||||
m_capacity++;
|
||||
Realloc();
|
||||
}
|
||||
}
|
||||
|
||||
void Realloc()
|
||||
{
|
||||
T* ptr = (T*)malloc( sizeof( T ) * CapacityNoNullptrCheck() );
|
||||
if( m_size != 0 )
|
||||
{
|
||||
if( std::is_trivially_copyable<T>() )
|
||||
{
|
||||
memcpy( (char*)ptr, m_ptr, m_size * sizeof( T ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
for( uint32_t i=0; i<m_size; i++ )
|
||||
{
|
||||
new(ptr+i) T( std::move( m_ptr[i] ) );
|
||||
}
|
||||
}
|
||||
free( m_ptr );
|
||||
}
|
||||
m_ptr = ptr;
|
||||
}
|
||||
|
||||
tracy_force_inline uint32_t Capacity() const
|
||||
{
|
||||
return m_ptr == nullptr ? 0 : 1 << m_capacity;
|
||||
}
|
||||
|
||||
tracy_force_inline uint32_t CapacityNoNullptrCheck() const
|
||||
{
|
||||
return 1 << m_capacity;
|
||||
}
|
||||
|
||||
#ifdef TRACY_VECTOR_DEBUG
|
||||
T* m_ptr;
|
||||
#else
|
||||
short_ptr<T> m_ptr;
|
||||
#endif
|
||||
uint32_t m_size;
|
||||
uint8_t m_capacity : 7;
|
||||
uint8_t m_magic : 1;
|
||||
};
|
||||
|
||||
|
||||
template<typename T> struct VectorAdapterDirect { const T& operator()( const T& it ) const { return it; } };
|
||||
template<typename T> struct VectorAdapterPointer { const T& operator()( const short_ptr<T>& it ) const { return *it; } };
|
||||
|
||||
#pragma pack( pop )
|
||||
|
||||
enum { VectorSize = sizeof( Vector<int> ) };
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
8516
subprojects/tracy/server/TracyWorker.cpp
Normal file
8516
subprojects/tracy/server/TracyWorker.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1109
subprojects/tracy/server/TracyWorker.hpp
Normal file
1109
subprojects/tracy/server/TracyWorker.hpp
Normal file
File diff suppressed because it is too large
Load Diff
524
subprojects/tracy/server/tracy_pdqsort.h
Normal file
524
subprojects/tracy/server/tracy_pdqsort.h
Normal file
@@ -0,0 +1,524 @@
|
||||
/*
|
||||
pdqsort.h - Pattern-defeating quicksort.
|
||||
|
||||
Copyright (c) 2015 Orson Peters
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty. In no event will the
|
||||
authors be held liable for any damages arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose, including commercial
|
||||
applications, and to alter it and redistribute it freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the
|
||||
original software. If you use this software in a product, an acknowledgment in the product
|
||||
documentation would be appreciated but is not required.
|
||||
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as
|
||||
being the original software.
|
||||
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef TRACY_PDQSORT_H
|
||||
#define TRACY_PDQSORT_H
|
||||
|
||||
#include "../public/common/TracyForceInline.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <functional>
|
||||
#include <utility>
|
||||
#include <iterator>
|
||||
|
||||
#include <cstdint>
|
||||
#include <type_traits>
|
||||
#define PDQSORT_PREFER_MOVE(x) std::move(x)
|
||||
|
||||
namespace tracy{
|
||||
|
||||
namespace pdqsort_detail {
|
||||
enum {
|
||||
// Partitions below this size are sorted using insertion sort.
|
||||
insertion_sort_threshold = 24,
|
||||
|
||||
// Partitions above this size use Tukey's ninther to select the pivot.
|
||||
ninther_threshold = 128,
|
||||
|
||||
// When we detect an already sorted partition, attempt an insertion sort that allows this
|
||||
// amount of element moves before giving up.
|
||||
partial_insertion_sort_limit = 8,
|
||||
|
||||
// Must be multiple of 8 due to loop unrolling, and < 256 to fit in unsigned char.
|
||||
block_size = 64,
|
||||
|
||||
// Cacheline size, assumes power of two.
|
||||
cacheline_size = 64
|
||||
|
||||
};
|
||||
|
||||
template<class T> struct is_default_compare : std::false_type { };
|
||||
template<class T> struct is_default_compare<std::less<T>> : std::true_type { };
|
||||
template<class T> struct is_default_compare<std::greater<T>> : std::true_type { };
|
||||
|
||||
// Returns floor(log2(n)), assumes n > 0.
|
||||
template<class T>
|
||||
tracy_force_inline int log2(T n) {
|
||||
int log = 0;
|
||||
while (n >>= 1) ++log;
|
||||
return log;
|
||||
}
|
||||
|
||||
// Sorts [begin, end) using insertion sort with the given comparison function.
|
||||
template<class Iter, class Compare>
|
||||
tracy_force_inline void insertion_sort(Iter begin, Iter end, Compare comp) {
|
||||
typedef typename std::iterator_traits<Iter>::value_type T;
|
||||
if (begin == end) return;
|
||||
|
||||
for (Iter cur = begin + 1; cur != end; ++cur) {
|
||||
Iter sift = cur;
|
||||
Iter sift_1 = cur - 1;
|
||||
|
||||
// Compare first so we can avoid 2 moves for an element already positioned correctly.
|
||||
if (comp(*sift, *sift_1)) {
|
||||
T tmp = PDQSORT_PREFER_MOVE(*sift);
|
||||
|
||||
do { *sift-- = PDQSORT_PREFER_MOVE(*sift_1); }
|
||||
while (sift != begin && comp(tmp, *--sift_1));
|
||||
|
||||
*sift = PDQSORT_PREFER_MOVE(tmp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sorts [begin, end) using insertion sort with the given comparison function. Assumes
|
||||
// *(begin - 1) is an element smaller than or equal to any element in [begin, end).
|
||||
template<class Iter, class Compare>
|
||||
tracy_force_inline void unguarded_insertion_sort(Iter begin, Iter end, Compare comp) {
|
||||
typedef typename std::iterator_traits<Iter>::value_type T;
|
||||
if (begin == end) return;
|
||||
|
||||
for (Iter cur = begin + 1; cur != end; ++cur) {
|
||||
Iter sift = cur;
|
||||
Iter sift_1 = cur - 1;
|
||||
|
||||
// Compare first so we can avoid 2 moves for an element already positioned correctly.
|
||||
if (comp(*sift, *sift_1)) {
|
||||
T tmp = PDQSORT_PREFER_MOVE(*sift);
|
||||
|
||||
do { *sift-- = PDQSORT_PREFER_MOVE(*sift_1); }
|
||||
while (comp(tmp, *--sift_1));
|
||||
|
||||
*sift = PDQSORT_PREFER_MOVE(tmp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Attempts to use insertion sort on [begin, end). Will return false if more than
|
||||
// partial_insertion_sort_limit elements were moved, and abort sorting. Otherwise it will
|
||||
// successfully sort and return true.
|
||||
template<class Iter, class Compare>
|
||||
tracy_force_inline bool partial_insertion_sort(Iter begin, Iter end, Compare comp) {
|
||||
typedef typename std::iterator_traits<Iter>::value_type T;
|
||||
if (begin == end) return true;
|
||||
|
||||
std::size_t limit = 0;
|
||||
for (Iter cur = begin + 1; cur != end; ++cur) {
|
||||
Iter sift = cur;
|
||||
Iter sift_1 = cur - 1;
|
||||
|
||||
// Compare first so we can avoid 2 moves for an element already positioned correctly.
|
||||
if (comp(*sift, *sift_1)) {
|
||||
T tmp = PDQSORT_PREFER_MOVE(*sift);
|
||||
|
||||
do { *sift-- = PDQSORT_PREFER_MOVE(*sift_1); }
|
||||
while (sift != begin && comp(tmp, *--sift_1));
|
||||
|
||||
*sift = PDQSORT_PREFER_MOVE(tmp);
|
||||
limit += cur - sift;
|
||||
}
|
||||
|
||||
if (limit > partial_insertion_sort_limit) return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template<class Iter, class Compare>
|
||||
tracy_force_inline void sort2(Iter a, Iter b, Compare comp) {
|
||||
if (comp(*b, *a)) std::iter_swap(a, b);
|
||||
}
|
||||
|
||||
// Sorts the elements *a, *b and *c using comparison function comp.
|
||||
template<class Iter, class Compare>
|
||||
tracy_force_inline void sort3(Iter a, Iter b, Iter c, Compare comp) {
|
||||
sort2(a, b, comp);
|
||||
sort2(b, c, comp);
|
||||
sort2(a, b, comp);
|
||||
}
|
||||
|
||||
template<class T>
|
||||
tracy_force_inline T* align_cacheline(T* p) {
|
||||
#if defined(UINTPTR_MAX)
|
||||
std::uintptr_t ip = reinterpret_cast<std::uintptr_t>(p);
|
||||
#else
|
||||
std::size_t ip = reinterpret_cast<std::size_t>(p);
|
||||
#endif
|
||||
ip = (ip + cacheline_size - 1) & -cacheline_size;
|
||||
return reinterpret_cast<T*>(ip);
|
||||
}
|
||||
|
||||
template<class Iter>
|
||||
tracy_force_inline void swap_offsets(Iter first, Iter last,
|
||||
unsigned char* offsets_l, unsigned char* offsets_r,
|
||||
size_t num, bool use_swaps) {
|
||||
typedef typename std::iterator_traits<Iter>::value_type T;
|
||||
if (use_swaps) {
|
||||
// This case is needed for the descending distribution, where we need
|
||||
// to have proper swapping for pdqsort to remain O(n).
|
||||
for (size_t i = 0; i < num; ++i) {
|
||||
std::iter_swap(first + offsets_l[i], last - offsets_r[i]);
|
||||
}
|
||||
} else if (num > 0) {
|
||||
Iter l = first + offsets_l[0]; Iter r = last - offsets_r[0];
|
||||
T tmp(PDQSORT_PREFER_MOVE(*l)); *l = PDQSORT_PREFER_MOVE(*r);
|
||||
for (size_t i = 1; i < num; ++i) {
|
||||
l = first + offsets_l[i]; *r = PDQSORT_PREFER_MOVE(*l);
|
||||
r = last - offsets_r[i]; *l = PDQSORT_PREFER_MOVE(*r);
|
||||
}
|
||||
*r = PDQSORT_PREFER_MOVE(tmp);
|
||||
}
|
||||
}
|
||||
|
||||
// Partitions [begin, end) around pivot *begin using comparison function comp. Elements equal
|
||||
// to the pivot are put in the right-hand partition. Returns the position of the pivot after
|
||||
// partitioning and whether the passed sequence already was correctly partitioned. Assumes the
|
||||
// pivot is a median of at least 3 elements and that [begin, end) is at least
|
||||
// insertion_sort_threshold long. Uses branchless partitioning.
|
||||
template<class Iter, class Compare>
|
||||
tracy_force_inline std::pair<Iter, bool> partition_right_branchless(Iter begin, Iter end, Compare comp) {
|
||||
typedef typename std::iterator_traits<Iter>::value_type T;
|
||||
|
||||
// Move pivot into local for speed.
|
||||
T pivot(PDQSORT_PREFER_MOVE(*begin));
|
||||
Iter first = begin;
|
||||
Iter last = end;
|
||||
|
||||
// Find the first element greater than or equal than the pivot (the median of 3 guarantees
|
||||
// this exists).
|
||||
while (comp(*++first, pivot));
|
||||
|
||||
// Find the first element strictly smaller than the pivot. We have to guard this search if
|
||||
// there was no element before *first.
|
||||
if (first - 1 == begin) while (first < last && !comp(*--last, pivot));
|
||||
else while ( !comp(*--last, pivot));
|
||||
|
||||
// If the first pair of elements that should be swapped to partition are the same element,
|
||||
// the passed in sequence already was correctly partitioned.
|
||||
bool already_partitioned = first >= last;
|
||||
if (!already_partitioned) {
|
||||
std::iter_swap(first, last);
|
||||
++first;
|
||||
|
||||
// The following branchless partitioning is derived from "BlockQuicksort: How Branch
|
||||
// Mispredictions don’t affect Quicksort" by Stefan Edelkamp and Armin Weiss, but
|
||||
// heavily micro-optimized.
|
||||
unsigned char offsets_l_storage[block_size + cacheline_size];
|
||||
unsigned char offsets_r_storage[block_size + cacheline_size];
|
||||
unsigned char* offsets_l = align_cacheline(offsets_l_storage);
|
||||
unsigned char* offsets_r = align_cacheline(offsets_r_storage);
|
||||
|
||||
Iter offsets_l_base = first;
|
||||
Iter offsets_r_base = last;
|
||||
size_t num_l, num_r, start_l, start_r;
|
||||
num_l = num_r = start_l = start_r = 0;
|
||||
|
||||
while (first < last) {
|
||||
// Fill up offset blocks with elements that are on the wrong side.
|
||||
// First we determine how much elements are considered for each offset block.
|
||||
size_t num_unknown = last - first;
|
||||
size_t left_split = num_l == 0 ? (num_r == 0 ? num_unknown / 2 : num_unknown) : 0;
|
||||
size_t right_split = num_r == 0 ? (num_unknown - left_split) : 0;
|
||||
|
||||
// Fill the offset blocks.
|
||||
if (left_split >= block_size) {
|
||||
for (size_t i = 0; i < block_size;) {
|
||||
offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
|
||||
offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
|
||||
offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
|
||||
offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
|
||||
offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
|
||||
offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
|
||||
offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
|
||||
offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
|
||||
}
|
||||
} else {
|
||||
for (size_t i = 0; i < left_split;) {
|
||||
offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
|
||||
}
|
||||
}
|
||||
|
||||
if (right_split >= block_size) {
|
||||
for (size_t i = 0; i < block_size;) {
|
||||
offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
|
||||
offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
|
||||
offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
|
||||
offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
|
||||
offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
|
||||
offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
|
||||
offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
|
||||
offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
|
||||
}
|
||||
} else {
|
||||
for (size_t i = 0; i < right_split;) {
|
||||
offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
|
||||
}
|
||||
}
|
||||
|
||||
// Swap elements and update block sizes and first/last boundaries.
|
||||
size_t num = std::min(num_l, num_r);
|
||||
swap_offsets(offsets_l_base, offsets_r_base,
|
||||
offsets_l + start_l, offsets_r + start_r,
|
||||
num, num_l == num_r);
|
||||
num_l -= num; num_r -= num;
|
||||
start_l += num; start_r += num;
|
||||
|
||||
if (num_l == 0) {
|
||||
start_l = 0;
|
||||
offsets_l_base = first;
|
||||
}
|
||||
|
||||
if (num_r == 0) {
|
||||
start_r = 0;
|
||||
offsets_r_base = last;
|
||||
}
|
||||
}
|
||||
|
||||
// We have now fully identified [first, last)'s proper position. Swap the last elements.
|
||||
if (num_l) {
|
||||
offsets_l += start_l;
|
||||
while (num_l--) std::iter_swap(offsets_l_base + offsets_l[num_l], --last);
|
||||
first = last;
|
||||
}
|
||||
if (num_r) {
|
||||
offsets_r += start_r;
|
||||
while (num_r--) std::iter_swap(offsets_r_base - offsets_r[num_r], first), ++first;
|
||||
last = first;
|
||||
}
|
||||
}
|
||||
|
||||
// Put the pivot in the right place.
|
||||
Iter pivot_pos = first - 1;
|
||||
*begin = PDQSORT_PREFER_MOVE(*pivot_pos);
|
||||
*pivot_pos = PDQSORT_PREFER_MOVE(pivot);
|
||||
|
||||
return std::make_pair(pivot_pos, already_partitioned);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Partitions [begin, end) around pivot *begin using comparison function comp. Elements equal
|
||||
// to the pivot are put in the right-hand partition. Returns the position of the pivot after
|
||||
// partitioning and whether the passed sequence already was correctly partitioned. Assumes the
|
||||
// pivot is a median of at least 3 elements and that [begin, end) is at least
|
||||
// insertion_sort_threshold long.
|
||||
template<class Iter, class Compare>
|
||||
tracy_force_inline std::pair<Iter, bool> partition_right(Iter begin, Iter end, Compare comp) {
|
||||
typedef typename std::iterator_traits<Iter>::value_type T;
|
||||
|
||||
// Move pivot into local for speed.
|
||||
T pivot(PDQSORT_PREFER_MOVE(*begin));
|
||||
|
||||
Iter first = begin;
|
||||
Iter last = end;
|
||||
|
||||
// Find the first element greater than or equal than the pivot (the median of 3 guarantees
|
||||
// this exists).
|
||||
while (comp(*++first, pivot));
|
||||
|
||||
// Find the first element strictly smaller than the pivot. We have to guard this search if
|
||||
// there was no element before *first.
|
||||
if (first - 1 == begin) while (first < last && !comp(*--last, pivot));
|
||||
else while ( !comp(*--last, pivot));
|
||||
|
||||
// If the first pair of elements that should be swapped to partition are the same element,
|
||||
// the passed in sequence already was correctly partitioned.
|
||||
bool already_partitioned = first >= last;
|
||||
|
||||
// Keep swapping pairs of elements that are on the wrong side of the pivot. Previously
|
||||
// swapped pairs guard the searches, which is why the first iteration is special-cased
|
||||
// above.
|
||||
while (first < last) {
|
||||
std::iter_swap(first, last);
|
||||
while (comp(*++first, pivot));
|
||||
while (!comp(*--last, pivot));
|
||||
}
|
||||
|
||||
// Put the pivot in the right place.
|
||||
Iter pivot_pos = first - 1;
|
||||
*begin = PDQSORT_PREFER_MOVE(*pivot_pos);
|
||||
*pivot_pos = PDQSORT_PREFER_MOVE(pivot);
|
||||
|
||||
return std::make_pair(pivot_pos, already_partitioned);
|
||||
}
|
||||
|
||||
// Similar function to the one above, except elements equal to the pivot are put to the left of
|
||||
// the pivot and it doesn't check or return if the passed sequence already was partitioned.
|
||||
// Since this is rarely used (the many equal case), and in that case pdqsort already has O(n)
|
||||
// performance, no block quicksort is applied here for simplicity.
|
||||
template<class Iter, class Compare>
|
||||
tracy_force_inline Iter partition_left(Iter begin, Iter end, Compare comp) {
|
||||
typedef typename std::iterator_traits<Iter>::value_type T;
|
||||
|
||||
T pivot(PDQSORT_PREFER_MOVE(*begin));
|
||||
Iter first = begin;
|
||||
Iter last = end;
|
||||
|
||||
while (comp(pivot, *--last));
|
||||
|
||||
if (last + 1 == end) while (first < last && !comp(pivot, *++first));
|
||||
else while ( !comp(pivot, *++first));
|
||||
|
||||
while (first < last) {
|
||||
std::iter_swap(first, last);
|
||||
while (comp(pivot, *--last));
|
||||
while (!comp(pivot, *++first));
|
||||
}
|
||||
|
||||
Iter pivot_pos = last;
|
||||
*begin = PDQSORT_PREFER_MOVE(*pivot_pos);
|
||||
*pivot_pos = PDQSORT_PREFER_MOVE(pivot);
|
||||
|
||||
return pivot_pos;
|
||||
}
|
||||
|
||||
|
||||
template<class Iter, class Compare, bool Branchless>
|
||||
inline void pdqsort_loop(Iter begin, Iter end, Compare comp, int bad_allowed, bool leftmost = true) {
|
||||
typedef typename std::iterator_traits<Iter>::difference_type diff_t;
|
||||
|
||||
// Use a while loop for tail recursion elimination.
|
||||
while (true) {
|
||||
diff_t size = end - begin;
|
||||
|
||||
// Insertion sort is faster for small arrays.
|
||||
if (size < insertion_sort_threshold) {
|
||||
if (leftmost) insertion_sort(begin, end, comp);
|
||||
else unguarded_insertion_sort(begin, end, comp);
|
||||
return;
|
||||
}
|
||||
|
||||
// Choose pivot as median of 3 or pseudomedian of 9.
|
||||
diff_t s2 = size / 2;
|
||||
if (size > ninther_threshold) {
|
||||
sort3(begin, begin + s2, end - 1, comp);
|
||||
sort3(begin + 1, begin + (s2 - 1), end - 2, comp);
|
||||
sort3(begin + 2, begin + (s2 + 1), end - 3, comp);
|
||||
sort3(begin + (s2 - 1), begin + s2, begin + (s2 + 1), comp);
|
||||
std::iter_swap(begin, begin + s2);
|
||||
} else sort3(begin + s2, begin, end - 1, comp);
|
||||
|
||||
// If *(begin - 1) is the end of the right partition of a previous partition operation
|
||||
// there is no element in [begin, end) that is smaller than *(begin - 1). Then if our
|
||||
// pivot compares equal to *(begin - 1) we change strategy, putting equal elements in
|
||||
// the left partition, greater elements in the right partition. We do not have to
|
||||
// recurse on the left partition, since it's sorted (all equal).
|
||||
if (!leftmost && !comp(*(begin - 1), *begin)) {
|
||||
begin = partition_left(begin, end, comp) + 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Partition and get results.
|
||||
std::pair<Iter, bool> part_result =
|
||||
Branchless ? partition_right_branchless(begin, end, comp)
|
||||
: partition_right(begin, end, comp);
|
||||
Iter pivot_pos = part_result.first;
|
||||
bool already_partitioned = part_result.second;
|
||||
|
||||
// Check for a highly unbalanced partition.
|
||||
diff_t l_size = pivot_pos - begin;
|
||||
diff_t r_size = end - (pivot_pos + 1);
|
||||
bool highly_unbalanced = l_size < size / 8 || r_size < size / 8;
|
||||
|
||||
// If we got a highly unbalanced partition we shuffle elements to break many patterns.
|
||||
if (highly_unbalanced) {
|
||||
// If we had too many bad partitions, switch to heapsort to guarantee O(n log n).
|
||||
if (--bad_allowed == 0) {
|
||||
std::make_heap(begin, end, comp);
|
||||
std::sort_heap(begin, end, comp);
|
||||
return;
|
||||
}
|
||||
|
||||
if (l_size >= insertion_sort_threshold) {
|
||||
std::iter_swap(begin, begin + l_size / 4);
|
||||
std::iter_swap(pivot_pos - 1, pivot_pos - l_size / 4);
|
||||
|
||||
if (l_size > ninther_threshold) {
|
||||
std::iter_swap(begin + 1, begin + (l_size / 4 + 1));
|
||||
std::iter_swap(begin + 2, begin + (l_size / 4 + 2));
|
||||
std::iter_swap(pivot_pos - 2, pivot_pos - (l_size / 4 + 1));
|
||||
std::iter_swap(pivot_pos - 3, pivot_pos - (l_size / 4 + 2));
|
||||
}
|
||||
}
|
||||
|
||||
if (r_size >= insertion_sort_threshold) {
|
||||
std::iter_swap(pivot_pos + 1, pivot_pos + (1 + r_size / 4));
|
||||
std::iter_swap(end - 1, end - r_size / 4);
|
||||
|
||||
if (r_size > ninther_threshold) {
|
||||
std::iter_swap(pivot_pos + 2, pivot_pos + (2 + r_size / 4));
|
||||
std::iter_swap(pivot_pos + 3, pivot_pos + (3 + r_size / 4));
|
||||
std::iter_swap(end - 2, end - (1 + r_size / 4));
|
||||
std::iter_swap(end - 3, end - (2 + r_size / 4));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// If we were decently balanced and we tried to sort an already partitioned
|
||||
// sequence try to use insertion sort.
|
||||
if (already_partitioned && partial_insertion_sort(begin, pivot_pos, comp)
|
||||
&& partial_insertion_sort(pivot_pos + 1, end, comp)) return;
|
||||
}
|
||||
|
||||
// Sort the left partition first using recursion and do tail recursion elimination for
|
||||
// the right-hand partition.
|
||||
pdqsort_loop<Iter, Compare, Branchless>(begin, pivot_pos, comp, bad_allowed, leftmost);
|
||||
begin = pivot_pos + 1;
|
||||
leftmost = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class Iter, class Compare>
|
||||
inline void pdqsort(Iter begin, Iter end, Compare comp) {
|
||||
if (begin == end) return;
|
||||
pdqsort_detail::pdqsort_loop<Iter, Compare,
|
||||
pdqsort_detail::is_default_compare<typename std::decay<Compare>::type>::value &&
|
||||
std::is_arithmetic<typename std::iterator_traits<Iter>::value_type>::value>(
|
||||
begin, end, comp, pdqsort_detail::log2(end - begin));
|
||||
}
|
||||
|
||||
template<class Iter>
|
||||
inline void pdqsort(Iter begin, Iter end) {
|
||||
typedef typename std::iterator_traits<Iter>::value_type T;
|
||||
pdqsort(begin, end, std::less<T>());
|
||||
}
|
||||
|
||||
template<class Iter, class Compare>
|
||||
inline void pdqsort_branchless(Iter begin, Iter end, Compare comp) {
|
||||
if (begin == end) return;
|
||||
pdqsort_detail::pdqsort_loop<Iter, Compare, true>(
|
||||
begin, end, comp, pdqsort_detail::log2(end - begin));
|
||||
}
|
||||
|
||||
template<class Iter>
|
||||
tracy_force_inline void pdqsort_branchless(Iter begin, Iter end) {
|
||||
typedef typename std::iterator_traits<Iter>::value_type T;
|
||||
pdqsort_branchless(begin, end, std::less<T>());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#undef PDQSORT_PREFER_MOVE
|
||||
|
||||
#endif
|
||||
2538
subprojects/tracy/server/tracy_robin_hood.h
Normal file
2538
subprojects/tracy/server/tracy_robin_hood.h
Normal file
File diff suppressed because it is too large
Load Diff
6773
subprojects/tracy/server/tracy_xxhash.h
Normal file
6773
subprojects/tracy/server/tracy_xxhash.h
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user