bulk copy of latest physfs to our misc/libphysfs since this seems to fix an off-by-1 error reliably hit in readln read of 1 byte probably introduced in the addition of the buffered read. Whether this is excessive or whether libphysfs should even be maintained by us is another matter. But at least we shouldn't crash
// LzmaBench.cpp
#include "StdAfx.h"
#include "LzmaBench.h"
#ifndef _WIN32
#define USE_POSIX_TIME
#define USE_POSIX_TIME2
#endif
#ifdef USE_POSIX_TIME
#include <time.h>
#ifdef USE_POSIX_TIME2
#include <sys/time.h>
#endif
#endif
#ifdef _WIN32
#define USE_ALLOCA
#endif
#ifdef USE_ALLOCA
#ifdef _WIN32
#include <malloc.h>
#else
#include <stdlib.h>
#endif
#endif
extern "C"
{
#include "../../../../C/Alloc.h"
#include "../../../../C/7zCrc.h"
}
#include "../../../Common/MyCom.h"
#include "../../ICoder.h"
#ifdef BENCH_MT
#include "../../../Windows/Thread.h"
#include "../../../Windows/Synchronization.h"
#endif
#ifdef EXTERNAL_LZMA
#include "../../../Windows/PropVariant.h"
#else
#include "../LZMA/LZMADecoder.h"
#include "../LZMA/LZMAEncoder.h"
#endif
static const UInt32 kUncompressMinBlockSize = 1 << 26;
static const UInt32 kAdditionalSize = (1 << 16);
static const UInt32 kCompressedAdditionalSize = (1 << 10);
static const UInt32 kMaxLzmaPropSize = 5;
class CBaseRandomGenerator
{
UInt32 A1;
UInt32 A2;
public:
CBaseRandomGenerator() { Init(); }
void Init() { A1 = 362436069; A2 = 521288629;}
UInt32 GetRnd()
{
return
((A1 = 36969 * (A1 & 0xffff) + (A1 >> 16)) << 16) +
((A2 = 18000 * (A2 & 0xffff) + (A2 >> 16)) );
}
};
class CBenchBuffer
{
public:
size_t BufferSize;
Byte *Buffer;
CBenchBuffer(): Buffer(0) {}
virtual ~CBenchBuffer() { Free(); }
void Free()
{
::MidFree(Buffer);
Buffer = 0;
}
bool Alloc(size_t bufferSize)
{
if (Buffer != 0 && BufferSize == bufferSize)
return true;
Free();
Buffer = (Byte *)::MidAlloc(bufferSize);
BufferSize = bufferSize;
return (Buffer != 0);
}
};
class CBenchRandomGenerator: public CBenchBuffer
{
CBaseRandomGenerator *RG;
public:
void Set(CBaseRandomGenerator *rg) { RG = rg; }
UInt32 GetVal(UInt32 &res, int numBits)
{
UInt32 val = res & (((UInt32)1 << numBits) - 1);
res >>= numBits;
return val;
}
UInt32 GetLen(UInt32 &res)
{
UInt32 len = GetVal(res, 2);
return GetVal(res, 1 + len);
}
void Generate()
{
UInt32 pos = 0;
UInt32 rep0 = 1;
while (pos < BufferSize)
{
UInt32 res = RG->GetRnd();
res >>= 1;
if (GetVal(res, 1) == 0 || pos < 1024)
Buffer[pos++] = (Byte)(res & 0xFF);
else
{
UInt32 len;
len = 1 + GetLen(res);
if (GetVal(res, 3) != 0)
{
len += GetLen(res);
do
{
UInt32 ppp = GetVal(res, 5) + 6;
res = RG->GetRnd();
if (ppp > 30)
continue;
rep0 = /* (1 << ppp) +*/ GetVal(res, ppp);
res = RG->GetRnd();
}
while (rep0 >= pos);
rep0++;
}
for (UInt32 i = 0; i < len && pos < BufferSize; i++, pos++)
Buffer[pos] = Buffer[pos - rep0];
}
}
}
};
class CBenchmarkInStream:
public ISequentialInStream,
public CMyUnknownImp
{
const Byte *Data;
size_t Pos;
size_t Size;
public:
MY_UNKNOWN_IMP
void Init(const Byte *data, size_t size)
{
Data = data;
Size = size;
Pos = 0;
}
STDMETHOD(Read)(void *data, UInt32 size, UInt32 *processedSize);
};
STDMETHODIMP CBenchmarkInStream::Read(void *data, UInt32 size, UInt32 *processedSize)
{
size_t remain = Size - Pos;
UInt32 kMaxBlockSize = (1 << 20);
if (size > kMaxBlockSize)
size = kMaxBlockSize;
if (size > remain)
size = (UInt32)remain;
for (UInt32 i = 0; i < size; i++)
((Byte *)data)[i] = Data[Pos + i];
Pos += size;
if(processedSize != NULL)
*processedSize = size;
return S_OK;
}
class CBenchmarkOutStream:
public ISequentialOutStream,
public CBenchBuffer,
public CMyUnknownImp
{
// bool _overflow;
public:
UInt32 Pos;
// CBenchmarkOutStream(): _overflow(false) {}
void Init()
{
// _overflow = false;
Pos = 0;
}
MY_UNKNOWN_IMP
STDMETHOD(Write)(const void *data, UInt32 size, UInt32 *processedSize);
};
STDMETHODIMP CBenchmarkOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize)
{
size_t curSize = BufferSize - Pos;
if (curSize > size)
curSize = size;
memcpy(Buffer + Pos, data, curSize);
Pos += (UInt32)curSize;
if(processedSize != NULL)
*processedSize = (UInt32)curSize;
if (curSize != size)
{
// _overflow = true;
return E_FAIL;
}
return S_OK;
}
class CCrcOutStream:
public ISequentialOutStream,
public CMyUnknownImp
{
public:
UInt32 Crc;
MY_UNKNOWN_IMP
void Init() { Crc = CRC_INIT_VAL; }
STDMETHOD(Write)(const void *data, UInt32 size, UInt32 *processedSize);
};
STDMETHODIMP CCrcOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize)
{
Crc = CrcUpdate(Crc, data, size);
if (processedSize != NULL)
*processedSize = size;
return S_OK;
}
static UInt64 GetTimeCount()
{
#ifdef USE_POSIX_TIME
#ifdef USE_POSIX_TIME2
timeval v;
if (gettimeofday(&v, 0) == 0)
return (UInt64)(v.tv_sec) * 1000000 + v.tv_usec;
return (UInt64)time(NULL) * 1000000;
#else
return time(NULL);
#endif
#else
/*
LARGE_INTEGER value;
if (::QueryPerformanceCounter(&value))
return value.QuadPart;
*/
return GetTickCount();
#endif
}
static UInt64 GetFreq()
{
#ifdef USE_POSIX_TIME
#ifdef USE_POSIX_TIME2
return 1000000;
#else
return 1;
#endif
#else
/*
LARGE_INTEGER value;
if (::QueryPerformanceFrequency(&value))
return value.QuadPart;
*/
return 1000;
#endif
}
#ifndef USE_POSIX_TIME
static inline UInt64 GetTime64(const FILETIME &t) { return ((UInt64)t.dwHighDateTime << 32) | t.dwLowDateTime; }
#endif
static UInt64 GetUserTime()
{
#ifdef USE_POSIX_TIME
return clock();
#else
FILETIME creationTime, exitTime, kernelTime, userTime;
if (::GetProcessTimes(::GetCurrentProcess(), &creationTime, &exitTime, &kernelTime, &userTime) != 0)
return GetTime64(userTime) + GetTime64(kernelTime);
return (UInt64)GetTickCount() * 10000;
#endif
}
static UInt64 GetUserFreq()
{
#ifdef USE_POSIX_TIME
return CLOCKS_PER_SEC;
#else
return 10000000;
#endif
}
class CBenchProgressStatus
{
#ifdef BENCH_MT
NWindows::NSynchronization::CCriticalSection CS;
#endif
public:
HRESULT Res;
bool EncodeMode;
void SetResult(HRESULT res)
{
#ifdef BENCH_MT
NWindows::NSynchronization::CCriticalSectionLock lock(CS);
#endif
Res = res;
}
HRESULT GetResult()
{
#ifdef BENCH_MT
NWindows::NSynchronization::CCriticalSectionLock lock(CS);
#endif
return Res;
}
};
class CBenchProgressInfo:
public ICompressProgressInfo,
public CMyUnknownImp
{
public:
CBenchProgressStatus *Status;
CBenchInfo BenchInfo;
HRESULT Res;
IBenchCallback *callback;
CBenchProgressInfo(): callback(0) {}
MY_UNKNOWN_IMP
STDMETHOD(SetRatioInfo)(const UInt64 *inSize, const UInt64 *outSize);
};
void SetStartTime(CBenchInfo &bi)
{
bi.GlobalFreq = GetFreq();
bi.UserFreq = GetUserFreq();
bi.GlobalTime = ::GetTimeCount();
bi.UserTime = ::GetUserTime();
}
void SetFinishTime(const CBenchInfo &biStart, CBenchInfo &dest)
{
dest.GlobalFreq = GetFreq();
dest.UserFreq = GetUserFreq();
dest.GlobalTime = ::GetTimeCount() - biStart.GlobalTime;
dest.UserTime = ::GetUserTime() - biStart.UserTime;
}
STDMETHODIMP CBenchProgressInfo::SetRatioInfo(const UInt64 *inSize, const UInt64 *outSize)
{
HRESULT res = Status->GetResult();
if (res != S_OK)
return res;
if (!callback)
return res;
CBenchInfo info = BenchInfo;
SetFinishTime(BenchInfo, info);
if (Status->EncodeMode)
{
info.UnpackSize = *inSize;
info.PackSize = *outSize;
res = callback->SetEncodeResult(info, false);
}
else
{
info.PackSize = BenchInfo.PackSize + *inSize;
info.UnpackSize = BenchInfo.UnpackSize + *outSize;
res = callback->SetDecodeResult(info, false);
}
if (res != S_OK)
Status->SetResult(res);
return res;
}
static const int kSubBits = 8;
static UInt32 GetLogSize(UInt32 size)
{
for (int i = kSubBits; i < 32; i++)
for (UInt32 j = 0; j < (1 << kSubBits); j++)
if (size <= (((UInt32)1) << i) + (j << (i - kSubBits)))
return (i << kSubBits) + j;
return (32 << kSubBits);
}
static void NormalizeVals(UInt64 &v1, UInt64 &v2)
{
while (v1 > 1000000)
{
v1 >>= 1;
v2 >>= 1;
}
}
UInt64 GetUsage(const CBenchInfo &info)
{
UInt64 userTime = info.UserTime;
UInt64 userFreq = info.UserFreq;
UInt64 globalTime = info.GlobalTime;
UInt64 globalFreq = info.GlobalFreq;
NormalizeVals(userTime, userFreq);
NormalizeVals(globalFreq, globalTime);
if (userFreq == 0)
userFreq = 1;
if (globalTime == 0)
globalTime = 1;
return userTime * globalFreq * 1000000 / userFreq / globalTime;
}
UInt64 GetRatingPerUsage(const CBenchInfo &info, UInt64 rating)
{
UInt64 userTime = info.UserTime;
UInt64 userFreq = info.UserFreq;
UInt64 globalTime = info.GlobalTime;
UInt64 globalFreq = info.GlobalFreq;
NormalizeVals(userFreq, userTime);
NormalizeVals(globalTime, globalFreq);
if (globalFreq == 0)
globalFreq = 1;
if (userTime == 0)
userTime = 1;
return userFreq * globalTime / globalFreq * rating / userTime;
}
static UInt64 MyMultDiv64(UInt64 value, UInt64 elapsedTime, UInt64 freq)
{
UInt64 elTime = elapsedTime;
NormalizeVals(freq, elTime);
if (elTime == 0)
elTime = 1;
return value * freq / elTime;
}
UInt64 GetCompressRating(UInt32 dictionarySize, UInt64 elapsedTime, UInt64 freq, UInt64 size)
{
UInt64 t = GetLogSize(dictionarySize) - (kBenchMinDicLogSize << kSubBits);
// UInt64 numCommandsForOne = 1000 + ((t * t * 7) >> (2 * kSubBits)); // AMD K8
UInt64 numCommandsForOne = 870 + ((t * t * 5) >> (2 * kSubBits)); // Intel Core2
UInt64 numCommands = (UInt64)(size) * numCommandsForOne;
return MyMultDiv64(numCommands, elapsedTime, freq);
}
UInt64 GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt32 numIterations)
{
// UInt64 numCommands = (inSize * 216 + outSize * 14) * numIterations; // AMD K8
UInt64 numCommands = (inSize * 220 + outSize * 8) * numIterations; // Intel Core2
return MyMultDiv64(numCommands, elapsedTime, freq);
}
#ifdef EXTERNAL_LZMA
typedef UInt32 (WINAPI * CreateObjectPointer)(const GUID *clsID,
const GUID *interfaceID, void **outObject);
#endif
struct CEncoderInfo;
struct CEncoderInfo
{
#ifdef BENCH_MT
NWindows::CThread thread[2];
#endif
CMyComPtr<ICompressCoder> encoder;
CBenchProgressInfo *progressInfoSpec[2];
CMyComPtr<ICompressProgressInfo> progressInfo[2];
UInt32 NumIterations;
#ifdef USE_ALLOCA
size_t AllocaSize;
#endif
struct CDecoderInfo
{
CEncoderInfo *Encoder;
UInt32 DecoderIndex;
#ifdef USE_ALLOCA
size_t AllocaSize;
#endif
bool CallbackMode;
};
CDecoderInfo decodersInfo[2];
CMyComPtr<ICompressCoder> decoders[2];
HRESULT Results[2];
CBenchmarkOutStream *outStreamSpec;
CMyComPtr<ISequentialOutStream> outStream;
IBenchCallback *callback;
UInt32 crc;
UInt32 kBufferSize;
UInt32 compressedSize;
CBenchRandomGenerator rg;
CBenchmarkOutStream *propStreamSpec;
CMyComPtr<ISequentialOutStream> propStream;
HRESULT Init(UInt32 dictionarySize, UInt32 numThreads, CBaseRandomGenerator *rg);
HRESULT Encode();
HRESULT Decode(UInt32 decoderIndex);
CEncoderInfo(): outStreamSpec(0), callback(0), propStreamSpec(0) {}
#ifdef BENCH_MT
static THREAD_FUNC_DECL EncodeThreadFunction(void *param)
{
CEncoderInfo *encoder = (CEncoderInfo *)param;
#ifdef USE_ALLOCA
alloca(encoder->AllocaSize);
#endif
HRESULT res = encoder->Encode();
encoder->Results[0] = res;
if (res != S_OK)
encoder->progressInfoSpec[0]->Status->SetResult(res);
return 0;
}
static THREAD_FUNC_DECL DecodeThreadFunction(void *param)
{
CDecoderInfo *decoder = (CDecoderInfo *)param;
#ifdef USE_ALLOCA
alloca(decoder->AllocaSize);
#endif
CEncoderInfo *encoder = decoder->Encoder;
encoder->Results[decoder->DecoderIndex] = encoder->Decode(decoder->DecoderIndex);
return 0;
}
HRESULT CreateEncoderThread()
{
return thread[0].Create(EncodeThreadFunction, this);
}
HRESULT CreateDecoderThread(int index, bool callbackMode
#ifdef USE_ALLOCA
, size_t allocaSize
#endif
)
{
CDecoderInfo &decoder = decodersInfo[index];
decoder.DecoderIndex = index;
decoder.Encoder = this;
#ifdef USE_ALLOCA
decoder.AllocaSize = allocaSize;
#endif
decoder.CallbackMode = callbackMode;
return thread[index].Create(DecodeThreadFunction, &decoder);
}
#endif
};
HRESULT CEncoderInfo::Init(UInt32 dictionarySize, UInt32 numThreads, CBaseRandomGenerator *rgLoc)
{
rg.Set(rgLoc);
kBufferSize = dictionarySize + kAdditionalSize;
UInt32 kCompressedBufferSize = (kBufferSize / 2) + kCompressedAdditionalSize;
if (!rg.Alloc(kBufferSize))
return E_OUTOFMEMORY;
rg.Generate();
crc = CrcCalc(rg.Buffer, rg.BufferSize);
outStreamSpec = new CBenchmarkOutStream;
if (!outStreamSpec->Alloc(kCompressedBufferSize))
return E_OUTOFMEMORY;
outStream = outStreamSpec;
propStreamSpec = 0;
if (!propStream)
{
propStreamSpec = new CBenchmarkOutStream;
propStream = propStreamSpec;
}
if (!propStreamSpec->Alloc(kMaxLzmaPropSize))
return E_OUTOFMEMORY;
propStreamSpec->Init();
PROPID propIDs[] =
{
NCoderPropID::kDictionarySize,
NCoderPropID::kMultiThread
};
const int kNumProps = sizeof(propIDs) / sizeof(propIDs[0]);
PROPVARIANT properties[kNumProps];
properties[0].vt = VT_UI4;
properties[0].ulVal = (UInt32)dictionarySize;
properties[1].vt = VT_BOOL;
properties[1].boolVal = (numThreads > 1) ? VARIANT_TRUE : VARIANT_FALSE;
{
CMyComPtr<ICompressSetCoderProperties> setCoderProperties;
RINOK(encoder.QueryInterface(IID_ICompressSetCoderProperties, &setCoderProperties));
if (!setCoderProperties)
return E_FAIL;
RINOK(setCoderProperties->SetCoderProperties(propIDs, properties, kNumProps));
CMyComPtr<ICompressWriteCoderProperties> writeCoderProperties;
encoder.QueryInterface(IID_ICompressWriteCoderProperties, &writeCoderProperties);
if (writeCoderProperties)
{
RINOK(writeCoderProperties->WriteCoderProperties(propStream));
}
}
return S_OK;
}
HRESULT CEncoderInfo::Encode()
{
CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
inStreamSpec->Init(rg.Buffer, rg.BufferSize);
outStreamSpec->Init();
RINOK(encoder->Code(inStream, outStream, 0, 0, progressInfo[0]));
compressedSize = outStreamSpec->Pos;
encoder.Release();
return S_OK;
}
HRESULT CEncoderInfo::Decode(UInt32 decoderIndex)
{
CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
CMyComPtr<ICompressCoder> &decoder = decoders[decoderIndex];
CMyComPtr<ICompressSetDecoderProperties2> compressSetDecoderProperties;
decoder.QueryInterface(IID_ICompressSetDecoderProperties2, &compressSetDecoderProperties);
if (!compressSetDecoderProperties)
return E_FAIL;
CCrcOutStream *crcOutStreamSpec = new CCrcOutStream;
CMyComPtr<ISequentialOutStream> crcOutStream = crcOutStreamSpec;
CBenchProgressInfo *pi = progressInfoSpec[decoderIndex];
pi->BenchInfo.UnpackSize = 0;
pi->BenchInfo.PackSize = 0;
for (UInt32 j = 0; j < NumIterations; j++)
{
inStreamSpec->Init(outStreamSpec->Buffer, compressedSize);
crcOutStreamSpec->Init();
RINOK(compressSetDecoderProperties->SetDecoderProperties2(propStreamSpec->Buffer, propStreamSpec->Pos));
UInt64 outSize = kBufferSize;
RINOK(decoder->Code(inStream, crcOutStream, 0, &outSize, progressInfo[decoderIndex]));
if (CRC_GET_DIGEST(crcOutStreamSpec->Crc) != crc)
return S_FALSE;
pi->BenchInfo.UnpackSize += kBufferSize;
pi->BenchInfo.PackSize += compressedSize;
}
decoder.Release();
return S_OK;
}
static const UInt32 kNumThreadsMax = (1 << 16);
struct CBenchEncoders
{
CEncoderInfo *encoders;
CBenchEncoders(UInt32 num): encoders(0) { encoders = new CEncoderInfo[num]; }
~CBenchEncoders() { delete []encoders; }
};
HRESULT LzmaBench(
#ifdef EXTERNAL_LZMA
CCodecs *codecs,
#endif
UInt32 numThreads, UInt32 dictionarySize, IBenchCallback *callback)
{
UInt32 numEncoderThreads =
#ifdef BENCH_MT
(numThreads > 1 ? numThreads / 2 : 1);
#else
1;
#endif
UInt32 numSubDecoderThreads =
#ifdef BENCH_MT
(numThreads > 1 ? 2 : 1);
#else
1;
#endif
if (dictionarySize < (1 << kBenchMinDicLogSize) || numThreads < 1 || numEncoderThreads > kNumThreadsMax)
{
return E_INVALIDARG;
}
CBenchEncoders encodersSpec(numEncoderThreads);
CEncoderInfo *encoders = encodersSpec.encoders;
#ifdef EXTERNAL_LZMA
UString name = L"LZMA";
#endif
UInt32 i;
for (i = 0; i < numEncoderThreads; i++)
{
CEncoderInfo &encoder = encoders[i];
encoder.callback = (i == 0) ? callback : 0;
#ifdef EXTERNAL_LZMA
RINOK(codecs->CreateCoder(name, true, encoder.encoder));
#else
encoder.encoder = new NCompress::NLZMA::CEncoder;
#endif
for (UInt32 j = 0; j < numSubDecoderThreads; j++)
{
#ifdef EXTERNAL_LZMA
RINOK(codecs->CreateCoder(name, false, encoder.decoders[j]));
#else
encoder.decoders[j] = new NCompress::NLZMA::CDecoder;
#endif
}
}
CBaseRandomGenerator rg;
rg.Init();
for (i = 0; i < numEncoderThreads; i++)
{
RINOK(encoders[i].Init(dictionarySize, numThreads, &rg));
}
CBenchProgressStatus status;
status.Res = S_OK;
status.EncodeMode = true;
for (i = 0; i < numEncoderThreads; i++)
{
CEncoderInfo &encoder = encoders[i];
for (int j = 0; j < 2; j++)
{
encoder.progressInfo[j] = encoder.progressInfoSpec[j] = new CBenchProgressInfo;
encoder.progressInfoSpec[j]->Status = &status;
}
if (i == 0)
{
encoder.progressInfoSpec[0]->callback = callback;
encoder.progressInfoSpec[0]->BenchInfo.NumIterations = numEncoderThreads;
SetStartTime(encoder.progressInfoSpec[0]->BenchInfo);
}
#ifdef BENCH_MT
if (numEncoderThreads > 1)
{
#ifdef USE_ALLOCA
encoder.AllocaSize = (i * 16 * 21) & 0x7FF;
#endif
RINOK(encoder.CreateEncoderThread())
}
else
#endif
{
RINOK(encoder.Encode());
}
}
#ifdef BENCH_MT
if (numEncoderThreads > 1)
for (i = 0; i < numEncoderThreads; i++)
encoders[i].thread[0].Wait();
#endif
RINOK(status.Res);
CBenchInfo info;
SetFinishTime(encoders[0].progressInfoSpec[0]->BenchInfo, info);
info.UnpackSize = 0;
info.PackSize = 0;
info.NumIterations = 1; // progressInfoSpec->NumIterations;
for (i = 0; i < numEncoderThreads; i++)
{
CEncoderInfo &encoder = encoders[i];
info.UnpackSize += encoder.kBufferSize;
info.PackSize += encoder.compressedSize;
}
RINOK(callback->SetEncodeResult(info, true));
status.Res = S_OK;
status.EncodeMode = false;
UInt32 numDecoderThreads = numEncoderThreads * numSubDecoderThreads;
for (i = 0; i < numEncoderThreads; i++)
{
CEncoderInfo &encoder = encoders[i];
encoder.NumIterations = 2 + kUncompressMinBlockSize / encoder.kBufferSize;
if (i == 0)
{
encoder.progressInfoSpec[0]->callback = callback;
encoder.progressInfoSpec[0]->BenchInfo.NumIterations = numDecoderThreads;
SetStartTime(encoder.progressInfoSpec[0]->BenchInfo);
}
#ifdef BENCH_MT
if (numDecoderThreads > 1)
{
for (UInt32 j = 0; j < numSubDecoderThreads; j++)
{
size_t allocaSize = ((i * numSubDecoderThreads + j) * 16 * 21) & 0x7FF;
HRESULT res = encoder.CreateDecoderThread(j, (i == 0 && j == 0)
#ifdef USE_ALLOCA
, allocaSize
#endif
);
RINOK(res);
}
}
else
#endif
{
RINOK(encoder.Decode(0));
}
}
#ifdef BENCH_MT
HRESULT res = S_OK;
if (numDecoderThreads > 1)
for (i = 0; i < numEncoderThreads; i++)
for (UInt32 j = 0; j < numSubDecoderThreads; j++)
{
CEncoderInfo &encoder = encoders[i];
encoder.thread[j].Wait();
if (encoder.Results[j] != S_OK)
res = encoder.Results[j];
}
RINOK(res);
#endif
RINOK(status.Res);
SetFinishTime(encoders[0].progressInfoSpec[0]->BenchInfo, info);
info.UnpackSize = 0;
info.PackSize = 0;
info.NumIterations = numSubDecoderThreads * encoders[0].NumIterations;
for (i = 0; i < numEncoderThreads; i++)
{
CEncoderInfo &encoder = encoders[i];
info.UnpackSize += encoder.kBufferSize;
info.PackSize += encoder.compressedSize;
}
RINOK(callback->SetDecodeResult(info, false));
RINOK(callback->SetDecodeResult(info, true));
return S_OK;
}
inline UInt64 GetLZMAUsage(bool multiThread, UInt32 dictionary)
{
UInt32 hs = dictionary - 1;
hs |= (hs >> 1);
hs |= (hs >> 2);
hs |= (hs >> 4);
hs |= (hs >> 8);
hs >>= 1;
hs |= 0xFFFF;
if (hs > (1 << 24))
hs >>= 1;
hs++;
return ((hs + (1 << 16)) + (UInt64)dictionary * 2) * 4 + (UInt64)dictionary * 3 / 2 +
(1 << 20) + (multiThread ? (6 << 20) : 0);
}
UInt64 GetBenchMemoryUsage(UInt32 numThreads, UInt32 dictionary)
{
const UInt32 kBufferSize = dictionary;
const UInt32 kCompressedBufferSize = (kBufferSize / 2);
UInt32 numSubThreads = (numThreads > 1) ? 2 : 1;
UInt32 numBigThreads = numThreads / numSubThreads;
return (kBufferSize + kCompressedBufferSize +
GetLZMAUsage((numThreads > 1), dictionary) + (2 << 20)) * numBigThreads;
}
static bool CrcBig(const void *data, UInt32 size, UInt32 numCycles, UInt32 crcBase)
{
for (UInt32 i = 0; i < numCycles; i++)
if (CrcCalc(data, size) != crcBase)
return false;
return true;
}
#ifdef BENCH_MT
struct CCrcInfo
{
NWindows::CThread Thread;
const Byte *Data;
UInt32 Size;
UInt32 NumCycles;
UInt32 Crc;
bool Res;
void Wait()
{
Thread.Wait();
Thread.Close();
}
};
static THREAD_FUNC_DECL CrcThreadFunction(void *param)
{
CCrcInfo *p = (CCrcInfo *)param;
p->Res = CrcBig(p->Data, p->Size, p->NumCycles, p->Crc);
return 0;
}
struct CCrcThreads
{
UInt32 NumThreads;
CCrcInfo *Items;
CCrcThreads(): Items(0), NumThreads(0) {}
void WaitAll()
{
for (UInt32 i = 0; i < NumThreads; i++)
Items[i].Wait();
NumThreads = 0;
}
~CCrcThreads()
{
WaitAll();
delete []Items;
}
};
#endif
static UInt32 CrcCalc1(const Byte *buf, UInt32 size)
{
UInt32 crc = CRC_INIT_VAL;;
for (UInt32 i = 0; i < size; i++)
crc = CRC_UPDATE_BYTE(crc, buf[i]);
return CRC_GET_DIGEST(crc);
}
static void RandGen(Byte *buf, UInt32 size, CBaseRandomGenerator &RG)
{
for (UInt32 i = 0; i < size; i++)
buf[i] = (Byte)RG.GetRnd();
}
static UInt32 RandGenCrc(Byte *buf, UInt32 size, CBaseRandomGenerator &RG)
{
RandGen(buf, size, RG);
return CrcCalc1(buf, size);
}
bool CrcInternalTest()
{
CBenchBuffer buffer;
const UInt32 kBufferSize0 = (1 << 8);
const UInt32 kBufferSize1 = (1 << 10);
const UInt32 kCheckSize = (1 << 5);
if (!buffer.Alloc(kBufferSize0 + kBufferSize1))
return false;
Byte *buf = buffer.Buffer;
UInt32 i;
for (i = 0; i < kBufferSize0; i++)
buf[i] = (Byte)i;
UInt32 crc1 = CrcCalc1(buf, kBufferSize0);
if (crc1 != 0x29058C73)
return false;
CBaseRandomGenerator RG;
RandGen(buf + kBufferSize0, kBufferSize1, RG);
for (i = 0; i < kBufferSize0 + kBufferSize1 - kCheckSize; i++)
for (UInt32 j = 0; j < kCheckSize; j++)
if (CrcCalc1(buf + i, j) != CrcCalc(buf + i, j))
return false;
return true;
}
HRESULT CrcBench(UInt32 numThreads, UInt32 bufferSize, UInt64 &speed)
{
if (numThreads == 0)
numThreads = 1;
CBenchBuffer buffer;
size_t totalSize = (size_t)bufferSize * numThreads;
if (totalSize / numThreads != bufferSize)
return E_OUTOFMEMORY;
if (!buffer.Alloc(totalSize))
return E_OUTOFMEMORY;
Byte *buf = buffer.Buffer;
CBaseRandomGenerator RG;
UInt32 numCycles = ((UInt32)1 << 30) / ((bufferSize >> 2) + 1) + 1;
UInt64 timeVal;
#ifdef BENCH_MT
CCrcThreads threads;
if (numThreads > 1)
{
threads.Items = new CCrcInfo[numThreads];
UInt32 i;
for (i = 0; i < numThreads; i++)
{
CCrcInfo &info = threads.Items[i];
Byte *data = buf + (size_t)bufferSize * i;
info.Data = data;
info.NumCycles = numCycles;
info.Size = bufferSize;
info.Crc = RandGenCrc(data, bufferSize, RG);
}
timeVal = GetTimeCount();
for (i = 0; i < numThreads; i++)
{
CCrcInfo &info = threads.Items[i];
RINOK(info.Thread.Create(CrcThreadFunction, &info));
threads.NumThreads++;
}
threads.WaitAll();
for (i = 0; i < numThreads; i++)
if (!threads.Items[i].Res)
return S_FALSE;
}
else
#endif
{
UInt32 crc = RandGenCrc(buf, bufferSize, RG);
timeVal = GetTimeCount();
if (!CrcBig(buf, bufferSize, numCycles, crc))
return S_FALSE;
}
timeVal = GetTimeCount() - timeVal;
if (timeVal == 0)
timeVal = 1;
UInt64 size = (UInt64)numCycles * totalSize;
speed = MyMultDiv64(size, timeVal, GetFreq());
return S_OK;
}