|
1 // LzmaBench.cpp |
|
2 |
|
3 #include "StdAfx.h" |
|
4 |
|
5 #include "LzmaBench.h" |
|
6 |
|
7 #ifndef _WIN32 |
|
8 #define USE_POSIX_TIME |
|
9 #define USE_POSIX_TIME2 |
|
10 #endif |
|
11 |
|
12 #ifdef USE_POSIX_TIME |
|
13 #include <time.h> |
|
14 #ifdef USE_POSIX_TIME2 |
|
15 #include <sys/time.h> |
|
16 #endif |
|
17 #endif |
|
18 |
|
19 #ifdef _WIN32 |
|
20 #define USE_ALLOCA |
|
21 #endif |
|
22 |
|
23 #ifdef USE_ALLOCA |
|
24 #ifdef _WIN32 |
|
25 #include <malloc.h> |
|
26 #else |
|
27 #include <stdlib.h> |
|
28 #endif |
|
29 #endif |
|
30 |
|
31 extern "C" |
|
32 { |
|
33 #include "../../../../C/Alloc.h" |
|
34 #include "../../../../C/7zCrc.h" |
|
35 } |
|
36 #include "../../../Common/MyCom.h" |
|
37 #include "../../ICoder.h" |
|
38 |
|
39 #ifdef BENCH_MT |
|
40 #include "../../../Windows/Thread.h" |
|
41 #include "../../../Windows/Synchronization.h" |
|
42 #endif |
|
43 |
|
44 #ifdef EXTERNAL_LZMA |
|
45 #include "../../../Windows/PropVariant.h" |
|
46 #else |
|
47 #include "../LZMA/LZMADecoder.h" |
|
48 #include "../LZMA/LZMAEncoder.h" |
|
49 #endif |
|
50 |
|
51 static const UInt32 kUncompressMinBlockSize = 1 << 26; |
|
52 static const UInt32 kAdditionalSize = (1 << 16); |
|
53 static const UInt32 kCompressedAdditionalSize = (1 << 10); |
|
54 static const UInt32 kMaxLzmaPropSize = 5; |
|
55 |
|
56 class CBaseRandomGenerator |
|
57 { |
|
58 UInt32 A1; |
|
59 UInt32 A2; |
|
60 public: |
|
61 CBaseRandomGenerator() { Init(); } |
|
62 void Init() { A1 = 362436069; A2 = 521288629;} |
|
63 UInt32 GetRnd() |
|
64 { |
|
65 return |
|
66 ((A1 = 36969 * (A1 & 0xffff) + (A1 >> 16)) << 16) + |
|
67 ((A2 = 18000 * (A2 & 0xffff) + (A2 >> 16)) ); |
|
68 } |
|
69 }; |
|
70 |
|
71 class CBenchBuffer |
|
72 { |
|
73 public: |
|
74 size_t BufferSize; |
|
75 Byte *Buffer; |
|
76 CBenchBuffer(): Buffer(0) {} |
|
77 virtual ~CBenchBuffer() { Free(); } |
|
78 void Free() |
|
79 { |
|
80 ::MidFree(Buffer); |
|
81 Buffer = 0; |
|
82 } |
|
83 bool Alloc(size_t bufferSize) |
|
84 { |
|
85 if (Buffer != 0 && BufferSize == bufferSize) |
|
86 return true; |
|
87 Free(); |
|
88 Buffer = (Byte *)::MidAlloc(bufferSize); |
|
89 BufferSize = bufferSize; |
|
90 return (Buffer != 0); |
|
91 } |
|
92 }; |
|
93 |
|
94 class CBenchRandomGenerator: public CBenchBuffer |
|
95 { |
|
96 CBaseRandomGenerator *RG; |
|
97 public: |
|
98 void Set(CBaseRandomGenerator *rg) { RG = rg; } |
|
99 UInt32 GetVal(UInt32 &res, int numBits) |
|
100 { |
|
101 UInt32 val = res & (((UInt32)1 << numBits) - 1); |
|
102 res >>= numBits; |
|
103 return val; |
|
104 } |
|
105 UInt32 GetLen(UInt32 &res) |
|
106 { |
|
107 UInt32 len = GetVal(res, 2); |
|
108 return GetVal(res, 1 + len); |
|
109 } |
|
110 void Generate() |
|
111 { |
|
112 UInt32 pos = 0; |
|
113 UInt32 rep0 = 1; |
|
114 while (pos < BufferSize) |
|
115 { |
|
116 UInt32 res = RG->GetRnd(); |
|
117 res >>= 1; |
|
118 if (GetVal(res, 1) == 0 || pos < 1024) |
|
119 Buffer[pos++] = (Byte)(res & 0xFF); |
|
120 else |
|
121 { |
|
122 UInt32 len; |
|
123 len = 1 + GetLen(res); |
|
124 if (GetVal(res, 3) != 0) |
|
125 { |
|
126 len += GetLen(res); |
|
127 do |
|
128 { |
|
129 UInt32 ppp = GetVal(res, 5) + 6; |
|
130 res = RG->GetRnd(); |
|
131 if (ppp > 30) |
|
132 continue; |
|
133 rep0 = /* (1 << ppp) +*/ GetVal(res, ppp); |
|
134 res = RG->GetRnd(); |
|
135 } |
|
136 while (rep0 >= pos); |
|
137 rep0++; |
|
138 } |
|
139 |
|
140 for (UInt32 i = 0; i < len && pos < BufferSize; i++, pos++) |
|
141 Buffer[pos] = Buffer[pos - rep0]; |
|
142 } |
|
143 } |
|
144 } |
|
145 }; |
|
146 |
|
147 |
|
148 class CBenchmarkInStream: |
|
149 public ISequentialInStream, |
|
150 public CMyUnknownImp |
|
151 { |
|
152 const Byte *Data; |
|
153 size_t Pos; |
|
154 size_t Size; |
|
155 public: |
|
156 MY_UNKNOWN_IMP |
|
157 void Init(const Byte *data, size_t size) |
|
158 { |
|
159 Data = data; |
|
160 Size = size; |
|
161 Pos = 0; |
|
162 } |
|
163 STDMETHOD(Read)(void *data, UInt32 size, UInt32 *processedSize); |
|
164 }; |
|
165 |
|
166 STDMETHODIMP CBenchmarkInStream::Read(void *data, UInt32 size, UInt32 *processedSize) |
|
167 { |
|
168 size_t remain = Size - Pos; |
|
169 UInt32 kMaxBlockSize = (1 << 20); |
|
170 if (size > kMaxBlockSize) |
|
171 size = kMaxBlockSize; |
|
172 if (size > remain) |
|
173 size = (UInt32)remain; |
|
174 for (UInt32 i = 0; i < size; i++) |
|
175 ((Byte *)data)[i] = Data[Pos + i]; |
|
176 Pos += size; |
|
177 if(processedSize != NULL) |
|
178 *processedSize = size; |
|
179 return S_OK; |
|
180 } |
|
181 |
|
182 class CBenchmarkOutStream: |
|
183 public ISequentialOutStream, |
|
184 public CBenchBuffer, |
|
185 public CMyUnknownImp |
|
186 { |
|
187 // bool _overflow; |
|
188 public: |
|
189 UInt32 Pos; |
|
190 // CBenchmarkOutStream(): _overflow(false) {} |
|
191 void Init() |
|
192 { |
|
193 // _overflow = false; |
|
194 Pos = 0; |
|
195 } |
|
196 MY_UNKNOWN_IMP |
|
197 STDMETHOD(Write)(const void *data, UInt32 size, UInt32 *processedSize); |
|
198 }; |
|
199 |
|
200 STDMETHODIMP CBenchmarkOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize) |
|
201 { |
|
202 size_t curSize = BufferSize - Pos; |
|
203 if (curSize > size) |
|
204 curSize = size; |
|
205 memcpy(Buffer + Pos, data, curSize); |
|
206 Pos += (UInt32)curSize; |
|
207 if(processedSize != NULL) |
|
208 *processedSize = (UInt32)curSize; |
|
209 if (curSize != size) |
|
210 { |
|
211 // _overflow = true; |
|
212 return E_FAIL; |
|
213 } |
|
214 return S_OK; |
|
215 } |
|
216 |
|
217 class CCrcOutStream: |
|
218 public ISequentialOutStream, |
|
219 public CMyUnknownImp |
|
220 { |
|
221 public: |
|
222 UInt32 Crc; |
|
223 MY_UNKNOWN_IMP |
|
224 void Init() { Crc = CRC_INIT_VAL; } |
|
225 STDMETHOD(Write)(const void *data, UInt32 size, UInt32 *processedSize); |
|
226 }; |
|
227 |
|
228 STDMETHODIMP CCrcOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize) |
|
229 { |
|
230 Crc = CrcUpdate(Crc, data, size); |
|
231 if (processedSize != NULL) |
|
232 *processedSize = size; |
|
233 return S_OK; |
|
234 } |
|
235 |
|
236 static UInt64 GetTimeCount() |
|
237 { |
|
238 #ifdef USE_POSIX_TIME |
|
239 #ifdef USE_POSIX_TIME2 |
|
240 timeval v; |
|
241 if (gettimeofday(&v, 0) == 0) |
|
242 return (UInt64)(v.tv_sec) * 1000000 + v.tv_usec; |
|
243 return (UInt64)time(NULL) * 1000000; |
|
244 #else |
|
245 return time(NULL); |
|
246 #endif |
|
247 #else |
|
248 /* |
|
249 LARGE_INTEGER value; |
|
250 if (::QueryPerformanceCounter(&value)) |
|
251 return value.QuadPart; |
|
252 */ |
|
253 return GetTickCount(); |
|
254 #endif |
|
255 } |
|
256 |
|
257 static UInt64 GetFreq() |
|
258 { |
|
259 #ifdef USE_POSIX_TIME |
|
260 #ifdef USE_POSIX_TIME2 |
|
261 return 1000000; |
|
262 #else |
|
263 return 1; |
|
264 #endif |
|
265 #else |
|
266 /* |
|
267 LARGE_INTEGER value; |
|
268 if (::QueryPerformanceFrequency(&value)) |
|
269 return value.QuadPart; |
|
270 */ |
|
271 return 1000; |
|
272 #endif |
|
273 } |
|
274 |
|
275 #ifndef USE_POSIX_TIME |
|
276 static inline UInt64 GetTime64(const FILETIME &t) { return ((UInt64)t.dwHighDateTime << 32) | t.dwLowDateTime; } |
|
277 #endif |
|
278 static UInt64 GetUserTime() |
|
279 { |
|
280 #ifdef USE_POSIX_TIME |
|
281 return clock(); |
|
282 #else |
|
283 FILETIME creationTime, exitTime, kernelTime, userTime; |
|
284 if (::GetProcessTimes(::GetCurrentProcess(), &creationTime, &exitTime, &kernelTime, &userTime) != 0) |
|
285 return GetTime64(userTime) + GetTime64(kernelTime); |
|
286 return (UInt64)GetTickCount() * 10000; |
|
287 #endif |
|
288 } |
|
289 |
|
290 static UInt64 GetUserFreq() |
|
291 { |
|
292 #ifdef USE_POSIX_TIME |
|
293 return CLOCKS_PER_SEC; |
|
294 #else |
|
295 return 10000000; |
|
296 #endif |
|
297 } |
|
298 |
|
299 class CBenchProgressStatus |
|
300 { |
|
301 #ifdef BENCH_MT |
|
302 NWindows::NSynchronization::CCriticalSection CS; |
|
303 #endif |
|
304 public: |
|
305 HRESULT Res; |
|
306 bool EncodeMode; |
|
307 void SetResult(HRESULT res) |
|
308 { |
|
309 #ifdef BENCH_MT |
|
310 NWindows::NSynchronization::CCriticalSectionLock lock(CS); |
|
311 #endif |
|
312 Res = res; |
|
313 } |
|
314 HRESULT GetResult() |
|
315 { |
|
316 #ifdef BENCH_MT |
|
317 NWindows::NSynchronization::CCriticalSectionLock lock(CS); |
|
318 #endif |
|
319 return Res; |
|
320 } |
|
321 }; |
|
322 |
|
323 class CBenchProgressInfo: |
|
324 public ICompressProgressInfo, |
|
325 public CMyUnknownImp |
|
326 { |
|
327 public: |
|
328 CBenchProgressStatus *Status; |
|
329 CBenchInfo BenchInfo; |
|
330 HRESULT Res; |
|
331 IBenchCallback *callback; |
|
332 CBenchProgressInfo(): callback(0) {} |
|
333 MY_UNKNOWN_IMP |
|
334 STDMETHOD(SetRatioInfo)(const UInt64 *inSize, const UInt64 *outSize); |
|
335 }; |
|
336 |
|
337 void SetStartTime(CBenchInfo &bi) |
|
338 { |
|
339 bi.GlobalFreq = GetFreq(); |
|
340 bi.UserFreq = GetUserFreq(); |
|
341 bi.GlobalTime = ::GetTimeCount(); |
|
342 bi.UserTime = ::GetUserTime(); |
|
343 } |
|
344 |
|
345 void SetFinishTime(const CBenchInfo &biStart, CBenchInfo &dest) |
|
346 { |
|
347 dest.GlobalFreq = GetFreq(); |
|
348 dest.UserFreq = GetUserFreq(); |
|
349 dest.GlobalTime = ::GetTimeCount() - biStart.GlobalTime; |
|
350 dest.UserTime = ::GetUserTime() - biStart.UserTime; |
|
351 } |
|
352 |
|
353 STDMETHODIMP CBenchProgressInfo::SetRatioInfo(const UInt64 *inSize, const UInt64 *outSize) |
|
354 { |
|
355 HRESULT res = Status->GetResult(); |
|
356 if (res != S_OK) |
|
357 return res; |
|
358 if (!callback) |
|
359 return res; |
|
360 CBenchInfo info = BenchInfo; |
|
361 SetFinishTime(BenchInfo, info); |
|
362 if (Status->EncodeMode) |
|
363 { |
|
364 info.UnpackSize = *inSize; |
|
365 info.PackSize = *outSize; |
|
366 res = callback->SetEncodeResult(info, false); |
|
367 } |
|
368 else |
|
369 { |
|
370 info.PackSize = BenchInfo.PackSize + *inSize; |
|
371 info.UnpackSize = BenchInfo.UnpackSize + *outSize; |
|
372 res = callback->SetDecodeResult(info, false); |
|
373 } |
|
374 if (res != S_OK) |
|
375 Status->SetResult(res); |
|
376 return res; |
|
377 } |
|
378 |
|
379 static const int kSubBits = 8; |
|
380 |
|
381 static UInt32 GetLogSize(UInt32 size) |
|
382 { |
|
383 for (int i = kSubBits; i < 32; i++) |
|
384 for (UInt32 j = 0; j < (1 << kSubBits); j++) |
|
385 if (size <= (((UInt32)1) << i) + (j << (i - kSubBits))) |
|
386 return (i << kSubBits) + j; |
|
387 return (32 << kSubBits); |
|
388 } |
|
389 |
|
390 static void NormalizeVals(UInt64 &v1, UInt64 &v2) |
|
391 { |
|
392 while (v1 > 1000000) |
|
393 { |
|
394 v1 >>= 1; |
|
395 v2 >>= 1; |
|
396 } |
|
397 } |
|
398 |
|
399 UInt64 GetUsage(const CBenchInfo &info) |
|
400 { |
|
401 UInt64 userTime = info.UserTime; |
|
402 UInt64 userFreq = info.UserFreq; |
|
403 UInt64 globalTime = info.GlobalTime; |
|
404 UInt64 globalFreq = info.GlobalFreq; |
|
405 NormalizeVals(userTime, userFreq); |
|
406 NormalizeVals(globalFreq, globalTime); |
|
407 if (userFreq == 0) |
|
408 userFreq = 1; |
|
409 if (globalTime == 0) |
|
410 globalTime = 1; |
|
411 return userTime * globalFreq * 1000000 / userFreq / globalTime; |
|
412 } |
|
413 |
|
414 UInt64 GetRatingPerUsage(const CBenchInfo &info, UInt64 rating) |
|
415 { |
|
416 UInt64 userTime = info.UserTime; |
|
417 UInt64 userFreq = info.UserFreq; |
|
418 UInt64 globalTime = info.GlobalTime; |
|
419 UInt64 globalFreq = info.GlobalFreq; |
|
420 NormalizeVals(userFreq, userTime); |
|
421 NormalizeVals(globalTime, globalFreq); |
|
422 if (globalFreq == 0) |
|
423 globalFreq = 1; |
|
424 if (userTime == 0) |
|
425 userTime = 1; |
|
426 return userFreq * globalTime / globalFreq * rating / userTime; |
|
427 } |
|
428 |
|
429 static UInt64 MyMultDiv64(UInt64 value, UInt64 elapsedTime, UInt64 freq) |
|
430 { |
|
431 UInt64 elTime = elapsedTime; |
|
432 NormalizeVals(freq, elTime); |
|
433 if (elTime == 0) |
|
434 elTime = 1; |
|
435 return value * freq / elTime; |
|
436 } |
|
437 |
|
438 UInt64 GetCompressRating(UInt32 dictionarySize, UInt64 elapsedTime, UInt64 freq, UInt64 size) |
|
439 { |
|
440 UInt64 t = GetLogSize(dictionarySize) - (kBenchMinDicLogSize << kSubBits); |
|
441 // UInt64 numCommandsForOne = 1000 + ((t * t * 7) >> (2 * kSubBits)); // AMD K8 |
|
442 UInt64 numCommandsForOne = 870 + ((t * t * 5) >> (2 * kSubBits)); // Intel Core2 |
|
443 |
|
444 UInt64 numCommands = (UInt64)(size) * numCommandsForOne; |
|
445 return MyMultDiv64(numCommands, elapsedTime, freq); |
|
446 } |
|
447 |
|
448 UInt64 GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt32 numIterations) |
|
449 { |
|
450 // UInt64 numCommands = (inSize * 216 + outSize * 14) * numIterations; // AMD K8 |
|
451 UInt64 numCommands = (inSize * 220 + outSize * 8) * numIterations; // Intel Core2 |
|
452 return MyMultDiv64(numCommands, elapsedTime, freq); |
|
453 } |
|
454 |
|
455 #ifdef EXTERNAL_LZMA |
|
456 typedef UInt32 (WINAPI * CreateObjectPointer)(const GUID *clsID, |
|
457 const GUID *interfaceID, void **outObject); |
|
458 #endif |
|
459 |
|
460 struct CEncoderInfo; |
|
461 |
|
462 struct CEncoderInfo |
|
463 { |
|
464 #ifdef BENCH_MT |
|
465 NWindows::CThread thread[2]; |
|
466 #endif |
|
467 CMyComPtr<ICompressCoder> encoder; |
|
468 CBenchProgressInfo *progressInfoSpec[2]; |
|
469 CMyComPtr<ICompressProgressInfo> progressInfo[2]; |
|
470 UInt32 NumIterations; |
|
471 #ifdef USE_ALLOCA |
|
472 size_t AllocaSize; |
|
473 #endif |
|
474 |
|
475 struct CDecoderInfo |
|
476 { |
|
477 CEncoderInfo *Encoder; |
|
478 UInt32 DecoderIndex; |
|
479 #ifdef USE_ALLOCA |
|
480 size_t AllocaSize; |
|
481 #endif |
|
482 bool CallbackMode; |
|
483 }; |
|
484 CDecoderInfo decodersInfo[2]; |
|
485 |
|
486 CMyComPtr<ICompressCoder> decoders[2]; |
|
487 HRESULT Results[2]; |
|
488 CBenchmarkOutStream *outStreamSpec; |
|
489 CMyComPtr<ISequentialOutStream> outStream; |
|
490 IBenchCallback *callback; |
|
491 UInt32 crc; |
|
492 UInt32 kBufferSize; |
|
493 UInt32 compressedSize; |
|
494 CBenchRandomGenerator rg; |
|
495 CBenchmarkOutStream *propStreamSpec; |
|
496 CMyComPtr<ISequentialOutStream> propStream; |
|
497 HRESULT Init(UInt32 dictionarySize, UInt32 numThreads, CBaseRandomGenerator *rg); |
|
498 HRESULT Encode(); |
|
499 HRESULT Decode(UInt32 decoderIndex); |
|
500 |
|
501 CEncoderInfo(): outStreamSpec(0), callback(0), propStreamSpec(0) {} |
|
502 |
|
503 #ifdef BENCH_MT |
|
504 static THREAD_FUNC_DECL EncodeThreadFunction(void *param) |
|
505 { |
|
506 CEncoderInfo *encoder = (CEncoderInfo *)param; |
|
507 #ifdef USE_ALLOCA |
|
508 alloca(encoder->AllocaSize); |
|
509 #endif |
|
510 HRESULT res = encoder->Encode(); |
|
511 encoder->Results[0] = res; |
|
512 if (res != S_OK) |
|
513 encoder->progressInfoSpec[0]->Status->SetResult(res); |
|
514 |
|
515 return 0; |
|
516 } |
|
517 static THREAD_FUNC_DECL DecodeThreadFunction(void *param) |
|
518 { |
|
519 CDecoderInfo *decoder = (CDecoderInfo *)param; |
|
520 #ifdef USE_ALLOCA |
|
521 alloca(decoder->AllocaSize); |
|
522 #endif |
|
523 CEncoderInfo *encoder = decoder->Encoder; |
|
524 encoder->Results[decoder->DecoderIndex] = encoder->Decode(decoder->DecoderIndex); |
|
525 return 0; |
|
526 } |
|
527 |
|
528 HRESULT CreateEncoderThread() |
|
529 { |
|
530 return thread[0].Create(EncodeThreadFunction, this); |
|
531 } |
|
532 |
|
533 HRESULT CreateDecoderThread(int index, bool callbackMode |
|
534 #ifdef USE_ALLOCA |
|
535 , size_t allocaSize |
|
536 #endif |
|
537 ) |
|
538 { |
|
539 CDecoderInfo &decoder = decodersInfo[index]; |
|
540 decoder.DecoderIndex = index; |
|
541 decoder.Encoder = this; |
|
542 #ifdef USE_ALLOCA |
|
543 decoder.AllocaSize = allocaSize; |
|
544 #endif |
|
545 decoder.CallbackMode = callbackMode; |
|
546 return thread[index].Create(DecodeThreadFunction, &decoder); |
|
547 } |
|
548 #endif |
|
549 }; |
|
550 |
|
551 HRESULT CEncoderInfo::Init(UInt32 dictionarySize, UInt32 numThreads, CBaseRandomGenerator *rgLoc) |
|
552 { |
|
553 rg.Set(rgLoc); |
|
554 kBufferSize = dictionarySize + kAdditionalSize; |
|
555 UInt32 kCompressedBufferSize = (kBufferSize / 2) + kCompressedAdditionalSize; |
|
556 if (!rg.Alloc(kBufferSize)) |
|
557 return E_OUTOFMEMORY; |
|
558 rg.Generate(); |
|
559 crc = CrcCalc(rg.Buffer, rg.BufferSize); |
|
560 |
|
561 outStreamSpec = new CBenchmarkOutStream; |
|
562 if (!outStreamSpec->Alloc(kCompressedBufferSize)) |
|
563 return E_OUTOFMEMORY; |
|
564 |
|
565 outStream = outStreamSpec; |
|
566 |
|
567 propStreamSpec = 0; |
|
568 if (!propStream) |
|
569 { |
|
570 propStreamSpec = new CBenchmarkOutStream; |
|
571 propStream = propStreamSpec; |
|
572 } |
|
573 if (!propStreamSpec->Alloc(kMaxLzmaPropSize)) |
|
574 return E_OUTOFMEMORY; |
|
575 propStreamSpec->Init(); |
|
576 |
|
577 PROPID propIDs[] = |
|
578 { |
|
579 NCoderPropID::kDictionarySize, |
|
580 NCoderPropID::kMultiThread |
|
581 }; |
|
582 const int kNumProps = sizeof(propIDs) / sizeof(propIDs[0]); |
|
583 PROPVARIANT properties[kNumProps]; |
|
584 properties[0].vt = VT_UI4; |
|
585 properties[0].ulVal = (UInt32)dictionarySize; |
|
586 |
|
587 properties[1].vt = VT_BOOL; |
|
588 properties[1].boolVal = (numThreads > 1) ? VARIANT_TRUE : VARIANT_FALSE; |
|
589 |
|
590 { |
|
591 CMyComPtr<ICompressSetCoderProperties> setCoderProperties; |
|
592 RINOK(encoder.QueryInterface(IID_ICompressSetCoderProperties, &setCoderProperties)); |
|
593 if (!setCoderProperties) |
|
594 return E_FAIL; |
|
595 RINOK(setCoderProperties->SetCoderProperties(propIDs, properties, kNumProps)); |
|
596 |
|
597 CMyComPtr<ICompressWriteCoderProperties> writeCoderProperties; |
|
598 encoder.QueryInterface(IID_ICompressWriteCoderProperties, &writeCoderProperties); |
|
599 if (writeCoderProperties) |
|
600 { |
|
601 RINOK(writeCoderProperties->WriteCoderProperties(propStream)); |
|
602 } |
|
603 } |
|
604 return S_OK; |
|
605 } |
|
606 |
|
607 HRESULT CEncoderInfo::Encode() |
|
608 { |
|
609 CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream; |
|
610 CMyComPtr<ISequentialInStream> inStream = inStreamSpec; |
|
611 inStreamSpec->Init(rg.Buffer, rg.BufferSize); |
|
612 outStreamSpec->Init(); |
|
613 |
|
614 RINOK(encoder->Code(inStream, outStream, 0, 0, progressInfo[0])); |
|
615 compressedSize = outStreamSpec->Pos; |
|
616 encoder.Release(); |
|
617 return S_OK; |
|
618 } |
|
619 |
|
620 HRESULT CEncoderInfo::Decode(UInt32 decoderIndex) |
|
621 { |
|
622 CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream; |
|
623 CMyComPtr<ISequentialInStream> inStream = inStreamSpec; |
|
624 CMyComPtr<ICompressCoder> &decoder = decoders[decoderIndex]; |
|
625 |
|
626 CMyComPtr<ICompressSetDecoderProperties2> compressSetDecoderProperties; |
|
627 decoder.QueryInterface(IID_ICompressSetDecoderProperties2, &compressSetDecoderProperties); |
|
628 if (!compressSetDecoderProperties) |
|
629 return E_FAIL; |
|
630 |
|
631 CCrcOutStream *crcOutStreamSpec = new CCrcOutStream; |
|
632 CMyComPtr<ISequentialOutStream> crcOutStream = crcOutStreamSpec; |
|
633 |
|
634 CBenchProgressInfo *pi = progressInfoSpec[decoderIndex]; |
|
635 pi->BenchInfo.UnpackSize = 0; |
|
636 pi->BenchInfo.PackSize = 0; |
|
637 |
|
638 for (UInt32 j = 0; j < NumIterations; j++) |
|
639 { |
|
640 inStreamSpec->Init(outStreamSpec->Buffer, compressedSize); |
|
641 crcOutStreamSpec->Init(); |
|
642 |
|
643 RINOK(compressSetDecoderProperties->SetDecoderProperties2(propStreamSpec->Buffer, propStreamSpec->Pos)); |
|
644 UInt64 outSize = kBufferSize; |
|
645 RINOK(decoder->Code(inStream, crcOutStream, 0, &outSize, progressInfo[decoderIndex])); |
|
646 if (CRC_GET_DIGEST(crcOutStreamSpec->Crc) != crc) |
|
647 return S_FALSE; |
|
648 pi->BenchInfo.UnpackSize += kBufferSize; |
|
649 pi->BenchInfo.PackSize += compressedSize; |
|
650 } |
|
651 decoder.Release(); |
|
652 return S_OK; |
|
653 } |
|
654 |
|
655 static const UInt32 kNumThreadsMax = (1 << 16); |
|
656 |
|
657 struct CBenchEncoders |
|
658 { |
|
659 CEncoderInfo *encoders; |
|
660 CBenchEncoders(UInt32 num): encoders(0) { encoders = new CEncoderInfo[num]; } |
|
661 ~CBenchEncoders() { delete []encoders; } |
|
662 }; |
|
663 |
|
664 HRESULT LzmaBench( |
|
665 #ifdef EXTERNAL_LZMA |
|
666 CCodecs *codecs, |
|
667 #endif |
|
668 UInt32 numThreads, UInt32 dictionarySize, IBenchCallback *callback) |
|
669 { |
|
670 UInt32 numEncoderThreads = |
|
671 #ifdef BENCH_MT |
|
672 (numThreads > 1 ? numThreads / 2 : 1); |
|
673 #else |
|
674 1; |
|
675 #endif |
|
676 UInt32 numSubDecoderThreads = |
|
677 #ifdef BENCH_MT |
|
678 (numThreads > 1 ? 2 : 1); |
|
679 #else |
|
680 1; |
|
681 #endif |
|
682 if (dictionarySize < (1 << kBenchMinDicLogSize) || numThreads < 1 || numEncoderThreads > kNumThreadsMax) |
|
683 { |
|
684 return E_INVALIDARG; |
|
685 } |
|
686 |
|
687 CBenchEncoders encodersSpec(numEncoderThreads); |
|
688 CEncoderInfo *encoders = encodersSpec.encoders; |
|
689 |
|
690 #ifdef EXTERNAL_LZMA |
|
691 UString name = L"LZMA"; |
|
692 #endif |
|
693 |
|
694 UInt32 i; |
|
695 for (i = 0; i < numEncoderThreads; i++) |
|
696 { |
|
697 CEncoderInfo &encoder = encoders[i]; |
|
698 encoder.callback = (i == 0) ? callback : 0; |
|
699 |
|
700 #ifdef EXTERNAL_LZMA |
|
701 RINOK(codecs->CreateCoder(name, true, encoder.encoder)); |
|
702 #else |
|
703 encoder.encoder = new NCompress::NLZMA::CEncoder; |
|
704 #endif |
|
705 for (UInt32 j = 0; j < numSubDecoderThreads; j++) |
|
706 { |
|
707 #ifdef EXTERNAL_LZMA |
|
708 RINOK(codecs->CreateCoder(name, false, encoder.decoders[j])); |
|
709 #else |
|
710 encoder.decoders[j] = new NCompress::NLZMA::CDecoder; |
|
711 #endif |
|
712 } |
|
713 } |
|
714 |
|
715 CBaseRandomGenerator rg; |
|
716 rg.Init(); |
|
717 for (i = 0; i < numEncoderThreads; i++) |
|
718 { |
|
719 RINOK(encoders[i].Init(dictionarySize, numThreads, &rg)); |
|
720 } |
|
721 |
|
722 CBenchProgressStatus status; |
|
723 status.Res = S_OK; |
|
724 status.EncodeMode = true; |
|
725 |
|
726 for (i = 0; i < numEncoderThreads; i++) |
|
727 { |
|
728 CEncoderInfo &encoder = encoders[i]; |
|
729 for (int j = 0; j < 2; j++) |
|
730 { |
|
731 encoder.progressInfo[j] = encoder.progressInfoSpec[j] = new CBenchProgressInfo; |
|
732 encoder.progressInfoSpec[j]->Status = &status; |
|
733 } |
|
734 if (i == 0) |
|
735 { |
|
736 encoder.progressInfoSpec[0]->callback = callback; |
|
737 encoder.progressInfoSpec[0]->BenchInfo.NumIterations = numEncoderThreads; |
|
738 SetStartTime(encoder.progressInfoSpec[0]->BenchInfo); |
|
739 } |
|
740 |
|
741 #ifdef BENCH_MT |
|
742 if (numEncoderThreads > 1) |
|
743 { |
|
744 #ifdef USE_ALLOCA |
|
745 encoder.AllocaSize = (i * 16 * 21) & 0x7FF; |
|
746 #endif |
|
747 RINOK(encoder.CreateEncoderThread()) |
|
748 } |
|
749 else |
|
750 #endif |
|
751 { |
|
752 RINOK(encoder.Encode()); |
|
753 } |
|
754 } |
|
755 #ifdef BENCH_MT |
|
756 if (numEncoderThreads > 1) |
|
757 for (i = 0; i < numEncoderThreads; i++) |
|
758 encoders[i].thread[0].Wait(); |
|
759 #endif |
|
760 |
|
761 RINOK(status.Res); |
|
762 |
|
763 CBenchInfo info; |
|
764 |
|
765 SetFinishTime(encoders[0].progressInfoSpec[0]->BenchInfo, info); |
|
766 info.UnpackSize = 0; |
|
767 info.PackSize = 0; |
|
768 info.NumIterations = 1; // progressInfoSpec->NumIterations; |
|
769 for (i = 0; i < numEncoderThreads; i++) |
|
770 { |
|
771 CEncoderInfo &encoder = encoders[i]; |
|
772 info.UnpackSize += encoder.kBufferSize; |
|
773 info.PackSize += encoder.compressedSize; |
|
774 } |
|
775 RINOK(callback->SetEncodeResult(info, true)); |
|
776 |
|
777 |
|
778 status.Res = S_OK; |
|
779 status.EncodeMode = false; |
|
780 |
|
781 UInt32 numDecoderThreads = numEncoderThreads * numSubDecoderThreads; |
|
782 for (i = 0; i < numEncoderThreads; i++) |
|
783 { |
|
784 CEncoderInfo &encoder = encoders[i]; |
|
785 encoder.NumIterations = 2 + kUncompressMinBlockSize / encoder.kBufferSize; |
|
786 |
|
787 if (i == 0) |
|
788 { |
|
789 encoder.progressInfoSpec[0]->callback = callback; |
|
790 encoder.progressInfoSpec[0]->BenchInfo.NumIterations = numDecoderThreads; |
|
791 SetStartTime(encoder.progressInfoSpec[0]->BenchInfo); |
|
792 } |
|
793 |
|
794 #ifdef BENCH_MT |
|
795 if (numDecoderThreads > 1) |
|
796 { |
|
797 for (UInt32 j = 0; j < numSubDecoderThreads; j++) |
|
798 { |
|
799 size_t allocaSize = ((i * numSubDecoderThreads + j) * 16 * 21) & 0x7FF; |
|
800 HRESULT res = encoder.CreateDecoderThread(j, (i == 0 && j == 0) |
|
801 #ifdef USE_ALLOCA |
|
802 , allocaSize |
|
803 #endif |
|
804 ); |
|
805 RINOK(res); |
|
806 } |
|
807 } |
|
808 else |
|
809 #endif |
|
810 { |
|
811 RINOK(encoder.Decode(0)); |
|
812 } |
|
813 } |
|
814 #ifdef BENCH_MT |
|
815 HRESULT res = S_OK; |
|
816 if (numDecoderThreads > 1) |
|
817 for (i = 0; i < numEncoderThreads; i++) |
|
818 for (UInt32 j = 0; j < numSubDecoderThreads; j++) |
|
819 { |
|
820 CEncoderInfo &encoder = encoders[i]; |
|
821 encoder.thread[j].Wait(); |
|
822 if (encoder.Results[j] != S_OK) |
|
823 res = encoder.Results[j]; |
|
824 } |
|
825 RINOK(res); |
|
826 #endif |
|
827 RINOK(status.Res); |
|
828 SetFinishTime(encoders[0].progressInfoSpec[0]->BenchInfo, info); |
|
829 info.UnpackSize = 0; |
|
830 info.PackSize = 0; |
|
831 info.NumIterations = numSubDecoderThreads * encoders[0].NumIterations; |
|
832 for (i = 0; i < numEncoderThreads; i++) |
|
833 { |
|
834 CEncoderInfo &encoder = encoders[i]; |
|
835 info.UnpackSize += encoder.kBufferSize; |
|
836 info.PackSize += encoder.compressedSize; |
|
837 } |
|
838 RINOK(callback->SetDecodeResult(info, false)); |
|
839 RINOK(callback->SetDecodeResult(info, true)); |
|
840 return S_OK; |
|
841 } |
|
842 |
|
843 |
|
844 inline UInt64 GetLZMAUsage(bool multiThread, UInt32 dictionary) |
|
845 { |
|
846 UInt32 hs = dictionary - 1; |
|
847 hs |= (hs >> 1); |
|
848 hs |= (hs >> 2); |
|
849 hs |= (hs >> 4); |
|
850 hs |= (hs >> 8); |
|
851 hs >>= 1; |
|
852 hs |= 0xFFFF; |
|
853 if (hs > (1 << 24)) |
|
854 hs >>= 1; |
|
855 hs++; |
|
856 return ((hs + (1 << 16)) + (UInt64)dictionary * 2) * 4 + (UInt64)dictionary * 3 / 2 + |
|
857 (1 << 20) + (multiThread ? (6 << 20) : 0); |
|
858 } |
|
859 |
|
860 UInt64 GetBenchMemoryUsage(UInt32 numThreads, UInt32 dictionary) |
|
861 { |
|
862 const UInt32 kBufferSize = dictionary; |
|
863 const UInt32 kCompressedBufferSize = (kBufferSize / 2); |
|
864 UInt32 numSubThreads = (numThreads > 1) ? 2 : 1; |
|
865 UInt32 numBigThreads = numThreads / numSubThreads; |
|
866 return (kBufferSize + kCompressedBufferSize + |
|
867 GetLZMAUsage((numThreads > 1), dictionary) + (2 << 20)) * numBigThreads; |
|
868 } |
|
869 |
|
870 static bool CrcBig(const void *data, UInt32 size, UInt32 numCycles, UInt32 crcBase) |
|
871 { |
|
872 for (UInt32 i = 0; i < numCycles; i++) |
|
873 if (CrcCalc(data, size) != crcBase) |
|
874 return false; |
|
875 return true; |
|
876 } |
|
877 |
|
878 #ifdef BENCH_MT |
|
879 struct CCrcInfo |
|
880 { |
|
881 NWindows::CThread Thread; |
|
882 const Byte *Data; |
|
883 UInt32 Size; |
|
884 UInt32 NumCycles; |
|
885 UInt32 Crc; |
|
886 bool Res; |
|
887 void Wait() |
|
888 { |
|
889 Thread.Wait(); |
|
890 Thread.Close(); |
|
891 } |
|
892 }; |
|
893 |
|
894 static THREAD_FUNC_DECL CrcThreadFunction(void *param) |
|
895 { |
|
896 CCrcInfo *p = (CCrcInfo *)param; |
|
897 p->Res = CrcBig(p->Data, p->Size, p->NumCycles, p->Crc); |
|
898 return 0; |
|
899 } |
|
900 |
|
901 struct CCrcThreads |
|
902 { |
|
903 UInt32 NumThreads; |
|
904 CCrcInfo *Items; |
|
905 CCrcThreads(): Items(0), NumThreads(0) {} |
|
906 void WaitAll() |
|
907 { |
|
908 for (UInt32 i = 0; i < NumThreads; i++) |
|
909 Items[i].Wait(); |
|
910 NumThreads = 0; |
|
911 } |
|
912 ~CCrcThreads() |
|
913 { |
|
914 WaitAll(); |
|
915 delete []Items; |
|
916 } |
|
917 }; |
|
918 #endif |
|
919 |
|
920 static UInt32 CrcCalc1(const Byte *buf, UInt32 size) |
|
921 { |
|
922 UInt32 crc = CRC_INIT_VAL;; |
|
923 for (UInt32 i = 0; i < size; i++) |
|
924 crc = CRC_UPDATE_BYTE(crc, buf[i]); |
|
925 return CRC_GET_DIGEST(crc); |
|
926 } |
|
927 |
|
928 static void RandGen(Byte *buf, UInt32 size, CBaseRandomGenerator &RG) |
|
929 { |
|
930 for (UInt32 i = 0; i < size; i++) |
|
931 buf[i] = (Byte)RG.GetRnd(); |
|
932 } |
|
933 |
|
934 static UInt32 RandGenCrc(Byte *buf, UInt32 size, CBaseRandomGenerator &RG) |
|
935 { |
|
936 RandGen(buf, size, RG); |
|
937 return CrcCalc1(buf, size); |
|
938 } |
|
939 |
|
940 bool CrcInternalTest() |
|
941 { |
|
942 CBenchBuffer buffer; |
|
943 const UInt32 kBufferSize0 = (1 << 8); |
|
944 const UInt32 kBufferSize1 = (1 << 10); |
|
945 const UInt32 kCheckSize = (1 << 5); |
|
946 if (!buffer.Alloc(kBufferSize0 + kBufferSize1)) |
|
947 return false; |
|
948 Byte *buf = buffer.Buffer; |
|
949 UInt32 i; |
|
950 for (i = 0; i < kBufferSize0; i++) |
|
951 buf[i] = (Byte)i; |
|
952 UInt32 crc1 = CrcCalc1(buf, kBufferSize0); |
|
953 if (crc1 != 0x29058C73) |
|
954 return false; |
|
955 CBaseRandomGenerator RG; |
|
956 RandGen(buf + kBufferSize0, kBufferSize1, RG); |
|
957 for (i = 0; i < kBufferSize0 + kBufferSize1 - kCheckSize; i++) |
|
958 for (UInt32 j = 0; j < kCheckSize; j++) |
|
959 if (CrcCalc1(buf + i, j) != CrcCalc(buf + i, j)) |
|
960 return false; |
|
961 return true; |
|
962 } |
|
963 |
|
964 HRESULT CrcBench(UInt32 numThreads, UInt32 bufferSize, UInt64 &speed) |
|
965 { |
|
966 if (numThreads == 0) |
|
967 numThreads = 1; |
|
968 |
|
969 CBenchBuffer buffer; |
|
970 size_t totalSize = (size_t)bufferSize * numThreads; |
|
971 if (totalSize / numThreads != bufferSize) |
|
972 return E_OUTOFMEMORY; |
|
973 if (!buffer.Alloc(totalSize)) |
|
974 return E_OUTOFMEMORY; |
|
975 |
|
976 Byte *buf = buffer.Buffer; |
|
977 CBaseRandomGenerator RG; |
|
978 UInt32 numCycles = ((UInt32)1 << 30) / ((bufferSize >> 2) + 1) + 1; |
|
979 |
|
980 UInt64 timeVal; |
|
981 #ifdef BENCH_MT |
|
982 CCrcThreads threads; |
|
983 if (numThreads > 1) |
|
984 { |
|
985 threads.Items = new CCrcInfo[numThreads]; |
|
986 UInt32 i; |
|
987 for (i = 0; i < numThreads; i++) |
|
988 { |
|
989 CCrcInfo &info = threads.Items[i]; |
|
990 Byte *data = buf + (size_t)bufferSize * i; |
|
991 info.Data = data; |
|
992 info.NumCycles = numCycles; |
|
993 info.Size = bufferSize; |
|
994 info.Crc = RandGenCrc(data, bufferSize, RG); |
|
995 } |
|
996 timeVal = GetTimeCount(); |
|
997 for (i = 0; i < numThreads; i++) |
|
998 { |
|
999 CCrcInfo &info = threads.Items[i]; |
|
1000 RINOK(info.Thread.Create(CrcThreadFunction, &info)); |
|
1001 threads.NumThreads++; |
|
1002 } |
|
1003 threads.WaitAll(); |
|
1004 for (i = 0; i < numThreads; i++) |
|
1005 if (!threads.Items[i].Res) |
|
1006 return S_FALSE; |
|
1007 } |
|
1008 else |
|
1009 #endif |
|
1010 { |
|
1011 UInt32 crc = RandGenCrc(buf, bufferSize, RG); |
|
1012 timeVal = GetTimeCount(); |
|
1013 if (!CrcBig(buf, bufferSize, numCycles, crc)) |
|
1014 return S_FALSE; |
|
1015 } |
|
1016 timeVal = GetTimeCount() - timeVal; |
|
1017 if (timeVal == 0) |
|
1018 timeVal = 1; |
|
1019 |
|
1020 UInt64 size = (UInt64)numCycles * totalSize; |
|
1021 speed = MyMultDiv64(size, timeVal, GetFreq()); |
|
1022 return S_OK; |
|
1023 } |
|
1024 |