Listing 1: A program to measure multithreaded performance using a single process heap vs. thread-private heaps

// main.cpp
#include <windows.h>
#include <process.h>
#include <iostream>
#include <assert.h>

using namespace std; 

int nNumThreads = 0; 
int nNumAllocs = 0; 
int nAllocSize = 0; 
HANDLE hStartEvent = NULL; 

unsigned __stdcall threadfunc1( void* pParm )
{
   HANDLE hHeap = (HANDLE) pParm; 
   for( int i=0; i<nNumAllocs; i++ ) {
      char* p = (char*) HeapAlloc( hHeap, 0, nAllocSize );
      HeapFree( hHeap, 0, p ); 
   } // for
   return 0; 
}


unsigned __stdcall threadfunc2( void* pParm )
{
   HANDLE hHeap = (HANDLE) pParm; 
   for( int i=0; i<nNumAllocs; i++ ) {
      char* p = 
         (char*) HeapAlloc(hHeap, HEAP_NO_SERIALIZE, nAllocSize);
      HeapFree( hHeap, HEAP_NO_SERIALIZE, p ); 
   } // for
   return 0; 
}


void 
go( char* pTest, char* pNumThreads, char* pNumAllocs, 
   char* pAllocSize )
{
   nNumThreads = atoi( pNumThreads ); 
   nNumAllocs = atoi( pNumAllocs );
   nAllocSize = atoi( pAllocSize ); 

   DWORD dwStart, dwEnd; 
   BOOL bOneHeap = ( lstrcmp( pTest, "1") == 0 ); 
   if( bOneHeap ) {
      cout << "one heap per process" << endl; 
   } else {
      cout << "one heap per thread" << endl; 
   } // else
   cout << nNumThreads << " threads" << endl; 
   cout << nNumAllocs << " allocations per thread" << endl; 
   cout << nAllocSize << " bytes per allocation" << endl; 

   HANDLE* aThreads = new HANDLE[nNumThreads]; 
   HANDLE* aHeaps = NULL; 
   if( !bOneHeap ) {
      // Create a heap for each thread
      aHeaps = new HANDLE[nNumThreads]; 
      for( int i=0; i<nNumThreads; i++ ) {
         aHeaps[i] = HeapCreate(HEAP_NO_SERIALIZE, 0x100000, 0);
      } // for
   } // if

   unsigned int notused; 
   for( int i=0; i<nNumThreads; i++ ) {
      aThreads[i] = 
         (HANDLE) _beginthreadex(NULL, 0, 
                     (bOneHeap ? threadfunc1 : threadfunc2),
                     (bOneHeap ? GetProcessHeap() : aHeaps[i]), 
                     CREATE_SUSPENDED, &notused ); 
   } // for

   dwStart = GetTickCount();
   for( i=0; i<nNumThreads; i++ ) {
      DWORD dwResult = ResumeThread( aThreads[i] ); 
      assert( dwResult != 0xffffffff );
   } // for

   WaitForMultipleObjects(nNumThreads, aThreads, TRUE, INFINITE);
   dwEnd = GetTickCount(); 

   delete [] aThreads; 
   if( !bOneHeap ) {
      for( i=0; i<nNumThreads; i++ ) {
         HeapDestroy( aHeaps[i] ) ;
      } // for
      delete [] aHeaps;
   } // if

   cout << "time: " << (dwEnd-dwStart) << endl; 
}


int main(int argc, TCHAR* argv[], TCHAR* envp[])
{
   if( argc != 5 ) {
      cout << "usage: HeapDemo <1|2> <numthreads>"
           << " <numallocs> <allocsize>" << endl;
      cout << "first param=1 uses one process wide heap "
           << "(heap access is serialized)" << endl; 
      cout << "first param=2 uses one heap per thread "
           << "(no heap serialization)" << endl; 
      return 0; 
   } else {
      go( argv[1], argv[2], argv[3], argv[4] ); 
   } // else
   return 0; 
}