//-------------------------------------------------------------------------------------
//
// Copyright 2009 Intel Corporation
// All Rights Reserved
//
// Permission is granted to use, copy, distribute and prepare derivative works of this
// software for any purpose and without fee, provided, that the above copyright notice
// and this statement appear in all copies.  Intel makes no representations about the
// suitability of this software for any purpose.  THIS SOFTWARE IS PROVIDED "AS IS."
// INTEL SPECIFICALLY DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, AND ALL LIABILITY,
// INCLUDING CONSEQUENTIAL AND OTHER INDIRECT DAMAGES, FOR THE USE OF THIS SOFTWARE,
// INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PROPRIETARY RIGHTS, AND INCLUDING THE
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  Intel does not
// assume any responsibility for any errors which may appear in this software nor any
// responsibility to update it.
//

#ifndef _NULSTEIN_TASKS_H_
#define _NULSTEIN_TASKS_H_

/*
	NOTE: implementation for the methods defined here mainly live in TaskScheduler.cpp, 
	      except for ones that want to be inlined which live in TaskScheduler.inl
	      (included at the end of this file) 
*/

#include "Mutex.h"

#define MAX_THREADS			 32	/* maximum number of worker threads we expect to encounter */ 
#define MAX_TASKSPERTHREAD	256	/* maximum capacity of a worker thread's task queue. Past this amount, tasks are executed immediately instead of being queued */ 

class CTaskPool;
class CWorkerThread;

//________________________________________________________________________________
class CACHELINE_ALIGNED CTaskCompletion
{
public:
	CTaskCompletion()
	{
		m_Busy = 0;
	}

	bool IsBusy() const
	{
		return m_Busy!=0;
	}

	void MarkBusy(bool bBusy)
	{
		if (bBusy)
			 InterlockedIncrement( &m_Busy );
		else InterlockedDecrement( &m_Busy );
	}
	
private:
	volatile LONG m_Busy;
	/* uses whole cache line to avoid false sharing */ 
};

//________________________________________________________________________________
class CInternalTask
{
public:
	CInternalTask(CTaskCompletion* pCompletion) 
	{
		m_pCompletion = pCompletion;
	}

public:
	virtual bool Run(CWorkerThread* pThread) =0;	/* does its work and suicides (or recycles)	*/ 
	
	virtual bool	Split(CWorkerThread* pThread, CInternalTask** ppTask) 
	{	/* Keep half the work and put the other half in a new task	*/ 
		return false;
	}	
		
	virtual bool	PartialPop(CWorkerThread* pThread, CInternalTask** ppTask) 
	{	/* returns a sub part of the task */ 
		return false;
	}

	virtual bool	Spread(CTaskPool* pPool) 
	{	/* share work across all threads (pool is idle) */ 
		return false;
	}	
	
	
public:
	CTaskCompletion*	m_pCompletion;
};

//________________________________________________________________________________
class CWorkerThread 
{
public:
	bool Start(CTaskPool* pTaskPool);

	static CWorkerThread* GetCurrent();

	int		GetWorkerIndex();
	
public:
	static DWORD WINAPI _ThreadProc(void* p);
	DWORD	ThreadProc();
	void	Idle();
	
	bool AttachToThisThread(CTaskPool* pTaskPool);
	
	bool PushTask(CInternalTask* pTask);	/* queue task if there is space, and run it otherwise */ 
	bool _PushTask(CInternalTask* pTask);	/* queue task if there is space (or do nothing) */ 
	
	bool PopTask(CInternalTask** ppTask);				/* pop task from queue */ 
	bool StealTasks();									/* fill queue with work from another thread */ 
	bool GiveUpSomeWork(CWorkerThread* pIdleThread);	/* request from an idle thread to give up some work */ 
	
	void WorkUntilDone(CTaskCompletion* pCard);
	void DoWork(CTaskCompletion* pCard);
	
public:
	/* task list */ 
	CSpinMutex			m_TaskMutex;
	CInternalTask*		m_Task[MAX_TASKSPERTHREAD];	/* tasks queue for this thread (pop from top of pile, steal from bottom)	*/ 
	unsigned			m_TaskCount;				/* number of tasks currently in queue										*/ 
	CTaskCompletion*	m_pCurrentCompletion;		/* completion flag for currently running task								*/ 

	/* misc */ 
	CTaskPool*		m_pTaskPool;
	HANDLE			m_hThread;
	volatile bool	m_bFinished;
};

//________________________________________________________________________________
class CTaskPool
{
public:
	CTaskPool();
	
	bool Start();
	bool Stop();
	
	void	WaitForWorkersToBeReady();
	void	WakeWorkers();
	
public:
	static unsigned GetHardwareThreadsCount();
	
public:
	CWorkerThread				m_Thread[MAX_THREADS];
	HANDLE						m_SleepNotification;
	HANDLE						m_WakeUpCall;
	unsigned					m_ThreadCount;
	bool						m_bShuttingDown;
	
	bool						m_bWorkersIdle;
	volatile CTaskCompletion*	m_pMainCompletion;

public: /* global */ 
	static DWORD	ms_TLSindex;
};

//________________________________________________________________________________
#include "TaskScheduler.inl"

#endif // _NULSTEIN_TASKS_H_
