/*
** [BEGIN NOTICE]
**
** Copyright (C) 1999-2003 Larry Hastings
**
** This software is provided 'as-is', without any express or implied warranty.
** In no event will the authors be held liable for any damages arising from
** the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute
** it freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
**    claim that you wrote the original software. If you use this software
**    in a product, an acknowledgment in the product documentation would be
**    appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
**    misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
** The ltimer homepage is here:
**		http://www.midwinter.com/~lch/programming/ltimer/
**
** [END NOTICE]
*/


/*
** ltimer
**
** A machine-speed-independent timer class.
**
** ltimer uses a two-tiered approach to determine the current time.
** For the first few seconds, it determines the time using
** QueryPerformanceCounter().  But after this calibration period is
** over, it can calculate what the local machine's clock frequency
** must be, and can use the Pentium-and-above RDTSC instruction.
** It then periodically recalculates the clock frequency--ltimer
** gets even more accurate over time.
**
** For information on using RDTSC, peep this my homies:
**   http://cedar.intel.com/software/idap/media/pdf/rdtscpm1.pdf
** or search the Intel developer site for "rdtsc cpuid timer"
** and wade through the results.
**
** ltimer also prevents possible "retrograde" time observation.
** Every now and then I observe QueryPerformanceCounter() actually
** return a *smaller* value than the time before.  *shudder*
** This causes all sorts of conniptions in my timing loops.
** Anyway, ltimer prevents you from seeing this.  If QPC() ever
** reports a value smaller than the one it returned last time,
** ltimer ignores QPC() and uses the previous returned value.
** Your observed change in time will be 0, which I feel is an
** improvement over a negative number.  It doesn't bother with
** this for values returned by RDTSC, as I've never seen retrograde
** values with RDTSC.  (It seems far less likely, as RDTSC is
** returning the CPU's raw cycle count.)
**
** (Warning: if your software actually travels back in time,
** either disable this feature or do not use ltimer.)
*/

#define DEBUG_SLEEP_UNTIL 0


#include <assert.h>
#if DEBUG_SLEEP_UNTIL
#include <stdio.h>
#endif // DEBUG_SLEEP_UNTIL

#include "ltimer.h"


/*
** all of these macros assume the following
**  *  there is a variable "returnValue" of some sort of integer/enum type
**  *  there a goto label in the current function called "EXIT"
**  *  zero is success, nonzero is failure
*/

#define RETURN(expr)		\
	{						\
	returnValue = expr;		\
	goto EXIT;				\
	}						\
	
/* if the expression "expr" is nonzero, return whatever "expr" was */
#define ASSERT_SUCCESS(expr)\
	{						\
	returnValue = expr;		\
	if (returnValue)		\
	goto EXIT;			\
	}						\
	
/* if the expression "expr" is false, return rv */
#define ASSERT_RETURN(expr, rv)	\
	{						\
	if (!(expr))			\
		{					\
		returnValue = rv;	\
		goto EXIT;			\
		}					\
	}						\
	




static ltimerUint64 globalRdtscStartTime;
static ltimerUint64 globalQpcStartTime;

ltimerUint64 rdtscFrequency = 0;
static ltimerUint64 qpcFrequency = 0;

static ltimerUint64 useRdtscAfter;
static ltimerUint64 recalibrateRdtscAfter;


struct ltimerOption_s
	{
	ltimerUint32 calibrationPeriod;
	ltimerUint32 recalibrationPeriod;
	ltimerUint32 resolution;
	ltimerUint32 mode;
	ltimerUint32 longSleepThreshold;
	ltimerUint32 shortSleepThreshold;
	};


static ltimerOption_s defaultOptions = 
	{
	2000, // calibration period
	2000, // recalibration period
	1000, // resolution
	LTIMER_TIMING_MODE_PERFORMANCE, // mode
	4, // long sleep threshold
	1, // short sleep threshold
	};

struct ltimer_s
	{
	ltimerUint64 rdtscCurrentTime;
	ltimerUint64 qpcCurrentTime;
	ltimerUint64 rdtscStartTime;
	ltimerUint64 qpcStartTime;

	ltimerUint64 lastReturnedTime;

	ltimerUint32 useRdtsc;

	ltimerOption_s options;

#define CALCULATE_TIME(currentTime, name) \
	(((currentTime - name ## StartTime) * options.resolution) / name ## Frequency); \
		
#define RETURN_TIME(currentTime, name) \
	{ \
	lastReturnedTime = CALCULATE_TIME(currentTime, name); \
	return lastReturnedTime; \
	} \
		
#define POLL_QPC() \
	{ \
	LARGE_INTEGER largeInteger; \
	largeInteger.QuadPart = 0; \
	QueryPerformanceCounter(&largeInteger); \
	if ((ltimerUint64)largeInteger.QuadPart > qpcCurrentTime) \
		qpcCurrentTime = largeInteger.QuadPart; \
	} \

#define CALIBRATE_RDTSC() \
	{ \
	rdtscFrequency = (ltimerUint64)(((rdtscCurrentTime - globalRdtscStartTime) * (double)qpcFrequency) / (qpcCurrentTime - globalQpcStartTime)); \
	recalibrateRdtscAfter = (rdtscCurrentTime + (ltimerUint64)((defaultOptions.recalibrationPeriod * rdtscFrequency) / 1000)); \
	} \


	// why is this a member function?
	// 'cause it's actually faster this way.
	// I guess MSVC is better at optimizing this pointers
	// than pointers to structs that get used a lot.
	// anyway, it means I'm "in context" as my friend Ryan
	// used to say.  --lch
	ltimerUint64 getCurrentTime(void)
		{
		if (useRdtsc)
			{
			rdtscCurrentTime = ltimerGetRDTSC();
			if (rdtscCurrentTime <= recalibrateRdtscAfter)
				RETURN_TIME(rdtscCurrentTime, rdtsc);
			}

		POLL_QPC();

		if ((options.mode == LTIMER_TIMING_MODE_PERFORMANCE) && (qpcCurrentTime >= useRdtscAfter))
			{
			useRdtsc = 1;
			rdtscCurrentTime = ltimerGetRDTSC();
			CALIBRATE_RDTSC();
			RETURN_TIME(rdtscCurrentTime, rdtsc);
			}

		RETURN_TIME(qpcCurrentTime, qpc);
		}


	ltimerUint64 getLastReportedTime(void)
		{
		return lastReturnedTime;
		}


#if DEBUG_SLEEP_UNTIL
	ltimerUint32 longSleepCount;
	ltimerUint32 shortSleepCount;
#endif // DEBUG_SLEEP_UNTIL

	HRESULT sleepUntil(ltimerUint64 wakeupTime)
		{
		// recalculate this on the fly--it can move around a little while we're running,
		// due to recalibrating rdtscFrequency.
		ltimerUint64 rdtscWakeupTime;
		ltimerUint64 qpcWakeupTime = ((wakeupTime * qpcFrequency) / options.resolution) + qpcStartTime;

#if DEBUG_SLEEP_UNTIL
		longSleepCount = 0;
		shortSleepCount = 0;

		printf("sleepUntil(%6I64d): useRdtsc %d\n    ", wakeupTime, useRdtsc);
#endif // DEBUG_SLEEP_UNTIL

		for (;;)
			{
			getCurrentTime();

			ltimerUint64 msTimeRemaining;
			if (useRdtsc)
				{
				rdtscWakeupTime = ((wakeupTime * rdtscFrequency) / options.resolution) + rdtscStartTime;
				if (rdtscCurrentTime >= rdtscWakeupTime)
					break;
				msTimeRemaining = ((rdtscWakeupTime - rdtscCurrentTime) * 1000) / rdtscFrequency;
				}
			else
				{
				if (qpcCurrentTime >= qpcWakeupTime)
					break;
				msTimeRemaining = ((qpcWakeupTime - qpcCurrentTime) * 1000) / qpcFrequency;
				}

			// stage one: if we have longSleepThreshold ms to wait,
			// go ahead and sleep for longer.
			if (msTimeRemaining >= options.longSleepThreshold)
				{
#if DEBUG_SLEEP_UNTIL
				longSleepCount++;
#endif // DEBUG_SLEEP_UNTIL
				Sleep(options.longSleepThreshold >> 1);
				continue;
				}

			// stage two: if we have more than shortSleepThreshold ms remaining,
			// Sleep(0) (yield our timeslice).
			if (msTimeRemaining >= options.shortSleepThreshold)
				{
#if DEBUG_SLEEP_UNTIL
				shortSleepCount++;
#endif // DEBUG_SLEEP_UNTIL
				Sleep(0);
				continue;
				}
			}

#if DEBUG_SLEEP_UNTIL
		printf("overshot %I64dms  Sleep(%d) x%4d  Sleep(0) x%4d  useRdtsc %d\n", getCurrentTime() - wakeupTime, options.longSleepThreshold, longSleepCount, shortSleepCount, useRdtsc);
#endif // DEBUG_SLEEP_UNTIL

		return S_OK;
		}
	};




HRESULT LTIMER_FUNCTION ltimerStartup(void)
	{
	LARGE_INTEGER largeInteger;

	QueryPerformanceCounter(&largeInteger);
	globalRdtscStartTime = ltimerGetRDTSC();
	globalQpcStartTime = largeInteger.QuadPart;

	QueryPerformanceFrequency(&largeInteger);
	qpcFrequency = largeInteger.QuadPart;

	timeBeginPeriod(1);

	// this ensures that some code in ltimerSetDefaultOption() is called,
	// without actually changing the calibration period.
	ltimerSetDefaultOption(LTIMER_OPTION_CALIBRATION_PERIOD, defaultOptions.calibrationPeriod);

	// only allow performance mode for systems with one processor.
	// note that it'll fail this test if dwNumberOfProcessors == 0;
	// this is intentional, as it means the GetSystemInfo() call failed.
	// A frightening eventuality to contemplate, but there you are.  --lch
	SYSTEM_INFO systemInfo;
	memset(&systemInfo, 0, sizeof(systemInfo));
	GetSystemInfo(&systemInfo);
	ltimerSetDefaultOption(LTIMER_OPTION_TIMING_MODE,
		((systemInfo.dwNumberOfProcessors) == 1)
		? LTIMER_TIMING_MODE_PERFORMANCE
		: LTIMER_TIMING_MODE_SAFETY
		);

	return S_OK;
	}


HRESULT LTIMER_FUNCTION ltimerShutdown(void)
	{
	timeEndPeriod(1);
	return S_OK;
	}



HRESULT LTIMER_FUNCTION ltimerSetDefaultOption(ltimerUint32 option, ltimerUint32 value)
	{
	if ((option == LTIMER_OPTION_INVALID) || (option > LTIMER_OPTION_LAST))
		return ERROR_INVALID_ACCESS;
	
	ltimerUint32 *optionArray = (ltimerUint32 *)&defaultOptions;
	optionArray[option - 1] = value;

	if (option == LTIMER_OPTION_CALIBRATION_PERIOD)
		{
		useRdtscAfter = globalQpcStartTime + (ltimerUint64)((defaultOptions.calibrationPeriod * qpcFrequency) / 1000);
		}

	return S_OK;
	}

HRESULT LTIMER_FUNCTION ltimerGetDefaultOption(ltimerUint32 option, ltimerUint32 *value)
	{
	if ((option == LTIMER_OPTION_INVALID) || (option > LTIMER_OPTION_LAST))
		return ERROR_INVALID_ACCESS;
	
	ltimerUint32 *optionArray = (ltimerUint32 *)&defaultOptions;
	*value = optionArray[option - 1];
	return S_OK;
	}




HRESULT LTIMER_FUNCTION ltimerSetOption(ltimer_t ltimer, ltimerUint32 option, ltimerUint32 value)
	{
	if ((option <= LTIMER_OPTION_LAST_GLOBAL_ONLY) || (option > LTIMER_OPTION_LAST))
		return ERROR_INVALID_ACCESS;
	
	ltimerUint32 *optionArray = (ltimerUint32 *)&(ltimer->options);
	optionArray[option - 1] = value;
	
	return S_OK;
	}

HRESULT LTIMER_FUNCTION ltimerGetOption(ltimer_t ltimer, ltimerUint32 option, ltimerUint32 *value)
	{
	if ((option <= LTIMER_OPTION_LAST_GLOBAL_ONLY) || (option > LTIMER_OPTION_LAST))
		return ERROR_INVALID_ACCESS;
	
	ltimerUint32 *optionArray = (ltimerUint32 *)&(ltimer->options);
	*value = optionArray[option - 1];
	return S_OK;
	}





HRESULT LTIMER_FUNCTION ltimerCreate(ltimer_t *ltimer_out)
	{
	HRESULT returnValue;
	ltimer_t ltimer;

	/* you *must* call ltimerStartup().  if you haven't, no soup for you! */
	ASSERT_RETURN(qpcFrequency != 0, ERROR_NOT_READY);

	ltimer = (ltimer_t)calloc(sizeof(ltimer_s), 1);
	ASSERT_RETURN(ltimer != NULL, ERROR_OUTOFMEMORY);

	ltimer->useRdtsc = 0;
	memcpy(&(ltimer->options), &defaultOptions, sizeof(defaultOptions));

	ASSERT_SUCCESS(ltimerSetOption(ltimer, LTIMER_OPTION_TIMING_MODE, defaultOptions.mode));
	ASSERT_SUCCESS(ltimerReset(ltimer));

EXIT:
	*ltimer_out = ltimer;
	return returnValue;
	}



HRESULT LTIMER_FUNCTION ltimerDestroy(ltimer_t *ltimer_out)
	{
	ltimer_t ltimer = *ltimer_out;
	*ltimer_out = NULL;
	free(ltimer);
	return S_OK;
	}



HRESULT LTIMER_FUNCTION ltimerReset(ltimer_t ltimer)
	{
	LARGE_INTEGER largeInteger;
	QueryPerformanceCounter(&largeInteger);
	ltimer->rdtscStartTime = ltimerGetRDTSC();
	ltimer->qpcStartTime = largeInteger.QuadPart;
	return S_OK;
	}


ltimerUint64 LTIMER_FUNCTION ltimerGetCurrentTime(ltimer_t ltimer)
	{
	return ltimer->getCurrentTime();
	}


ltimerUint64 LTIMER_FUNCTION ltimerGetLastReportedTime(ltimer_t ltimer)
	{
	return ltimer->getLastReportedTime();
	}

ltimerUint64 LTIMER_FUNCTION ltimerGetRdtscFrequency(ltimer_t ltimer)
	{
	return rdtscFrequency;
	}


HRESULT LTIMER_FUNCTION ltimerSleepUntil(ltimer_t ltimer, ltimerUint64 wakeupTime)
	{
	return ltimer->sleepUntil(wakeupTime);
	}

HRESULT LTIMER_FUNCTION ltimerSleep(ltimer_t ltimer, ltimerUint64 ticks)
	{
	return ltimerSleepUntil(ltimer, ltimer->getCurrentTime() + ticks);
	}

