 /* SCE CONFIDENTIAL
$PSLibId$
* Copyright (C) 2011 Sony Computer Entertainment Inc.
* All Rights Reserved.
*/

#include "render.h"
#include "common.h"
#include <string.h>
#include <libdbg.h>
#include <sceconst.h>
#include <vectormath.h>
#include <gxm.h>
#include <stdio.h>

// Offscreen texture parameters
#define OFFSCREEN_WIDTH				512
#define OFFSCREEN_HEIGHT			512
#define OFFSCREEN_COLOR_FORMAT		SCE_GXM_COLOR_FORMAT_A8R8G8B8
#define OFFSCREEN_TEXTURE_FORMAT	SCE_GXM_TEXTURE_FORMAT_A8R8G8B8

#define USE_DEFAULT_UB

using namespace sce::Vectormath::Simd::Aos;

// update data
//float							g_rotationAngle = 0.0f;
//Matrix4							g_offscreenWvpMatrix;
//Matrix4							g_mainWvpMatrix;
//Matrix4 g_magMvpMatrix;
//Matrix4 g_viewMatrix;
//Matrix4 g_worldMatrix;
//Matrix4 g_localMatrix;
//Matrix4 g_projectionMatrix;

static bool popup_ = false;

// Embedded GXM shader programs
extern const SceGxmProgram _binary_clear_v_gxp_start;
extern const SceGxmProgram _binary_clear_f_gxp_start;
extern const SceGxmProgram _binary_cube_v_gxp_start;
extern const SceGxmProgram _binary_cube_f_gxp_start;

// Data structure for clear geometry
typedef struct ClearVertex {
	float x;
	float y;
} ClearVertex;

// Data structure for basic geometry
typedef struct BasicVertex {
	float x;
	float y;
	float z;
	uint32_t color;
	// uint16_t u;
	// uint16_t v;
	float u;
	float v;
} BasicVertex;

// clear geometry data
SceGxmShaderPatcherId			g_clearVertexProgramId;
SceGxmShaderPatcherId			g_clearFragmentProgramId;
SceGxmVertexProgram				*g_clearVertexProgram = NULL;
SceGxmFragmentProgram			*g_clearFragmentProgram = NULL;
ClearVertex						*g_clearVertices = NULL;
uint16_t						*g_clearIndices = NULL;
const SceGxmProgramParameter	*g_clearColorParam = NULL;

// cube geometry data
SceGxmShaderPatcherId			g_cubeVertexProgramId;
SceGxmShaderPatcherId			g_cubeFragmentProgramId;
SceGxmVertexProgram				*g_cubeVertexProgram = NULL;
SceGxmFragmentProgram			*g_cubeFragmentProgram = NULL;
BasicVertex						*g_cubeVertices = NULL;
uint16_t						*g_cubeIndices = NULL;
BasicVertex* g_magVertices = NULL;
uint16_t* g_magIndices = NULL;
void* g_cubeUniformBuffer = NULL;
void* g_magUniformBuffer = NULL;


//const SceGxmProgramParameter	*g_cubeWvpParam = NULL;

// offscreen surface data and render target
void							*g_offscreenColorBufferData;
SceGxmColorSurface				g_offscreenColorSurface;
SceGxmTexture					g_offscreenTexture;
void							*g_offscreenDepthBufferData;
SceGxmDepthStencilSurface		g_offscreenDepthSurface;
SceGxmRenderTarget				*g_offscreenRenderTarget;

// test texture
uint8_t							*g_testTextureData;
SceGxmTexture					g_testTexture;
tilebackend::Target				g_target;

void createClearData(void)
{
	int err = SCE_OK;
	UNUSED(err);

	// register programs with the shader patcher
	err = sceGxmShaderPatcherRegisterProgram(g_shaderPatcher, &_binary_clear_v_gxp_start, &g_clearVertexProgramId);
	SCE_DBG_ASSERT(err == SCE_OK);
	err = sceGxmShaderPatcherRegisterProgram(g_shaderPatcher, &_binary_clear_f_gxp_start, &g_clearFragmentProgramId);
	SCE_DBG_ASSERT(err == SCE_OK);

	// find attributes by name to create vertex format bindings
	const SceGxmProgram *clearVertexProgram = sceGxmShaderPatcherGetProgramFromId(g_clearVertexProgramId);
	const SceGxmProgramParameter *paramPositionAttribute = sceGxmProgramFindParameterByName(clearVertexProgram, "aPosition");
	SCE_DBG_ASSERT(paramPositionAttribute && (sceGxmProgramParameterGetCategory(paramPositionAttribute) == SCE_GXM_PARAMETER_CATEGORY_ATTRIBUTE));

	// find fragment uniforms by name and cache parameter info
	// note: name lookup is a slow load-time operation
	const SceGxmProgram *clearFragmentProgram = sceGxmShaderPatcherGetProgramFromId(g_clearFragmentProgramId);
	SCE_DBG_ASSERT(clearFragmentProgram);
	g_clearColorParam = sceGxmProgramFindParameterByName(clearFragmentProgram, "color");
	SCE_DBG_ASSERT(g_clearColorParam && (sceGxmProgramParameterGetCategory(g_clearColorParam) == SCE_GXM_PARAMETER_CATEGORY_UNIFORM));

	// create clear vertex format
	SceGxmVertexAttribute clearVertexAttributes[1];
	SceGxmVertexStream clearVertexStreams[1];
	clearVertexAttributes[0].streamIndex = 0;
	clearVertexAttributes[0].offset = 0;
	clearVertexAttributes[0].format = SCE_GXM_ATTRIBUTE_FORMAT_F32;
	clearVertexAttributes[0].componentCount = 2;
	clearVertexAttributes[0].regIndex = sceGxmProgramParameterGetResourceIndex(paramPositionAttribute);
	clearVertexStreams[0].stride = sizeof(ClearVertex);
	clearVertexStreams[0].indexSource = SCE_GXM_INDEX_SOURCE_INDEX_16BIT;

	// create clear programs
	err = sceGxmShaderPatcherCreateVertexProgram(
		g_shaderPatcher,
		g_clearVertexProgramId,
		clearVertexAttributes,
		1,
		clearVertexStreams,
		1,
		&g_clearVertexProgram);
	SCE_DBG_ASSERT(err == SCE_OK);
	err = sceGxmShaderPatcherCreateFragmentProgram(
		g_shaderPatcher,
		g_clearFragmentProgramId,
		SCE_GXM_OUTPUT_REGISTER_FORMAT_UCHAR4,
		MSAA_MODE,
		NULL,
		sceGxmShaderPatcherGetProgramFromId(g_clearVertexProgramId),
		&g_clearFragmentProgram);
	SCE_DBG_ASSERT(err == SCE_OK);

#if !defined(USE_DEFAULT_UB)
	g_cubeUniformBuffer = heapAlloc(HEAP_TYPE_LPDDR_R, 128*sizeof(float), 16);
#endif
	g_magUniformBuffer = heapAlloc(HEAP_TYPE_LPDDR_R, 128*sizeof(float), 16);
	
	// allocate vertices and indices
	g_clearVertices = (ClearVertex *)heapAlloc(HEAP_TYPE_LPDDR_R, 3*sizeof(ClearVertex), 4);
	g_clearIndices = (uint16_t *)heapAlloc(HEAP_TYPE_LPDDR_R, 3*sizeof(uint16_t), 2);

	// write vertex data
	g_clearVertices[0].x = -1.0f;
	g_clearVertices[0].y = -1.0f;
	g_clearVertices[1].x =  3.0f;
	g_clearVertices[1].y = -1.0f;
	g_clearVertices[2].x = -1.0f;
	g_clearVertices[2].y =  3.0f;

	// write index data
	g_clearIndices[0] = 0;
	g_clearIndices[1] = 1;
	g_clearIndices[2] = 2;
}

void destroyClearData(void)
{
	int err = SCE_OK;
	UNUSED(err);

	// release the shaders
	err = sceGxmShaderPatcherReleaseFragmentProgram(g_shaderPatcher, g_clearFragmentProgram);
	SCE_DBG_ASSERT(err == SCE_OK);
	err = sceGxmShaderPatcherReleaseVertexProgram(g_shaderPatcher, g_clearVertexProgram);
	SCE_DBG_ASSERT(err == SCE_OK);

	// free the memory used for vertices and indices
	heapFree(g_clearIndices);
	heapFree(g_clearVertices);
	heapFree(g_magUniformBuffer);

	// unregister programs since we don't need them any more
	err = sceGxmShaderPatcherUnregisterProgram(g_shaderPatcher, g_clearFragmentProgramId);
	SCE_DBG_ASSERT(err == SCE_OK);
	err = sceGxmShaderPatcherUnregisterProgram(g_shaderPatcher, g_clearVertexProgramId);
	SCE_DBG_ASSERT(err == SCE_OK);
}

void createCubeData(void)
{
	int err = SCE_OK;
	UNUSED(err);

	// register programs with the patcher
	err = sceGxmShaderPatcherRegisterProgram(g_shaderPatcher, &_binary_cube_v_gxp_start, &g_cubeVertexProgramId);
	SCE_DBG_ASSERT(err == SCE_OK);
	err = sceGxmShaderPatcherRegisterProgram(g_shaderPatcher, &_binary_cube_f_gxp_start, &g_cubeFragmentProgramId);
	SCE_DBG_ASSERT(err == SCE_OK);

	// find vertex uniforms by name and cache parameter info
	// note: name lookup is a slow load-time operation
	const SceGxmProgram *cubeVertexProgram = sceGxmShaderPatcherGetProgramFromId(g_cubeVertexProgramId);
	SCE_DBG_ASSERT(cubeVertexProgram);
	//g_cubeWvpParam  = sceGxmProgramFindParameterByName(cubeVertexProgram, "wvp");
	//SCE_DBG_ASSERT(g_cubeWvpParam && (sceGxmProgramParameterGetCategory(g_cubeWvpParam) == SCE_GXM_PARAMETER_CATEGORY_UNIFORM));

	// find attributes by name to create vertex format bindings
	const SceGxmProgramParameter *paramPositionAttribute = sceGxmProgramFindParameterByName(cubeVertexProgram, "aPosition");
	SCE_DBG_ASSERT(paramPositionAttribute && (sceGxmProgramParameterGetCategory(paramPositionAttribute) == SCE_GXM_PARAMETER_CATEGORY_ATTRIBUTE));
	const SceGxmProgramParameter *paramColorAttribute = sceGxmProgramFindParameterByName(cubeVertexProgram, "aColor");
	SCE_DBG_ASSERT(paramColorAttribute && (sceGxmProgramParameterGetCategory(paramColorAttribute) == SCE_GXM_PARAMETER_CATEGORY_ATTRIBUTE));
	const SceGxmProgramParameter *paramTexCoordAttribute = sceGxmProgramFindParameterByName(cubeVertexProgram, "aTexCoord");
	SCE_DBG_ASSERT(paramTexCoordAttribute && (sceGxmProgramParameterGetCategory(paramTexCoordAttribute) == SCE_GXM_PARAMETER_CATEGORY_ATTRIBUTE));

	// create shaded triangle vertex format
	SceGxmVertexAttribute basicVertexAttributes[3];
	SceGxmVertexStream basicVertexStreams[1];
	basicVertexAttributes[0].streamIndex = 0;
	basicVertexAttributes[0].offset = 0;
	basicVertexAttributes[0].format = SCE_GXM_ATTRIBUTE_FORMAT_F32;
	basicVertexAttributes[0].componentCount = 3;
	basicVertexAttributes[0].regIndex = sceGxmProgramParameterGetResourceIndex(paramPositionAttribute);
	basicVertexAttributes[1].streamIndex = 0;
	basicVertexAttributes[1].offset = 12;
	basicVertexAttributes[1].format = SCE_GXM_ATTRIBUTE_FORMAT_U8N;
	basicVertexAttributes[1].componentCount = 4;
	basicVertexAttributes[1].regIndex = sceGxmProgramParameterGetResourceIndex(paramColorAttribute);
	basicVertexAttributes[2].streamIndex = 0;
	basicVertexAttributes[2].offset = 16;
	basicVertexAttributes[2].format = SCE_GXM_ATTRIBUTE_FORMAT_F32;
	basicVertexAttributes[2].componentCount = 2;
	basicVertexAttributes[2].regIndex = sceGxmProgramParameterGetResourceIndex(paramTexCoordAttribute);
	basicVertexStreams[0].stride = sizeof(BasicVertex);
	basicVertexStreams[0].indexSource = SCE_GXM_INDEX_SOURCE_INDEX_16BIT;

	// create cube vertex program
	err = sceGxmShaderPatcherCreateVertexProgram(
		g_shaderPatcher,
		g_cubeVertexProgramId,
		basicVertexAttributes,
		3,
		basicVertexStreams,
		1,
		&g_cubeVertexProgram);
	SCE_DBG_ASSERT(err == SCE_OK);

	// create cube fragment program
	err = sceGxmShaderPatcherCreateFragmentProgram(
		g_shaderPatcher,
		g_cubeFragmentProgramId,
		SCE_GXM_OUTPUT_REGISTER_FORMAT_UCHAR4,
		MSAA_MODE,
		NULL,
		sceGxmShaderPatcherGetProgramFromId(g_cubeVertexProgramId),
		&g_cubeFragmentProgram);
	SCE_DBG_ASSERT(err == SCE_OK);

	// allocate memory for vertex and index data
	g_cubeVertices = (BasicVertex *)heapAlloc(HEAP_TYPE_LPDDR_R, 24*sizeof(BasicVertex), 4);
	g_cubeIndices = (uint16_t *)heapAlloc(HEAP_TYPE_LPDDR_R, 36*sizeof(uint16_t), 2);

	g_magVertices = (BasicVertex*)heapAlloc(HEAP_TYPE_LPDDR_R, 4*sizeof(BasicVertex), 4);
	g_magIndices = (uint16_t *)heapAlloc(HEAP_TYPE_LPDDR_R, 6*sizeof(uint16_t), 2);

	// write vertices
	BasicVertex *vertexData = g_cubeVertices;
	// const uint16_t half0 = 0x0000;
	// const uint16_t half1 = 0x3c00;
	const float half0 = 0.f;
	const float half1 = 1.f;
	const float ratio = (float)DISPLAY_HEIGHT*(float)WEBVIEW_WIDTH/(float)DISPLAY_WIDTH/(float)WEBVIEW_HEIGHT;
	vertexData[0].x = -1.f * ratio;
	vertexData[0].y = 1.f;
	vertexData[0].z = 0.f;
	vertexData[0].color = 0xffffffff;
	vertexData[0].u = half0;
	vertexData[0].v = half0;

	vertexData[1].x = 1.f * ratio;
	vertexData[1].y = 1.f;
	vertexData[1].z = 0.f;
	vertexData[1].color = 0xffffffff;
	vertexData[1].u = half1;
	vertexData[1].v = half0;

	vertexData[2].x = 1.f * ratio;
	vertexData[2].y = -1.f;
	vertexData[2].z = 0.f;
	vertexData[2].color = 0xffffffff;
	vertexData[2].u = half1;
	vertexData[2].v = half1;

	vertexData[3].x = -1.f * ratio;
	vertexData[3].y = -1.f;
	vertexData[3].z = 0.f;
	vertexData[3].color = 0xffffffff;
	vertexData[3].u = half0;
	vertexData[3].v = half1;

	// write indices
	uint16_t *indexData = g_cubeIndices;
	indexData[0] = 0;
	indexData[1] = 1;
	indexData[2] = 3;
	indexData[3] = 3;
	indexData[4] = 1;
	indexData[5] = 2;

	vertexData = g_magVertices;
	vertexData[0].x = -0.5f;
	vertexData[0].y = 0.5f;
	vertexData[0].z = 0.f;
	vertexData[0].color = 0xffffffff;
	vertexData[0].u = half0;
	vertexData[0].v = half0;

	vertexData[1].x = 0.5f;
	vertexData[1].y = 0.5f;
	vertexData[1].z = 0.f;
	vertexData[1].color = 0xffffffff;
	vertexData[1].u = half1;
	vertexData[1].v = half0;

	vertexData[2].x = 0.5f;
	vertexData[2].y = -0.5f;
	vertexData[2].z = 0.f;
	vertexData[2].color = 0xffffffff;
	vertexData[2].u = half1;
	vertexData[2].v = half1;

	vertexData[3].x = -0.5f;
	vertexData[3].y = -0.5f;
	vertexData[3].z = 0.f;
	vertexData[3].color = 0xffffffff;
	vertexData[3].u = half0;
	vertexData[3].v = half1;

	indexData = g_magIndices;
	indexData[0] = 0;
	indexData[1] = 1;
	indexData[2] = 3;
	indexData[3] = 3;
	indexData[4] = 1;
	indexData[5] = 2;
}

void destroyCubeData(void)
{
	int err = SCE_OK;
	UNUSED(err);

	// release the shaders
	err = sceGxmShaderPatcherReleaseFragmentProgram(g_shaderPatcher, g_cubeFragmentProgram);
	SCE_DBG_ASSERT(err == SCE_OK);
	err = sceGxmShaderPatcherReleaseVertexProgram(g_shaderPatcher, g_cubeVertexProgram);
	SCE_DBG_ASSERT(err == SCE_OK);

	// free the memory used for vertices and indices
	heapFree(g_magIndices);
	heapFree(g_magVertices);
	heapFree(g_cubeIndices);
	heapFree(g_cubeVertices);

	// unregister programs since we don't need them any more
	err = sceGxmShaderPatcherUnregisterProgram(g_shaderPatcher, g_cubeFragmentProgramId);
	SCE_DBG_ASSERT(err == SCE_OK);
	err = sceGxmShaderPatcherUnregisterProgram(g_shaderPatcher, g_cubeVertexProgramId);
	SCE_DBG_ASSERT(err == SCE_OK);
}

void createTestTextureData(uint32_t width, uint32_t height, SceGxmTextureFormat format)
{
	int err = SCE_OK;
	UNUSED(err);

	// get the size of the texture data
	const uint32_t dataSize = width * height * 4;

	// allocate memory
//	g_testTextureData = (uint8_t *)heapAlloc(HEAP_TYPE_LPDDR_R, dataSize, SCE_GXM_TEXTURE_ALIGNMENT);
	g_testTextureData = (uint8_t *)heapAlloc(
		HEAP_TYPE_LPDDR_R,
		dataSize,
		SCE_GXM_TEXTURE_ALIGNMENT);

	// copy texture data
	memset(g_testTextureData, 0xff, dataSize);

	g_target.buffer = g_testTextureData;
	g_target.width  = width;
	g_target.height = height;
	g_target.stride = width * 4;
	g_target.format = 0;

	// fill with (swizzled) checkerboard pattern
	const uint32_t size = width * height;
	const uint32_t mask = 0xc0;
	for (uint32_t i = 0; i < size; ++i) {
		uint32_t bits = mask & i;
		((uint32_t *)g_testTextureData)[i] = (bits == 0 || bits == mask) ? 0xff : 0x00;
	}

	//for (int i = 0; i < 512; i ++) {
	//  ((uint32_t*)g_testTextureData)[256+width*256 + i + i * width] = 0xffff0000;
	//}

	// set up the texture control words
	err = sceGxmTextureInitLinear(
		&g_testTexture,
		g_testTextureData,
		format,
		width,
		height,
		1);

	SCE_DBG_ASSERT(err == SCE_OK);

	// set linear filtering
	err = sceGxmTextureSetMagFilter(
		&g_testTexture,
		SCE_GXM_TEXTURE_FILTER_POINT);
	SCE_DBG_ASSERT(err == SCE_OK);
	err = sceGxmTextureSetMinFilter(
		&g_testTexture,
		SCE_GXM_TEXTURE_FILTER_POINT);
	SCE_DBG_ASSERT(err == SCE_OK);

}

void destroyTestTextureData(void)
{
	heapFree(g_testTextureData);
}

void createOffscreenBuffer(void)
{
	int err = SCE_OK;
	UNUSED(err);

	// allocate memory
	g_offscreenColorBufferData = heapAlloc(
		HEAP_TYPE_CDRAM_RW,
		OFFSCREEN_WIDTH*OFFSCREEN_HEIGHT*4,
		MAX(SCE_GXM_TEXTURE_ALIGNMENT, SCE_GXM_COLOR_SURFACE_ALIGNMENT));

	// set up the surface
	err = sceGxmColorSurfaceInit(
		&g_offscreenColorSurface,
		OFFSCREEN_COLOR_FORMAT,
		SCE_GXM_COLOR_SURFACE_LINEAR,
		(MSAA_MODE != SCE_GXM_MULTISAMPLE_NONE) ? SCE_GXM_COLOR_SURFACE_SCALE_MSAA_DOWNSCALE : SCE_GXM_COLOR_SURFACE_SCALE_NONE,
		SCE_GXM_OUTPUT_REGISTER_SIZE_32BIT,
		OFFSCREEN_WIDTH,
		OFFSCREEN_HEIGHT,
		OFFSCREEN_WIDTH,
		g_offscreenColorBufferData);
	SCE_DBG_ASSERT(err == SCE_OK);

	// set up the texture
	err = sceGxmTextureInitLinear(
		&g_offscreenTexture,
		g_offscreenColorBufferData,
		OFFSCREEN_TEXTURE_FORMAT,
		OFFSCREEN_WIDTH,
		OFFSCREEN_HEIGHT,
		1);
	SCE_DBG_ASSERT(err == SCE_OK);

	// set linear filtering
	err = sceGxmTextureSetMagFilter(&g_offscreenTexture, SCE_GXM_TEXTURE_FILTER_LINEAR);
	SCE_DBG_ASSERT(err == SCE_OK);
	err = sceGxmTextureSetMinFilter(&g_offscreenTexture, SCE_GXM_TEXTURE_FILTER_LINEAR);
	SCE_DBG_ASSERT(err == SCE_OK);

	// create the depth/stencil surface
	const uint32_t alignedWidth = ALIGN(DISPLAY_WIDTH, SCE_GXM_TILE_SIZEX);
	const uint32_t alignedHeight = ALIGN(DISPLAY_HEIGHT, SCE_GXM_TILE_SIZEY);
	uint32_t sampleCount = alignedWidth*alignedHeight;
	uint32_t depthStrideInSamples = alignedWidth;
	if (MSAA_MODE == SCE_GXM_MULTISAMPLE_4X) {
		// samples increase in X and Y
		sampleCount *= 4;
		depthStrideInSamples *= 2;
	} else if (MSAA_MODE == SCE_GXM_MULTISAMPLE_2X) {
		// samples increase in Y only
		sampleCount *= 2;
	}

	g_offscreenDepthBufferData = heapAlloc(
		HEAP_TYPE_LPDDR_RW,
		4*sampleCount,
		SCE_GXM_DEPTHSTENCIL_SURFACE_ALIGNMENT);

	err = sceGxmDepthStencilSurfaceInit(
		&g_offscreenDepthSurface,
		SCE_GXM_DEPTH_STENCIL_FORMAT_S8D24,
		SCE_GXM_DEPTH_STENCIL_SURFACE_TILED,
		depthStrideInSamples,
		g_offscreenDepthBufferData,
		NULL);

	// create a render target
	g_offscreenRenderTarget = createRenderTarget(OFFSCREEN_WIDTH, OFFSCREEN_HEIGHT, MSAA_MODE);
}

void destroyOffscreenBuffer(void)
{
	// destroy render target
	destroyRenderTarget(g_offscreenRenderTarget);

	// free the memory
	heapFree(g_offscreenDepthBufferData);
	heapFree(g_offscreenColorBufferData);
}

void updateRender(void)
{
	// copmute our matrices
	//Matrix4	offscreenProjectionMatrix = Matrix4::perspective(
	//	SCE_MATH_PI/4.0f,
	//	(float)OFFSCREEN_WIDTH/(float)OFFSCREEN_HEIGHT,
	//	0.1f,
	//	10.0f);
	//Matrix4	mainProjectionMatrix = Matrix4::perspective(
	//	SCE_MATH_PI/4.0f,
	//	(float)DISPLAY_WIDTH/(float)DISPLAY_HEIGHT,
	//	0.1f,
	//	10.0f);
	//Matrix4 viewMatrix		= Matrix4::translation(Vector3(0.0f, 0.0f, -5.0f));
	//Matrix4 worldMatrix		= Matrix4::rotation(g_rotationAngle, Vector3(0.707f, 0.707f, 0.0f));

	//g_viewMatrix = viewMatrix;
	//g_worldMatrix = worldMatrix;
	//g_projectionMatrix = mainProjectionMatrix;

	//g_offscreenWvpMatrix	= offscreenProjectionMatrix * viewMatrix * worldMatrix;
	//g_mainWvpMatrix			= mainProjectionMatrix * viewMatrix * worldMatrix;
}

void renderOffscreen(void)
{
	// set up a scene, offscreen render target, no sync required
	sceGxmBeginScene(
		g_context,
		0,
		g_offscreenRenderTarget,
		NULL,
		NULL,
		NULL,
		&g_offscreenColorSurface,
		&g_offscreenDepthSurface);

	// set clear shaders
	sceGxmSetVertexProgram(g_context, g_clearVertexProgram);
	sceGxmSetFragmentProgram(g_context, g_clearFragmentProgram);

	// set the fragment program constants
	void *fragmentDefaultBuffer;
	sceGxmReserveFragmentDefaultUniformBuffer(g_context, &fragmentDefaultBuffer);
	float clearColor[4] = { 0.25f, 0.25f, 0.25f, 0.0f };
	sceGxmSetUniformDataF(fragmentDefaultBuffer, g_clearColorParam, 0, 4, clearColor);

	// draw geometry
	sceGxmSetVertexStream(g_context, 0, g_clearVertices);
	sceGxmDraw(g_context, SCE_GXM_PRIMITIVE_TRIANGLES, SCE_GXM_INDEX_FORMAT_U16, g_clearIndices, 3);

	// render the cube
	sceGxmSetVertexProgram(g_context, g_cubeVertexProgram);
	sceGxmSetFragmentProgram(g_context, g_cubeFragmentProgram);
	sceGxmSetVertexStream(g_context, 0, g_cubeVertices);
	sceGxmSetFragmentTexture(g_context, 0, &g_testTexture);

	// set the vertex program constants
	void *vertexDefaultBuffer;
#if defined(USE_DEFAULT_UB)
	void* real_default;
	sceGxmReserveVertexDefaultUniformBuffer(g_context, &real_default);
	vertexDefaultBuffer = real_default;
	// printf("default uniform buffer:0x%x size:%d\n", real_default, sceGxmProgramGetDefaultUniformBufferSize(&_binary_cube_v_gxp_start));
#else
	vertexDefaultBuffer = g_cubeUniformBuffer;
#endif
//	sceGxmSetUniformDataF(vertexDefaultBuffer, g_cubeWvpParam, 0, 16, (float *)&g_offscreenWvpMatrix);

	// draw the cube
	sceGxmDraw(g_context, SCE_GXM_PRIMITIVE_TRIANGLES, SCE_GXM_INDEX_FORMAT_U16, g_cubeIndices, 6);


	// stop rendering to the offscreen render target
	sceGxmEndScene(g_context, NULL, NULL);
}


void renderMain(void)
{
	// set up a scene, main render target, synchronised with the back buffer sync
	sceGxmBeginScene(
		g_context,
		0,
		g_mainRenderTarget,
		NULL,
		NULL,
		g_displayBufferSync[g_displayBackBufferIndex],
		&g_displaySurface[g_displayBackBufferIndex],
		&g_mainDepthSurface);

	// set clear shaders
	sceGxmSetVertexProgram(g_context, g_clearVertexProgram);
	sceGxmSetFragmentProgram(g_context, g_clearFragmentProgram);

	// set the fragment program constants
	void *fragmentDefaultBuffer;
	sceGxmReserveFragmentDefaultUniformBuffer(g_context, &fragmentDefaultBuffer);
	float clearColor[4] = { 0.2f, 0.2f, 0.2f, 0.0f };
	sceGxmSetUniformDataF(fragmentDefaultBuffer, g_clearColorParam, 0, 4, clearColor);

	// draw geometry
	sceGxmSetVertexStream(g_context, 0, g_clearVertices);
	sceGxmDraw(g_context, SCE_GXM_PRIMITIVE_TRIANGLES, SCE_GXM_INDEX_FORMAT_U16, g_clearIndices, 3);

	// render the cube
	sceGxmSetVertexProgram(g_context, g_cubeVertexProgram);
	sceGxmSetFragmentProgram(g_context, g_cubeFragmentProgram);
	sceGxmSetVertexStream(g_context, 0, g_cubeVertices);
//	sceGxmSetFragmentTexture(g_context, 0, &g_offscreenTexture);
	sceGxmSetFragmentTexture(g_context, 0, &g_testTexture);

#if defined(USE_DEFAULT_UB)
	// set the vertex program constants
	void *vertexDefaultBuffer;
	void* real_default;
	sceGxmReserveVertexDefaultUniformBuffer(g_context, &real_default);
	vertexDefaultBuffer = real_default;
#else	
	vertexDefaultBuffer = g_cubeUniformBuffer;
#endif
//	sceGxmSetUniformDataF(vertexDefaultBuffer, g_cubeWvpParam, 0, 16, (float *)&g_mainWvpMatrix);

	// draw the cube
	sceGxmDraw(g_context, SCE_GXM_PRIMITIVE_TRIANGLES, SCE_GXM_INDEX_FORMAT_U16, g_cubeIndices, 6);

	if (popup_) {
#if defined(USE_DEFAULT_UB)
		vertexDefaultBuffer = g_magUniformBuffer;
#endif
		sceGxmSetVertexStream(g_context, 0, g_magVertices);
		sceGxmSetVertexDefaultUniformBuffer(g_context, vertexDefaultBuffer);
//		sceGxmSetUniformDataF(vertexDefaultBuffer, g_cubeWvpParam, 0, 16, (float *)&g_magMvpMatrix);
		sceGxmDraw(g_context, SCE_GXM_PRIMITIVE_TRIANGLES, SCE_GXM_INDEX_FORMAT_U16, g_magIndices, 6);
	}

	// stop rendering to the main render target
	sceGxmEndScene(g_context, NULL, NULL);

	// PA heartbeat to notify end of frame
	sceGxmPadHeartbeat(
		&g_displaySurface[g_displayBackBufferIndex],
		g_displayBufferSync[g_displayBackBufferIndex]);
}

void getOffset(uint32_t *x, uint32_t *y)
{
	*x = 460;
	*y = 50;
}


inline Matrix4 get_viewmatrix(float x, float y, float w, float h, float zmin, float zmax)
{
	float wh = w * 0.5f;
	float hh = h * 0.5f;

	Matrix4 scale = Matrix4::scale(Vector3(wh, -hh, (zmax - zmin) * 0.5f));
	Matrix4 offset = Matrix4::translation(Vector3(wh, hh, (zmax + zmin) * 0.5f));
	Matrix4 viewport = offset * scale;
	return viewport;
}

static bool pointIntersect(const Vector4& __restrict__ point, 
			   const Matrix4& __restrict__ mv,
			   const Matrix4& __restrict__ proj, 
			   const BasicVertex* pl, 
			   const Matrix4& __restrict__ viewport, 
			   Vector4& __restrict__ hpos, 
			   bool dump)
{
	// using namespace sce::Vectormath::Simd::Aos;
	Vector4 v0(pl[0].x, pl[0].y, pl[0].z, 1.0f);
	Vector4 v1(pl[3].x, pl[3].y, pl[3].z, 1.0f);
	Vector4 v2(pl[1].x, pl[1].y, pl[1].z, 1.0f);
	Vector4 v3(pl[2].x, pl[2].y, pl[2].z, 1.0f);

	float vw = (pl[2].x - pl[0].x);
	float vh = (pl[0].y - pl[2].y);
#if 0
	printf("prj = [%vf;\n", proj.getRow(0).get128());
	printf("       %vf;\n", proj.getRow(1).get128());
	printf("       %vf;\n", proj.getRow(2).get128());
	printf("       %vf]\n", proj.getRow(3).get128());
#endif
#if 0
	printf("vp = [%wf;\n", viewport.getRow(0).get128());
	printf("      %wf;\n", viewport.getRow(1).get128());
	printf("      %wf;\n", viewport.getRow(2).get128());
	printf("      %wf]\n", viewport.getRow(3).get128());
#endif

	Matrix4 mvp = proj * mv;
	v0 = mvp * v0;
	v1 = mvp * v1;
	v2 = mvp * v2;
	v3 = mvp * v3;

	vec_float4 v0w = vdupq_n_f32(vgetq_lane_f32(v0.get128(), 3));
	vec_float4 v1w = vdupq_n_f32(vgetq_lane_f32(v1.get128(), 3));
	vec_float4 v2w = vdupq_n_f32(vgetq_lane_f32(v2.get128(), 3));
	vec_float4 v3w = vdupq_n_f32(vgetq_lane_f32(v3.get128(), 3));

	v0 = divPerElem(v0, Vector4(v0w));
	v1 = divPerElem(v1, Vector4(v1w));
	v2 = divPerElem(v2, Vector4(v2w));
	v3 = divPerElem(v3, Vector4(v3w));

	v0 = viewport * v0;
	v1 = viewport * v1;
	v2 = viewport * v2;
	v3 = viewport * v3;
#if 0
	printf("v0:%wf\n", v0.get128());
	printf("v1:%wf\n", v1.get128());
	printf("v2:%wf\n", v2.get128());
	printf("v3:%wf\n", v3.get128());
#endif
//	Vector4 p = offset * point;
	Vector4 p = point;
	Vector3 p3 = point.getXYZ();

	Vector3 pv = Vector3(0.f, 0.f, -1.f);

	Vector3 u0 = (v1 - v0).getXYZ();
	Vector3 u1 = (v2 - v1).getXYZ();
	Vector3 u2 = (v0 - v2).getXYZ();
	Vector3 unorm = normalize(cross((v1 - v0).getXYZ(), (v2 - v0).getXYZ()));

	float n = dot(v0.getXYZ() - p3, unorm);
	float pvn = dot(pv, unorm);

	Vector4 upd = Vector4(p3 + pv * (n / pvn), 1.f); // p->p+v perspects on plane(v0 dot unorm)

	Vector3 l0 = (v2 - v1).getXYZ();
	Vector3 l1 = (v3 - v2).getXYZ();
	Vector3 l2 = (v1 - v3).getXYZ();
	Vector3 lnorm = normalize(cross((v2 - v1).getXYZ(), (v3 - v1).getXYZ()));

//	vec_uchar16 v2scale = (vec_uchar16){};
//	Matrix4 m = Matrix4::orthographic(v0.getX(), v2.getX(), v1.getY(), v0.getY(), v2.getZ(), v0.getZ());
//	upd = m * upd;

//	Vector4 updw(vec_splat(upd.get128(), 3));
//	upd = divPerElem(upd, updw);
	Vector3 ux0 = cross(u0, (p - v0).getXYZ());
	Vector3 ux1 = cross(u1, (p - v1).getXYZ());
	Vector3 ux2 = cross(u2, (p - v2).getXYZ());

	Vector3 lx0 = cross(l0, (p - v1).getXYZ());
	Vector3 lx1 = cross(l1, (p - v2).getXYZ());
	Vector3 lx2 = cross(l2, (p - v3).getXYZ());

	float fux0 = dot(ux0, unorm);
	float fux1 = dot(ux1, unorm);
	float fux2 = dot(ux2, unorm);


	Vector4 k = inverse(viewport * proj * mv) * upd;
	vec_float4 kw = vdupq_n_f32(vgetq_lane_f32(k.get128(), 3)); // vec_splat(v, 3)
	k = divPerElem(k, Vector4(kw));
	//printf("k:[%wf]\n", k.get128());
	hpos = mulPerElem(Vector4((float)WEBVIEW_WIDTH, (float)WEBVIEW_HEIGHT, 0.f, 1.f), mulPerElem(k - Vector4(-vw/2.f, vh/2.f, 0.f, 0.f), Vector4(1.f/vw, -1.f/vh, 0.f, 1.f)));
//	pivot = mulPerElem(mulPerElem(upd - v0, Vector4(v0w)), k);

	if ((fux0 > 0.f && fux1 > 0.f && fux2 > 0.f) || (fux0 < 0.f && fux1 < 0.f && fux2 < 0.f)) {
#if 0
		if (dump) {
			printf("v: point0 = [%vf]\n", v0.get128());
			printf("v: point1 = [%vf]\n", v1.get128());
			printf("v: point2 = [%vf]\n", v2.get128());
			printf("v: point3 = [%vf]\n", v3.get128());
			printf("upd = [%vf]\n", upd.get128());
			printf("k   = [%vf]\n", k.get128());
		}
#endif
//		printf("org = [%vf]\n", org.get128());
		return true; // hit
	}

	float flx0 = dot(lx0, lnorm);
	float flx1 = dot(lx1, lnorm);
	float flx2 = dot(lx2, lnorm);

	if ((flx0 > 0.f && flx1 > 0.f && flx2 > 0.f) || (flx0 < 0.f && flx1 < 0.f && flx2 < 0.f)) {
#if 0
		if (dump) {
			printf("v: point0 = [%vf]\n", v0.get128());
			printf("v: point1 = [%vf]\n", v1.get128());
			printf("v: point2 = [%vf]\n", v2.get128());
			printf("v: point3 = [%vf]\n", v3.get128());
			printf("upd = [%vf]\n", upd.get128());
			printf("k   = [%vf]\n", k.get128());
		}
#endif
		return true; // hit
	}
	return false; // miss
}

bool convertScreenToLocalCSS(short& x, short& y)
{
	float X = x;
	float Y = y;
	X -= DISPLAY_WIDTH/2;
	Y -= DISPLAY_HEIGHT/2;
	X *= (float)WEBVIEW_WIDTH / DISPLAY_WIDTH;
	Y *= (float)WEBVIEW_HEIGHT / DISPLAY_HEIGHT;

	X += WEBVIEW_WIDTH/2;
	Y += WEBVIEW_HEIGHT/2;
	x = X;
	y = Y;
	return true;

	Vector4 pt((float)x, (float)y, 0.f, 1.f);
	Matrix4 viewport = get_viewmatrix(0.f, 0.f, (float)DISPLAY_WIDTH, (float)DISPLAY_HEIGHT, .1f, 10.f);
#if 0
	Matrix4 mat = Matrix4::translation(viewport.getTranslation()) * Matrix4::scale(Vector3(1.f, -1.f, 1.f));
	Vector4 p =  mat * Vector4(pt.getXYZ(), 1.f);
	// hit test
#else
	Vector4 p = pt; // the point is in the dev-coord
#endif
	Matrix4 local = Matrix4::identity();
	
//	Matrix4 mv = local *g_worldMatrix * g_viewMatrix;
	
	Vector4 hitpos;
	//printf("pt:(%d, %d) -> dev:[%vf]\n", x, y, p.get128());
	//if (pointIntersect(p, mv, g_projectionMatrix, g_cubeVertices, viewport, hitpos, false)) {
	//	float tmp[4] __attribute__ ((aligned(16)));
	//	memcpy(tmp, &hitpos, 16);
	//	x = (short)tmp[0];
	//	y = (short)tmp[1];
	//	// printf("real pt:(%f, %f)\n", tmp[0], tmp[1]);
	//	return true;
	//}
	return false;
}


void popup_mag(float x, float y, float z, float w, float h, float tx, float ty, float tw, float th)
{
	//popup_ = true;
	//Matrix4 m = Matrix4::orthographic(-3.f, 3.f, -3.f, 3.f, 0.0001f, 100.f);
	//g_magMvpMatrix = m * Matrix4::translation(Vector3(x, y, z - 0.1f)) * Matrix4::scale(Vector3(w, h, 1.f));
	//update_mag(tx, ty, tw, th);
}

void update_mag(float x, float y, float rx, float ry)
{
	float nx = x  * (1.f /(float)WEBVIEW_WIDTH);
	float nw = rx * (0.5f/(float)WEBVIEW_WIDTH);
	float ny = y  * (1.f /(float)WEBVIEW_HEIGHT);
	float nh = ry * (0.5f/(float)WEBVIEW_HEIGHT);

	float u0 = nx - nw;
	float u1 = nx + nw;
	float v0 = ny - nh;
	float v1 = ny + nh;

	//printf("uv0:(%f, %f) - uv1:(%f, %f) : w:%d h:%d\n", u0, v0, u1, v1, int((u1-u0)*(float)WEBVIEW_WIDTH), int((v1-v0)*(float)WEBVIEW_HEIGHT));
	
	g_magVertices[0].u = u0;
	g_magVertices[0].v = v0;
	g_magVertices[1].u = u1;
	g_magVertices[1].v = v0;
	g_magVertices[2].u = u1;
	g_magVertices[2].v = v1;
	g_magVertices[3].u = u0;
	g_magVertices[3].v = v1;
}

void dismiss_mag()
{
	popup_ = false;
}
