mirror of
https://github.com/pspdev/pspsdk.git
synced 2025-10-03 16:51:27 +00:00
Merge pull request #277 from fjtrujy/sceGu_parallel
Adding a `sceGU` sample dealing with CPU & GPU parallelization
This commit is contained in:
@@ -499,10 +499,10 @@ void* sceGuGetMemory(int size);
|
||||
*
|
||||
* The previous context-type is stored so that it can be restored at sceGuFinish().
|
||||
*
|
||||
* @param cid - Context Type
|
||||
* @param ctype - Context Type
|
||||
* @param list - Pointer to display-list (16 byte aligned)
|
||||
**/
|
||||
void sceGuStart(int cid, void* list);
|
||||
void sceGuStart(int ctype, void* list);
|
||||
|
||||
/**
|
||||
* Finish current display list and go back to the parent context
|
||||
|
@@ -11,9 +11,9 @@
|
||||
#include <pspkernel.h>
|
||||
#include <pspge.h>
|
||||
|
||||
void sceGuStart(int cid, void *list)
|
||||
void sceGuStart(int ctype, void *list)
|
||||
{
|
||||
GuContext *context = &gu_contexts[cid];
|
||||
GuContext *context = &gu_contexts[ctype];
|
||||
unsigned int *local_list = (unsigned int *)(((unsigned int)list) | 0x40000000);
|
||||
|
||||
// setup display list
|
||||
@@ -25,9 +25,9 @@ void sceGuStart(int cid, void *list)
|
||||
|
||||
// store current context
|
||||
|
||||
gu_curr_context = cid;
|
||||
gu_curr_context = ctype;
|
||||
|
||||
if (!cid)
|
||||
if (ctype == GU_DIRECT)
|
||||
{
|
||||
ge_list_executed[0] = sceGeListEnQueue(local_list, local_list, gu_settings.ge_callback_id, 0);
|
||||
gu_settings.signal_offset = 0;
|
||||
@@ -53,12 +53,9 @@ void sceGuStart(int cid, void *list)
|
||||
gu_init = 1;
|
||||
}
|
||||
|
||||
if (!gu_curr_context)
|
||||
if (ctype == GU_DIRECT && gu_draw_buffer.frame_width != 0)
|
||||
{
|
||||
if (gu_draw_buffer.frame_width)
|
||||
{
|
||||
sendCommandi(FRAME_BUF_PTR, ((unsigned int)gu_draw_buffer.frame_buffer) & 0xffffff);
|
||||
sendCommandi(FRAME_BUF_WIDTH, ((((unsigned int)gu_draw_buffer.frame_buffer) & 0xff000000) >> 8) | gu_draw_buffer.frame_width);
|
||||
}
|
||||
sendCommandi(FRAME_BUF_PTR, ((unsigned int)gu_draw_buffer.frame_buffer) & 0xffffff);
|
||||
sendCommandi(FRAME_BUF_WIDTH, ((((unsigned int)gu_draw_buffer.frame_buffer) & 0xff000000) >> 8) | gu_draw_buffer.frame_width);
|
||||
}
|
||||
}
|
||||
|
17
src/samples/gu/doublelist/Makefile.sample
Normal file
17
src/samples/gu/doublelist/Makefile.sample
Normal file
@@ -0,0 +1,17 @@
|
||||
TARGET = doublelist
|
||||
OBJS = doublelist.o ../common/callbacks.o
|
||||
|
||||
INCDIR =
|
||||
CFLAGS = -Wall -O2
|
||||
CXXFLAGS = $(CFLAGS) -fno-exceptions -fno-rtti
|
||||
ASFLAGS = $(CFLAGS)
|
||||
|
||||
LIBDIR =
|
||||
LDFLAGS =
|
||||
LIBS= -lpspgum -lpspgu
|
||||
|
||||
EXTRA_TARGETS = EBOOT.PBP
|
||||
PSP_EBOOT_TITLE = Blit with Double Display List Sample
|
||||
|
||||
PSPSDK=$(shell psp-config --pspsdk-path)
|
||||
include $(PSPSDK)/lib/build.mak
|
288
src/samples/gu/doublelist/doublelist.c
Normal file
288
src/samples/gu/doublelist/doublelist.c
Normal file
@@ -0,0 +1,288 @@
|
||||
/*
|
||||
* PSP Software Development Kit - https://github.com/pspdev
|
||||
* -----------------------------------------------------------------------
|
||||
* Licensed under the BSD license, see LICENSE in PSPSDK root for details.
|
||||
*
|
||||
* Copyright (c) 2005 Jesper Svennevid
|
||||
*/
|
||||
|
||||
#include <pspkernel.h>
|
||||
#include <pspdisplay.h>
|
||||
#include <pspdebug.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/param.h>
|
||||
|
||||
#include <pspctrl.h>
|
||||
#include <pspgu.h>
|
||||
#include <psprtc.h>
|
||||
|
||||
#include "../common/callbacks.h"
|
||||
|
||||
PSP_MODULE_INFO("Blit DoubleDisplayList", 0, 1, 1);
|
||||
PSP_MAIN_THREAD_ATTR(THREAD_ATTR_USER);
|
||||
|
||||
static unsigned int __attribute__((aligned(16))) list[2][512];
|
||||
|
||||
#define BUF_WIDTH (512)
|
||||
#define SCR_WIDTH (480)
|
||||
#define SCR_HEIGHT (272)
|
||||
|
||||
static unsigned short __attribute__((aligned(16))) pixels[BUF_WIDTH*SCR_HEIGHT];
|
||||
static unsigned short __attribute__((aligned(16))) swizzled_pixels[BUF_WIDTH*SCR_HEIGHT];
|
||||
|
||||
struct Vertex
|
||||
{
|
||||
unsigned short u, v;
|
||||
unsigned short color;
|
||||
short x, y, z;
|
||||
};
|
||||
|
||||
void simpleBlit(int sx, int sy, int sw, int sh, int dx, int dy)
|
||||
{
|
||||
// simple blit, this just copies A->B, with all the cache-misses that apply
|
||||
|
||||
struct Vertex* vertices = (struct Vertex*)sceGuGetMemory(2 * sizeof(struct Vertex));
|
||||
|
||||
vertices[0].u = sx; vertices[0].v = sy;
|
||||
vertices[0].color = 0;
|
||||
vertices[0].x = dx; vertices[0].y = dy; vertices[0].z = 0;
|
||||
|
||||
vertices[1].u = sx+sw; vertices[1].v = sy+sh;
|
||||
vertices[1].color = 0;
|
||||
vertices[1].x = dx+sw; vertices[1].y = dy+sh; vertices[1].z = 0;
|
||||
|
||||
sceGuDrawArray(GU_SPRITES,GU_TEXTURE_16BIT|GU_COLOR_4444|GU_VERTEX_16BIT|GU_TRANSFORM_2D,2,0,vertices);
|
||||
}
|
||||
|
||||
void advancedBlit(int sx, int sy, int sw, int sh, int dx, int dy, int slice)
|
||||
{
|
||||
int start, end;
|
||||
|
||||
// blit maximizing the use of the texture-cache
|
||||
|
||||
for (start = sx, end = sx+sw; start < end; start += slice, dx += slice)
|
||||
{
|
||||
struct Vertex* vertices = (struct Vertex*)sceGuGetMemory(2 * sizeof(struct Vertex));
|
||||
int width = (start + slice) < end ? slice : end-start;
|
||||
|
||||
vertices[0].u = start; vertices[0].v = sy;
|
||||
vertices[0].color = 0;
|
||||
vertices[0].x = dx; vertices[0].y = dy; vertices[0].z = 0;
|
||||
|
||||
vertices[1].u = start + width; vertices[1].v = sy + sh;
|
||||
vertices[1].color = 0;
|
||||
vertices[1].x = dx + width; vertices[1].y = dy + sh; vertices[1].z = 0;
|
||||
|
||||
sceGuDrawArray(GU_SPRITES,GU_TEXTURE_16BIT|GU_COLOR_4444|GU_VERTEX_16BIT|GU_TRANSFORM_2D,2,0,vertices);
|
||||
}
|
||||
}
|
||||
|
||||
void swizzle_fast(u8* out, const u8* in, unsigned int width, unsigned int height)
|
||||
{
|
||||
unsigned int blockx, blocky;
|
||||
unsigned int j;
|
||||
|
||||
unsigned int width_blocks = (width / 16);
|
||||
unsigned int height_blocks = (height / 8);
|
||||
|
||||
unsigned int src_pitch = (width-16)/4;
|
||||
unsigned int src_row = width * 8;
|
||||
|
||||
const u8* ysrc = in;
|
||||
u32* dst = (u32*)out;
|
||||
|
||||
for (blocky = 0; blocky < height_blocks; ++blocky)
|
||||
{
|
||||
const u8* xsrc = ysrc;
|
||||
for (blockx = 0; blockx < width_blocks; ++blockx)
|
||||
{
|
||||
const u32* src = (u32*)xsrc;
|
||||
for (j = 0; j < 8; ++j)
|
||||
{
|
||||
*(dst++) = *(src++);
|
||||
*(dst++) = *(src++);
|
||||
*(dst++) = *(src++);
|
||||
*(dst++) = *(src++);
|
||||
src += src_pitch;
|
||||
}
|
||||
xsrc += 16;
|
||||
}
|
||||
ysrc += src_row;
|
||||
}
|
||||
}
|
||||
|
||||
const char* modes[] =
|
||||
{
|
||||
"normal, linear",
|
||||
"optimized, linear",
|
||||
"normal, swizzled",
|
||||
"optimized, swizzled"
|
||||
};
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
unsigned int x,y;
|
||||
|
||||
pspDebugScreenInit();
|
||||
setupCallbacks();
|
||||
|
||||
// Setup GU
|
||||
|
||||
void* fbp0 = guGetStaticVramBuffer(BUF_WIDTH,SCR_HEIGHT,GU_PSM_8888);
|
||||
void* fbp1 = guGetStaticVramBuffer(BUF_WIDTH,SCR_HEIGHT,GU_PSM_8888);
|
||||
void* zbp = guGetStaticVramBuffer(BUF_WIDTH,SCR_HEIGHT,GU_PSM_4444);
|
||||
|
||||
sceGuInit();
|
||||
|
||||
sceGuStart(GU_DIRECT,list[0]);
|
||||
sceGuDrawBuffer(GU_PSM_8888,fbp0,BUF_WIDTH);
|
||||
sceGuDispBuffer(SCR_WIDTH,SCR_HEIGHT,fbp1,BUF_WIDTH);
|
||||
sceGuDepthBuffer(zbp,BUF_WIDTH);
|
||||
sceGuOffset(2048 - (SCR_WIDTH/2),2048 - (SCR_HEIGHT/2));
|
||||
sceGuViewport(2048,2048,SCR_WIDTH,SCR_HEIGHT);
|
||||
sceGuDepthRange(65535,0);
|
||||
sceGuScissor(0,0,SCR_WIDTH,SCR_HEIGHT);
|
||||
sceGuEnable(GU_SCISSOR_TEST);
|
||||
sceGuFrontFace(GU_CW);
|
||||
sceGuEnable(GU_TEXTURE_2D);
|
||||
sceGuClear(GU_COLOR_BUFFER_BIT|GU_DEPTH_BUFFER_BIT);
|
||||
sceGuFinish();
|
||||
sceGuSync(GU_SYNC_FINISH, GU_SYNC_WHAT_DONE);
|
||||
|
||||
// Ensure both display lists are initialised
|
||||
sceGuStart(GU_CALL,list[0]);
|
||||
sceGuFinish();
|
||||
sceGuStart(GU_CALL,list[1]);
|
||||
sceGuFinish();
|
||||
|
||||
sceDisplayWaitVblankStart();
|
||||
sceGuDisplay(1);
|
||||
|
||||
// generate dummy image to blit
|
||||
|
||||
for (y = 0; y < SCR_HEIGHT; ++y)
|
||||
{
|
||||
unsigned short* row = &pixels[y * BUF_WIDTH];
|
||||
for (x = 0; x < SCR_WIDTH; ++x)
|
||||
{
|
||||
row[x] = x * y;
|
||||
}
|
||||
}
|
||||
|
||||
swizzle_fast((u8*)swizzled_pixels,(const u8*)pixels,BUF_WIDTH*2,SCR_HEIGHT); // 512*2 because swizzle operates in bytes, and each pixel in a 16-bit texture is 2 bytes
|
||||
|
||||
sceKernelDcacheWritebackAll();
|
||||
|
||||
float curr_ms = 1.0f;
|
||||
int blit_method = 0;
|
||||
int usleep_value = 0;
|
||||
int swizzle = 0;
|
||||
SceCtrlData oldPad;
|
||||
oldPad.Buttons = 0;
|
||||
|
||||
sceCtrlSetSamplingCycle(0);
|
||||
sceCtrlSetSamplingMode(0);
|
||||
|
||||
u64 last_tick;
|
||||
sceRtcGetCurrentTick(&last_tick);
|
||||
u32 tick_frequency = sceRtcGetTickResolution();
|
||||
int frame_count = 0;
|
||||
int list_idx = 0;
|
||||
|
||||
while(running())
|
||||
{
|
||||
SceCtrlData pad;
|
||||
|
||||
void *pkt = list[list_idx & 1];
|
||||
void *buf = ( list_idx & 1 ) ? fbp0 : fbp1;
|
||||
sceGuStart(GU_SEND, pkt);
|
||||
sceGuDrawBufferList(GU_PSM_8888, buf, BUF_WIDTH);
|
||||
|
||||
// switch methods if requested
|
||||
|
||||
if(sceCtrlPeekBufferPositive(&pad, 1))
|
||||
{
|
||||
if (pad.Buttons != oldPad.Buttons)
|
||||
{
|
||||
if(pad.Buttons & PSP_CTRL_CROSS)
|
||||
blit_method ^= 1;
|
||||
if(pad.Buttons & PSP_CTRL_CIRCLE)
|
||||
swizzle ^= 1;
|
||||
if(pad.Buttons & PSP_CTRL_RTRIGGER)
|
||||
usleep_value += 100;
|
||||
if(pad.Buttons & PSP_CTRL_LTRIGGER)
|
||||
usleep_value = MAX(usleep_value - 100, 0);
|
||||
}
|
||||
oldPad = pad;
|
||||
}
|
||||
|
||||
|
||||
// Setup Texture
|
||||
sceGuTexMode(GU_PSM_4444,0,0,swizzle); // 16-bit RGBA
|
||||
sceGuTexImage(0,512,512,512,swizzle ? swizzled_pixels : pixels); // setup texture as a 512x512 texture, even though the buffer is only 512x272 (480 visible)
|
||||
sceGuTexFunc(GU_TFX_REPLACE,GU_TCC_RGBA); // don't get influenced by any vertex colors
|
||||
sceGuTexFilter(GU_NEAREST,GU_NEAREST); // point-filtered sampling
|
||||
|
||||
// Blit
|
||||
|
||||
if (blit_method)
|
||||
advancedBlit(0,0,SCR_WIDTH,SCR_HEIGHT,0,0,32);
|
||||
else
|
||||
simpleBlit(0,0,SCR_WIDTH,SCR_HEIGHT,0,0);
|
||||
|
||||
// Finish the packet
|
||||
int g_packet_size = sceGuFinish() ;
|
||||
sceKernelDcacheWritebackRange(pkt, g_packet_size) ;
|
||||
|
||||
// Apply some sleep to the CPU
|
||||
if (usleep_value > 0)
|
||||
usleep(usleep_value);
|
||||
|
||||
// Wait for the packet to be finished
|
||||
clock_t start = clock();
|
||||
sceGuSync(GU_SYNC_SEND, GU_SYNC_WHAT_DONE);
|
||||
clock_t end = clock();
|
||||
|
||||
float curr_fps = 1.0f / curr_ms;
|
||||
void *prev_buf = ( list_idx & 1 ) ? fbp1 : fbp0;
|
||||
pspDebugScreenSetOffset((int)prev_buf);
|
||||
pspDebugScreenSetXY(0,0);
|
||||
pspDebugScreenPrintf(
|
||||
"(X = mode, O = swizzle, L/R = +/- usleep) %s\nfps: %d.%02d (%dMB/s)\nCPU delay: %d us, Waited for GPU: %ld us\n",
|
||||
modes[blit_method + swizzle * 2],
|
||||
(int)curr_fps,
|
||||
(int)((curr_fps-(int)curr_fps) * 100.0f),
|
||||
(((int)curr_fps * SCR_WIDTH * SCR_HEIGHT * 2)/(1024*1024)),
|
||||
usleep_value,
|
||||
end - start);
|
||||
|
||||
// sceDisplayWaitVblankStart();
|
||||
sceGuSwapBuffers();
|
||||
|
||||
// Send the packet to the GPU
|
||||
sceGuSendList(GU_TAIL, pkt, NULL) ;
|
||||
|
||||
// simple frame rate counter
|
||||
++frame_count;
|
||||
u64 curr_tick;
|
||||
list_idx = (list_idx + 1) & 1;
|
||||
sceRtcGetCurrentTick(&curr_tick);
|
||||
if ((curr_tick-last_tick) >= tick_frequency)
|
||||
{
|
||||
float time_span = ((int)(curr_tick-last_tick)) / (float)tick_frequency;
|
||||
curr_ms = time_span / frame_count;
|
||||
|
||||
frame_count = 0;
|
||||
sceRtcGetCurrentTick(&last_tick);
|
||||
}
|
||||
}
|
||||
|
||||
sceGuTerm();
|
||||
|
||||
sceKernelExitGame();
|
||||
return 0;
|
||||
}
|
Reference in New Issue
Block a user