Merge pull request #277 from fjtrujy/sceGu_parallel

Adding a `sceGU` sample dealing with CPU & GPU parallelization
This commit is contained in:
Francisco Javier Trujillo Mata
2025-05-04 11:19:19 +02:00
committed by GitHub
4 changed files with 314 additions and 12 deletions

View File

@@ -499,10 +499,10 @@ void* sceGuGetMemory(int size);
*
* The previous context-type is stored so that it can be restored at sceGuFinish().
*
* @param cid - Context Type
* @param ctype - Context Type
* @param list - Pointer to display-list (16 byte aligned)
**/
void sceGuStart(int cid, void* list);
void sceGuStart(int ctype, void* list);
/**
* Finish current display list and go back to the parent context

View File

@@ -11,9 +11,9 @@
#include <pspkernel.h>
#include <pspge.h>
void sceGuStart(int cid, void *list)
void sceGuStart(int ctype, void *list)
{
GuContext *context = &gu_contexts[cid];
GuContext *context = &gu_contexts[ctype];
unsigned int *local_list = (unsigned int *)(((unsigned int)list) | 0x40000000);
// setup display list
@@ -25,9 +25,9 @@ void sceGuStart(int cid, void *list)
// store current context
gu_curr_context = cid;
gu_curr_context = ctype;
if (!cid)
if (ctype == GU_DIRECT)
{
ge_list_executed[0] = sceGeListEnQueue(local_list, local_list, gu_settings.ge_callback_id, 0);
gu_settings.signal_offset = 0;
@@ -53,12 +53,9 @@ void sceGuStart(int cid, void *list)
gu_init = 1;
}
if (!gu_curr_context)
if (ctype == GU_DIRECT && gu_draw_buffer.frame_width != 0)
{
if (gu_draw_buffer.frame_width)
{
sendCommandi(FRAME_BUF_PTR, ((unsigned int)gu_draw_buffer.frame_buffer) & 0xffffff);
sendCommandi(FRAME_BUF_WIDTH, ((((unsigned int)gu_draw_buffer.frame_buffer) & 0xff000000) >> 8) | gu_draw_buffer.frame_width);
}
sendCommandi(FRAME_BUF_PTR, ((unsigned int)gu_draw_buffer.frame_buffer) & 0xffffff);
sendCommandi(FRAME_BUF_WIDTH, ((((unsigned int)gu_draw_buffer.frame_buffer) & 0xff000000) >> 8) | gu_draw_buffer.frame_width);
}
}

View File

@@ -0,0 +1,17 @@
TARGET = doublelist
OBJS = doublelist.o ../common/callbacks.o
INCDIR =
CFLAGS = -Wall -O2
CXXFLAGS = $(CFLAGS) -fno-exceptions -fno-rtti
ASFLAGS = $(CFLAGS)
LIBDIR =
LDFLAGS =
LIBS= -lpspgum -lpspgu
EXTRA_TARGETS = EBOOT.PBP
PSP_EBOOT_TITLE = Blit with Double Display List Sample
PSPSDK=$(shell psp-config --pspsdk-path)
include $(PSPSDK)/lib/build.mak

View File

@@ -0,0 +1,288 @@
/*
* PSP Software Development Kit - https://github.com/pspdev
* -----------------------------------------------------------------------
* Licensed under the BSD license, see LICENSE in PSPSDK root for details.
*
* Copyright (c) 2005 Jesper Svennevid
*/
#include <pspkernel.h>
#include <pspdisplay.h>
#include <pspdebug.h>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <string.h>
#include <unistd.h>
#include <sys/param.h>
#include <pspctrl.h>
#include <pspgu.h>
#include <psprtc.h>
#include "../common/callbacks.h"
PSP_MODULE_INFO("Blit DoubleDisplayList", 0, 1, 1);
PSP_MAIN_THREAD_ATTR(THREAD_ATTR_USER);
static unsigned int __attribute__((aligned(16))) list[2][512];
#define BUF_WIDTH (512)
#define SCR_WIDTH (480)
#define SCR_HEIGHT (272)
static unsigned short __attribute__((aligned(16))) pixels[BUF_WIDTH*SCR_HEIGHT];
static unsigned short __attribute__((aligned(16))) swizzled_pixels[BUF_WIDTH*SCR_HEIGHT];
struct Vertex
{
unsigned short u, v;
unsigned short color;
short x, y, z;
};
void simpleBlit(int sx, int sy, int sw, int sh, int dx, int dy)
{
// simple blit, this just copies A->B, with all the cache-misses that apply
struct Vertex* vertices = (struct Vertex*)sceGuGetMemory(2 * sizeof(struct Vertex));
vertices[0].u = sx; vertices[0].v = sy;
vertices[0].color = 0;
vertices[0].x = dx; vertices[0].y = dy; vertices[0].z = 0;
vertices[1].u = sx+sw; vertices[1].v = sy+sh;
vertices[1].color = 0;
vertices[1].x = dx+sw; vertices[1].y = dy+sh; vertices[1].z = 0;
sceGuDrawArray(GU_SPRITES,GU_TEXTURE_16BIT|GU_COLOR_4444|GU_VERTEX_16BIT|GU_TRANSFORM_2D,2,0,vertices);
}
void advancedBlit(int sx, int sy, int sw, int sh, int dx, int dy, int slice)
{
int start, end;
// blit maximizing the use of the texture-cache
for (start = sx, end = sx+sw; start < end; start += slice, dx += slice)
{
struct Vertex* vertices = (struct Vertex*)sceGuGetMemory(2 * sizeof(struct Vertex));
int width = (start + slice) < end ? slice : end-start;
vertices[0].u = start; vertices[0].v = sy;
vertices[0].color = 0;
vertices[0].x = dx; vertices[0].y = dy; vertices[0].z = 0;
vertices[1].u = start + width; vertices[1].v = sy + sh;
vertices[1].color = 0;
vertices[1].x = dx + width; vertices[1].y = dy + sh; vertices[1].z = 0;
sceGuDrawArray(GU_SPRITES,GU_TEXTURE_16BIT|GU_COLOR_4444|GU_VERTEX_16BIT|GU_TRANSFORM_2D,2,0,vertices);
}
}
void swizzle_fast(u8* out, const u8* in, unsigned int width, unsigned int height)
{
unsigned int blockx, blocky;
unsigned int j;
unsigned int width_blocks = (width / 16);
unsigned int height_blocks = (height / 8);
unsigned int src_pitch = (width-16)/4;
unsigned int src_row = width * 8;
const u8* ysrc = in;
u32* dst = (u32*)out;
for (blocky = 0; blocky < height_blocks; ++blocky)
{
const u8* xsrc = ysrc;
for (blockx = 0; blockx < width_blocks; ++blockx)
{
const u32* src = (u32*)xsrc;
for (j = 0; j < 8; ++j)
{
*(dst++) = *(src++);
*(dst++) = *(src++);
*(dst++) = *(src++);
*(dst++) = *(src++);
src += src_pitch;
}
xsrc += 16;
}
ysrc += src_row;
}
}
const char* modes[] =
{
"normal, linear",
"optimized, linear",
"normal, swizzled",
"optimized, swizzled"
};
int main(int argc, char* argv[])
{
unsigned int x,y;
pspDebugScreenInit();
setupCallbacks();
// Setup GU
void* fbp0 = guGetStaticVramBuffer(BUF_WIDTH,SCR_HEIGHT,GU_PSM_8888);
void* fbp1 = guGetStaticVramBuffer(BUF_WIDTH,SCR_HEIGHT,GU_PSM_8888);
void* zbp = guGetStaticVramBuffer(BUF_WIDTH,SCR_HEIGHT,GU_PSM_4444);
sceGuInit();
sceGuStart(GU_DIRECT,list[0]);
sceGuDrawBuffer(GU_PSM_8888,fbp0,BUF_WIDTH);
sceGuDispBuffer(SCR_WIDTH,SCR_HEIGHT,fbp1,BUF_WIDTH);
sceGuDepthBuffer(zbp,BUF_WIDTH);
sceGuOffset(2048 - (SCR_WIDTH/2),2048 - (SCR_HEIGHT/2));
sceGuViewport(2048,2048,SCR_WIDTH,SCR_HEIGHT);
sceGuDepthRange(65535,0);
sceGuScissor(0,0,SCR_WIDTH,SCR_HEIGHT);
sceGuEnable(GU_SCISSOR_TEST);
sceGuFrontFace(GU_CW);
sceGuEnable(GU_TEXTURE_2D);
sceGuClear(GU_COLOR_BUFFER_BIT|GU_DEPTH_BUFFER_BIT);
sceGuFinish();
sceGuSync(GU_SYNC_FINISH, GU_SYNC_WHAT_DONE);
// Ensure both display lists are initialised
sceGuStart(GU_CALL,list[0]);
sceGuFinish();
sceGuStart(GU_CALL,list[1]);
sceGuFinish();
sceDisplayWaitVblankStart();
sceGuDisplay(1);
// generate dummy image to blit
for (y = 0; y < SCR_HEIGHT; ++y)
{
unsigned short* row = &pixels[y * BUF_WIDTH];
for (x = 0; x < SCR_WIDTH; ++x)
{
row[x] = x * y;
}
}
swizzle_fast((u8*)swizzled_pixels,(const u8*)pixels,BUF_WIDTH*2,SCR_HEIGHT); // 512*2 because swizzle operates in bytes, and each pixel in a 16-bit texture is 2 bytes
sceKernelDcacheWritebackAll();
float curr_ms = 1.0f;
int blit_method = 0;
int usleep_value = 0;
int swizzle = 0;
SceCtrlData oldPad;
oldPad.Buttons = 0;
sceCtrlSetSamplingCycle(0);
sceCtrlSetSamplingMode(0);
u64 last_tick;
sceRtcGetCurrentTick(&last_tick);
u32 tick_frequency = sceRtcGetTickResolution();
int frame_count = 0;
int list_idx = 0;
while(running())
{
SceCtrlData pad;
void *pkt = list[list_idx & 1];
void *buf = ( list_idx & 1 ) ? fbp0 : fbp1;
sceGuStart(GU_SEND, pkt);
sceGuDrawBufferList(GU_PSM_8888, buf, BUF_WIDTH);
// switch methods if requested
if(sceCtrlPeekBufferPositive(&pad, 1))
{
if (pad.Buttons != oldPad.Buttons)
{
if(pad.Buttons & PSP_CTRL_CROSS)
blit_method ^= 1;
if(pad.Buttons & PSP_CTRL_CIRCLE)
swizzle ^= 1;
if(pad.Buttons & PSP_CTRL_RTRIGGER)
usleep_value += 100;
if(pad.Buttons & PSP_CTRL_LTRIGGER)
usleep_value = MAX(usleep_value - 100, 0);
}
oldPad = pad;
}
// Setup Texture
sceGuTexMode(GU_PSM_4444,0,0,swizzle); // 16-bit RGBA
sceGuTexImage(0,512,512,512,swizzle ? swizzled_pixels : pixels); // setup texture as a 512x512 texture, even though the buffer is only 512x272 (480 visible)
sceGuTexFunc(GU_TFX_REPLACE,GU_TCC_RGBA); // don't get influenced by any vertex colors
sceGuTexFilter(GU_NEAREST,GU_NEAREST); // point-filtered sampling
// Blit
if (blit_method)
advancedBlit(0,0,SCR_WIDTH,SCR_HEIGHT,0,0,32);
else
simpleBlit(0,0,SCR_WIDTH,SCR_HEIGHT,0,0);
// Finish the packet
int g_packet_size = sceGuFinish() ;
sceKernelDcacheWritebackRange(pkt, g_packet_size) ;
// Apply some sleep to the CPU
if (usleep_value > 0)
usleep(usleep_value);
// Wait for the packet to be finished
clock_t start = clock();
sceGuSync(GU_SYNC_SEND, GU_SYNC_WHAT_DONE);
clock_t end = clock();
float curr_fps = 1.0f / curr_ms;
void *prev_buf = ( list_idx & 1 ) ? fbp1 : fbp0;
pspDebugScreenSetOffset((int)prev_buf);
pspDebugScreenSetXY(0,0);
pspDebugScreenPrintf(
"(X = mode, O = swizzle, L/R = +/- usleep) %s\nfps: %d.%02d (%dMB/s)\nCPU delay: %d us, Waited for GPU: %ld us\n",
modes[blit_method + swizzle * 2],
(int)curr_fps,
(int)((curr_fps-(int)curr_fps) * 100.0f),
(((int)curr_fps * SCR_WIDTH * SCR_HEIGHT * 2)/(1024*1024)),
usleep_value,
end - start);
// sceDisplayWaitVblankStart();
sceGuSwapBuffers();
// Send the packet to the GPU
sceGuSendList(GU_TAIL, pkt, NULL) ;
// simple frame rate counter
++frame_count;
u64 curr_tick;
list_idx = (list_idx + 1) & 1;
sceRtcGetCurrentTick(&curr_tick);
if ((curr_tick-last_tick) >= tick_frequency)
{
float time_span = ((int)(curr_tick-last_tick)) / (float)tick_frequency;
curr_ms = time_span / frame_count;
frame_count = 0;
sceRtcGetCurrentTick(&last_tick);
}
}
sceGuTerm();
sceKernelExitGame();
return 0;
}