Merge pull request #231 from fjtrujy/gprof_improvements

Some `gprof` improvements
This commit is contained in:
Francisco Javier Trujillo Mata
2024-08-24 23:34:47 +02:00
committed by GitHub
13 changed files with 422 additions and 43 deletions

View File

@@ -33,6 +33,7 @@ AC_CHECK_LIB([z], [compress])
AC_C_CONST
AC_C_BIGENDIAN
AC_ARG_WITH(gprofflags, [ --with-gprofflags build with gprof flag included], [], [--with_gprofflags=no])
AC_ARG_WITH(pthread, [ --with-pthread build with pthread support], [], [with_pthread=yes])
# Checks for library functions.
@@ -73,6 +74,9 @@ AC_SUBST(PSPSDK_LIBDIR)
# CFLAGS and CXXFLAGS used to build pspsdk libraries.
PSPSDK_CFLAGS="$CFLAGS -mno-gpopt -Wall -Werror -D_PSP_FW_VERSION=600"
if test "$with_gprofflags" = yes ; then
PSPSDK_CFLAGS="$PSPSDK_CFLAGS -pg -g"
fi
PSPSDK_CXXFLAGS="$PSPSDK_CFLAGS -fno-exceptions -fno-rtti"
if test "$with_pthread" = no ; then
PSPSDK_CFLAGS="$PSPSDK_CFLAGS -DPSP_WITHOUT_PTHREAD"

View File

@@ -11,7 +11,7 @@ CFLAGS = @PSPSDK_CFLAGS@ -std=gnu99 -Wall -Wmissing-prototypes
CCASFLAGS = $(CFLAGS) -I$(top_srcdir)/src/base -I$(top_srcdir)/src/kernel
libpspprofincludedir = @PSPSDK_INCLUDEDIR@
libpspprofinclude_HEADERS =
libpspprofinclude_HEADERS = pspprof.h
lib_LIBRARIES = libpspprof.a
libpspprof_a_SOURCES = prof.c mcount.s

View File

@@ -17,30 +17,12 @@ _mcount:
sd $6, 32($29)
sd $7, 40($29)
# Make sure we're not recursively called when compiling __mcount()
# With -pg
la $4, _busy
lw $5, 0($4)
bnez $5, done
nop
# Mark busy
li $5, 1
sw $5, 0($4)
# Call internal C handler
move $4, $1
move $5, $31
jal __mcount
nop
# Unmark busy
la $4, _busy
li $5, 0
sw $5, 0($4)
done:
# Restore registers
ld $31, 0($29)
ld $1, 8($29)
@@ -52,9 +34,6 @@ _mcount:
j $31
move $31, $1 # restore caller's ra
_busy:
.space 4
.end _mcount
.set reorder

View File

@@ -13,6 +13,8 @@
#include <stdio.h>
#include <string.h>
#include <pspprof.h>
#define GMON_PROF_ON 0
#define GMON_PROF_BUSY 1
#define GMON_PROF_ERROR 2
@@ -78,8 +80,11 @@ extern int _ftext;
extern int _etext;
/* forward declarations */
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
void __gprof_cleanup(void);
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
void __mcount(unsigned int, unsigned int);
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
static SceUInt timer_handler(SceUID uid, SceKernelSysClock *c1, SceKernelSysClock *c2, void *common);
/** Initializes pg library
@@ -89,6 +94,7 @@ static SceUInt timer_handler(SceUID uid, SceKernelSysClock *c1, SceKernelSysCloc
for sampling statistics. Note that this also installs a timer that
runs at 1000 hert.
*/
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
static void initialize()
{
initialized = 1;
@@ -122,6 +128,15 @@ static void initialize()
memset((void *)gp.samples, '\0', gp.nsamples * (sizeof(unsigned int )));
gp.timer = sceKernelCreateVTimer("gprof timer", NULL);
if (gp.timer < 0)
{
free(gp.arcs);
free(gp.samples);
gp.arcs = 0;
gp.samples = 0;
gp.state = GMON_PROF_ERROR;
return;
}
SceKernelSysClock sc;
sc.hi = 0;
@@ -151,12 +166,17 @@ static void initialize()
}
}
/** Writes gmon.out dump file and stops profiling
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
void gprof_start(void) {
// There is already a profiling session running, let's stop it and ignore the result
if (gp.state == GMON_PROF_ON) {
gprof_stop(NULL, 0);
}
initialize();
}
Called from atexit() handler; will dump out a host:gmon.out file
with all collected information.
*/
void __gprof_cleanup()
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
void gprof_stop(const char* filename, int should_dump)
{
FILE *fp;
int i;
@@ -171,29 +191,47 @@ void __gprof_cleanup()
/* disable profiling before we make plenty of libc calls */
gp.state = GMON_PROF_OFF;
// Delete timer
sceKernelStopVTimer(gp.timer);
sceKernelDeleteVTimer(gp.timer);
fp = fopen("gmon.out", "wb");
hdr.lpc = gp.lowpc;
hdr.hpc = gp.highpc;
hdr.ncnt = sizeof(hdr) + (sizeof(unsigned int) * gp.nsamples);
hdr.version = GMONVERSION;
hdr.profrate = SAMPLE_FREQ;
hdr.resv[0] = 0;
hdr.resv[1] = 0;
hdr.resv[2] = 0;
fwrite(&hdr, 1, sizeof(hdr), fp);
fwrite(gp.samples, gp.nsamples, sizeof(unsigned int), fp);
if (should_dump) {
fp = fopen(filename, "wb");
hdr.lpc = gp.lowpc;
hdr.hpc = gp.highpc;
hdr.ncnt = sizeof(hdr) + (sizeof(unsigned int) * gp.nsamples);
hdr.version = GMONVERSION;
hdr.profrate = SAMPLE_FREQ;
hdr.resv[0] = 0;
hdr.resv[1] = 0;
hdr.resv[2] = 0;
fwrite(&hdr, 1, sizeof(hdr), fp);
fwrite(gp.samples, gp.nsamples, sizeof(unsigned int), fp);
for (i=0; i<gp.narcs; i++)
{
if (gp.arcs[i].count > 0)
for (i=0; i<gp.narcs; i++)
{
fwrite(gp.arcs + i, sizeof(struct rawarc), 1, fp);
if (gp.arcs[i].count > 0)
{
fwrite(gp.arcs + i, sizeof(struct rawarc), 1, fp);
}
}
fclose(fp);
}
fclose(fp);
// Free memory
free(gp.arcs);
free(gp.samples);
}
/** Writes gmon.out dump file and stops profiling
Called from atexit() handler; will dump out a gmon.out file
at cwd with all collected information.
*/
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
void __gprof_cleanup()
{
gprof_stop("gmon.out", 1);
}
/** Internal C handler for _mcount()
@@ -205,6 +243,7 @@ void __gprof_cleanup()
beginning of each compiled routine, which eventually brings the
control to here.
*/
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
void __mcount(unsigned int frompc, unsigned int selfpc)
{
int e;
@@ -238,6 +277,7 @@ void __mcount(unsigned int frompc, unsigned int selfpc)
/** Internal timer handler
*/
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
static SceUInt timer_handler(SceUID uid, SceKernelSysClock *requested, SceKernelSysClock *actual, void *common)
{
unsigned int frompc = gp.pc;

41
src/prof/pspprof.h Normal file
View File

@@ -0,0 +1,41 @@
/*
* PSP Software Development Kit - https://github.com/pspdev
* -----------------------------------------------------------------------
* Licensed under the BSD license, see LICENSE in PSPSDK root for details.
*
* pspprof.h - Prototypes for the profiler library
*
* Copyright (c) 2006 Urchin
*
*/
#ifndef __PSPPROF_H__
#define __PSPPROF_H__
#ifdef __cplusplus
extern "C" {
#endif
/**
* Start the profiler.
* If the profiler is already running, this function stop previous one,
* and ignore the result.
* Finally, it initializes a new profiler session.
*/
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
void gprof_start(void);
/**
* Stop the profiler.
* If the profiler is not running, this function does nothing.
* @param filename The name of the file to write the profiling data to.
* @param should_dump If 1, the profiling data will be written to the file.
* If 0, the profiling data will be discarded.
*/
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
void gprof_stop(const char* filename, int should_dump);
#ifdef __cplusplus
}
#endif
#endif /* __PSPPROF_H__ */

View File

@@ -12,6 +12,8 @@ SAMPLES = \
debug/debugkb \
debug/sio \
debug/gdb \
gprof/basic \
gprof/custom \
gu/beginobject \
gu/blend \
gu/blit \

View File

@@ -9,6 +9,8 @@ SAMPLES = \
debug/debugkb \
debug/sio \
debug/gdb \
gprof/basic \
gprof/custom \
gu/beginobject \
gu/blend \
gu/blit \

View File

@@ -0,0 +1,16 @@
TARGET = gprofbasic
OBJS = main.o
INCDIR =
CFLAGS = -O2 -Wall -pg -g
CXXFLAGS = $(CFLAGS) -fno-exceptions -fno-rtti
ASFLAGS = $(CFLAGS)
LIBDIR =
LDFLAGS = -pg -g
EXTRA_TARGETS = EBOOT.PBP
PSP_EBOOT_TITLE = GProf Basic Example
PSPSDK=$(shell psp-config --pspsdk-path)
include $(PSPSDK)/lib/build.mak

View File

@@ -0,0 +1,66 @@
Sample program to show how to use the `gprof` feature.
The requiremnts are quite easy, just adding `-g -pg` flags to the `CFLAGS` and `LDFLAGS` is enough to make things to work out of the box.
Firstly execute your program, then once program ends it will automatically generates a `gmon.out` file at CWD level.
In order to inspect the content of the generated file you need to use the `psp-gprof` binary.
For instance, following the next syntax:
```
psp-gprof -b {binary.elf} gmon.out
```
like:
```
psp-gprof -b gprofbasic.elf gmon.out
```
It will show something like:
```
Flat profile:
Each sample counts as 0.001 seconds.
% cumulative self self total
time seconds seconds calls ms/call ms/call name
95.98 0.17 0.17 104728 0.00 0.00 is_prime
4.02 0.17 0.01 1 7.00 7.00 dummy_function
0.00 0.17 0.00 1 0.00 174.00 main
0.00 0.17 0.00 1 0.00 167.00 sum_of_square_roots
Call graph
granularity: each sample hit covers 2 byte(s) for 0.57% of 0.17 seconds
index % time self children called name
0.00 0.17 1/1 _main [2]
[1] 100.0 0.00 0.17 1 main [1]
0.00 0.17 1/1 sum_of_square_roots [4]
0.01 0.00 1/1 dummy_function [5]
-----------------------------------------------
<spontaneous>
[2] 100.0 0.00 0.17 _main [2]
0.00 0.17 1/1 main [1]
-----------------------------------------------
0.17 0.00 104728/104728 sum_of_square_roots [4]
[3] 96.0 0.17 0.00 104728 is_prime [3]
-----------------------------------------------
0.00 0.17 1/1 main [1]
[4] 96.0 0.00 0.17 1 sum_of_square_roots [4]
0.17 0.00 104728/104728 is_prime [3]
-----------------------------------------------
0.01 0.00 1/1 main [1]
[5] 4.0 0.01 0.00 1 dummy_function [5]
-----------------------------------------------
Index by function name
[5] dummy_function [1] main
[3] is_prime [4] sum_of_square_roots
```
Cheers

View File

@@ -0,0 +1,78 @@
/*
* PSP Software Development Kit - https://github.com/pspdev
* -----------------------------------------------------------------------
* Licensed under the BSD license, see LICENSE in PSPSDK root for details.
*
* main.c - A basic example for checking the GProf profiler.
*
* Copyright (c) 2024 Francisco Javier Trujillo Mata - fjtrujy@gmail.com
*
*/
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <math.h>
#include <pspmoduleinfo.h>
#include <pspthreadman.h>
PSP_MODULE_INFO("GProf Basic Example", 0, 1, 1);
PSP_MAIN_THREAD_ATTR(THREAD_ATTR_USER | THREAD_ATTR_VFPU);
// Function to check if a number is prime
int is_prime(int num)
{
if (num <= 1)
return 0;
if (num <= 3)
return 1;
if (num % 2 == 0 || num % 3 == 0)
return 0;
for (int i = 5; i * i <= num; i += 6)
{
if (num % i == 0 || num % (i + 2) == 0)
return 0;
}
return 1;
}
// Function to compute the sum of square roots of the first N prime numbers
double sum_of_square_roots(int N)
{
int count = 0;
int num = 2;
double sum = 0.0;
while (count < N)
{
if (is_prime(num))
{
sum += sqrt(num);
count++;
}
num++;
}
return sum;
}
int dummy_function()
{
int i;
for (i = 0; i < 10000; i++)
{
printf(".");
}
printf("\n");
return 0;
}
int main(int argc, char *argv[])
{
printf("Hello, world!\n");
dummy_function();
int N = 10000; // Large number of primes to compute
printf("Sum of square roots of the first %d prime numbers is %lf\n", N, sum_of_square_roots(N));
printf("Goodbye, world!\n");
return 0;
}

View File

@@ -0,0 +1,16 @@
TARGET = gprofcustom
OBJS = main.o
INCDIR =
CFLAGS = -O2 -Wall -pg -g
CXXFLAGS = $(CFLAGS) -fno-exceptions -fno-rtti
ASFLAGS = $(CFLAGS)
LIBDIR =
LDFLAGS = -pg -g
EXTRA_TARGETS = EBOOT.PBP
PSP_EBOOT_TITLE = GProf Custom Example
PSPSDK=$(shell psp-config --pspsdk-path)
include $(PSPSDK)/lib/build.mak

View File

@@ -0,0 +1,57 @@
More advance example about how to use the `gprof` feature.
The requiremnts are quite easy, just adding `-g -pg` flags to the `CFLAGS` and `LDFLAGS` is enough to make things to work out of the box.
This example shows how to enable profiling just around some specific piece of code.
How `gprof` by default start profiling from the very beginning of the app we must discard that result, this is why we do `gprof_stop(NULL, false);`.
Then we just need to call `gprof_start();` whenever we want to start meassuring our piece of code and `gprof_stop("gmon_custom.out", true);` whenver we want to stop the profiling.
Firstly execute your program, then once program ends it will automatically generates the output with the given names.
In order to inspect the content of the generated file you need to use the `psp-gprof` binary.
For instance, following the next syntax:
```
psp-gprof -b {binary.elf} {gmon_custom.out}
```
like:
```
psp-gprof -b gprofcustom.elf gmon_custom.out
```
Output in this example:
```
Flat profile:
Each sample counts as 0.001 seconds.
% cumulative self self total
time seconds seconds calls s/call s/call name
89.78 5.60 5.60 1 5.60 5.60 heavy_operation_3
10.22 6.24 0.64 1 0.64 0.64 heavy_operation_2
Call graph
granularity: each sample hit covers 2 byte(s) for 0.02% of 6.24 seconds
index % time self children called name
<spontaneous>
[1] 100.0 0.00 6.24 main [1]
5.60 0.00 1/1 heavy_operation_3 [2]
0.64 0.00 1/1 heavy_operation_2 [3]
-----------------------------------------------
5.60 0.00 1/1 main [1]
[2] 89.8 5.60 0.00 1 heavy_operation_3 [2]
-----------------------------------------------
0.64 0.00 1/1 main [1]
[3] 10.2 0.64 0.00 1 heavy_operation_2 [3]
-----------------------------------------------
Index by function name
[3] heavy_operation_2 [2] heavy_operation_3
```
Cheers.

View File

@@ -0,0 +1,78 @@
/*
* PSP Software Development Kit - https://github.com/pspdev
* -----------------------------------------------------------------------
* Licensed under the BSD license, see LICENSE in PSPSDK root for details.
*
* main.c - A more advadced example for checking the GProf profiler.
* This example demonstrates how to start and stop the profiler multiple times
*
* Copyright (c) 2024 Francisco Javier Trujillo Mata - fjtrujy@gmail.com
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <math.h>
#include <unistd.h>
#include <pspmoduleinfo.h>
#include <pspthreadman.h>
#include <pspprof.h>
PSP_MODULE_INFO("GProf Custom Example", 0, 1, 1);
PSP_MAIN_THREAD_ATTR(THREAD_ATTR_USER | THREAD_ATTR_VFPU);
// Simulates a CPU-bound heavy operation by calculating large numbers of square roots
void heavy_operation_1() {
double result = 0.0;
for (long i = 1; i < 100000; i++) {
result += sqrt((double)i);
}
printf("Result of heavy_operation_1: %f\n", result);
}
// Simulates a different CPU-bound heavy operation by calculating factorials
void heavy_operation_2() {
unsigned long long result = 1;
for (int i = 1; i < 20; i++) {
result = 1;
for (int j = 1; j <= i * 10000; j++) {
result *= j;
result %= 100000; // Prevent overflow by keeping the result manageable
}
}
printf("Result of heavy_operation_2: %llu\n", result);
}
// Simulates a mixed heavy operation (CPU + IO-bound) by performing some calculations and sleeping
void heavy_operation_3() {
double result = 1.0;
for (long i = 1; i < 50000; i++) {
result *= log((double)i);
if (i % 10000 == 0) {
printf("Heavy operation 3, part %ld: sleeping for 1 second...\n", i / 1000000);
sleep(1); // Simulate some IO-bound activity
}
}
printf("Result of heavy_operation_3: %f\n", result);
}
int main() {
// Initial heavy operation (this part will be ignored in profiling)
heavy_operation_1();
// Stop profiling (ignore the above operation in gprof output)
gprof_stop(NULL, false);
// Start profiling again
gprof_start();
// Operations to be profiled
heavy_operation_2();
heavy_operation_3();
// Stop profiling and save the output to a custom file
gprof_stop("gmon_custom.out", true);
return 0;
}