I can't find any information about ins instructions . :(
Code: Select all
ins $v0, $zr, 0, 4
Code: Select all
ins $v0, $zr, 0, 4
Insert into $v0 starting at bit 0 the lowest 4 bits of $zr.cooleyes wrote:what this code mean?
I can't find any information about ins instructions . :(Code: Select all
ins $v0, $zr, 0, 4
Code: Select all
typedef union { int word; struct { int bf4 : 4; } } bit4_t;
...
bit4_t f(bit4 x)
{
bit4_t res;
...
printf("%d", x.bf4); // EXT $a1, $a0, 0, 4
...
res.bf4 = 0; // INS $v0, $zr, 0, 4
...
return res;
}
Code: Select all
#define COLOR565( r, g, b ) \
({ unsigned short c = r>>3; asm volatile ( "ins %0, %2, 5, 6; ins %0, %3, 11, 5" : "+r"(c) : "r"(g>>2), "r"(b>>3) ); c; })
#define COLOR5551( r, g, b, a ) \
({ unsigned short c = r>>3; asm volatile ( "ins %0, %2, 5, 6; ins %0, %3, 11, 5; ins %0, %4, 15, 1" : "+r"(c) : "r"(g>>3), "r"(b>>3), "r"(a>>7) ); c; })
Code: Select all
union rgb565_t { unsigned short c; struct { unsigned short r : 5, g : 6, b : 5; } }
#define COLOR565( r, g, b ) ({ union rgb565_t res; res.c = r >> 3; res.g = g >> 2; res.b = b >> 3; res.c; })
Code: Select all
1)
00000000 <COLOR565_c>:
0: 000410c3 sra v0,a0,0x3
4: 00052883 sra a1,a1,0x2
8: 00052940 sll a1,a1,0x5
c: 000212c0 sll v0,v0,0xb
10: 00451025 or v0,v0,a1
14: 000630c3 sra a2,a2,0x3
18: 00461025 or v0,v0,a2
1c: 03e00008 jr ra
20: 3042ffff andi v0,v0,0xffff
2)
00000024 <COLOR565_asm>:
24: 7c8278c0 ext v0,a0,0x3,0x10
28: 00052883 sra a1,a1,0x2
2c: 000630c3 sra a2,a2,0x3
30: 7ca25144 ins v0,a1,0x5,0x6
34: 7cc27ac4 ins v0,a2,0xb,0x5
38: 03e00008 jr ra
3c: 3042ffff andi v0,v0,0xffff
3)
00000040 <COLOR565_union>:
40: 000410c2 srl v0,a0,0x3
44: 00052883 sra a1,a1,0x2
48: 7ca25144 ins v0,a1,0x5,0x6
4c: 000630c3 sra a2,a2,0x3
50: 7cc27ac4 ins v0,a2,0xb,0x5
54: 03e00008 jr ra
58: 3042ffff andi v0,v0,0xffff
hlide wrote:on PSP, "shift" and "or" operations are a little overhead compared with "ins" instructions :as you can see there is no need for "or" instructions.Code: Select all
#define COLOR565( r, g, b ) \ ({ unsigned short c = r>>3; asm volatile ( "ins %0, %2, 5, 6; ins %0, %3, 11, 5" : "+r"(c) : "r"(g>>2), "r"(b>>3) ); c; }) #define COLOR5551( r, g, b, a ) \ ({ unsigned short c = r>>3; asm volatile ( "ins %0, %2, 5, 6; ins %0, %3, 11, 5; ins %0, %4, 15, 1" : "+r"(c) : "r"(g>>3), "r"(b>>3), "r"(a>>7) ); c; })
Normally using :should give a similar code on PSP, since "ins"/"ext" instructions were specially designed for access to/from bit fields in struct.Code: Select all
union rgb565_t { unsigned short c; struct { unsigned short r : 5, g : 6, b : 5; } } #define COLOR565( r, g, b ) ({ union rgb565_t res; res.c = r >> 3; res.g = g >> 2; res.b = b >> 3; res.c; })
if you are not convinced :
- 1) unsigned short COLOR565_c(int r,int g,int b ) // 9 insns
{ return (((r >> 3) << 11) | ((g >> 2) << 5) | ((b >> 3) << 0)); }
2) unsigned short COLOR565_asm(int r,int g,int b ) // 7 insns
{ unsigned short c = r>>3; asm volatile ( "ins %0, %1, 5, 6; ins %0, %2, 11, 5" : "+r"(c) : "r"(g>>2), "r"(b>>3) ); return c; }
3) unsigned short COLOR565_union(int r,int g,int b ) // 7 insns
{ union rgb565_t { unsigned short c; struct { unsigned short r : 5, g : 6, b : 5; }; } res; res.c = r >> 3; res.g = g >> 2; res.b = b >> 3; return res.c; }Note that COLOR565_asm and COLOR565_union are quite similar.Code: Select all
1) 00000000 <COLOR565_c>: 0: 000410c3 sra v0,a0,0x3 4: 00052883 sra a1,a1,0x2 8: 00052940 sll a1,a1,0x5 c: 000212c0 sll v0,v0,0xb 10: 00451025 or v0,v0,a1 14: 000630c3 sra a2,a2,0x3 18: 00461025 or v0,v0,a2 1c: 03e00008 jr ra 20: 3042ffff andi v0,v0,0xffff 2) 00000024 <COLOR565_asm>: 24: 7c8278c0 ext v0,a0,0x3,0x10 28: 00052883 sra a1,a1,0x2 2c: 000630c3 sra a2,a2,0x3 30: 7ca25144 ins v0,a1,0x5,0x6 34: 7cc27ac4 ins v0,a2,0xb,0x5 38: 03e00008 jr ra 3c: 3042ffff andi v0,v0,0xffff 3) 00000040 <COLOR565_union>: 40: 000410c2 srl v0,a0,0x3 44: 00052883 sra a1,a1,0x2 48: 7ca25144 ins v0,a1,0x5,0x6 4c: 000630c3 sra a2,a2,0x3 50: 7cc27ac4 ins v0,a2,0xb,0x5 54: 03e00008 jr ra 58: 3042ffff andi v0,v0,0xffff
Code: Select all
#include <pspkernel.h>
#include <pspctrl.h>
#include <pspdisplay.h>
#include <pspdebug.h>
#include <psppower.h>
#include <stdio.h>
#include <stdlib.h>
#include <pspkernel.h>
#include <pspctrl.h>
#include <psppower.h>
#include <pspdebug.h>
#include <psprtc.h>
#include <pspsdk.h>
#include <pspaudiocodec.h>
#include <pspaudio.h>
#include <string.h>
#include <malloc.h>
#include <pspmpeg.h>
#include "pspvideocodec.h"
int SetupCallbacks();
PSP_MODULE_INFO("videocodec test", 0x1000, 1, 1);
PSP_MAIN_THREAD_ATTR(0);
__attribute__ ((constructor))
void loaderInit(){
pspKernelSetKernelPC();
pspSdkInstallNoDeviceCheckPatch();
pspSdkInstallNoPlainModuleCheckPatch();
pspSdkInstallKernelLoadModulePatch();
}
SceCtrlData input;
unsigned long Video_Codec_BufferMP4V[96] __attribute__((aligned(64)));
unsigned long Video_Codec_BufferAVC1[96] __attribute__((aligned(64)));
unsigned long Video_YUVCodec_BufferMP4V[12] __attribute__((aligned(64)));
unsigned char YUVBuffer[512*512*3] __attribute__((aligned(64)));
int main(void)
{
SetupCallbacks();
pspDebugScreenInit();
pspDebugScreenSetXY(0, 2);
//scePowerSetClockFrequency(120,120,60);
//scePowerSetCpuClockFrequency(120);
//scePowerSetBusClockFrequency(60);
u32 cpu = scePowerGetCpuClockFrequency();
u32 bus = scePowerGetBusClockFrequency();
pspDebugScreenPrintf("cpu=%d, bus=%d\n", cpu, bus);
pspDebugScreenPrintf("Press any key to exit.\n");
int result;
result = pspSdkLoadStartModule("flash0:/kd/me_for_vsh.prx", PSP_MEMORY_PARTITION_KERNEL);
if (result < 0 ) goto wait;
result = pspSdkLoadStartModule("flash0:/kd/videocodec.prx", PSP_MEMORY_PARTITION_KERNEL);
if (result < 0 ) goto wait;
result = pspSdkLoadStartModule("flash0:/kd/audiocodec.prx", PSP_MEMORY_PARTITION_KERNEL);
if (result < 0 ) goto wait;
result = pspSdkLoadStartModule("flash0:/kd/mpegbase.prx", PSP_MEMORY_PARTITION_KERNEL);
if (result < 0 ) goto wait;
result = pspSdkLoadStartModule("flash0:/kd/mpeg_vsh.prx", PSP_MEMORY_PARTITION_USER);
if (result < 0 ) goto wait;
pspSdkFixupImports(result);
sceMpegInit();
FILE* fp = fopen("ms0:/Test.dat", "rb");
fseek(fp, 0, PSP_SEEK_END);
long fsize = ftell(fp);
long buffer_size = fsize;
int mod_64 = buffer_size & 0x3f;
if (mod_64 != 0) buffer_size += 64 - mod_64;
unsigned char* file_buffer = memalign(64, buffer_size);
fseek(fp, 0, PSP_SEEK_SET);
fread(file_buffer, fsize, 1, fp);
fclose(fp);
memset(Video_Codec_BufferMP4V, 0, sizeof(Video_Codec_BufferMP4V));
int res ;
int i;
//---------------------------------------------------------------------------------//
Video_Codec_BufferMP4V[4] = (unsigned long)(((void*)Video_Codec_BufferMP4V) + 128 );
Video_Codec_BufferMP4V[11] = 512;
Video_Codec_BufferMP4V[12] = 512;
Video_Codec_BufferMP4V[13] = 512*512;
fp = fopen("ms0:/sceVideocodecOpen(mp4v).dat", "wb");
for(i=0;i<96;i++) {
fwrite( &Video_Codec_BufferMP4V[i], sizeof(unsigned int), 1, fp);
}
fclose(fp);
if ( (res = sceVideocodecOpen(Video_Codec_BufferMP4V, 0x1)) < 0 ) {
pspDebugScreenPrintf("sceVideocodecOpen=0x%08X\n", res);
goto wait;
}
pspDebugScreenPrintf("sceVideocodecOpen=0x%08X\n", res);
//---------------------------------------------------------------------------------//
Video_Codec_BufferMP4V[7] = 16384;
fp = fopen("ms0:/sceVideocodecGetEDRAM(mp4v).dat", "wb");
for(i=0;i<96;i++) {
fwrite( &Video_Codec_BufferMP4V[i], sizeof(unsigned int), 1, fp);
}
fclose(fp);
if ( (res = sceVideocodecGetEDRAM(Video_Codec_BufferMP4V, 0x1)) < 0 ) {
pspDebugScreenPrintf("sceVideocodecGetEDRAM=0x%08X\n", res);
goto wait;
}
pspDebugScreenPrintf("sceVideocodecGetEDRAM=0x%08X\n", res);
//---------------------------------------------------------------------------------//
fp = fopen("ms0:/sceVideocodecInit(mp4v).dat", "wb");
for(i=0;i<96;i++) {
fwrite( &Video_Codec_BufferMP4V[i], sizeof(unsigned int), 1, fp);
}
fclose(fp);
if ( (res = sceVideocodecInit(Video_Codec_BufferMP4V, 0x1)) < 0 ) {
pspDebugScreenPrintf("sceVideocodecInit=0x%08X\n", res);
goto wait;
}
pspDebugScreenPrintf("sceVideocodecInit=0x%08X\n", res);
//---------------------------------------------------------------------------------//
Video_Codec_BufferMP4V[34] = 7;
Video_Codec_BufferMP4V[36] = 0;
fp = fopen("ms0:/sceVideocodecStop(mp4v).dat", "wb");
for(i=0;i<96;i++) {
fwrite( &Video_Codec_BufferMP4V[i], sizeof(unsigned int), 1, fp);
}
fclose(fp);
if ( (res = sceVideocodecStop(Video_Codec_BufferMP4V, 0x1)) < 0 ) {
pspDebugScreenPrintf("sceVideocodecStop=0x%08X\n", res);
goto wait;
}
pspDebugScreenPrintf("sceVideocodecStop=0x%08X\n", res);
//---------------------------------------------------------------------------------//
Video_Codec_BufferMP4V[9] = file_buffer;
Video_Codec_BufferMP4V[10] = fsize;
Video_Codec_BufferMP4V[14] = 7;
fp = fopen("ms0:/sceVideocodecDecode(mp4v).dat", "wb");
for(i=0;i<96;i++) {
fwrite( &Video_Codec_BufferMP4V[i], sizeof(unsigned int), 1, fp);
}
fclose(fp);
if ( (res = sceVideocodecDecode(Video_Codec_BufferMP4V, 0x1)) < 0 ) {
pspDebugScreenPrintf("sceVideocodecDecode=0x%08X\n", res);
goto wait;
}
pspDebugScreenPrintf("sceVideocodecDecode=0x%08X\n", res);
Video_YUVCodec_BufferMP4V[0] = (Video_Codec_BufferMP4V[45]+15) & 0xFFFFFFF0;
Video_YUVCodec_BufferMP4V[1] = (Video_Codec_BufferMP4V[44]+15) & 0xFFFFFFF0;
Video_YUVCodec_BufferMP4V[2] = 0;
Video_YUVCodec_BufferMP4V[3] = 1;
Video_YUVCodec_BufferMP4V[4] = Video_Codec_BufferMP4V[53];
Video_YUVCodec_BufferMP4V[5] = Video_YUVCodec_BufferMP4V[4] + (Video_Codec_BufferMP4V[56] * (Video_YUVCodec_BufferMP4V[0]/2));
Video_YUVCodec_BufferMP4V[6] = Video_Codec_BufferMP4V[54];
Video_YUVCodec_BufferMP4V[7] = Video_Codec_BufferMP4V[55];
Video_YUVCodec_BufferMP4V[8] = Video_YUVCodec_BufferMP4V[6] + (Video_Codec_BufferMP4V[57] * (Video_YUVCodec_BufferMP4V[0]/4));
Video_YUVCodec_BufferMP4V[9] = Video_YUVCodec_BufferMP4V[7] + (Video_Codec_BufferMP4V[57] * (Video_YUVCodec_BufferMP4V[0]/4));
Video_YUVCodec_BufferMP4V[10] = 0;
Video_YUVCodec_BufferMP4V[11] = 0;
fp = fopen("ms0:/sceMpegBaseYCrCbCopyVme(mp4v).dat", "wb");
for(i=0;i<12;i++) {
fwrite( &Video_YUVCodec_BufferMP4V[i], sizeof(unsigned int), 1, fp);
}
fclose(fp);
res = sceMpegBaseYCrCbCopyVme(YUVBuffer, Video_YUVCodec_BufferMP4V, 3);
pspDebugScreenPrintf("sceMpegBaseYCrCbCopyVme=0x%08X\n", res);
fp = fopen("ms0:/YUVBuffer(mp4v).dat", "wb");
fwrite( YUVBuffer, sizeof(unsigned char), 512*512*3, fp);
fclose(fp);
//---------------------------------------------------------------------------------//
sceCtrlReadBufferPositive(&input, 1);
while(!(input.Buttons & PSP_CTRL_TRIANGLE))
{
sceKernelDelayThread(10000); // wait 10 milliseconds
sceCtrlReadBufferPositive(&input, 1);
}
fp = fopen("ms0:/mp4vend.dat", "wb");
for(i=0;i<96;i++) {
pspDebugScreenPrintf("0x%08X ", Video_Codec_BufferMP4V[i]);
fwrite( &Video_Codec_BufferMP4V[i], sizeof(unsigned int), 1, fp);
}
pspDebugScreenPrintf("\n");
fclose(fp);
wait:
sceCtrlReadBufferPositive(&input, 1);
while(!(input.Buttons & PSP_CTRL_TRIANGLE))
{
sceKernelDelayThread(10000); // wait 10 milliseconds
sceCtrlReadBufferPositive(&input, 1);
}
sceKernelExitGame();
return 0;
}
/* Exit callback */
int exit_callback(int arg1, int arg2, void *common)
{
sceKernelExitGame();
return 0;
}
/* Callback thread */
int CallbackThread(SceSize args, void *argp)
{
int cbid;
cbid = sceKernelCreateCallback("Exit Callback", exit_callback, NULL);
sceKernelRegisterExitCallback(cbid);
sceKernelSleepThreadCB();
return 0;
}
/* Sets up the callback thread and returns its thread id */
int SetupCallbacks(void)
{
int thid = 0;
thid = sceKernelCreateThread("update_thread", CallbackThread, 0x11, 0xFA0, 0, 0);
if(thid >= 0)
{
sceKernelStartThread(thid, 0, 0);
}
return thid;
}
Code: Select all
/*
* PSP Software Development Kit - http://www.pspdev.org
* -----------------------------------------------------------------------
* Licensed under the BSD license, see LICENSE in PSPSDK root for details.
*
* pspvideocodec.h - Prototypes for the sceVideocodec library.
*
* Copyright (c) 2007 cooleyes
*
*/
#ifdef __cplusplus
extern "C" {
#endif
int sceVideocodecOpen(unsigned long *Buffer, int Type);
int sceVideocodecGetEDRAM(unsigned long *Buffer, int Type);
int sceVideocodecInit(unsigned long *Buffer, int Type);
int sceVideocodecDecode(unsigned long *Buffer, int Type);
int sceVideocodecReleaseEDRAM(unsigned long *Buffer);
int sceMpegBaseYCrCbCopyVme(void* YUVBuffer, unsigned long *Buffer, int Type);
#ifdef __cplusplus
}
#endif