Team Gryffindor - Sobel Source Code: Difference between revisions
Jump to navigation
Jump to search
(13 intermediate revisions by the same user not shown) | |||
Line 1: | Line 1: | ||
= User Application = | |||
== UserApp.c == | == UserApp.c == | ||
<nowiki>#include <convey/usr/cny_comp.h> | <nowiki>#include <convey/usr/cny_comp.h> | ||
Line 15: | Line 17: | ||
void usage (char *); | void usage (char *); | ||
int main(int argc, char *argv[]) | void sobel(uint64 original, uint64 edge, uint64 rows, uint64 cols); | ||
{ | |||
/* | |||
* ./run ./lena.bmp | |||
* Reads in a grayscale bitmap, calls the sobel() function, writes the bitmap to edgeSob.bmp | |||
*/ | |||
int main(int argc, char *argv[]) { | |||
// Check arguments | |||
if (argc != 2) { | |||
printf("Usage: %s bmpInput.bmp\n", argv[0]); | |||
exit(1); | |||
} | |||
// Get personality signature | // Get personality signature | ||
Line 40: | Line 48: | ||
// Check for crossbar support | // Check for crossbar support | ||
int crossbar_enabled = i_copcall_fmt(sig, cpXbar, ""); | |||
if (!crossbar_enabled) { | |||
printf("ERROR - Crossbar not enabled!\n"); | |||
/* | |||
cny_cp_posix_memalign((void**)&original.data, 512, size*8); | |||
cny_cp_posix_memalign((void**)&edge.data, 512, size*8); | |||
*/ | |||
} | } | ||
// Read in bitmap data | |||
sImage original, edge; | |||
readImage(argv[1], &original, &edge); | |||
// Coprocessor Call | // Coprocessor Call | ||
copcall_fmt(sig, cpSobel, "AAAA", original.data, edge.data, original.rows, original.cols); | copcall_fmt(sig, cpSobel, "AAAA", original.data, edge.data, original.rows, original.cols); | ||
// Mock coprocessor call | |||
// sobel(original.data, edge.data, original.rows, original.cols); | |||
// Write bitmap data | |||
writeImage(argv[1], &edge); | |||
return 0; | return 0; | ||
} | |||
#define NUM_AES 4 | |||
#define LINE_WIDTH 512 | |||
/* | |||
* Mocks the software simulator | |||
*/ | |||
void sobel(uint64 original, uint64 edge, uint64 rows, uint64 cols) { | |||
int aeId; | |||
for (aeId = 0; aeId<4; aeId++) { | |||
/* | |||
uint64 original, edge, rows, cols; | |||
original = ReadAeg(aeId, 0); | |||
edge = ReadAeg(aeId, 1); | |||
rows = ReadAeg(aeId, 2); | |||
cols = ReadAeg(aeId, 3); | |||
*/ | |||
int startrow = (rows * aeId) / NUM_AES - 3; | |||
int endrow = (rows * (aeId+1)) / NUM_AES; | |||
if (startrow < 0) | |||
startrow = 0; | |||
int GY[3][3] = { {-1, 0, 1}, | |||
{-2, 0, 2}, | |||
{-1, 0, 1} }; | |||
int GX[3][3] = { { 1, 2, 1}, | |||
{ 0, 0, 0}, | |||
{-1,-2,-1} }; | |||
long sumX, sumY, SUM; | |||
int x, y, i, j, bytes; | |||
unsigned char cache[3][LINE_WIDTH]; | |||
int x_offset = 0; | |||
uint64 data, address; | |||
// Divide image into vertical strips, each LINE_WIDTH pixels wide | |||
// with a 2 pixel overlap | |||
for (x_offset = 0; x_offset < cols; x_offset += LINE_WIDTH - 2) { | |||
// Load first three rows of pixel data into cache | |||
for(x=0; (x < LINE_WIDTH) && (x + x_offset < cols); x++) { | |||
for(y=0; y < 3; y++) { | |||
address = original + (x_offset + x) + ((startrow + y)*cols); | |||
// AeMemLoad(aeId, McNum(address), address, 1, false, data); | |||
data = *((char *) address); | |||
cache[y][x] = (char) data; | |||
} | |||
} | |||
for (y = startrow; y < endrow - 3; y++) { | |||
// Make edge calculations | |||
for(x=0; x < LINE_WIDTH - 3 && (x + x_offset < cols - 3); x++) { | |||
// -------X GRADIENT APPROXIMATION------ | |||
sumX = 0; | |||
for(i=0; i<3; i++) { | |||
for(j=0; j<=3; j++) { | |||
sumX += cache[j][x + i] * GX[i][j]; | |||
} | |||
} | |||
// -------Y GRADIENT APPROXIMATION------- | |||
sumY = 0; | |||
for(i=0; i<3; i++) { | |||
for(j=0; j<=3; j++) { | |||
sumY += cache[j][x + i] * GY[i][j]; | |||
} | |||
} | |||
// ---GRADIENT MAGNITUDE APPROXIMATION (Myler p.218)---- | |||
SUM = abs(sumX) + abs(sumY); | |||
if(SUM > 255) | |||
SUM = 255; | |||
if(SUM < 0) | |||
SUM = 0; | |||
address = edge + (x_offset + x + 1) + ((y + 1)*cols); | |||
// AeMemStore(aeId, McNum(address), address, 1, false, (char) SUM); | |||
*((char *) address) = (unsigned char) SUM; | |||
} | |||
// Shift cache up, bring in next line | |||
for(x=0; x < LINE_WIDTH && (x + x_offset < cols); x++) { | |||
address = original + (x_offset + x) + ((y + 3)*cols); | |||
// AeMemLoad(aeId, McNum(address), address, 1, false, data); | |||
data = *((char *) address); | |||
cache[0][x] = cache[1][x]; | |||
cache[1][x] = cache[2][x]; | |||
cache[2][x] = (char) data; | |||
} | |||
} // end of row | |||
} // end of vertical strip | |||
} // end of AE | |||
} | } | ||
</nowiki> | </nowiki> | ||
Line 127: | Line 251: | ||
printf("vectorSize: %lu\n", vectorSize); | printf("vectorSize: %lu\n", vectorSize); | ||
// edgeImage.data = farmalloc(vectorSize*sizeof(unsigned char)); | // edgeImage.data = farmalloc(vectorSize*sizeof(unsigned char)); | ||
edgeImage->data = (unsigned char *) (cny_cp_malloc)(vectorSize*sizeof(unsigned char)); | edgeImage->data = (unsigned char *) (cny_cp_malloc)(vectorSize*sizeof(unsigned char)); | ||
// originalImage.data = farmalloc(vectorSize*sizeof(unsigned char)); | // originalImage.data = farmalloc(vectorSize*sizeof(unsigned char)); | ||
originalImage->data = (unsigned char *) (cny_cp_malloc)(vectorSize*sizeof(unsigned char)); | originalImage->data = (unsigned char *) (cny_cp_malloc)(vectorSize*sizeof(unsigned char)); | ||
Line 149: | Line 273: | ||
} | } | ||
fclose(bmpInput); | |||
} | } | ||
void writeImage(char *filename, sImage *edgeImage) { | void writeImage(char *filename, sImage *edgeImage) { | ||
FILE *bmpInput, *bmpOutput; | |||
int nColors; | int nColors; | ||
int X, Y; | int X, Y; | ||
Line 168: | Line 292: | ||
nColors = (int)getImageInfo(bmpInput, 46, 4); | nColors = (int)getImageInfo(bmpInput, 46, 4); | ||
/*------COPY HEADER AND COLOR TABLE---------*/ | |||
copyImageInfo(bmpInput, bmpOutput); | copyImageInfo(bmpInput, bmpOutput); | ||
copyColorTable(bmpInput, bmpOutput, nColors); | copyColorTable(bmpInput, bmpOutput, nColors); | ||
Line 176: | Line 299: | ||
for(Y=0; Y<=(edgeImage->rows-1); Y++) { | for(Y=0; Y<=(edgeImage->rows-1); Y++) { | ||
for(X=0; X<=(edgeImage->cols-1); X++) { | |||
fwrite((edgeImage->data + X + Y*edgeImage->cols), sizeof(char), 1, bmpOutput); | |||
} | } | ||
} | } | ||
Line 323: | Line 446: | ||
image.o: image.h image.c | image.o: image.h image.c | ||
cnycc -g -lm image.c -c image.o | cnycc -g -lm image.c -c image.o | ||
</nowiki> | |||
= Software Simulation = | |||
The software simulation takes ~7 seconds to run. | |||
== CaeIsaSobel.cpp == | |||
<nowiki> | |||
/* | |||
AUTH: Chad Nelson (based on Sobel algo by Bill Green) | |||
DESC: 2 3x3 Sobel masks for edge detection | |||
DATE: 02/20/11 | |||
*/ | |||
#include "CaeSim.h" | |||
#include "CaeIsa.h" | |||
#include <stdio.h> | |||
#include <stdlib.h> | |||
#define MAX_AEG_INDEX 128 | |||
#define PERS_SIGN_CAE 0x4001000101000LL | |||
#define NUM_AES 4 | |||
#undef DEBUG | |||
#ifndef MC_XBAR | |||
#define MC_XBAR 0 | |||
#endif | |||
#define LINE_WIDTH 512 | |||
void CCaeIsa::InitPers() { | |||
SetAegCnt(MAX_AEG_INDEX); | |||
WriteAeg(0, 0, 0); | |||
SetPersSign(PERS_SIGN_CAE); | |||
} | |||
void CCaeIsa::CaepInst(int aeId, int opcode, int immed, uint32 inst, uint64 scalar) { | |||
if (opcode == 0x20) { | |||
// CAEP00 - sobel | |||
uint64 original, edge, rows, cols; | |||
original = ReadAeg(aeId, 0); | |||
edge = ReadAeg(aeId, 1); | |||
rows = ReadAeg(aeId, 2); | |||
cols = ReadAeg(aeId, 3); | |||
unsigned int startrow = (rows * aeId) / NUM_AES - 3; | |||
unsigned int endrow = (rows * (aeId+1)) / NUM_AES; | |||
if (aeId == 0) | |||
startrow = 0; | |||
int GX[3][3] = { {-1, 0, 1}, | |||
{-2, 0, 2}, | |||
{-1, 0, 1} }; | |||
int GY[3][3] = { { 1, 2, 1}, | |||
{ 0, 0, 0}, | |||
{-1,-2,-1} }; | |||
long sumX, sumY, SUM; | |||
unsigned int x, y, i, j; | |||
unsigned char cache[3][LINE_WIDTH]; | |||
unsigned int x_offset = 0; | |||
uint64 data, address; | |||
// Divide image into vertical strips, each LINE_WIDTH pixels wide | |||
// with a 2 pixel overlap | |||
for (x_offset = 0; x_offset < cols; x_offset += LINE_WIDTH - 2) { | |||
// Load first three rows of pixel data into cache | |||
for(x=0; (x < LINE_WIDTH) && (x + x_offset < cols); x++) { | |||
for(y=0; y < 3; y++) { | |||
address = original + (x_offset + x) + ((startrow + y)*cols); | |||
AeMemLoad(aeId, McNum(address), address, 1, false, data); | |||
cache[y][x] = (char) data; | |||
} | |||
} | |||
for (y = startrow; y < endrow - 3; y++) { | |||
// Make edge calculations | |||
for(x=0; x < LINE_WIDTH - 3 && (x + x_offset < cols - 3); x++) { | |||
// -------X GRADIENT APPROXIMATION------ | |||
sumX = 0; | |||
for(i=0; i<3; i++) { | |||
for(j=0; j<3; j++) { | |||
sumX += cache[j][x + i] * GX[i][j]; | |||
} | |||
} | |||
// -------Y GRADIENT APPROXIMATION------- | |||
sumY = 0; | |||
for(i=0; i<3; i++) { | |||
for(j=0; j<3; j++) { | |||
sumY += cache[j][x + i] * GY[i][j]; | |||
} | |||
} | |||
// ---GRADIENT MAGNITUDE APPROXIMATION (Myler p.218)---- | |||
SUM = abs(sumX) + abs(sumY); | |||
if(SUM > 255) | |||
SUM = 255; | |||
if(SUM < 0) | |||
SUM = 0; | |||
address = edge + (x_offset + x + 1) + ((y + 1)*cols); | |||
AeMemStore(aeId, McNum(address), address, 1, false, SUM); | |||
} | |||
// Shift cache up, bring in next line | |||
for(x=0; x < LINE_WIDTH && (x + x_offset < cols); x++) { | |||
address = original + (x_offset + x) + ((y + 3)*cols); | |||
AeMemLoad(aeId, McNum(address), address, 1, false, data); | |||
cache[0][x] = cache[1][x]; | |||
cache[1][x] = cache[2][x]; | |||
cache[2][x] = (char) data; | |||
} | |||
} // end of row | |||
} // end of vertical strip | |||
// End of CAEP00 - sobel | |||
} else { | |||
// other CAEPXX instructions | |||
printf("CAEP00 was not called.\n"); | |||
for (int aeId = 0; aeId < CAE_AE_CNT; aeId += 1) { | |||
// SetException(int aeID, int bitnum); AEUIE = 0 | |||
SetException(aeId, AEUIE); | |||
} | |||
} | |||
} | |||
</nowiki> | </nowiki> |
Latest revision as of 01:28, 23 February 2012
User Application
UserApp.c
#include <convey/usr/cny_comp.h> #include <ctype.h> #include <stdlib.h> #include <string.h> #include <stdio.h> #include "image.h" #undef DEBUG typedef unsigned long long uint64; extern void cpSobel(); extern int cpXbar(); void usage (char *); void sobel(uint64 original, uint64 edge, uint64 rows, uint64 cols); /* * ./run ./lena.bmp * Reads in a grayscale bitmap, calls the sobel() function, writes the bitmap to edgeSob.bmp */ int main(int argc, char *argv[]) { // Check arguments if (argc != 2) { printf("Usage: %s bmpInput.bmp\n", argv[0]); exit(1); } // Get personality signature cny_image_t sig, sig2; int stat; cny_get_signature("pdk", &sig, &sig2, &stat); if (stat) { printf("***ERROR: cny_get_signature() Failure: %d\n", stat); exit(1); } // Check memory interleave if (cny_cp_interleave() == CNY_MI_3131) { printf("ERROR - interleave set to 3131, this personality requires binary interleave\n"); exit(1); } // Check for crossbar support int crossbar_enabled = i_copcall_fmt(sig, cpXbar, ""); if (!crossbar_enabled) { printf("ERROR - Crossbar not enabled!\n"); /* cny_cp_posix_memalign((void**)&original.data, 512, size*8); cny_cp_posix_memalign((void**)&edge.data, 512, size*8); */ } // Read in bitmap data sImage original, edge; readImage(argv[1], &original, &edge); // Coprocessor Call copcall_fmt(sig, cpSobel, "AAAA", original.data, edge.data, original.rows, original.cols); // Mock coprocessor call // sobel(original.data, edge.data, original.rows, original.cols); // Write bitmap data writeImage(argv[1], &edge); return 0; } #define NUM_AES 4 #define LINE_WIDTH 512 /* * Mocks the software simulator */ void sobel(uint64 original, uint64 edge, uint64 rows, uint64 cols) { int aeId; for (aeId = 0; aeId<4; aeId++) { /* uint64 original, edge, rows, cols; original = ReadAeg(aeId, 0); edge = ReadAeg(aeId, 1); rows = ReadAeg(aeId, 2); cols = ReadAeg(aeId, 3); */ int startrow = (rows * aeId) / NUM_AES - 3; int endrow = (rows * (aeId+1)) / NUM_AES; if (startrow < 0) startrow = 0; int GY[3][3] = { {-1, 0, 1}, {-2, 0, 2}, {-1, 0, 1} }; int GX[3][3] = { { 1, 2, 1}, { 0, 0, 0}, {-1,-2,-1} }; long sumX, sumY, SUM; int x, y, i, j, bytes; unsigned char cache[3][LINE_WIDTH]; int x_offset = 0; uint64 data, address; // Divide image into vertical strips, each LINE_WIDTH pixels wide // with a 2 pixel overlap for (x_offset = 0; x_offset < cols; x_offset += LINE_WIDTH - 2) { // Load first three rows of pixel data into cache for(x=0; (x < LINE_WIDTH) && (x + x_offset < cols); x++) { for(y=0; y < 3; y++) { address = original + (x_offset + x) + ((startrow + y)*cols); // AeMemLoad(aeId, McNum(address), address, 1, false, data); data = *((char *) address); cache[y][x] = (char) data; } } for (y = startrow; y < endrow - 3; y++) { // Make edge calculations for(x=0; x < LINE_WIDTH - 3 && (x + x_offset < cols - 3); x++) { // -------X GRADIENT APPROXIMATION------ sumX = 0; for(i=0; i<3; i++) { for(j=0; j<=3; j++) { sumX += cache[j][x + i] * GX[i][j]; } } // -------Y GRADIENT APPROXIMATION------- sumY = 0; for(i=0; i<3; i++) { for(j=0; j<=3; j++) { sumY += cache[j][x + i] * GY[i][j]; } } // ---GRADIENT MAGNITUDE APPROXIMATION (Myler p.218)---- SUM = abs(sumX) + abs(sumY); if(SUM > 255) SUM = 255; if(SUM < 0) SUM = 0; address = edge + (x_offset + x + 1) + ((y + 1)*cols); // AeMemStore(aeId, McNum(address), address, 1, false, (char) SUM); *((char *) address) = (unsigned char) SUM; } // Shift cache up, bring in next line for(x=0; x < LINE_WIDTH && (x + x_offset < cols); x++) { address = original + (x_offset + x) + ((y + 3)*cols); // AeMemLoad(aeId, McNum(address), address, 1, false, data); data = *((char *) address); cache[0][x] = cache[1][x]; cache[1][x] = cache[2][x]; cache[2][x] = (char) data; } } // end of row } // end of vertical strip } // end of AE }
image.h
typedef struct {int rows; int cols; unsigned char* data;} sImage; void writeImage(char *filename, sImage *edgeImage); void readImage(char* filename, sImage *originalImage, sImage *edgeImage);
image.c
/* FILE: edgeSob.c - WORKS!! AUTH: Chad Nelson (originally by Bill Green) DESC: 2 3x3 Sobel masks for edge detection DATE: 02/20/2012 REFS: edgeLap.c */ #include <convey/usr/cny_comp.h> #include <stdio.h> #include <stdlib.h> #include <math.h> #include "image.h" /*-------PROTOTYPES---------*/ long getImageInfo(FILE*, long, int); void copyImageInfo(FILE* inputFile, FILE* outputFile); void copyColorTable(FILE* inputFile, FILE* outputFile, int nColors); void readImage(char* filename, sImage *originalImage, sImage *edgeImage) { FILE *bmpInput; unsigned long vectorSize; unsigned long fileSize; unsigned char *pChar, someChar; unsigned int row, col, nColors; someChar = '0'; pChar = &someChar; printf("Reading filename: %s\n", filename); /*-------DECLARE INPUT & OUTPUT FILES-------*/ bmpInput = fopen(filename, "rb"); /*---SET POINTER TO BEGINNING OF FILE----*/ fseek(bmpInput, 0L, SEEK_END); /*-------GET INPUT BMP DATA--------*/ fileSize = getImageInfo(bmpInput, 2, 4); originalImage->cols = (int)getImageInfo(bmpInput, 18, 4); originalImage->rows = (int)getImageInfo(bmpInput, 22, 4); edgeImage->rows = originalImage->rows; edgeImage->cols = originalImage->cols; /*--------PRINT DATA TO SCREEN----------*/ printf("Width: %d\n", originalImage->cols); printf("Height: %d\n", originalImage->rows); printf("File size: %lu\n", fileSize); nColors = (int)getImageInfo(bmpInput, 46, 4); printf("nColors: %d\n", nColors); /*------ALLOCATE MEMORY FOR FILES--------*/ vectorSize = fileSize - (14+40+4*nColors); printf("vectorSize: %lu\n", vectorSize); // edgeImage.data = farmalloc(vectorSize*sizeof(unsigned char)); edgeImage->data = (unsigned char *) (cny_cp_malloc)(vectorSize*sizeof(unsigned char)); // originalImage.data = farmalloc(vectorSize*sizeof(unsigned char)); originalImage->data = (unsigned char *) (cny_cp_malloc)(vectorSize*sizeof(unsigned char)); if(edgeImage->data == NULL || originalImage->data == NULL) { printf("Failed to cny_cp_malloc image space\n"); exit(0); } printf("%lu bytes malloc'ed for image data\n", vectorSize); fseek(bmpInput, (14+40+4*nColors), SEEK_SET); /* Read input.bmp and store it's raster data into originalImage.data */ for(row=0; row<=originalImage->rows-1; row++) { for(col=0; col<=originalImage->cols-1; col++) { fread(pChar, sizeof(char), 1, bmpInput); *(originalImage->data + row*originalImage->cols + col) = *pChar; } } fclose(bmpInput); } void writeImage(char *filename, sImage *edgeImage) { FILE *bmpInput, *bmpOutput; int nColors; int X, Y; /*-------DECLARE INPUT & OUTPUT FILES-------*/ bmpInput = fopen(filename, "rb"); bmpOutput = fopen("edgeSob.bmp", "wb"); /*---SET POINTER TO BEGINNING OF FILE----*/ fseek(bmpInput, 0L, SEEK_END); /*-------GET INPUT BMP DATA--------*/ nColors = (int)getImageInfo(bmpInput, 46, 4); /*------COPY HEADER AND COLOR TABLE---------*/ copyImageInfo(bmpInput, bmpOutput); copyColorTable(bmpInput, bmpOutput, nColors); fseek(bmpInput, (14+40+4*nColors), SEEK_SET); fseek(bmpOutput, (14+40+4*nColors), SEEK_SET); for(Y=0; Y<=(edgeImage->rows-1); Y++) { for(X=0; X<=(edgeImage->cols-1); X++) { fwrite((edgeImage->data + X + Y*edgeImage->cols), sizeof(char), 1, bmpOutput); } } fclose(bmpInput); fclose(bmpOutput); printf("See edgeSob.bmp for results\n"); } /*----------GET IMAGE INFO SUBPROGRAM--------------*/ long getImageInfo(FILE* inputFile, long offset, int numberOfChars) { unsigned char *ptrC; long value = 0L; unsigned char dummy; int i; dummy = '0'; ptrC = &dummy; fseek(inputFile, offset, SEEK_SET); for(i=1; i<=numberOfChars; i++) { fread(ptrC, sizeof(char), 1, inputFile); /* calculate value based on adding bytes */ value = (long)(value + (*ptrC)*(pow(256, (i-1)))); } return(value); } /* end of getImageInfo */ /*-------------COPIES HEADER AND INFO HEADER----------------*/ void copyImageInfo(FILE* inputFile, FILE* outputFile) { unsigned char *ptrC; unsigned char dummy; int i; dummy = '0'; ptrC = &dummy; fseek(inputFile, 0L, SEEK_SET); fseek(outputFile, 0L, SEEK_SET); for(i=0; i<=50; i++) { fread(ptrC, sizeof(char), 1, inputFile); fwrite(ptrC, sizeof(char), 1, outputFile); } } /*----------------COPIES COLOR TABLE-----------------------------*/ void copyColorTable(FILE* inputFile, FILE* outputFile, int nColors) { unsigned char *ptrC; unsigned char dummy; int i; dummy = '0'; ptrC = &dummy; fseek(inputFile, 54L, SEEK_SET); fseek(outputFile, 54L, SEEK_SET); for(i=0; i<=(4*nColors); i++) /* there are (4*nColors) bytesin color table */ { fread(ptrC, sizeof(char), 1, inputFile); fwrite(ptrC, sizeof(char), 1, outputFile); } }
cpSobel.s
.file "cpSobel.s" .ctext # # cpXbar function: # -cpXbar function reads AEG 4, which indicates whether # the memory crossbar is enabled in the AE # .globl cpXbar .type cpXbar. @function .signature pdk=4 cpXbar: mov %aeg, $4, %a8 # integer values returned in a8 rtn # # cpSobel function: # -writes array pointers (image data) and column/row sizes to AEG registers # -calls caep00 to execute the sobel operation # .globl cpSobel .type cpSobel. @function .signature pdk=4 cpSobel: mov %a8, $0, %aeg # a8 contains address of original image data mov %a9, $1, %aeg # a9 contains address of edge image data mov %a10, $2, %aeg # a10 contains number of rows mov %a11, $3, %aeg # a11 contains number of columns caep00 $0 # make the coprocessor call rtn .cend
Makefile
EXEC = UserApp.exe SWMODEL = ../sim/CaeSimPers all: $(EXEC) $(SWMODEL) clean: rm -f $(EXEC) make --directory=../sim clean $(EXEC): UserApp.c cpSobel.s image.o cnycc -g -lm UserApp.c cpSobel.s image.o -o UserApp.exe $(SWMODEL): ../Makefile.include ../sim/CaeIsaSobel.cpp make -C ../sim image.o: image.h image.c cnycc -g -lm image.c -c image.o
Software Simulation
The software simulation takes ~7 seconds to run.
CaeIsaSobel.cpp
/* AUTH: Chad Nelson (based on Sobel algo by Bill Green) DESC: 2 3x3 Sobel masks for edge detection DATE: 02/20/11 */ #include "CaeSim.h" #include "CaeIsa.h" #include <stdio.h> #include <stdlib.h> #define MAX_AEG_INDEX 128 #define PERS_SIGN_CAE 0x4001000101000LL #define NUM_AES 4 #undef DEBUG #ifndef MC_XBAR #define MC_XBAR 0 #endif #define LINE_WIDTH 512 void CCaeIsa::InitPers() { SetAegCnt(MAX_AEG_INDEX); WriteAeg(0, 0, 0); SetPersSign(PERS_SIGN_CAE); } void CCaeIsa::CaepInst(int aeId, int opcode, int immed, uint32 inst, uint64 scalar) { if (opcode == 0x20) { // CAEP00 - sobel uint64 original, edge, rows, cols; original = ReadAeg(aeId, 0); edge = ReadAeg(aeId, 1); rows = ReadAeg(aeId, 2); cols = ReadAeg(aeId, 3); unsigned int startrow = (rows * aeId) / NUM_AES - 3; unsigned int endrow = (rows * (aeId+1)) / NUM_AES; if (aeId == 0) startrow = 0; int GX[3][3] = { {-1, 0, 1}, {-2, 0, 2}, {-1, 0, 1} }; int GY[3][3] = { { 1, 2, 1}, { 0, 0, 0}, {-1,-2,-1} }; long sumX, sumY, SUM; unsigned int x, y, i, j; unsigned char cache[3][LINE_WIDTH]; unsigned int x_offset = 0; uint64 data, address; // Divide image into vertical strips, each LINE_WIDTH pixels wide // with a 2 pixel overlap for (x_offset = 0; x_offset < cols; x_offset += LINE_WIDTH - 2) { // Load first three rows of pixel data into cache for(x=0; (x < LINE_WIDTH) && (x + x_offset < cols); x++) { for(y=0; y < 3; y++) { address = original + (x_offset + x) + ((startrow + y)*cols); AeMemLoad(aeId, McNum(address), address, 1, false, data); cache[y][x] = (char) data; } } for (y = startrow; y < endrow - 3; y++) { // Make edge calculations for(x=0; x < LINE_WIDTH - 3 && (x + x_offset < cols - 3); x++) { // -------X GRADIENT APPROXIMATION------ sumX = 0; for(i=0; i<3; i++) { for(j=0; j<3; j++) { sumX += cache[j][x + i] * GX[i][j]; } } // -------Y GRADIENT APPROXIMATION------- sumY = 0; for(i=0; i<3; i++) { for(j=0; j<3; j++) { sumY += cache[j][x + i] * GY[i][j]; } } // ---GRADIENT MAGNITUDE APPROXIMATION (Myler p.218)---- SUM = abs(sumX) + abs(sumY); if(SUM > 255) SUM = 255; if(SUM < 0) SUM = 0; address = edge + (x_offset + x + 1) + ((y + 1)*cols); AeMemStore(aeId, McNum(address), address, 1, false, SUM); } // Shift cache up, bring in next line for(x=0; x < LINE_WIDTH && (x + x_offset < cols); x++) { address = original + (x_offset + x) + ((y + 3)*cols); AeMemLoad(aeId, McNum(address), address, 1, false, data); cache[0][x] = cache[1][x]; cache[1][x] = cache[2][x]; cache[2][x] = (char) data; } } // end of row } // end of vertical strip // End of CAEP00 - sobel } else { // other CAEPXX instructions printf("CAEP00 was not called.\n"); for (int aeId = 0; aeId < CAE_AE_CNT; aeId += 1) { // SetException(int aeID, int bitnum); AEUIE = 0 SetException(aeId, AEUIE); } } }