User Application
UserApp.c
#include <convey/usr/cny_comp.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "image.h"
#undef DEBUG
typedef unsigned long long uint64;
extern void cpSobel();
extern int cpXbar();
void usage (char *);
void sobel(uint64 original, uint64 edge, uint64 rows, uint64 cols);
/*
* ./run ./lena.bmp
* Reads in a grayscale bitmap, calls the sobel() function, writes the bitmap to edgeSob.bmp
*/
int main(int argc, char *argv[]) {
// Check arguments
if (argc != 2) {
printf("Usage: %s bmpInput.bmp\n", argv[0]);
exit(1);
}
// Get personality signature
cny_image_t sig, sig2;
int stat;
cny_get_signature("pdk", &sig, &sig2, &stat);
if (stat) {
printf("***ERROR: cny_get_signature() Failure: %d\n", stat);
exit(1);
}
// Check memory interleave
if (cny_cp_interleave() == CNY_MI_3131) {
printf("ERROR - interleave set to 3131, this personality requires binary interleave\n");
exit(1);
}
// Check for crossbar support
int crossbar_enabled = i_copcall_fmt(sig, cpXbar, "");
if (!crossbar_enabled) {
printf("ERROR - Crossbar not enabled!\n");
/*
cny_cp_posix_memalign((void**)&original.data, 512, size*8);
cny_cp_posix_memalign((void**)&edge.data, 512, size*8);
*/
}
// Read in bitmap data
sImage original, edge;
readImage(argv[1], &original, &edge);
// Coprocessor Call
copcall_fmt(sig, cpSobel, "AAAA", original.data, edge.data, original.rows, original.cols);
// Mock coprocessor call
// sobel(original.data, edge.data, original.rows, original.cols);
// Write bitmap data
writeImage(argv[1], &edge);
return 0;
}
#define NUM_AES 4
#define LINE_WIDTH 512
/*
* Mocks the software simulator
*/
void sobel(uint64 original, uint64 edge, uint64 rows, uint64 cols) {
int aeId;
for (aeId = 0; aeId<4; aeId++) {
/*
uint64 original, edge, rows, cols;
original = ReadAeg(aeId, 0);
edge = ReadAeg(aeId, 1);
rows = ReadAeg(aeId, 2);
cols = ReadAeg(aeId, 3);
*/
int startrow = (rows * aeId) / NUM_AES - 3;
int endrow = (rows * (aeId+1)) / NUM_AES;
if (startrow < 0)
startrow = 0;
int GY[3][3] = { {-1, 0, 1},
{-2, 0, 2},
{-1, 0, 1} };
int GX[3][3] = { { 1, 2, 1},
{ 0, 0, 0},
{-1,-2,-1} };
long sumX, sumY, SUM;
int x, y, i, j, bytes;
unsigned char cache[3][LINE_WIDTH];
int x_offset = 0;
uint64 data, address;
// Divide image into vertical strips, each LINE_WIDTH pixels wide
// with a 2 pixel overlap
for (x_offset = 0; x_offset < cols; x_offset += LINE_WIDTH - 2) {
// Load first three rows of pixel data into cache
for(x=0; (x < LINE_WIDTH) && (x + x_offset < cols); x++) {
for(y=0; y < 3; y++) {
address = original + (x_offset + x) + ((startrow + y)*cols);
// AeMemLoad(aeId, McNum(address), address, 1, false, data);
data = *((char *) address);
cache[y][x] = (char) data;
}
}
for (y = startrow; y < endrow - 3; y++) {
// Make edge calculations
for(x=0; x < LINE_WIDTH - 3 && (x + x_offset < cols - 3); x++) {
// -------X GRADIENT APPROXIMATION------
sumX = 0;
for(i=0; i<3; i++) {
for(j=0; j<=3; j++) {
sumX += cache[j][x + i] * GX[i][j];
}
}
// -------Y GRADIENT APPROXIMATION-------
sumY = 0;
for(i=0; i<3; i++) {
for(j=0; j<=3; j++) {
sumY += cache[j][x + i] * GY[i][j];
}
}
// ---GRADIENT MAGNITUDE APPROXIMATION (Myler p.218)----
SUM = abs(sumX) + abs(sumY);
if(SUM > 255)
SUM = 255;
if(SUM < 0)
SUM = 0;
address = edge + (x_offset + x + 1) + ((y + 1)*cols);
// AeMemStore(aeId, McNum(address), address, 1, false, (char) SUM);
*((char *) address) = (unsigned char) SUM;
}
// Shift cache up, bring in next line
for(x=0; x < LINE_WIDTH && (x + x_offset < cols); x++) {
address = original + (x_offset + x) + ((y + 3)*cols);
// AeMemLoad(aeId, McNum(address), address, 1, false, data);
data = *((char *) address);
cache[0][x] = cache[1][x];
cache[1][x] = cache[2][x];
cache[2][x] = (char) data;
}
} // end of row
} // end of vertical strip
} // end of AE
}
image.h
typedef struct {int rows; int cols; unsigned char* data;} sImage;
void writeImage(char *filename, sImage *edgeImage);
void readImage(char* filename, sImage *originalImage, sImage *edgeImage);
image.c
/*
FILE: edgeSob.c - WORKS!!
AUTH: Chad Nelson (originally by Bill Green)
DESC: 2 3x3 Sobel masks for edge detection
DATE: 02/20/2012
REFS: edgeLap.c
*/
#include <convey/usr/cny_comp.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "image.h"
/*-------PROTOTYPES---------*/
long getImageInfo(FILE*, long, int);
void copyImageInfo(FILE* inputFile, FILE* outputFile);
void copyColorTable(FILE* inputFile, FILE* outputFile, int nColors);
void readImage(char* filename, sImage *originalImage, sImage *edgeImage)
{
FILE *bmpInput;
unsigned long vectorSize;
unsigned long fileSize;
unsigned char *pChar, someChar;
unsigned int row, col, nColors;
someChar = '0'; pChar = &someChar;
printf("Reading filename: %s\n", filename);
/*-------DECLARE INPUT & OUTPUT FILES-------*/
bmpInput = fopen(filename, "rb");
/*---SET POINTER TO BEGINNING OF FILE----*/
fseek(bmpInput, 0L, SEEK_END);
/*-------GET INPUT BMP DATA--------*/
fileSize = getImageInfo(bmpInput, 2, 4);
originalImage->cols = (int)getImageInfo(bmpInput, 18, 4);
originalImage->rows = (int)getImageInfo(bmpInput, 22, 4);
edgeImage->rows = originalImage->rows;
edgeImage->cols = originalImage->cols;
/*--------PRINT DATA TO SCREEN----------*/
printf("Width: %d\n", originalImage->cols);
printf("Height: %d\n", originalImage->rows);
printf("File size: %lu\n", fileSize);
nColors = (int)getImageInfo(bmpInput, 46, 4);
printf("nColors: %d\n", nColors);
/*------ALLOCATE MEMORY FOR FILES--------*/
vectorSize = fileSize - (14+40+4*nColors);
printf("vectorSize: %lu\n", vectorSize);
// edgeImage.data = farmalloc(vectorSize*sizeof(unsigned char));
edgeImage->data = (unsigned char *) (cny_cp_malloc)(vectorSize*sizeof(unsigned char));
// originalImage.data = farmalloc(vectorSize*sizeof(unsigned char));
originalImage->data = (unsigned char *) (cny_cp_malloc)(vectorSize*sizeof(unsigned char));
if(edgeImage->data == NULL || originalImage->data == NULL) {
printf("Failed to cny_cp_malloc image space\n");
exit(0);
}
printf("%lu bytes malloc'ed for image data\n", vectorSize);
fseek(bmpInput, (14+40+4*nColors), SEEK_SET);
/* Read input.bmp and store it's raster data into originalImage.data */
for(row=0; row<=originalImage->rows-1; row++) {
for(col=0; col<=originalImage->cols-1; col++) {
fread(pChar, sizeof(char), 1, bmpInput);
*(originalImage->data + row*originalImage->cols + col) = *pChar;
}
}
fclose(bmpInput);
}
void writeImage(char *filename, sImage *edgeImage) {
FILE *bmpInput, *bmpOutput;
int nColors;
int X, Y;
/*-------DECLARE INPUT & OUTPUT FILES-------*/
bmpInput = fopen(filename, "rb");
bmpOutput = fopen("edgeSob.bmp", "wb");
/*---SET POINTER TO BEGINNING OF FILE----*/
fseek(bmpInput, 0L, SEEK_END);
/*-------GET INPUT BMP DATA--------*/
nColors = (int)getImageInfo(bmpInput, 46, 4);
/*------COPY HEADER AND COLOR TABLE---------*/
copyImageInfo(bmpInput, bmpOutput);
copyColorTable(bmpInput, bmpOutput, nColors);
fseek(bmpInput, (14+40+4*nColors), SEEK_SET);
fseek(bmpOutput, (14+40+4*nColors), SEEK_SET);
for(Y=0; Y<=(edgeImage->rows-1); Y++) {
for(X=0; X<=(edgeImage->cols-1); X++) {
fwrite((edgeImage->data + X + Y*edgeImage->cols), sizeof(char), 1, bmpOutput);
}
}
fclose(bmpInput);
fclose(bmpOutput);
printf("See edgeSob.bmp for results\n");
}
/*----------GET IMAGE INFO SUBPROGRAM--------------*/
long getImageInfo(FILE* inputFile, long offset, int numberOfChars)
{
unsigned char *ptrC;
long value = 0L;
unsigned char dummy;
int i;
dummy = '0';
ptrC = &dummy;
fseek(inputFile, offset, SEEK_SET);
for(i=1; i<=numberOfChars; i++)
{
fread(ptrC, sizeof(char), 1, inputFile);
/* calculate value based on adding bytes */
value = (long)(value + (*ptrC)*(pow(256, (i-1))));
}
return(value);
} /* end of getImageInfo */
/*-------------COPIES HEADER AND INFO HEADER----------------*/
void copyImageInfo(FILE* inputFile, FILE* outputFile)
{
unsigned char *ptrC;
unsigned char dummy;
int i;
dummy = '0';
ptrC = &dummy;
fseek(inputFile, 0L, SEEK_SET);
fseek(outputFile, 0L, SEEK_SET);
for(i=0; i<=50; i++)
{
fread(ptrC, sizeof(char), 1, inputFile);
fwrite(ptrC, sizeof(char), 1, outputFile);
}
}
/*----------------COPIES COLOR TABLE-----------------------------*/
void copyColorTable(FILE* inputFile, FILE* outputFile, int nColors)
{
unsigned char *ptrC;
unsigned char dummy;
int i;
dummy = '0';
ptrC = &dummy;
fseek(inputFile, 54L, SEEK_SET);
fseek(outputFile, 54L, SEEK_SET);
for(i=0; i<=(4*nColors); i++) /* there are (4*nColors) bytesin color table */
{
fread(ptrC, sizeof(char), 1, inputFile);
fwrite(ptrC, sizeof(char), 1, outputFile);
}
}
cpSobel.s
.file "cpSobel.s"
.ctext
#
# cpXbar function:
# -cpXbar function reads AEG 4, which indicates whether
# the memory crossbar is enabled in the AE
#
.globl cpXbar
.type cpXbar. @function
.signature pdk=4
cpXbar:
mov %aeg, $4, %a8 # integer values returned in a8
rtn
#
# cpSobel function:
# -writes array pointers (image data) and column/row sizes to AEG registers
# -calls caep00 to execute the sobel operation
#
.globl cpSobel
.type cpSobel. @function
.signature pdk=4
cpSobel:
mov %a8, $0, %aeg # a8 contains address of original image data
mov %a9, $1, %aeg # a9 contains address of edge image data
mov %a10, $2, %aeg # a10 contains number of rows
mov %a11, $3, %aeg # a11 contains number of columns
caep00 $0 # make the coprocessor call
rtn
.cend
Makefile
EXEC = UserApp.exe
SWMODEL = ../sim/CaeSimPers
all: $(EXEC) $(SWMODEL)
clean:
rm -f $(EXEC)
make --directory=../sim clean
$(EXEC): UserApp.c cpSobel.s image.o
cnycc -g -lm UserApp.c cpSobel.s image.o -o UserApp.exe
$(SWMODEL): ../Makefile.include ../sim/CaeIsaSobel.cpp
make -C ../sim
image.o: image.h image.c
cnycc -g -lm image.c -c image.o
Software Simulation
CaeIsaSobel.cpp
/*
AUTH: Chad Nelson (based on Sobel algo by Bill Green)
DESC: 2 3x3 Sobel masks for edge detection
DATE: 02/20/11
*/
#include "CaeSim.h"
#include "CaeIsa.h"
#include <stdio.h>
#include <stdlib.h>
#define MAX_AEG_INDEX 128
#define PERS_SIGN_CAE 0x4001000101000LL
#define NUM_AES 4
#undef DEBUG
#ifndef MC_XBAR
#define MC_XBAR 0
#endif
#define LINE_WIDTH 512
void CCaeIsa::InitPers() {
SetAegCnt(MAX_AEG_INDEX);
WriteAeg(0, 0, 0);
SetPersSign(PERS_SIGN_CAE);
}
void CCaeIsa::CaepInst(int aeId, int opcode, int immed, uint32 inst, uint64 scalar) {
if (opcode == 0x20) {
// CAEP00 - sobel
uint64 original, edge, rows, cols;
original = ReadAeg(aeId, 0);
edge = ReadAeg(aeId, 1);
rows = ReadAeg(aeId, 2);
cols = ReadAeg(aeId, 3);
unsigned int startrow = (rows * aeId) / NUM_AES - 3;
unsigned int endrow = (rows * (aeId+1)) / NUM_AES;
if (aeId == 0)
startrow = 0;
int GY[3][3] = { {-1, 0, 1},
{-2, 0, 2},
{-1, 0, 1} };
int GX[3][3] = { { 1, 2, 1},
{ 0, 0, 0},
{-1,-2,-1} };
long sumX, sumY, SUM;
unsigned int x, y, i, j;
unsigned char cache[3][LINE_WIDTH];
unsigned int x_offset = 0;
uint64 data, address;
// Divide image into vertical strips, each LINE_WIDTH pixels wide
// with a 2 pixel overlap
for (x_offset = 0; x_offset < cols; x_offset += LINE_WIDTH - 2) {
// Load first three rows of pixel data into cache
for(x=0; (x < LINE_WIDTH) && (x + x_offset < cols); x++) {
for(y=0; y < 3; y++) {
address = original + (x_offset + x) + ((startrow + y)*cols);
AeMemLoad(aeId, McNum(address), address, 1, false, data);
cache[y][x] = (char) data;
}
}
for (y = startrow; y < endrow - 3; y++) {
// Make edge calculations
for(x=0; x < LINE_WIDTH - 3 && (x + x_offset < cols - 3); x++) {
// -------X GRADIENT APPROXIMATION------
sumX = 0;
for(i=0; i<3; i++) {
for(j=0; j<=3; j++) {
sumX += cache[j][x + i] * GX[i][j];
}
}
// -------Y GRADIENT APPROXIMATION-------
sumY = 0;
for(i=0; i<3; i++) {
for(j=0; j<=3; j++) {
sumY += cache[j][x + i] * GY[i][j];
}
}
// ---GRADIENT MAGNITUDE APPROXIMATION (Myler p.218)----
SUM = abs(sumX) + abs(sumY);
if(SUM > 255)
SUM = 255;
if(SUM < 0)
SUM = 0;
address = edge + (x_offset + x + 1) + ((y + 1)*cols);
AeMemStore(aeId, McNum(address), address, 1, false, (char) SUM);
}
// Shift cache up, bring in next line
for(x=0; x < LINE_WIDTH && (x + x_offset < cols); x++) {
address = original + (x_offset + x) + ((y + 3)*cols);
AeMemLoad(aeId, McNum(address), address, 1, false, data);
cache[0][x] = cache[1][x];
cache[1][x] = cache[2][x];
cache[2][x] = (char) data;
}
} // end of row
} // end of vertical strip
// End of CAEP00 - sobel
} else {
// other CAEPXX instructions
printf("CAEP00 was not called.\n");
for (int aeId = 0; aeId < CAE_AE_CNT; aeId += 1) {
// SetException(int aeID, int bitnum); AEUIE = 0
SetException(aeId, AEUIE);
}
}
}