Team Gryffindor - Sobel Source Code: Difference between revisions

From Cpre584
Jump to navigation Jump to search
m →‎Makefile: Added software simulation cpp code
 
(11 intermediate revisions by the same user not shown)
Line 1: Line 1:
= User Application =
== UserApp.c ==
== UserApp.c ==
  <nowiki>#include <convey/usr/cny_comp.h>
  <nowiki>#include <convey/usr/cny_comp.h>
Line 15: Line 17:
void usage (char *);
void usage (char *);


int main(int argc, char *argv[])
void sobel(uint64 original, uint64 edge, uint64 rows, uint64 cols);
{
 
 
/*
* ./run ./lena.bmp
* Reads in a grayscale bitmap, calls the sobel() function, writes the bitmap to edgeSob.bmp
*/
int main(int argc, char *argv[]) {
     // Check arguments
     // Check arguments
     if (argc != 2) {
     if (argc != 2) {
         printf("Usage: %s bmpInput.bmp\n", argv[0]);
         printf("Usage: %s bmpInput.bmp\n", argv[0]);
    exit(1);
        exit(1);  
     }
     }


Line 40: Line 48:


     // Check for crossbar support
     // Check for crossbar support
     if (!i_copcall_fmt(sig, cpXbar, ""))    {
     int crossbar_enabled = i_copcall_fmt(sig, cpXbar, "");
printf("ERROR - Crossbar not enabled!");
    if (!crossbar_enabled)    {
exit(1);
        printf("ERROR - Crossbar not enabled!\n");
        /*
        cny_cp_posix_memalign((void**)&original.data, 512, size*8);
        cny_cp_posix_memalign((void**)&edge.data, 512, size*8);
        */
     }
     }


     // Read in bitmap  
     // Read in bitmap data
     sImage original, edge;
     sImage original, edge;
     readImage(argv[1], &original, &edge);
     readImage(argv[1], &original, &edge);
Line 51: Line 63:
     // Coprocessor Call
     // Coprocessor Call
     copcall_fmt(sig, cpSobel, "AAAA", original.data, edge.data, original.rows, original.cols);
     copcall_fmt(sig, cpSobel, "AAAA", original.data, edge.data, original.rows, original.cols);
    // Mock coprocessor call
    // sobel(original.data, edge.data, original.rows, original.cols);


     // Write bitmap  
     // Write bitmap data
     writeImage(argv[1], &edge);
     writeImage(argv[1], &edge);


     return 0;
     return 0;
}
#define NUM_AES 4
#define LINE_WIDTH 512
/*
* Mocks the software simulator
*/
void sobel(uint64 original, uint64 edge, uint64 rows, uint64 cols) {
    int aeId;
    for (aeId = 0; aeId<4; aeId++) {
        /*
        uint64 original, edge, rows, cols;
       
        original = ReadAeg(aeId, 0);
        edge    = ReadAeg(aeId, 1);
        rows    = ReadAeg(aeId, 2);
        cols    = ReadAeg(aeId, 3);
        */
        int startrow = (rows * aeId) / NUM_AES - 3;
        int endrow = (rows * (aeId+1)) / NUM_AES;
        if (startrow < 0)
            startrow = 0;
        int GY[3][3] = { {-1, 0, 1},
                        {-2, 0, 2},
                        {-1, 0, 1} };
        int GX[3][3] = { { 1, 2, 1},
                        { 0, 0, 0},
                        {-1,-2,-1} };
        long sumX, sumY, SUM;
        int x, y, i, j, bytes;
        unsigned char cache[3][LINE_WIDTH];
        int x_offset = 0;
        uint64 data, address;
       
        // Divide image into vertical strips, each LINE_WIDTH pixels wide
        // with a 2 pixel overlap
        for (x_offset = 0; x_offset < cols; x_offset += LINE_WIDTH - 2) {
            // Load first three rows of pixel data into cache
            for(x=0; (x < LINE_WIDTH) && (x + x_offset < cols); x++)  {
                for(y=0; y < 3; y++)  {
                    address = original + (x_offset + x) + ((startrow + y)*cols);
                   
                    // AeMemLoad(aeId, McNum(address), address, 1, false, data);
                    data = *((char *) address);
                    cache[y][x] = (char) data;
                }
            }
            for (y = startrow; y < endrow - 3; y++) {
                // Make edge calculations
                for(x=0; x < LINE_WIDTH - 3 && (x + x_offset < cols - 3); x++)  {
                    // -------X GRADIENT APPROXIMATION------
                    sumX = 0;
                    for(i=0; i<3; i++) {
                        for(j=0; j<=3; j++)  {
                            sumX += cache[j][x + i] * GX[i][j];
                        }
                    }
                    // -------Y GRADIENT APPROXIMATION-------
                    sumY = 0;
                    for(i=0; i<3; i++) {
                        for(j=0; j<=3; j++)  {
                            sumY += cache[j][x + i] * GY[i][j];
                        }
                    }
                    // ---GRADIENT MAGNITUDE APPROXIMATION (Myler p.218)----
                    SUM = abs(sumX) + abs(sumY);
                    if(SUM > 255)
                        SUM = 255;
                    if(SUM < 0)
                        SUM = 0;
                    address = edge + (x_offset + x + 1) + ((y + 1)*cols);
                    // AeMemStore(aeId, McNum(address), address, 1, false, (char) SUM);
                    *((char *) address) = (unsigned char) SUM;
                }
                // Shift cache up, bring in next line
                for(x=0; x < LINE_WIDTH && (x + x_offset < cols); x++)  {
                    address = original + (x_offset + x) + ((y + 3)*cols);
                    // AeMemLoad(aeId, McNum(address), address, 1, false, data); 
                    data = *((char *) address);
 
                    cache[0][x] = cache[1][x];
                    cache[1][x] = cache[2][x];
                    cache[2][x] = (char) data;
                }
            } // end of row
        } // end of vertical strip
    } // end of AE
}
}
</nowiki>
</nowiki>
Line 324: Line 448:
</nowiki>
</nowiki>


= Software Simulation =
The software simulation takes ~7 seconds to run.


== CaeIsaSobel.cpp ==
== CaeIsaSobel.cpp ==
Line 338: Line 465:
#include <stdlib.h>
#include <stdlib.h>


#define MAX_AEG_INDEX 3
#define MAX_AEG_INDEX 128
#define PERS_SIGN_CAE 0x4001000101000LL
#define PERS_SIGN_CAE 0x4001000101000LL


#define AEG_CONFIG 4
#define NUM_AES    4
#define AEG_SAE_BASE 30
#define AEG_STL 40


#define NUM_MCS 8
#define NUM_PIPES 16
#define MEM_REQ_SIZE 8
#define AEUIE 0
#undef DEBUG
#undef DEBUG


#ifndef MC_XBAR
#ifndef MC_XBAR
#define MC_XBAR 0
    #define MC_XBAR 0
#endif
#endif


Line 368: Line 488:
void CCaeIsa::CaepInst(int aeId, int opcode, int immed, uint32 inst, uint64 scalar) {
void CCaeIsa::CaepInst(int aeId, int opcode, int immed, uint32 inst, uint64 scalar) {
     if (opcode == 0x20) {
     if (opcode == 0x20) {
// CAEP00 - sobel
        // CAEP00 - sobel
 
        uint64 original, edge, rows, cols;
       
        original = ReadAeg(aeId, 0);
        edge    = ReadAeg(aeId, 1);
        rows    = ReadAeg(aeId, 2);
        cols    = ReadAeg(aeId, 3);
 
        unsigned int startrow = (rows * aeId) / NUM_AES - 3;
        unsigned int endrow = (rows * (aeId+1)) / NUM_AES;


long rows, cols;
        if (aeId == 0)
uint64 address, original, edge;
            startrow = 0;


    original = ReadAeg(aeId, 0);
        int GX[3][3] = { {-1, 0, 1},
edge    = ReadAeg(aeId, 1);
                        {-2, 0, 2},
rows    = ReadAeg(aeId, 2);
                        {-1, 0, 1} };
cols    = ReadAeg(aeId, 3);


    int GX[3][3];
        int GY[3][3] = { { 1, 2, 1},
    int GY[3][3];
                        { 0, 0, 0},
    long sumX, sumY, SUM;
                        {-1,-2,-1} };
    int x, y, i, j;


/* 3x3 GX Sobel mask.  Ref: www.cee.hw.ac.uk/hipr/html/sobel.html */
        long sumX, sumY, SUM;
GX[0][0] = -1; GX[0][1] = 0; GX[0][2] = 1;
        unsigned int x, y, i, j;
GX[1][0] = -2; GX[1][1] = 0; GX[1][2] = 2;
GX[2][0] = -1; GX[2][1] = 0; GX[2][2] = 1;


GY[0][0] =  1; GY[0][1] = 2; GY[0][2] =  1;
        unsigned char cache[3][LINE_WIDTH];
GY[1][0] =  0; GY[1][1] =  0; GY[1][2] = 0;
        unsigned int x_offset = 0;
GY[2][0] = -1; GY[2][1] = -2; GY[2][2] = -1;
        uint64 data, address;
       
        // Divide image into vertical strips, each LINE_WIDTH pixels wide
        // with a 2 pixel overlap
        for (x_offset = 0; x_offset < cols; x_offset += LINE_WIDTH - 2) {


    unsigned char cache[3][LINE_WIDTH];
            // Load first three rows of pixel data into cache
    int x_offset = 0;
            for(x=0; (x < LINE_WIDTH) && (x + x_offset < cols); x++)  {
    int y_offset = 0;
                for(y=0; y < 3; y++)  {
uint64 data;
                    address = original + (x_offset + x) + ((startrow + y)*cols);
                   
                    AeMemLoad(aeId, McNum(address), address, 1, false, data);
                    cache[y][x] = (char) data;
                }
            }


for (x_offset = 0; x_offset < cols; x_offset += LINE_WIDTH - 2) {  
            for (y = startrow; y < endrow - 3; y++) {


                // Make edge calculations
                for(x=0; x < LINE_WIDTH - 3 && (x + x_offset < cols - 3); x++)  {


    // load first three rows
                    // -------X GRADIENT APPROXIMATION------
    for(x=0; (x < LINE_WIDTH) && (x + x_offset < cols); x++) {
                    sumX = 0;
        for(y=0; y < 3; y++)  {
                    for(i=0; i<3; i++) {
address = original + (x_offset + x) + ((y_offset + y)*cols);
                        for(j=0; j<3; j++)  {
AeMemLoad(aeId, McNum(address), address, 0, false, data);
                            sumX += cache[j][x + i] * GX[i][j];
            cache[y][x] = data;
                        }
        }
                    }
    }


    for (y_offset = 0; y_offset < rows - 3; y_offset++) {
                    // -------Y GRADIENT APPROXIMATION-------
                    sumY = 0;
                    for(i=0; i<3; i++) {
                        for(j=0; j<3; j++) {
                            sumY += cache[j][x + i] * GY[i][j];
                        }
                    }


        // Make edge calculations
                    // ---GRADIENT MAGNITUDE APPROXIMATION (Myler p.218)----
        for(x=0; x < LINE_WIDTH - 3; x++)  {
                    SUM = abs(sumX) + abs(sumY);
            /*-------X GRADIENT APPROXIMATION------*/
            sumX = 0;
            for(i=0; i<3; i++) {
                for(j=0; j<=3; j++) {
                    sumX += cache[y + j][x + i] * GX[i][j];
                }
            }


            /*-------Y GRADIENT APPROXIMATION-------*/
                    if(SUM > 255)
            sumY = 0;
                        SUM = 255;
            for(i=0; i<3; i++) {
                    if(SUM < 0)  
                for(j=0; j<=3; j++)  {
                        SUM = 0;
                    sumX += cache[y + j][x + i] * GY[i][j];
                }
            }


            /*---GRADIENT MAGNITUDE APPROXIMATION (Myler p.218)----*/
                    address = edge + (x_offset + x + 1) + ((y + 1)*cols);
            SUM = abs(sumX) + abs(sumY);


            if(SUM>255)  
                    AeMemStore(aeId, McNum(address), address, 1, false, SUM);
                SUM=255;
                }
            if(SUM<0)
                SUM=0;


            data = SUM;
                // Shift cache up, bring in next line
address = original + (x_offset + x + 1) + ((y_offset + 1)*cols);
                for(x=0; x < LINE_WIDTH && (x + x_offset < cols); x++)  {
AeMemStore(aeId, McNum(address), address, 0, false, data);
                    address = original + (x_offset + x) + ((y + 3)*cols);
        }
                    AeMemLoad(aeId, McNum(address), address, 1, false, data)
 
                    cache[0][x] = cache[1][x];
                    cache[1][x] = cache[2][x];
                    cache[2][x] = (char) data;
                }


        // Shift cache up, bring in next line
            } // end of row
        for(x=0; x < LINE_WIDTH; x++)  {
        } // end of vertical strip
            cache[0][x] = cache[1][x];
 
            cache[1][x] = cache[2][x];
        // End of CAEP00 - sobel
address = original + (x_offset + x) + ((y_offset + 3)*cols);
    } else {
AeMemLoad(aeId, McNum(address), address, 0, false, data);
        // other CAEPXX instructions
cache[2][x] = data;
        printf("CAEP00 was not called.\n");
        }
        for (int aeId = 0; aeId < CAE_AE_CNT; aeId += 1) {
    }
            // SetException(int aeID, int bitnum);  AEUIE = 0
}
            SetException(aeId, AEUIE);  
} else {
        }
// other CAEPXX instructions
    }
for (int aeId = 0; aeId < CAE_AE_CNT; aeId += 1) {
// SetException(int aeID, int bitnum);  AEUIE = 0
SetException(aeId, AEUIE);  
}
}
}
}
</nowiki>
</nowiki>

Latest revision as of 01:28, 23 February 2012

User Application

UserApp.c

#include <convey/usr/cny_comp.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>

#include "image.h"

#undef DEBUG

typedef unsigned long long uint64;
extern void cpSobel();
extern int cpXbar();
void usage (char *);

void sobel(uint64 original, uint64 edge, uint64 rows, uint64 cols);


/*
 * ./run ./lena.bmp
 * Reads in a grayscale bitmap, calls the sobel() function, writes the bitmap to edgeSob.bmp
 */
int main(int argc, char *argv[]) {
    // Check arguments
    if (argc != 2) {
        printf("Usage: %s bmpInput.bmp\n", argv[0]);
        exit(1);    
    }

    // Get personality signature
    cny_image_t sig, sig2;
    int stat;
    cny_get_signature("pdk", &sig, &sig2, &stat);

    if (stat) {
        printf("***ERROR: cny_get_signature() Failure: %d\n", stat);
        exit(1);
    }

    // Check memory interleave
    if (cny_cp_interleave() == CNY_MI_3131) {
        printf("ERROR - interleave set to 3131, this personality requires binary interleave\n");
        exit(1);
    }

    // Check for crossbar support
    int crossbar_enabled = i_copcall_fmt(sig, cpXbar, "");
    if (!crossbar_enabled)    {
        printf("ERROR - Crossbar not enabled!\n");
        /*
        cny_cp_posix_memalign((void**)&original.data, 512, size*8);
        cny_cp_posix_memalign((void**)&edge.data, 512, size*8);
        */
    }

    // Read in bitmap data
    sImage original, edge;
    readImage(argv[1], &original, &edge);

    // Coprocessor Call
    copcall_fmt(sig, cpSobel, "AAAA", original.data, edge.data, original.rows, original.cols);
    // Mock coprocessor call
    // sobel(original.data, edge.data, original.rows, original.cols);

    // Write bitmap data
    writeImage(argv[1], &edge);

    return 0;
}


#define NUM_AES 4
#define LINE_WIDTH 512

/*
 * Mocks the software simulator
 */ 
void sobel(uint64 original, uint64 edge, uint64 rows, uint64 cols) {

    int aeId;

    for (aeId = 0; aeId<4; aeId++) {

        /*
        uint64 original, edge, rows, cols;
        
        original = ReadAeg(aeId, 0);
        edge     = ReadAeg(aeId, 1);
        rows     = ReadAeg(aeId, 2);
        cols     = ReadAeg(aeId, 3);
        */

        int startrow = (rows * aeId) / NUM_AES - 3;
        int endrow = (rows * (aeId+1)) / NUM_AES;

        if (startrow < 0) 
            startrow = 0;

        int GY[3][3] = { {-1, 0, 1}, 
                         {-2, 0, 2},
                         {-1, 0, 1} };

        int GX[3][3] = { { 1, 2, 1}, 
                         { 0, 0, 0},
                         {-1,-2,-1} };

        long sumX, sumY, SUM;
        int x, y, i, j, bytes;

        unsigned char cache[3][LINE_WIDTH];
        int x_offset = 0;
        uint64 data, address;
        
        // Divide image into vertical strips, each LINE_WIDTH pixels wide
        // with a 2 pixel overlap
        for (x_offset = 0; x_offset < cols; x_offset += LINE_WIDTH - 2) { 

            // Load first three rows of pixel data into cache
            for(x=0; (x < LINE_WIDTH) && (x + x_offset < cols); x++)  {
                for(y=0; y < 3; y++)  {
                    address = original + (x_offset + x) + ((startrow + y)*cols);
                    
                    // AeMemLoad(aeId, McNum(address), address, 1, false, data);
                    data = *((char *) address);
                    cache[y][x] = (char) data;
                }
            }

            for (y = startrow; y < endrow - 3; y++) {

                // Make edge calculations
                for(x=0; x < LINE_WIDTH - 3 && (x + x_offset < cols - 3); x++)  {

                    // -------X GRADIENT APPROXIMATION------
                    sumX = 0;
                    for(i=0; i<3; i++) {
                        for(j=0; j<=3; j++)  {
                            sumX += cache[j][x + i] * GX[i][j];
                        }
                    }

                    // -------Y GRADIENT APPROXIMATION-------
                    sumY = 0;
                    for(i=0; i<3; i++) {
                        for(j=0; j<=3; j++)  {
                            sumY += cache[j][x + i] * GY[i][j];
                        }
                    }

                    // ---GRADIENT MAGNITUDE APPROXIMATION (Myler p.218)----
                    SUM = abs(sumX) + abs(sumY);

                    if(SUM > 255) 
                        SUM = 255;
                    if(SUM < 0) 
                        SUM = 0;

                    address = edge + (x_offset + x + 1) + ((y + 1)*cols);

                    // AeMemStore(aeId, McNum(address), address, 1, false, (char) SUM);
                    *((char *) address) = (unsigned char) SUM;
                }

                // Shift cache up, bring in next line
                for(x=0; x < LINE_WIDTH && (x + x_offset < cols); x++)  {
                    address = original + (x_offset + x) + ((y + 3)*cols);
                    // AeMemLoad(aeId, McNum(address), address, 1, false, data);  
                    data = *((char *) address);
  
                    cache[0][x] = cache[1][x];
                    cache[1][x] = cache[2][x];
                    cache[2][x] = (char) data;
                }

            } // end of row
        } // end of vertical strip
    } // end of AE

}

image.h

typedef struct {int rows; int cols; unsigned char* data;} sImage;

void writeImage(char *filename, sImage *edgeImage);
void readImage(char* filename, sImage *originalImage, sImage *edgeImage);

image.c

/*
  FILE: edgeSob.c - WORKS!!
  AUTH: Chad Nelson (originally by Bill Green)
  DESC: 2 3x3 Sobel masks for edge detection
  DATE: 02/20/2012
  REFS: edgeLap.c
*/

#include <convey/usr/cny_comp.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "image.h"

/*-------PROTOTYPES---------*/
long getImageInfo(FILE*, long, int);
void copyImageInfo(FILE* inputFile, FILE* outputFile);
void copyColorTable(FILE* inputFile, FILE* outputFile, int nColors);



void readImage(char* filename, sImage *originalImage, sImage *edgeImage)
{
    FILE          *bmpInput;
    unsigned long vectorSize;
    unsigned long fileSize;
    unsigned char *pChar, someChar;
    unsigned int  row, col, nColors;

    someChar = '0'; pChar = &someChar;

    printf("Reading filename: %s\n", filename);

    /*-------DECLARE INPUT & OUTPUT FILES-------*/
    bmpInput = fopen(filename, "rb");

    /*---SET POINTER TO BEGINNING OF FILE----*/
    fseek(bmpInput, 0L, SEEK_END);

    /*-------GET INPUT BMP DATA--------*/
    fileSize = getImageInfo(bmpInput, 2, 4);
    originalImage->cols = (int)getImageInfo(bmpInput, 18, 4);
    originalImage->rows = (int)getImageInfo(bmpInput, 22, 4);
    edgeImage->rows = originalImage->rows;
    edgeImage->cols = originalImage->cols;

    /*--------PRINT DATA TO SCREEN----------*/
    printf("Width: %d\n", originalImage->cols);
    printf("Height: %d\n", originalImage->rows);
    printf("File size: %lu\n", fileSize);

    nColors = (int)getImageInfo(bmpInput, 46, 4);
    printf("nColors: %d\n", nColors);

    /*------ALLOCATE MEMORY FOR FILES--------*/
    vectorSize = fileSize - (14+40+4*nColors);
    printf("vectorSize: %lu\n", vectorSize);

    // edgeImage.data = farmalloc(vectorSize*sizeof(unsigned char));
    edgeImage->data = (unsigned char *) (cny_cp_malloc)(vectorSize*sizeof(unsigned char));

    // originalImage.data = farmalloc(vectorSize*sizeof(unsigned char));
    originalImage->data = (unsigned char *) (cny_cp_malloc)(vectorSize*sizeof(unsigned char));

    if(edgeImage->data == NULL || originalImage->data == NULL) {
	    printf("Failed to cny_cp_malloc image space\n");
	    exit(0);
    }
    printf("%lu bytes malloc'ed for image data\n", vectorSize);

    fseek(bmpInput, (14+40+4*nColors), SEEK_SET);

    /* Read input.bmp and store it's raster data into originalImage.data */
    for(row=0; row<=originalImage->rows-1; row++) {
	    for(col=0; col<=originalImage->cols-1; col++) {
            fread(pChar, sizeof(char), 1, bmpInput);
            *(originalImage->data + row*originalImage->cols + col) = *pChar;
	    }
    }

    fclose(bmpInput);
}


void writeImage(char *filename, sImage *edgeImage) {
    FILE *bmpInput, *bmpOutput;
    int   nColors;
    int   X, Y;

	/*-------DECLARE INPUT & OUTPUT FILES-------*/
    bmpInput = fopen(filename, "rb");
    bmpOutput = fopen("edgeSob.bmp", "wb");

    /*---SET POINTER TO BEGINNING OF FILE----*/
    fseek(bmpInput, 0L, SEEK_END);

    /*-------GET INPUT BMP DATA--------*/
    nColors = (int)getImageInfo(bmpInput, 46, 4);

    /*------COPY HEADER AND COLOR TABLE---------*/
    copyImageInfo(bmpInput, bmpOutput);
    copyColorTable(bmpInput, bmpOutput, nColors);
    fseek(bmpInput, (14+40+4*nColors), SEEK_SET);
    fseek(bmpOutput, (14+40+4*nColors), SEEK_SET);
	
    for(Y=0; Y<=(edgeImage->rows-1); Y++)  {
        for(X=0; X<=(edgeImage->cols-1); X++)  {
	    fwrite((edgeImage->data + X + Y*edgeImage->cols), sizeof(char), 1, bmpOutput);
        }
    }

    fclose(bmpInput);
    fclose(bmpOutput);

    printf("See edgeSob.bmp for results\n");
}


/*----------GET IMAGE INFO SUBPROGRAM--------------*/
long getImageInfo(FILE* inputFile, long offset, int numberOfChars)
{
  unsigned char			*ptrC;
  long				value = 0L;
  unsigned char			dummy;
  int				i;

  dummy = '0';
  ptrC = &dummy;

  fseek(inputFile, offset, SEEK_SET);

  for(i=1; i<=numberOfChars; i++)
  {
     fread(ptrC, sizeof(char), 1, inputFile);
     /* calculate value based on adding bytes */
     value = (long)(value + (*ptrC)*(pow(256, (i-1))));
  }
  return(value);

} /* end of getImageInfo */

/*-------------COPIES HEADER AND INFO HEADER----------------*/
void copyImageInfo(FILE* inputFile, FILE* outputFile)
{
  unsigned char		*ptrC;
  unsigned char		dummy;
  int				i;

  dummy = '0';
  ptrC = &dummy;

  fseek(inputFile, 0L, SEEK_SET);
  fseek(outputFile, 0L, SEEK_SET);

  for(i=0; i<=50; i++)
  {
     fread(ptrC, sizeof(char), 1, inputFile);
     fwrite(ptrC, sizeof(char), 1, outputFile);
  }

}

/*----------------COPIES COLOR TABLE-----------------------------*/
void copyColorTable(FILE* inputFile, FILE* outputFile, int nColors)
{
  unsigned char		*ptrC;
  unsigned char		dummy;
  int				i;

  dummy = '0';
  ptrC = &dummy;

  fseek(inputFile, 54L, SEEK_SET);
  fseek(outputFile, 54L, SEEK_SET);

  for(i=0; i<=(4*nColors); i++)  /* there are (4*nColors) bytesin color table */
  {
     fread(ptrC, sizeof(char), 1, inputFile); 
     fwrite(ptrC, sizeof(char), 1, outputFile);
  }

}

cpSobel.s


	.file	"cpSobel.s"
	.ctext



#
# cpXbar function:
#   -cpXbar function reads AEG 4, which indicates whether 
#    the memory crossbar is enabled in the AE
#

	.globl	cpXbar
	.type	cpXbar. @function
	.signature	pdk=4

cpXbar:
	mov %aeg, $4, %a8		# integer values returned in a8
	rtn 




#
# cpSobel function:
#   -writes array pointers (image data) and column/row sizes to AEG registers 
# 	-calls caep00 to execute the sobel operation
#

	.globl	cpSobel
	.type	cpSobel. @function
	.signature	pdk=4

cpSobel:
	mov %a8, $0, %aeg		# a8 contains address of original image data
	mov %a9, $1, %aeg		# a9 contains address of edge image data
	mov %a10, $2, %aeg		# a10 contains number of rows
	mov %a11, $3, %aeg		# a11 contains number of columns

	caep00 $0               # make the coprocessor call

	rtn 



	.cend

Makefile

EXEC = UserApp.exe
SWMODEL = ../sim/CaeSimPers

all:	$(EXEC) $(SWMODEL)

clean:
	rm -f $(EXEC)
	make --directory=../sim clean

$(EXEC):	UserApp.c cpSobel.s image.o
	cnycc -g -lm UserApp.c cpSobel.s image.o -o UserApp.exe

$(SWMODEL): ../Makefile.include ../sim/CaeIsaSobel.cpp
	make -C ../sim

image.o:	image.h image.c
	cnycc -g -lm image.c -c image.o

Software Simulation

The software simulation takes ~7 seconds to run.

CaeIsaSobel.cpp

/*
  AUTH: Chad Nelson (based on Sobel algo by Bill Green)
  DESC: 2 3x3 Sobel masks for edge detection
  DATE: 02/20/11
*/

#include "CaeSim.h"
#include "CaeIsa.h"
#include <stdio.h>
#include <stdlib.h>

#define MAX_AEG_INDEX 128
#define PERS_SIGN_CAE 0x4001000101000LL

#define NUM_AES    4

#undef DEBUG

#ifndef MC_XBAR
    #define MC_XBAR 0
#endif

#define LINE_WIDTH 512

void CCaeIsa::InitPers() {
    SetAegCnt(MAX_AEG_INDEX);
    WriteAeg(0, 0, 0);
    SetPersSign(PERS_SIGN_CAE);
}



void CCaeIsa::CaepInst(int aeId, int opcode, int immed, uint32 inst, uint64 scalar) {
    if (opcode == 0x20) {
        // CAEP00 - sobel

        uint64 original, edge, rows, cols;
        
        original = ReadAeg(aeId, 0);
        edge     = ReadAeg(aeId, 1);
        rows     = ReadAeg(aeId, 2);
        cols     = ReadAeg(aeId, 3);

        unsigned int startrow = (rows * aeId) / NUM_AES - 3;
        unsigned int endrow = (rows * (aeId+1)) / NUM_AES;

        if (aeId == 0) 
            startrow = 0;

        int GX[3][3] = { {-1, 0, 1}, 
                         {-2, 0, 2},
                         {-1, 0, 1} };

        int GY[3][3] = { { 1, 2, 1}, 
                         { 0, 0, 0},
                         {-1,-2,-1} };

        long sumX, sumY, SUM;
        unsigned int x, y, i, j;

        unsigned char cache[3][LINE_WIDTH];
        unsigned int x_offset = 0;
        uint64 data, address;
        
        // Divide image into vertical strips, each LINE_WIDTH pixels wide
        // with a 2 pixel overlap
        for (x_offset = 0; x_offset < cols; x_offset += LINE_WIDTH - 2) { 

            // Load first three rows of pixel data into cache
            for(x=0; (x < LINE_WIDTH) && (x + x_offset < cols); x++)  {
                for(y=0; y < 3; y++)  {
                    address = original + (x_offset + x) + ((startrow + y)*cols);
                    
                    AeMemLoad(aeId, McNum(address), address, 1, false, data);
                    cache[y][x] = (char) data;
                }
            }

            for (y = startrow; y < endrow - 3; y++) {

                // Make edge calculations
                for(x=0; x < LINE_WIDTH - 3 && (x + x_offset < cols - 3); x++)  {

                    // -------X GRADIENT APPROXIMATION------
                    sumX = 0;
                    for(i=0; i<3; i++) {
                        for(j=0; j<3; j++)  {
                            sumX += cache[j][x + i] * GX[i][j];
                        }
                    }

                    // -------Y GRADIENT APPROXIMATION-------
                    sumY = 0;
                    for(i=0; i<3; i++) {
                        for(j=0; j<3; j++)  {
                            sumY += cache[j][x + i] * GY[i][j];
                        }
                    }

                    // ---GRADIENT MAGNITUDE APPROXIMATION (Myler p.218)----
                    SUM = abs(sumX) + abs(sumY);

                    if(SUM > 255) 
                        SUM = 255;
                    if(SUM < 0) 
                        SUM = 0;

                    address = edge + (x_offset + x + 1) + ((y + 1)*cols);

                    AeMemStore(aeId, McNum(address), address, 1, false, SUM);
                }

                // Shift cache up, bring in next line
                for(x=0; x < LINE_WIDTH && (x + x_offset < cols); x++)  {
                    address = original + (x_offset + x) + ((y + 3)*cols);
                    AeMemLoad(aeId, McNum(address), address, 1, false, data);  
  
                    cache[0][x] = cache[1][x];
                    cache[1][x] = cache[2][x];
                    cache[2][x] = (char) data;
                }

            } // end of row
        } // end of vertical strip

        // End of CAEP00 - sobel
    } else {
        // other CAEPXX instructions
        printf("CAEP00 was not called.\n");
        for (int aeId = 0; aeId < CAE_AE_CNT; aeId += 1) {
            // SetException(int aeID, int bitnum);  AEUIE = 0
            SetException(aeId, AEUIE); 
        }
    }
}