Team Gryffindor - Sobel Source Code: Difference between revisions

From Cpre584
Jump to navigation Jump to search
 
(2 intermediate revisions by the same user not shown)
Line 450: Line 450:
= Software Simulation =
= Software Simulation =


[[Image:Gry-edge1.PNG]]
The software simulation takes ~7 seconds to run.


== CaeIsaSobel.cpp ==
== CaeIsaSobel.cpp ==
Line 540: Line 540:
                     sumX = 0;
                     sumX = 0;
                     for(i=0; i<3; i++) {
                     for(i=0; i<3; i++) {
                         for(j=0; j<=3; j++)  {
                         for(j=0; j<3; j++)  {
                             sumX += cache[j][x + i] * GX[i][j];
                             sumX += cache[j][x + i] * GX[i][j];
                         }
                         }
Line 548: Line 548:
                     sumY = 0;
                     sumY = 0;
                     for(i=0; i<3; i++) {
                     for(i=0; i<3; i++) {
                         for(j=0; j<=3; j++)  {
                         for(j=0; j<3; j++)  {
                             sumY += cache[j][x + i] * GY[i][j];
                             sumY += cache[j][x + i] * GY[i][j];
                         }
                         }

Latest revision as of 01:28, 23 February 2012

User Application

UserApp.c

#include <convey/usr/cny_comp.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>

#include "image.h"

#undef DEBUG

typedef unsigned long long uint64;
extern void cpSobel();
extern int cpXbar();
void usage (char *);

void sobel(uint64 original, uint64 edge, uint64 rows, uint64 cols);


/*
 * ./run ./lena.bmp
 * Reads in a grayscale bitmap, calls the sobel() function, writes the bitmap to edgeSob.bmp
 */
int main(int argc, char *argv[]) {
    // Check arguments
    if (argc != 2) {
        printf("Usage: %s bmpInput.bmp\n", argv[0]);
        exit(1);    
    }

    // Get personality signature
    cny_image_t sig, sig2;
    int stat;
    cny_get_signature("pdk", &sig, &sig2, &stat);

    if (stat) {
        printf("***ERROR: cny_get_signature() Failure: %d\n", stat);
        exit(1);
    }

    // Check memory interleave
    if (cny_cp_interleave() == CNY_MI_3131) {
        printf("ERROR - interleave set to 3131, this personality requires binary interleave\n");
        exit(1);
    }

    // Check for crossbar support
    int crossbar_enabled = i_copcall_fmt(sig, cpXbar, "");
    if (!crossbar_enabled)    {
        printf("ERROR - Crossbar not enabled!\n");
        /*
        cny_cp_posix_memalign((void**)&original.data, 512, size*8);
        cny_cp_posix_memalign((void**)&edge.data, 512, size*8);
        */
    }

    // Read in bitmap data
    sImage original, edge;
    readImage(argv[1], &original, &edge);

    // Coprocessor Call
    copcall_fmt(sig, cpSobel, "AAAA", original.data, edge.data, original.rows, original.cols);
    // Mock coprocessor call
    // sobel(original.data, edge.data, original.rows, original.cols);

    // Write bitmap data
    writeImage(argv[1], &edge);

    return 0;
}


#define NUM_AES 4
#define LINE_WIDTH 512

/*
 * Mocks the software simulator
 */ 
void sobel(uint64 original, uint64 edge, uint64 rows, uint64 cols) {

    int aeId;

    for (aeId = 0; aeId<4; aeId++) {

        /*
        uint64 original, edge, rows, cols;
        
        original = ReadAeg(aeId, 0);
        edge     = ReadAeg(aeId, 1);
        rows     = ReadAeg(aeId, 2);
        cols     = ReadAeg(aeId, 3);
        */

        int startrow = (rows * aeId) / NUM_AES - 3;
        int endrow = (rows * (aeId+1)) / NUM_AES;

        if (startrow < 0) 
            startrow = 0;

        int GY[3][3] = { {-1, 0, 1}, 
                         {-2, 0, 2},
                         {-1, 0, 1} };

        int GX[3][3] = { { 1, 2, 1}, 
                         { 0, 0, 0},
                         {-1,-2,-1} };

        long sumX, sumY, SUM;
        int x, y, i, j, bytes;

        unsigned char cache[3][LINE_WIDTH];
        int x_offset = 0;
        uint64 data, address;
        
        // Divide image into vertical strips, each LINE_WIDTH pixels wide
        // with a 2 pixel overlap
        for (x_offset = 0; x_offset < cols; x_offset += LINE_WIDTH - 2) { 

            // Load first three rows of pixel data into cache
            for(x=0; (x < LINE_WIDTH) && (x + x_offset < cols); x++)  {
                for(y=0; y < 3; y++)  {
                    address = original + (x_offset + x) + ((startrow + y)*cols);
                    
                    // AeMemLoad(aeId, McNum(address), address, 1, false, data);
                    data = *((char *) address);
                    cache[y][x] = (char) data;
                }
            }

            for (y = startrow; y < endrow - 3; y++) {

                // Make edge calculations
                for(x=0; x < LINE_WIDTH - 3 && (x + x_offset < cols - 3); x++)  {

                    // -------X GRADIENT APPROXIMATION------
                    sumX = 0;
                    for(i=0; i<3; i++) {
                        for(j=0; j<=3; j++)  {
                            sumX += cache[j][x + i] * GX[i][j];
                        }
                    }

                    // -------Y GRADIENT APPROXIMATION-------
                    sumY = 0;
                    for(i=0; i<3; i++) {
                        for(j=0; j<=3; j++)  {
                            sumY += cache[j][x + i] * GY[i][j];
                        }
                    }

                    // ---GRADIENT MAGNITUDE APPROXIMATION (Myler p.218)----
                    SUM = abs(sumX) + abs(sumY);

                    if(SUM > 255) 
                        SUM = 255;
                    if(SUM < 0) 
                        SUM = 0;

                    address = edge + (x_offset + x + 1) + ((y + 1)*cols);

                    // AeMemStore(aeId, McNum(address), address, 1, false, (char) SUM);
                    *((char *) address) = (unsigned char) SUM;
                }

                // Shift cache up, bring in next line
                for(x=0; x < LINE_WIDTH && (x + x_offset < cols); x++)  {
                    address = original + (x_offset + x) + ((y + 3)*cols);
                    // AeMemLoad(aeId, McNum(address), address, 1, false, data);  
                    data = *((char *) address);
  
                    cache[0][x] = cache[1][x];
                    cache[1][x] = cache[2][x];
                    cache[2][x] = (char) data;
                }

            } // end of row
        } // end of vertical strip
    } // end of AE

}

image.h

typedef struct {int rows; int cols; unsigned char* data;} sImage;

void writeImage(char *filename, sImage *edgeImage);
void readImage(char* filename, sImage *originalImage, sImage *edgeImage);

image.c

/*
  FILE: edgeSob.c - WORKS!!
  AUTH: Chad Nelson (originally by Bill Green)
  DESC: 2 3x3 Sobel masks for edge detection
  DATE: 02/20/2012
  REFS: edgeLap.c
*/

#include <convey/usr/cny_comp.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "image.h"

/*-------PROTOTYPES---------*/
long getImageInfo(FILE*, long, int);
void copyImageInfo(FILE* inputFile, FILE* outputFile);
void copyColorTable(FILE* inputFile, FILE* outputFile, int nColors);



void readImage(char* filename, sImage *originalImage, sImage *edgeImage)
{
    FILE          *bmpInput;
    unsigned long vectorSize;
    unsigned long fileSize;
    unsigned char *pChar, someChar;
    unsigned int  row, col, nColors;

    someChar = '0'; pChar = &someChar;

    printf("Reading filename: %s\n", filename);

    /*-------DECLARE INPUT & OUTPUT FILES-------*/
    bmpInput = fopen(filename, "rb");

    /*---SET POINTER TO BEGINNING OF FILE----*/
    fseek(bmpInput, 0L, SEEK_END);

    /*-------GET INPUT BMP DATA--------*/
    fileSize = getImageInfo(bmpInput, 2, 4);
    originalImage->cols = (int)getImageInfo(bmpInput, 18, 4);
    originalImage->rows = (int)getImageInfo(bmpInput, 22, 4);
    edgeImage->rows = originalImage->rows;
    edgeImage->cols = originalImage->cols;

    /*--------PRINT DATA TO SCREEN----------*/
    printf("Width: %d\n", originalImage->cols);
    printf("Height: %d\n", originalImage->rows);
    printf("File size: %lu\n", fileSize);

    nColors = (int)getImageInfo(bmpInput, 46, 4);
    printf("nColors: %d\n", nColors);

    /*------ALLOCATE MEMORY FOR FILES--------*/
    vectorSize = fileSize - (14+40+4*nColors);
    printf("vectorSize: %lu\n", vectorSize);

    // edgeImage.data = farmalloc(vectorSize*sizeof(unsigned char));
    edgeImage->data = (unsigned char *) (cny_cp_malloc)(vectorSize*sizeof(unsigned char));

    // originalImage.data = farmalloc(vectorSize*sizeof(unsigned char));
    originalImage->data = (unsigned char *) (cny_cp_malloc)(vectorSize*sizeof(unsigned char));

    if(edgeImage->data == NULL || originalImage->data == NULL) {
	    printf("Failed to cny_cp_malloc image space\n");
	    exit(0);
    }
    printf("%lu bytes malloc'ed for image data\n", vectorSize);

    fseek(bmpInput, (14+40+4*nColors), SEEK_SET);

    /* Read input.bmp and store it's raster data into originalImage.data */
    for(row=0; row<=originalImage->rows-1; row++) {
	    for(col=0; col<=originalImage->cols-1; col++) {
            fread(pChar, sizeof(char), 1, bmpInput);
            *(originalImage->data + row*originalImage->cols + col) = *pChar;
	    }
    }

    fclose(bmpInput);
}


void writeImage(char *filename, sImage *edgeImage) {
    FILE *bmpInput, *bmpOutput;
    int   nColors;
    int   X, Y;

	/*-------DECLARE INPUT & OUTPUT FILES-------*/
    bmpInput = fopen(filename, "rb");
    bmpOutput = fopen("edgeSob.bmp", "wb");

    /*---SET POINTER TO BEGINNING OF FILE----*/
    fseek(bmpInput, 0L, SEEK_END);

    /*-------GET INPUT BMP DATA--------*/
    nColors = (int)getImageInfo(bmpInput, 46, 4);

    /*------COPY HEADER AND COLOR TABLE---------*/
    copyImageInfo(bmpInput, bmpOutput);
    copyColorTable(bmpInput, bmpOutput, nColors);
    fseek(bmpInput, (14+40+4*nColors), SEEK_SET);
    fseek(bmpOutput, (14+40+4*nColors), SEEK_SET);
	
    for(Y=0; Y<=(edgeImage->rows-1); Y++)  {
        for(X=0; X<=(edgeImage->cols-1); X++)  {
	    fwrite((edgeImage->data + X + Y*edgeImage->cols), sizeof(char), 1, bmpOutput);
        }
    }

    fclose(bmpInput);
    fclose(bmpOutput);

    printf("See edgeSob.bmp for results\n");
}


/*----------GET IMAGE INFO SUBPROGRAM--------------*/
long getImageInfo(FILE* inputFile, long offset, int numberOfChars)
{
  unsigned char			*ptrC;
  long				value = 0L;
  unsigned char			dummy;
  int				i;

  dummy = '0';
  ptrC = &dummy;

  fseek(inputFile, offset, SEEK_SET);

  for(i=1; i<=numberOfChars; i++)
  {
     fread(ptrC, sizeof(char), 1, inputFile);
     /* calculate value based on adding bytes */
     value = (long)(value + (*ptrC)*(pow(256, (i-1))));
  }
  return(value);

} /* end of getImageInfo */

/*-------------COPIES HEADER AND INFO HEADER----------------*/
void copyImageInfo(FILE* inputFile, FILE* outputFile)
{
  unsigned char		*ptrC;
  unsigned char		dummy;
  int				i;

  dummy = '0';
  ptrC = &dummy;

  fseek(inputFile, 0L, SEEK_SET);
  fseek(outputFile, 0L, SEEK_SET);

  for(i=0; i<=50; i++)
  {
     fread(ptrC, sizeof(char), 1, inputFile);
     fwrite(ptrC, sizeof(char), 1, outputFile);
  }

}

/*----------------COPIES COLOR TABLE-----------------------------*/
void copyColorTable(FILE* inputFile, FILE* outputFile, int nColors)
{
  unsigned char		*ptrC;
  unsigned char		dummy;
  int				i;

  dummy = '0';
  ptrC = &dummy;

  fseek(inputFile, 54L, SEEK_SET);
  fseek(outputFile, 54L, SEEK_SET);

  for(i=0; i<=(4*nColors); i++)  /* there are (4*nColors) bytesin color table */
  {
     fread(ptrC, sizeof(char), 1, inputFile); 
     fwrite(ptrC, sizeof(char), 1, outputFile);
  }

}

cpSobel.s


	.file	"cpSobel.s"
	.ctext



#
# cpXbar function:
#   -cpXbar function reads AEG 4, which indicates whether 
#    the memory crossbar is enabled in the AE
#

	.globl	cpXbar
	.type	cpXbar. @function
	.signature	pdk=4

cpXbar:
	mov %aeg, $4, %a8		# integer values returned in a8
	rtn 




#
# cpSobel function:
#   -writes array pointers (image data) and column/row sizes to AEG registers 
# 	-calls caep00 to execute the sobel operation
#

	.globl	cpSobel
	.type	cpSobel. @function
	.signature	pdk=4

cpSobel:
	mov %a8, $0, %aeg		# a8 contains address of original image data
	mov %a9, $1, %aeg		# a9 contains address of edge image data
	mov %a10, $2, %aeg		# a10 contains number of rows
	mov %a11, $3, %aeg		# a11 contains number of columns

	caep00 $0               # make the coprocessor call

	rtn 



	.cend

Makefile

EXEC = UserApp.exe
SWMODEL = ../sim/CaeSimPers

all:	$(EXEC) $(SWMODEL)

clean:
	rm -f $(EXEC)
	make --directory=../sim clean

$(EXEC):	UserApp.c cpSobel.s image.o
	cnycc -g -lm UserApp.c cpSobel.s image.o -o UserApp.exe

$(SWMODEL): ../Makefile.include ../sim/CaeIsaSobel.cpp
	make -C ../sim

image.o:	image.h image.c
	cnycc -g -lm image.c -c image.o

Software Simulation

The software simulation takes ~7 seconds to run.

CaeIsaSobel.cpp

/*
  AUTH: Chad Nelson (based on Sobel algo by Bill Green)
  DESC: 2 3x3 Sobel masks for edge detection
  DATE: 02/20/11
*/

#include "CaeSim.h"
#include "CaeIsa.h"
#include <stdio.h>
#include <stdlib.h>

#define MAX_AEG_INDEX 128
#define PERS_SIGN_CAE 0x4001000101000LL

#define NUM_AES    4

#undef DEBUG

#ifndef MC_XBAR
    #define MC_XBAR 0
#endif

#define LINE_WIDTH 512

void CCaeIsa::InitPers() {
    SetAegCnt(MAX_AEG_INDEX);
    WriteAeg(0, 0, 0);
    SetPersSign(PERS_SIGN_CAE);
}



void CCaeIsa::CaepInst(int aeId, int opcode, int immed, uint32 inst, uint64 scalar) {
    if (opcode == 0x20) {
        // CAEP00 - sobel

        uint64 original, edge, rows, cols;
        
        original = ReadAeg(aeId, 0);
        edge     = ReadAeg(aeId, 1);
        rows     = ReadAeg(aeId, 2);
        cols     = ReadAeg(aeId, 3);

        unsigned int startrow = (rows * aeId) / NUM_AES - 3;
        unsigned int endrow = (rows * (aeId+1)) / NUM_AES;

        if (aeId == 0) 
            startrow = 0;

        int GX[3][3] = { {-1, 0, 1}, 
                         {-2, 0, 2},
                         {-1, 0, 1} };

        int GY[3][3] = { { 1, 2, 1}, 
                         { 0, 0, 0},
                         {-1,-2,-1} };

        long sumX, sumY, SUM;
        unsigned int x, y, i, j;

        unsigned char cache[3][LINE_WIDTH];
        unsigned int x_offset = 0;
        uint64 data, address;
        
        // Divide image into vertical strips, each LINE_WIDTH pixels wide
        // with a 2 pixel overlap
        for (x_offset = 0; x_offset < cols; x_offset += LINE_WIDTH - 2) { 

            // Load first three rows of pixel data into cache
            for(x=0; (x < LINE_WIDTH) && (x + x_offset < cols); x++)  {
                for(y=0; y < 3; y++)  {
                    address = original + (x_offset + x) + ((startrow + y)*cols);
                    
                    AeMemLoad(aeId, McNum(address), address, 1, false, data);
                    cache[y][x] = (char) data;
                }
            }

            for (y = startrow; y < endrow - 3; y++) {

                // Make edge calculations
                for(x=0; x < LINE_WIDTH - 3 && (x + x_offset < cols - 3); x++)  {

                    // -------X GRADIENT APPROXIMATION------
                    sumX = 0;
                    for(i=0; i<3; i++) {
                        for(j=0; j<3; j++)  {
                            sumX += cache[j][x + i] * GX[i][j];
                        }
                    }

                    // -------Y GRADIENT APPROXIMATION-------
                    sumY = 0;
                    for(i=0; i<3; i++) {
                        for(j=0; j<3; j++)  {
                            sumY += cache[j][x + i] * GY[i][j];
                        }
                    }

                    // ---GRADIENT MAGNITUDE APPROXIMATION (Myler p.218)----
                    SUM = abs(sumX) + abs(sumY);

                    if(SUM > 255) 
                        SUM = 255;
                    if(SUM < 0) 
                        SUM = 0;

                    address = edge + (x_offset + x + 1) + ((y + 1)*cols);

                    AeMemStore(aeId, McNum(address), address, 1, false, SUM);
                }

                // Shift cache up, bring in next line
                for(x=0; x < LINE_WIDTH && (x + x_offset < cols); x++)  {
                    address = original + (x_offset + x) + ((y + 3)*cols);
                    AeMemLoad(aeId, McNum(address), address, 1, false, data);  
  
                    cache[0][x] = cache[1][x];
                    cache[1][x] = cache[2][x];
                    cache[2][x] = (char) data;
                }

            } // end of row
        } // end of vertical strip

        // End of CAEP00 - sobel
    } else {
        // other CAEPXX instructions
        printf("CAEP00 was not called.\n");
        for (int aeId = 0; aeId < CAE_AE_CNT; aeId += 1) {
            // SetException(int aeID, int bitnum);  AEUIE = 0
            SetException(aeId, AEUIE); 
        }
    }
}