Howto test performance of data alignment
This article introduces howto write a program that tests which data alignment is most suitable for better memory I/O operations in a target architecture.
Data alignment
Data alignment is an important issue for all programmers who directly use memory. Data alignment affects how well your software performs, and even if your software runs at all Read more on data alignment from IBM ...
The performance of various alignments depends on three main factors :
- Whether the processor has hardware support for unaligned access
- The memory access granularity
- Buffer size used to access memory
Program for testing data alignment performance
#include <stdio.h> #include <stdint.h> #include <stdlib.h> #include <sys/time.h> #include <time.h> #include <unistd.h> #define DATASIZE 1000000000 uint32_t stampstart(); uint32_t stampstop(uint32_t start); void Munge32(void *data, uint32_t size); void Munge64(void *data, uint32_t size); #ifndef MUNGE64 #define MUNGE Munge32 #else #define MUNGE Munge64 #endif int main(int argc, char **argv) { uint32_t align; uint8_t *data; uint32_t start; uint8_t *unaligned_data __attribute__ ((aligned(2))); uint8_t *aligned_data8 __attribute__ ((aligned(8))); uint8_t *aligned_data16 __attribute__ ((aligned(16))); uint8_t *aligned_data32 __attribute__ ((aligned(32))); if (argc != 2) { printf("usage : %s alignment\n", argv[0]); exit(-1); } align = atoi(argv[1]); switch (align) { case 8: printf("\nData aligned at 8 bits\n"); start = stampstart(); aligned_data8 = malloc(DATASIZE); MUNGE(aligned_data8, DATASIZE); free(aligned_data8); stampstop(start); break; case 16: printf("\nData aligned at 16 bits\n"); start = stampstart(); aligned_data16 = malloc(DATASIZE); MUNGE(aligned_data16, DATASIZE); free(aligned_data16); stampstop(start); break; case 32: printf("\nData aligned at 32 bits\n"); start = stampstart(); aligned_data32 = malloc(DATASIZE); MUNGE(aligned_data32, DATASIZE); free(aligned_data32); stampstop(start); break; default: /* Unalign data */ printf("\nData unaligned \n"); start = stampstart(); unaligned_data = malloc(DATASIZE); MUNGE(unaligned_data, DATASIZE); free(unaligned_data); stampstop(start); } return 0; } void Munge64(void *data, uint32_t size) { double *data64 = (double *) data; double *data64End = data64 + (size >> 3); /* Divide size by 8. */ uint8_t *data8 = (uint8_t *) data64End; uint8_t *data8End = data8 + (size & 0x00000007); /* Strip upper 29 bits. */ printf("Using 64bit blocks\n"); while (data64 != data64End) { *data64++ = -*data64; } while (data8 != data8End) { *data8++ = -*data8; } } void Munge32(void *data, uint32_t size) { uint32_t *data32 = (uint32_t *) data; uint32_t *data32End = data32 + (size >> 2); /* Divide size by 4. */ uint8_t *data8 = (uint8_t *) data32End; uint8_t *data8End = data8 + (size & 0x00000003); /* Strip upper 30 bits. */ printf("Using 32bit blocks\n"); while (data32 != data32End) { *data32++ = -*data32; } while (data8 != data8End) { *data8++ = -*data8; } } uint32_t stampstart() { struct timeval tv; struct timezone tz; struct tm *tm; uint32_t start; gettimeofday(&tv, &tz); tm = localtime(&tv.tv_sec); printf("TIMESTAMP-START\t %d:%02d:%02d:%d (~%d ms)\n", tm->tm_hour, tm->tm_min, tm->tm_sec, tv.tv_usec, tm->tm_hour * 3600 * 1000 + tm->tm_min * 60 * 1000 + tm->tm_sec * 1000 + tv.tv_usec / 1000); start = tm->tm_hour * 3600 * 1000 + tm->tm_min * 60 * 1000 + tm->tm_sec * 1000 + tv.tv_usec / 1000; return (start); } uint32_t stampstop(uint32_t start) { struct timeval tv; struct timezone tz; struct tm *tm; uint32_t stop; gettimeofday(&tv, &tz); tm = localtime(&tv.tv_sec); stop = tm->tm_hour * 3600 * 1000 + tm->tm_min * 60 * 1000 + tm->tm_sec * 1000 + tv.tv_usec / 1000; printf("TIMESTAMP-END\t %d:%02d:%02d:%d (~%d ms) \n", tm->tm_hour, tm->tm_min, tm->tm_sec, tv.tv_usec, tm->tm_hour * 3600 * 1000 + tm->tm_min * 60 * 1000 + tm->tm_sec * 1000 + tv.tv_usec / 1000); printf("ELAPSED\t %d ms\n", stop - start); return (stop); }
Compilation
- To use 32bit I/O buffers
gcc -o align-test.c
- To use 64bit I/O buffers
gcc -o align-test.c -DMUNGE64
Usage
- Test unaligned memory access
align-test
- Test 8bits alignment
align-test 8
- Test 16bits alignment
align-test 16
- Test 32bits alignment
align-test 32
| Labels: performance, coding, embedded, howto |
|

Comment