Skip to content

Commit

Permalink
Volk: A bunch of new Orc routines plus a couple of build changes.
Browse files Browse the repository at this point in the history
32fc_magnitude_16s fails test_all right now.
  • Loading branch information
Nick Foster committed Dec 17, 2010
1 parent 51b45e2 commit e94b1b8
Show file tree
Hide file tree
Showing 28 changed files with 202 additions and 61 deletions.
2 changes: 1 addition & 1 deletion config/orc.m4
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ dnl ORC_CHECK([REQUIRED_VERSION])

AC_DEFUN([ORC_CHECK],
[
ORC_REQ=ifelse([$1], , "0.4.6", [$1])
ORC_REQ=ifelse([$1], , "0.4.10", [$1])
enable_orc = auto
if test "x$enable_orc" != "xno" ; then
Expand Down
14 changes: 13 additions & 1 deletion include/volk/volk_16sc_deinterleave_16s_aligned16.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,19 @@ static inline void volk_16sc_deinterleave_16s_aligned16_generic(int16_t* iBuffer
}
#endif /* LV_HAVE_GENERIC */


#if LV_HAVE_ORC
/*!
\brief Deinterleaves the complex 16 bit vector into I & Q vector data
\param complexVector The complex input vector
\param iBuffer The I buffer output data
\param qBuffer The Q buffer output data
\param num_points The number of complex data values to be deinterleaved
*/
extern void volk_16sc_deinterleave_16s_aligned16_orc_impl(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points);
static inline void volk_16sc_deinterleave_16s_aligned16_orc(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
volk_16sc_deinterleave_16s_aligned16_orc_impl(iBuffer, qBuffer, complexVector, num_points);
}
#endif /* LV_HAVE_ORC */


#endif /* INCLUDED_VOLK_16sc_DEINTERLEAVE_16S_ALIGNED16_H */
15 changes: 14 additions & 1 deletion include/volk/volk_16sc_deinterleave_32f_aligned16.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,20 @@ static inline void volk_16sc_deinterleave_32f_aligned16_generic(float* iBuffer,
}
#endif /* LV_HAVE_GENERIC */


#if LV_HAVE_ORC
/*!
\brief Converts the complex 16 bit vector into floats,scales each data point, and deinterleaves into I & Q vector data
\param complexVector The complex input vector
\param iBuffer The I buffer output data
\param qBuffer The Q buffer output data
\param scalar The data value to be divided against each input data value of the input complex vector
\param num_points The number of complex data values to be deinterleaved
*/
extern void volk_16sc_deinterleave_32f_aligned16_orc_impl(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points);
static inline void volk_16sc_deinterleave_32f_aligned16_orc(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
volk_16sc_deinterleave_32f_aligned16_orc_impl(iBuffer, qBuffer, complexVector, scalar, num_points);
}
#endif /* LV_HAVE_ORC */


#endif /* INCLUDED_VOLK_16sc_DEINTERLEAVE_32F_ALIGNED16_H */
13 changes: 12 additions & 1 deletion include/volk/volk_16sc_deinterleave_real_8s_aligned16.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,18 @@ static inline void volk_16sc_deinterleave_real_8s_aligned16_generic(int8_t* iBuf
}
#endif /* LV_HAVE_GENERIC */


#if LV_HAVE_ORC
/*!
\brief Deinterleaves the complex 16 bit vector into 8 bit I vector data
\param complexVector The complex input vector
\param iBuffer The I buffer output data
\param num_points The number of complex data values to be deinterleaved
*/
extern void volk_16sc_deinterleave_real_8s_aligned16_orc_impl(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points);
static inline void volk_16sc_deinterleave_real_8s_aligned16_orc(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
volk_16sc_deinterleave_real_8s_aligned16_orc_impl(iBuffer, complexVector, num_points);
}
#endif /* LV_HAVE_ORC */


#endif /* INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_8s_ALIGNED16_H */
6 changes: 3 additions & 3 deletions include/volk/volk_16sc_magnitude_16s_aligned16.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,16 +173,16 @@ static inline void volk_16sc_magnitude_16s_aligned16_generic(int16_t* magnitudeV
}
#endif /* LV_HAVE_GENERIC */

#if LV_HAVE_ORC
#if LV_HAVE_ORC_DISABLED
/*!
\brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
\param complexVector The vector containing the complex input values
\param magnitudeVector The vector containing the real output values
\param num_points The number of complex values in complexVector to be calculated and stored into cVector
*/
extern void volk_16sc_magnitude_16s_aligned16_orc_impl(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points);
extern void volk_16sc_magnitude_16s_aligned16_orc_impl(int16_t* magnitudeVector, const lv_16sc_t* complexVector, float scalar, unsigned int num_points);
static inline void volk_16sc_magnitude_16s_aligned16_orc(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){
volk_16sc_magnitude_16s_aligned16_orc_impl(magnitudeVector, complexVector, num_points);
volk_16sc_magnitude_16s_aligned16_orc_impl(magnitudeVector, complexVector, 32768.0, num_points);
}
#endif /* LV_HAVE_ORC */

Expand Down
2 changes: 1 addition & 1 deletion include/volk/volk_16sc_magnitude_32f_aligned16.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ static inline void volk_16sc_magnitude_32f_aligned16_generic(float* magnitudeVec
}
#endif /* LV_HAVE_GENERIC */

#if LV_HAVE_ORC
#if LV_HAVE_ORC_DISABLED
/*!
\brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
\param complexVector The vector containing the complex input values
Expand Down
14 changes: 14 additions & 0 deletions include/volk/volk_32f_max_aligned16.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,5 +67,19 @@ static inline void volk_32f_max_aligned16_generic(float* cVector, const float* a
}
#endif /* LV_HAVE_GENERIC */

#if LV_HAVE_ORC
/*!
\brief Selects maximum value from each entry between bVector and aVector and store their results in the cVector
\param cVector The vector where the results will be stored
\param aVector The vector to be checked
\param bVector The vector to be checked
\param num_points The number of values in aVector and bVector to be checked and stored into cVector
*/
extern void volk_32f_max_aligned16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
static inline void volk_32f_max_aligned16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
volk_32f_max_aligned16_orc_impl(cVector, aVector, bVector, num_points);
}
#endif /* LV_HAVE_ORC */


#endif /* INCLUDED_VOLK_32f_MAX_ALIGNED16_H */
14 changes: 14 additions & 0 deletions include/volk/volk_32f_min_aligned16.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,5 +67,19 @@ static inline void volk_32f_min_aligned16_generic(float* cVector, const float* a
}
#endif /* LV_HAVE_GENERIC */

#if LV_HAVE_ORC
/*!
\brief Selects minimum value from each entry between bVector and aVector and store their results in the cVector
\param cVector The vector where the results will be stored
\param aVector The vector to be checked
\param bVector The vector to be checked
\param num_points The number of values in aVector and bVector to be checked and stored into cVector
*/
extern void volk_32f_min_aligned16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
static inline void volk_32f_min_aligned16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
volk_32f_min_aligned16_orc_impl(cVector, aVector, bVector, num_points);
}
#endif /* LV_HAVE_ORC */


#endif /* INCLUDED_VOLK_32f_MIN_ALIGNED16_H */
12 changes: 12 additions & 0 deletions lib/qa_16sc_deinterleave_16s_aligned16.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ void qa_16sc_deinterleave_16s_aligned16::t1() {
int16_t output_generic1[vlen] __attribute__ ((aligned (16)));
int16_t output_sse2[vlen] __attribute__ ((aligned (16)));
int16_t output_sse21[vlen] __attribute__ ((aligned (16)));
int16_t output_orc[vlen] __attribute__ ((aligned (16)));
int16_t output_orc1[vlen] __attribute__ ((aligned (16)));
int16_t output_ssse3[vlen] __attribute__ ((aligned (16)));
int16_t output_ssse31[vlen] __attribute__ ((aligned (16)));

Expand All @@ -43,6 +45,13 @@ void qa_16sc_deinterleave_16s_aligned16::t1() {
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_16sc_deinterleave_16s_aligned16_manual(output_orc, output_orc1, input0, vlen, "orc");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("orc_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_16sc_deinterleave_16s_aligned16_manual(output_sse2, output_sse21, input0, vlen, "sse2");
}
Expand Down Expand Up @@ -70,6 +79,9 @@ void qa_16sc_deinterleave_16s_aligned16::t1() {

CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]);
CPPUNIT_ASSERT_EQUAL(output_generic1[i], output_ssse31[i]);

CPPUNIT_ASSERT_EQUAL(output_generic[i], output_orc[i]);
CPPUNIT_ASSERT_EQUAL(output_generic1[i], output_orc1[i]);
}
}

Expand Down
11 changes: 11 additions & 0 deletions lib/qa_16sc_deinterleave_32f_aligned16.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ void qa_16sc_deinterleave_32f_aligned16::t1() {
float output_generic1[vlen] __attribute__ ((aligned (16)));
float output_sse2[vlen] __attribute__ ((aligned (16)));
float output_sse21[vlen] __attribute__ ((aligned (16)));
float output_orc[vlen] __attribute__ ((aligned (16)));
float output_orc1[vlen] __attribute__ ((aligned (16)));

int16_t* loadInput = (int16_t*)input0;
for(int i = 0; i < vlen*2; ++i) {
Expand All @@ -41,6 +43,13 @@ void qa_16sc_deinterleave_32f_aligned16::t1() {
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_16sc_deinterleave_32f_aligned16_manual(output_orc, output_orc1, input0, 32768.0, vlen, "orc");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("orc_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_16sc_deinterleave_32f_aligned16_manual(output_sse2, output_sse21, input0, 32768.0, vlen, "sse");
}
Expand All @@ -57,6 +66,8 @@ void qa_16sc_deinterleave_32f_aligned16::t1() {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse21[i], fabs(output_generic1[i])*1e-4);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_orc1[i], fabs(output_generic1[i])*1e-4);
}
}

Expand Down
9 changes: 9 additions & 0 deletions lib/qa_16sc_deinterleave_real_8s_aligned16.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ void qa_16sc_deinterleave_real_8s_aligned16::t1() {

int8_t output_generic[vlen] __attribute__ ((aligned (16)));
int8_t output_ssse3[vlen] __attribute__ ((aligned (16)));
int8_t output_orc[vlen] __attribute__ ((aligned (16)));

int16_t* loadInput = (int16_t*)input0;
for(int i = 0; i < vlen*2; ++i) {
Expand All @@ -39,6 +40,13 @@ void qa_16sc_deinterleave_real_8s_aligned16::t1() {
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_16sc_deinterleave_real_8s_aligned16_manual(output_orc, input0, vlen, "orc");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("orc_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_16sc_deinterleave_real_8s_aligned16_manual(output_ssse3, input0, vlen, "ssse3");
}
Expand All @@ -54,6 +62,7 @@ void qa_16sc_deinterleave_real_8s_aligned16::t1() {
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]);
CPPUNIT_ASSERT_EQUAL(output_generic[i], output_orc[i]);
}
}

Expand Down
5 changes: 3 additions & 2 deletions lib/qa_16sc_magnitude_16s_aligned16.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,14 @@ void qa_16sc_magnitude_16s_aligned16::t1() {
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
start = clock();
/* start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_16sc_magnitude_16s_aligned16_manual(output_orc, input0, vlen, "orc");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("orc_time: %f\n", total);
*/
start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_16sc_magnitude_16s_aligned16_manual(output_sse, input0, vlen, "sse");
Expand All @@ -72,7 +73,7 @@ void qa_16sc_magnitude_16s_aligned16::t1() {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1);
//CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1);
}
}

Expand Down
6 changes: 3 additions & 3 deletions lib/qa_16sc_magnitude_32f_aligned16.cc
Original file line number Diff line number Diff line change
Expand Up @@ -90,14 +90,14 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
start = clock();
/* start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_16sc_magnitude_32f_aligned16_manual(output_orc, input0, 32768.0, vlen, "orc");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("orc_time: %f\n", total);

*/
start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_16sc_magnitude_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
Expand All @@ -123,7 +123,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4);
// CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4);
}
}

Expand Down
9 changes: 9 additions & 0 deletions lib/qa_32f_max_aligned16.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ void qa_32f_max_aligned16::t1() {

float output0[vlen] __attribute__ ((aligned (16)));
float output01[vlen] __attribute__ ((aligned (16)));
float output02[vlen] __attribute__ ((aligned (16)));

for(int i = 0; i < vlen; ++i) {
input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
Expand All @@ -40,6 +41,13 @@ void qa_32f_max_aligned16::t1() {
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_32f_max_aligned16_manual(output02, input0, input1, vlen, "orc");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("orc_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_32f_max_aligned16_manual(output01, input0, input1, vlen, "sse");
}
Expand All @@ -54,6 +62,7 @@ void qa_32f_max_aligned16::t1() {
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
}
}

Expand Down
9 changes: 9 additions & 0 deletions lib/qa_32f_min_aligned16.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ void qa_32f_min_aligned16::t1() {

float output0[vlen] __attribute__ ((aligned (16)));
float output01[vlen] __attribute__ ((aligned (16)));
float output02[vlen] __attribute__ ((aligned (16)));

for(int i = 0; i < vlen; ++i) {
input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
Expand All @@ -40,6 +41,13 @@ void qa_32f_min_aligned16::t1() {
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_32f_min_aligned16_manual(output02, input0, input1, vlen, "orc");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("orc_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_32f_min_aligned16_manual(output01, input0, input1, vlen, "sse");
}
Expand All @@ -54,6 +62,7 @@ void qa_32f_min_aligned16::t1() {
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
}
}

Expand Down
8 changes: 4 additions & 4 deletions lib/qa_32fc_magnitude_16s_aligned16.cc
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,10 @@ void qa_32fc_magnitude_16s_aligned16::t1() {
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse3_time: %f\n", total);

for(int i = 0; i < 1; ++i) {
//printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
//printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
}
//for(int i = 0; i < 10; ++i) {
// printf("inputs: %f, %f\n", input0[i].real(), input0[i].imag());
// printf("generic... %i, sse3... %i, orc... %i\n", output_generic[i], output_sse3[i], output_orc[i]);
//}

for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
Expand Down
1 change: 0 additions & 1 deletion lib/qa_volk.cc
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@ CppUnit::TestSuite *
qa_volk::suite()
{
CppUnit::TestSuite *s = new CppUnit::TestSuite("volk");

s->addTest(qa_16s_quad_max_star_aligned16::suite());
s->addTest(qa_32fc_dot_prod_aligned16::suite());
s->addTest(qa_32fc_square_dist_scalar_mult_aligned16::suite());
Expand Down
10 changes: 7 additions & 3 deletions orc/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,17 @@ volk_32f_subtract_aligned16_orc_impl.orc \
volk_32f_divide_aligned16_orc_impl.orc \
volk_32f_multiply_aligned16_orc_impl.orc \
volk_32f_sqrt_aligned16_orc_impl.orc \
volk_16sc_magnitude_32f_aligned16_orc_impl.orc \
volk_32f_max_aligned16_orc_impl.orc \
volk_32f_min_aligned16_orc_impl.orc \
volk_32fc_magnitude_32f_aligned16_orc_impl.orc \
volk_32fc_magnitude_16s_aligned16_orc_impl.orc
volk_32fc_magnitude_16s_aligned16_orc_impl.orc \
volk_16sc_deinterleave_16s_aligned16_orc_impl.orc \
volk_16sc_deinterleave_32f_aligned16_orc_impl.orc \
volk_16sc_deinterleave_real_8s_aligned16_orc_impl.orc



my_ORCC_FLAGS = --implementation --lazy-init $(ORCC_FLAGS)
my_ORCC_FLAGS = --implementation $(ORCC_FLAGS)

.orc.c:
$(ORCC) $(my_ORCC_FLAGS) -o $@ $<
5 changes: 5 additions & 0 deletions orc/volk_16sc_deinterleave_16s_aligned16_orc_impl.orc
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.function volk_16sc_deinterleave_16s_aligned16_orc_impl
.dest 2 idst
.dest 2 qdst
.source 4 src
splitlw qdst, idst, src
Loading

0 comments on commit e94b1b8

Please sign in to comment.