F030C8T6_Kbus_MIX.git

QuakeGod

2024-02-24 8b51c78f1b88d94a89bb8c37ae38a54f523cb597

提交 \| 用户 \| age
8b51c7	1	/* ----------------------------------------------------------------------
Q	2	* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
	3	*
	4	* $Date: 19. March 2015
	5	* $Revision: V.1.4.5
	6	*
	7	* Project: CMSIS DSP Library
	8	* Title: arm_cmplx_mult_cmplx_f32.c
	9	*
	10	* Description: Floating-point complex-by-complex multiplication
	11	*
	12	* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
	13	*
	14	* Redistribution and use in source and binary forms, with or without
	15	* modification, are permitted provided that the following conditions
	16	* are met:
	17	* - Redistributions of source code must retain the above copyright
	18	* notice, this list of conditions and the following disclaimer.
	19	* - Redistributions in binary form must reproduce the above copyright
	20	* notice, this list of conditions and the following disclaimer in
	21	* the documentation and/or other materials provided with the
	22	* distribution.
	23	* - Neither the name of ARM LIMITED nor the names of its contributors
	24	* may be used to endorse or promote products derived from this
	25	* software without specific prior written permission.
	26	*
	27	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	28	* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	29	* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
	30	* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
	31	* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
	32	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
	33	* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
	34	* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
	35	* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	36	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
	37	* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
	38	* POSSIBILITY OF SUCH DAMAGE.
	39	* -------------------------------------------------------------------- */
	40	#include "arm_math.h"
	41
	42	/**
	43	* @ingroup groupCmplxMath
	44	*/
	45
	46	/**
	47	* @defgroup CmplxByCmplxMult Complex-by-Complex Multiplication
	48	*
	49	* Multiplies a complex vector by another complex vector and generates a complex result.
	50	* The data in the complex arrays is stored in an interleaved fashion
	51	* (real, imag, real, imag, ...).
	52	* The parameter <code>numSamples</code> represents the number of complex
	53	* samples processed. The complex arrays have a total of <code>2*numSamples</code>
	54	* real values.
	55	*
	56	* The underlying algorithm is used:
	57	*
	58	* <pre>
	59	* for(n=0; n<numSamples; n++) {
	60	* pDst[(2n)+0] = pSrcA[(2n)+0] * pSrcB[(2n)+0] - pSrcA[(2n)+1] * pSrcB[(2*n)+1];
	61	* pDst[(2n)+1] = pSrcA[(2n)+0] * pSrcB[(2n)+1] + pSrcA[(2n)+1] * pSrcB[(2*n)+0];
	62	* }
	63	* </pre>
	64	*
	65	* There are separate functions for floating-point, Q15, and Q31 data types.
	66	*/
	67
	68	/**
	69	* @addtogroup CmplxByCmplxMult
	70	* @{
	71	*/
	72
	73
	74	/**
	75	* @brief Floating-point complex-by-complex multiplication
	76	* @param[in] *pSrcA points to the first input vector
	77	* @param[in] *pSrcB points to the second input vector
	78	* @param[out] *pDst points to the output vector
	79	* @param[in] numSamples number of complex samples in each vector
	80	* @return none.
	81	*/
	82
	83	void arm_cmplx_mult_cmplx_f32(
	84	float32_t * pSrcA,
	85	float32_t * pSrcB,
	86	float32_t * pDst,
	87	uint32_t numSamples)
	88	{
	89	float32_t a1, b1, c1, d1; /* Temporary variables to store real and imaginary values */
	90	uint32_t blkCnt; /* loop counters */
	91
	92	#ifndef ARM_MATH_CM0_FAMILY
	93
	94	/* Run the below code for Cortex-M4 and Cortex-M3 */
	95	float32_t a2, b2, c2, d2; /* Temporary variables to store real and imaginary values */
	96	float32_t acc1, acc2, acc3, acc4;
	97
	98
	99	/* loop Unrolling */
	100	blkCnt = numSamples >> 2u;
	101
	102	/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
	103	** a second loop below computes the remaining 1 to 3 samples. */
	104	while(blkCnt > 0u)
	105	{
	106	/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
	107	/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
	108	a1 = pSrcA; / A[2 * i] */
	109	c1 = pSrcB; / B[2 * i] */
	110
	111	b1 = (pSrcA + 1); / A[2 * i + 1] */
	112	acc1 = a1 * c1; /* acc1 = A[2 * i] * B[2 * i] */
	113
	114	a2 = (pSrcA + 2); / A[2 * i + 2] */
	115	acc2 = (b1 * c1); /* acc2 = A[2 * i + 1] * B[2 * i] */
	116
	117	d1 = (pSrcB + 1); / B[2 * i + 1] */
	118	c2 = (pSrcB + 2); / B[2 * i + 2] */
	119	acc1 -= b1 * d1; /* acc1 = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1] */
	120
	121	d2 = (pSrcB + 3); / B[2 * i + 3] */
	122	acc3 = a2 * c2; /* acc3 = A[2 * i + 2] * B[2 * i + 2] */
	123
	124	b2 = (pSrcA + 3); / A[2 * i + 3] */
	125	acc2 += (a1 * d1); /* acc2 = A[2 * i + 1] * B[2 * i] + A[2 * i] * B[2 * i + 1] */
	126
	127	a1 = (pSrcA + 4); / A[2 * i + 4] */
	128	acc4 = (a2 * d2); /* acc4 = A[2 * i + 2] * B[2 * i + 3] */
	129
	130	c1 = (pSrcB + 4); / B[2 * i + 4] */
	131	acc3 -= (b2 * d2); /* acc3 = A[2 * i + 2] * B[2 * i + 2] - A[2 * i + 3] * B[2 * i + 3] */
	132	pDst = acc1; / C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1] */
	133
	134	b1 = (pSrcA + 5); / A[2 * i + 5] */
	135	acc4 += b2 * c2; /* acc4 = A[2 * i + 2] * B[2 * i + 3] + A[2 * i + 3] * B[2 * i + 2] */
	136
	137	(pDst + 1) = acc2; / C[2 * i + 1] = A[2 * i + 1] * B[2 * i] + A[2 * i] * B[2 * i + 1] */
	138	acc1 = (a1 * c1);
	139
	140	d1 = *(pSrcB + 5);
	141	acc2 = (b1 * c1);
	142
	143	*(pDst + 2) = acc3;
	144	*(pDst + 3) = acc4;
	145
	146	a2 = *(pSrcA + 6);
	147	acc1 -= (b1 * d1);
	148
	149	c2 = *(pSrcB + 6);
	150	acc2 += (a1 * d1);
	151
	152	b2 = *(pSrcA + 7);
	153	acc3 = (a2 * c2);
	154
	155	d2 = *(pSrcB + 7);
	156	acc4 = (b2 * c2);
	157
	158	*(pDst + 4) = acc1;
	159	pSrcA += 8u;
	160
	161	acc3 -= (b2 * d2);
	162	acc4 += (a2 * d2);
	163
	164	*(pDst + 5) = acc2;
	165	pSrcB += 8u;
	166
	167	*(pDst + 6) = acc3;
	168	*(pDst + 7) = acc4;
	169
	170	pDst += 8u;
	171
	172	/* Decrement the numSamples loop counter */
	173	blkCnt--;
	174	}
	175
	176	/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
	177	** No loop unrolling is used. */
	178	blkCnt = numSamples % 0x4u;
	179
	180	#else
	181
	182	/* Run the below code for Cortex-M0 */
	183	blkCnt = numSamples;
	184
	185	#endif /* #ifndef ARM_MATH_CM0_FAMILY */
	186
	187	while(blkCnt > 0u)
	188	{
	189	/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
	190	/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
	191	a1 = *pSrcA++;
	192	b1 = *pSrcA++;
	193	c1 = *pSrcB++;
	194	d1 = *pSrcB++;
	195
	196	/* store the result in the destination buffer. */
	197	pDst++ = (a1 c1) - (b1 * d1);
	198	pDst++ = (a1 d1) + (b1 * c1);
	199
	200	/* Decrement the numSamples loop counter */
	201	blkCnt--;
	202	}
	203	}
	204
	205	/**
	206	* @} end of CmplxByCmplxMult group
	207	*/