F030C8xx_KBus.git

QuakeGod

2023-02-01 47857ed32cb8737a25f26970b222e29727f1c93b

提交 \| 用户 \| age
bfc108	1	/* ----------------------------------------------------------------------
Q	2	* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
	3	*
	4	* $Date: 19. March 2015
	5	* $Revision: V.1.4.5
	6	*
	7	* Project: CMSIS DSP Library
	8	* Title: arm_cmplx_mult_cmplx_q31.c
	9	*
	10	* Description: Q31 complex-by-complex multiplication
	11	*
	12	* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
	13	*
	14	* Redistribution and use in source and binary forms, with or without
	15	* modification, are permitted provided that the following conditions
	16	* are met:
	17	* - Redistributions of source code must retain the above copyright
	18	* notice, this list of conditions and the following disclaimer.
	19	* - Redistributions in binary form must reproduce the above copyright
	20	* notice, this list of conditions and the following disclaimer in
	21	* the documentation and/or other materials provided with the
	22	* distribution.
	23	* - Neither the name of ARM LIMITED nor the names of its contributors
	24	* may be used to endorse or promote products derived from this
	25	* software without specific prior written permission.
	26	*
	27	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	28	* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	29	* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
	30	* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
	31	* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
	32	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
	33	* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
	34	* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
	35	* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	36	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
	37	* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
	38	* POSSIBILITY OF SUCH DAMAGE.
	39	* -------------------------------------------------------------------- */
	40
	41	#include "arm_math.h"
	42
	43	/**
	44	* @ingroup groupCmplxMath
	45	*/
	46
	47	/**
	48	* @addtogroup CmplxByCmplxMult
	49	* @{
	50	*/
	51
	52
	53	/**
	54	* @brief Q31 complex-by-complex multiplication
	55	* @param[in] *pSrcA points to the first input vector
	56	* @param[in] *pSrcB points to the second input vector
	57	* @param[out] *pDst points to the output vector
	58	* @param[in] numSamples number of complex samples in each vector
	59	* @return none.
	60	*
	61	* <b>Scaling and Overflow Behavior:</b>
	62	* \par
	63	* The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format.
	64	* Input down scaling is not required.
	65	*/
	66
	67	void arm_cmplx_mult_cmplx_q31(
	68	q31_t * pSrcA,
	69	q31_t * pSrcB,
	70	q31_t * pDst,
	71	uint32_t numSamples)
	72	{
	73	q31_t a, b, c, d; /* Temporary variables to store real and imaginary values */
	74	uint32_t blkCnt; /* loop counters */
	75	q31_t mul1, mul2, mul3, mul4;
	76	q31_t out1, out2;
	77
	78	#ifndef ARM_MATH_CM0_FAMILY
	79
	80	/* Run the below code for Cortex-M4 and Cortex-M3 */
	81
	82	/* loop Unrolling */
	83	blkCnt = numSamples >> 2u;
	84
	85	/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
	86	** a second loop below computes the remaining 1 to 3 samples. */
	87	while(blkCnt > 0u)
	88	{
	89	/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
	90	/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
	91	a = *pSrcA++;
	92	b = *pSrcA++;
	93	c = *pSrcB++;
	94	d = *pSrcB++;
	95
	96	mul1 = (q31_t) (((q63_t) a * c) >> 32);
	97	mul2 = (q31_t) (((q63_t) b * d) >> 32);
	98	mul3 = (q31_t) (((q63_t) a * d) >> 32);
	99	mul4 = (q31_t) (((q63_t) b * c) >> 32);
	100
	101	mul1 = (mul1 >> 1);
	102	mul2 = (mul2 >> 1);
	103	mul3 = (mul3 >> 1);
	104	mul4 = (mul4 >> 1);
	105
	106	out1 = mul1 - mul2;
	107	out2 = mul3 + mul4;
	108
	109	/* store the real result in 3.29 format in the destination buffer. */
	110	*pDst++ = out1;
	111	/* store the imag result in 3.29 format in the destination buffer. */
	112	*pDst++ = out2;
	113
	114	a = *pSrcA++;
	115	b = *pSrcA++;
	116	c = *pSrcB++;
	117	d = *pSrcB++;
	118
	119	mul1 = (q31_t) (((q63_t) a * c) >> 32);
	120	mul2 = (q31_t) (((q63_t) b * d) >> 32);
	121	mul3 = (q31_t) (((q63_t) a * d) >> 32);
	122	mul4 = (q31_t) (((q63_t) b * c) >> 32);
	123
	124	mul1 = (mul1 >> 1);
	125	mul2 = (mul2 >> 1);
	126	mul3 = (mul3 >> 1);
	127	mul4 = (mul4 >> 1);
	128
	129	out1 = mul1 - mul2;
	130	out2 = mul3 + mul4;
	131
	132	/* store the real result in 3.29 format in the destination buffer. */
	133	*pDst++ = out1;
	134	/* store the imag result in 3.29 format in the destination buffer. */
	135	*pDst++ = out2;
	136
	137	a = *pSrcA++;
	138	b = *pSrcA++;
	139	c = *pSrcB++;
	140	d = *pSrcB++;
	141
	142	mul1 = (q31_t) (((q63_t) a * c) >> 32);
	143	mul2 = (q31_t) (((q63_t) b * d) >> 32);
	144	mul3 = (q31_t) (((q63_t) a * d) >> 32);
	145	mul4 = (q31_t) (((q63_t) b * c) >> 32);
	146
	147	mul1 = (mul1 >> 1);
	148	mul2 = (mul2 >> 1);
	149	mul3 = (mul3 >> 1);
	150	mul4 = (mul4 >> 1);
	151
	152	out1 = mul1 - mul2;
	153	out2 = mul3 + mul4;
	154
	155	/* store the real result in 3.29 format in the destination buffer. */
	156	*pDst++ = out1;
	157	/* store the imag result in 3.29 format in the destination buffer. */
	158	*pDst++ = out2;
	159
	160	a = *pSrcA++;
	161	b = *pSrcA++;
	162	c = *pSrcB++;
	163	d = *pSrcB++;
	164
	165	mul1 = (q31_t) (((q63_t) a * c) >> 32);
	166	mul2 = (q31_t) (((q63_t) b * d) >> 32);
	167	mul3 = (q31_t) (((q63_t) a * d) >> 32);
	168	mul4 = (q31_t) (((q63_t) b * c) >> 32);
	169
	170	mul1 = (mul1 >> 1);
	171	mul2 = (mul2 >> 1);
	172	mul3 = (mul3 >> 1);
	173	mul4 = (mul4 >> 1);
	174
	175	out1 = mul1 - mul2;
	176	out2 = mul3 + mul4;
	177
	178	/* store the real result in 3.29 format in the destination buffer. */
	179	*pDst++ = out1;
	180	/* store the imag result in 3.29 format in the destination buffer. */
	181	*pDst++ = out2;
	182
	183	/* Decrement the blockSize loop counter */
	184	blkCnt--;
	185	}
	186
	187	/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
	188	** No loop unrolling is used. */
	189	blkCnt = numSamples % 0x4u;
	190
	191	while(blkCnt > 0u)
	192	{
	193	/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
	194	/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
	195	a = *pSrcA++;
	196	b = *pSrcA++;
	197	c = *pSrcB++;
	198	d = *pSrcB++;
	199
	200	mul1 = (q31_t) (((q63_t) a * c) >> 32);
	201	mul2 = (q31_t) (((q63_t) b * d) >> 32);
	202	mul3 = (q31_t) (((q63_t) a * d) >> 32);
	203	mul4 = (q31_t) (((q63_t) b * c) >> 32);
	204
	205	mul1 = (mul1 >> 1);
	206	mul2 = (mul2 >> 1);
	207	mul3 = (mul3 >> 1);
	208	mul4 = (mul4 >> 1);
	209
	210	out1 = mul1 - mul2;
	211	out2 = mul3 + mul4;
	212
	213	/* store the real result in 3.29 format in the destination buffer. */
	214	*pDst++ = out1;
	215	/* store the imag result in 3.29 format in the destination buffer. */
	216	*pDst++ = out2;
	217
	218	/* Decrement the blockSize loop counter */
	219	blkCnt--;
	220	}
	221
	222	#else
	223
	224	/* Run the below code for Cortex-M0 */
	225
	226	/* loop Unrolling */
	227	blkCnt = numSamples >> 1u;
	228
	229	/* First part of the processing with loop unrolling. Compute 2 outputs at a time.
	230	** a second loop below computes the remaining 1 sample. */
	231	while(blkCnt > 0u)
	232	{
	233	/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
	234	/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
	235	a = *pSrcA++;
	236	b = *pSrcA++;
	237	c = *pSrcB++;
	238	d = *pSrcB++;
	239
	240	mul1 = (q31_t) (((q63_t) a * c) >> 32);
	241	mul2 = (q31_t) (((q63_t) b * d) >> 32);
	242	mul3 = (q31_t) (((q63_t) a * d) >> 32);
	243	mul4 = (q31_t) (((q63_t) b * c) >> 32);
	244
	245	mul1 = (mul1 >> 1);
	246	mul2 = (mul2 >> 1);
	247	mul3 = (mul3 >> 1);
	248	mul4 = (mul4 >> 1);
	249
	250	out1 = mul1 - mul2;
	251	out2 = mul3 + mul4;
	252
	253	/* store the real result in 3.29 format in the destination buffer. */
	254	*pDst++ = out1;
	255	/* store the imag result in 3.29 format in the destination buffer. */
	256	*pDst++ = out2;
	257
	258	a = *pSrcA++;
	259	b = *pSrcA++;
	260	c = *pSrcB++;
	261	d = *pSrcB++;
	262
	263	mul1 = (q31_t) (((q63_t) a * c) >> 32);
	264	mul2 = (q31_t) (((q63_t) b * d) >> 32);
	265	mul3 = (q31_t) (((q63_t) a * d) >> 32);
	266	mul4 = (q31_t) (((q63_t) b * c) >> 32);
	267
	268	mul1 = (mul1 >> 1);
	269	mul2 = (mul2 >> 1);
	270	mul3 = (mul3 >> 1);
	271	mul4 = (mul4 >> 1);
	272
	273	out1 = mul1 - mul2;
	274	out2 = mul3 + mul4;
	275
	276	/* store the real result in 3.29 format in the destination buffer. */
	277	*pDst++ = out1;
	278	/* store the imag result in 3.29 format in the destination buffer. */
	279	*pDst++ = out2;
	280
	281	/* Decrement the blockSize loop counter */
	282	blkCnt--;
	283	}
	284
	285	/* If the blockSize is not a multiple of 2, compute any remaining output samples here.
	286	** No loop unrolling is used. */
	287	blkCnt = numSamples % 0x2u;
	288
	289	while(blkCnt > 0u)
	290	{
	291	/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
	292	/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
	293	a = *pSrcA++;
	294	b = *pSrcA++;
	295	c = *pSrcB++;
	296	d = *pSrcB++;
	297
	298	mul1 = (q31_t) (((q63_t) a * c) >> 32);
	299	mul2 = (q31_t) (((q63_t) b * d) >> 32);
	300	mul3 = (q31_t) (((q63_t) a * d) >> 32);
	301	mul4 = (q31_t) (((q63_t) b * c) >> 32);
	302
	303	mul1 = (mul1 >> 1);
	304	mul2 = (mul2 >> 1);
	305	mul3 = (mul3 >> 1);
	306	mul4 = (mul4 >> 1);
	307
	308	out1 = mul1 - mul2;
	309	out2 = mul3 + mul4;
	310
	311	/* store the real result in 3.29 format in the destination buffer. */
	312	*pDst++ = out1;
	313	/* store the imag result in 3.29 format in the destination buffer. */
	314	*pDst++ = out2;
	315
	316	/* Decrement the blockSize loop counter */
	317	blkCnt--;
	318	}
	319
	320	#endif /* #ifndef ARM_MATH_CM0_FAMILY */
	321
	322	}
	323
	324	/**
	325	* @} end of CmplxByCmplxMult group
	326	*/