F030C8xx_KBus.git

QuakeGod

2021-06-20 bfc108e6097eff2bec73050e261f3b9e5db447b7

提交 \| 用户 \| age
bfc108	1	/* ----------------------------------------------------------------------
Q	2	* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
	3	*
	4	* $Date: 19. March 2015
	5	* $Revision: V.1.4.5
	6	*
	7	* Project: CMSIS DSP Library
	8	* Title: arm_lms_q15.c
	9	*
	10	* Description: Processing function for the Q15 LMS filter.
	11	*
	12	* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
	13	*
	14	* Redistribution and use in source and binary forms, with or without
	15	* modification, are permitted provided that the following conditions
	16	* are met:
	17	* - Redistributions of source code must retain the above copyright
	18	* notice, this list of conditions and the following disclaimer.
	19	* - Redistributions in binary form must reproduce the above copyright
	20	* notice, this list of conditions and the following disclaimer in
	21	* the documentation and/or other materials provided with the
	22	* distribution.
	23	* - Neither the name of ARM LIMITED nor the names of its contributors
	24	* may be used to endorse or promote products derived from this
	25	* software without specific prior written permission.
	26	*
	27	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	28	* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	29	* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
	30	* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
	31	* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
	32	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
	33	* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
	34	* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
	35	* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	36	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
	37	* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
	38	* POSSIBILITY OF SUCH DAMAGE.
	39	* -------------------------------------------------------------------- */
	40
	41	#include "arm_math.h"
	42	/**
	43	* @ingroup groupFilters
	44	*/
	45
	46	/**
	47	* @addtogroup LMS
	48	* @{
	49	*/
	50
	51	/**
	52	* @brief Processing function for Q15 LMS filter.
	53	* @param[in] *S points to an instance of the Q15 LMS filter structure.
	54	* @param[in] *pSrc points to the block of input data.
	55	* @param[in] *pRef points to the block of reference data.
	56	* @param[out] *pOut points to the block of output data.
	57	* @param[out] *pErr points to the block of error data.
	58	* @param[in] blockSize number of samples to process.
	59	* @return none.
	60	*
	61	* \par Scaling and Overflow Behavior:
	62	* The function is implemented using a 64-bit internal accumulator.
	63	* Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
	64	* The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
	65	* There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
	66	* After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
	67	* Lastly, the accumulator is saturated to yield a result in 1.15 format.
	68	*
	69	* \par
	70	* In this filter, filter coefficients are updated for each sample and the updation of filter cofficients are saturted.
	71	*
	72	*/
	73
	74	void arm_lms_q15(
	75	const arm_lms_instance_q15 * S,
	76	q15_t * pSrc,
	77	q15_t * pRef,
	78	q15_t * pOut,
	79	q15_t * pErr,
	80	uint32_t blockSize)
	81	{
	82	q15_t pState = S->pState; / State pointer */
	83	uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */
	84	q15_t pCoeffs = S->pCoeffs; / Coefficient pointer */
	85	q15_t pStateCurnt; / Points to the current sample of the state */
	86	q15_t mu = S->mu; /* Adaptive factor */
	87	q15_t px; / Temporary pointer for state */
	88	q15_t pb; / Temporary pointer for coefficient buffer */
	89	uint32_t tapCnt, blkCnt; /* Loop counters */
	90	q63_t acc; /* Accumulator */
	91	q15_t e = 0; /* error of data sample */
	92	q15_t alpha; /* Intermediate constant for taps update */
	93	q31_t coef; /* Teporary variable for coefficient */
	94	q31_t acc_l, acc_h;
	95	int32_t lShift = (15 - (int32_t) S->postShift); /* Post shift */
	96	int32_t uShift = (32 - lShift);
	97
	98
	99	#ifndef ARM_MATH_CM0_FAMILY
	100
	101	/* Run the below code for Cortex-M4 and Cortex-M3 */
	102
	103
	104	/* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
	105	/* pStateCurnt points to the location where the new input data should be written */
	106	pStateCurnt = &(S->pState[(numTaps - 1u)]);
	107
	108	/* Initializing blkCnt with blockSize */
	109	blkCnt = blockSize;
	110
	111	while(blkCnt > 0u)
	112	{
	113	/* Copy the new input sample into the state buffer */
	114	pStateCurnt++ = pSrc++;
	115
	116	/* Initialize state pointer */
	117	px = pState;
	118
	119	/* Initialize coefficient pointer */
	120	pb = pCoeffs;
	121
	122	/* Set the accumulator to zero */
	123	acc = 0;
	124
	125	/* Loop unrolling. Process 4 taps at a time. */
	126	tapCnt = numTaps >> 2u;
	127
	128	while(tapCnt > 0u)
	129	{
	130	/* acc += b[N] * x[n-N] + b[N-1] * x[n-N-1] */
	131	/* Perform the multiply-accumulate */
	132	#ifndef UNALIGNED_SUPPORT_DISABLE
	133
	134	acc = __SMLALD(__SIMD32(px)++, (__SIMD32(pb)++), acc);
	135	acc = __SMLALD(__SIMD32(px)++, (__SIMD32(pb)++), acc);
	136
	137	#else
	138
	139	acc += (q63_t) (((q31_t) (px++) (*pb++)));
	140	acc += (q63_t) (((q31_t) (px++) (*pb++)));
	141	acc += (q63_t) (((q31_t) (px++) (*pb++)));
	142	acc += (q63_t) (((q31_t) (px++) (*pb++)));
	143
	144
	145	#endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
	146
	147	/* Decrement the loop counter */
	148	tapCnt--;
	149	}
	150
	151	/* If the filter length is not a multiple of 4, compute the remaining filter taps */
	152	tapCnt = numTaps % 0x4u;
	153
	154	while(tapCnt > 0u)
	155	{
	156	/* Perform the multiply-accumulate */
	157	acc += (q63_t) (((q31_t) (px++) (*pb++)));
	158
	159	/* Decrement the loop counter */
	160	tapCnt--;
	161	}
	162
	163	/* Calc lower part of acc */
	164	acc_l = acc & 0xffffffff;
	165
	166	/* Calc upper part of acc */
	167	acc_h = (acc >> 32) & 0xffffffff;
	168
	169	/* Apply shift for lower part of acc and upper part of acc */
	170	acc = (uint32_t) acc_l >> lShift \| acc_h << uShift;
	171
	172	/* Converting the result to 1.15 format and saturate the output */
	173	acc = __SSAT(acc, 16);
	174
	175	/* Store the result from accumulator into the destination buffer. */
	176	*pOut++ = (q15_t) acc;
	177
	178	/* Compute and store error */
	179	e = *pRef++ - (q15_t) acc;
	180
	181	*pErr++ = (q15_t) e;
	182
	183	/* Compute alpha i.e. intermediate constant for taps update */
	184	alpha = (q15_t) (((q31_t) e * (mu)) >> 15);
	185
	186	/* Initialize state pointer */
	187	/* Advance state pointer by 1 for the next sample */
	188	px = pState++;
	189
	190	/* Initialize coefficient pointer */
	191	pb = pCoeffs;
	192
	193	/* Loop unrolling. Process 4 taps at a time. */
	194	tapCnt = numTaps >> 2u;
	195
	196	/* Update filter coefficients */
	197	while(tapCnt > 0u)
	198	{
	199	coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
	200	*pb++ = (q15_t) __SSAT((coef), 16);
	201	coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
	202	*pb++ = (q15_t) __SSAT((coef), 16);
	203	coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
	204	*pb++ = (q15_t) __SSAT((coef), 16);
	205	coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
	206	*pb++ = (q15_t) __SSAT((coef), 16);
	207
	208	/* Decrement the loop counter */
	209	tapCnt--;
	210	}
	211
	212	/* If the filter length is not a multiple of 4, compute the remaining filter taps */
	213	tapCnt = numTaps % 0x4u;
	214
	215	while(tapCnt > 0u)
	216	{
	217	/* Perform the multiply-accumulate */
	218	coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
	219	*pb++ = (q15_t) __SSAT((coef), 16);
	220
	221	/* Decrement the loop counter */
	222	tapCnt--;
	223	}
	224
	225	/* Decrement the loop counter */
	226	blkCnt--;
	227
	228	}
	229
	230	/* Processing is complete. Now copy the last numTaps - 1 samples to the
	231	satrt of the state buffer. This prepares the state buffer for the
	232	next function call. */
	233
	234	/* Points to the start of the pState buffer */
	235	pStateCurnt = S->pState;
	236
	237	/* Calculation of count for copying integer writes */
	238	tapCnt = (numTaps - 1u) >> 2;
	239
	240	while(tapCnt > 0u)
	241	{
	242
	243	#ifndef UNALIGNED_SUPPORT_DISABLE
	244
	245	__SIMD32(pStateCurnt)++ = __SIMD32(pState)++;
	246	__SIMD32(pStateCurnt)++ = __SIMD32(pState)++;
	247	#else
	248	pStateCurnt++ = pState++;
	249	pStateCurnt++ = pState++;
	250	pStateCurnt++ = pState++;
	251	pStateCurnt++ = pState++;
	252	#endif
	253
	254	tapCnt--;
	255
	256	}
	257
	258	/* Calculation of count for remaining q15_t data */
	259	tapCnt = (numTaps - 1u) % 0x4u;
	260
	261	/* copy data */
	262	while(tapCnt > 0u)
	263	{
	264	pStateCurnt++ = pState++;
	265
	266	/* Decrement the loop counter */
	267	tapCnt--;
	268	}
	269
	270	#else
	271
	272	/* Run the below code for Cortex-M0 */
	273
	274	/* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
	275	/* pStateCurnt points to the location where the new input data should be written */
	276	pStateCurnt = &(S->pState[(numTaps - 1u)]);
	277
	278	/* Loop over blockSize number of values */
	279	blkCnt = blockSize;
	280
	281	while(blkCnt > 0u)
	282	{
	283	/* Copy the new input sample into the state buffer */
	284	pStateCurnt++ = pSrc++;
	285
	286	/* Initialize pState pointer */
	287	px = pState;
	288
	289	/* Initialize pCoeffs pointer */
	290	pb = pCoeffs;
	291
	292	/* Set the accumulator to zero */
	293	acc = 0;
	294
	295	/* Loop over numTaps number of values */
	296	tapCnt = numTaps;
	297
	298	while(tapCnt > 0u)
	299	{
	300	/* Perform the multiply-accumulate */
	301	acc += (q63_t) ((q31_t) (px++) (*pb++));
	302
	303	/* Decrement the loop counter */
	304	tapCnt--;
	305	}
	306
	307	/* Calc lower part of acc */
	308	acc_l = acc & 0xffffffff;
	309
	310	/* Calc upper part of acc */
	311	acc_h = (acc >> 32) & 0xffffffff;
	312
	313	/* Apply shift for lower part of acc and upper part of acc */
	314	acc = (uint32_t) acc_l >> lShift \| acc_h << uShift;
	315
	316	/* Converting the result to 1.15 format and saturate the output */
	317	acc = __SSAT(acc, 16);
	318
	319	/* Store the result from accumulator into the destination buffer. */
	320	*pOut++ = (q15_t) acc;
	321
	322	/* Compute and store error */
	323	e = *pRef++ - (q15_t) acc;
	324
	325	*pErr++ = (q15_t) e;
	326
	327	/* Compute alpha i.e. intermediate constant for taps update */
	328	alpha = (q15_t) (((q31_t) e * (mu)) >> 15);
	329
	330	/* Initialize pState pointer */
	331	/* Advance state pointer by 1 for the next sample */
	332	px = pState++;
	333
	334	/* Initialize pCoeffs pointer */
	335	pb = pCoeffs;
	336
	337	/* Loop over numTaps number of values */
	338	tapCnt = numTaps;
	339
	340	while(tapCnt > 0u)
	341	{
	342	/* Perform the multiply-accumulate */
	343	coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
	344	*pb++ = (q15_t) __SSAT((coef), 16);
	345
	346	/* Decrement the loop counter */
	347	tapCnt--;
	348	}
	349
	350	/* Decrement the loop counter */
	351	blkCnt--;
	352
	353	}
	354
	355	/* Processing is complete. Now copy the last numTaps - 1 samples to the
	356	start of the state buffer. This prepares the state buffer for the
	357	next function call. */
	358
	359	/* Points to the start of the pState buffer */
	360	pStateCurnt = S->pState;
	361
	362	/* Copy (numTaps - 1u) samples */
	363	tapCnt = (numTaps - 1u);
	364
	365	/* Copy the data */
	366	while(tapCnt > 0u)
	367	{
	368	pStateCurnt++ = pState++;
	369
	370	/* Decrement the loop counter */
	371	tapCnt--;
	372	}
	373
	374	#endif /* #ifndef ARM_MATH_CM0_FAMILY */
	375
	376	}
	377
	378	/**
	379	* @} end of LMS group
	380	*/