久久精品国产精品国产精品污,男人扒开添女人下部免费视频,一级国产69式性姿势免费视频,夜鲁夜鲁很鲁在线视频 视频,欧美丰满少妇一区二区三区,国产偷国产偷亚洲高清人乐享,中文 在线 日韩 亚洲 欧美,熟妇人妻无乱码中文字幕真矢织江,一区二区三区人妻制服国产

歡迎訪問 生活随笔!

生活随笔

當(dāng)前位置: 首頁 > 人文社科 > 生活经验 >内容正文

生活经验

Neon Intrinsics各函数介绍

發(fā)布時(shí)間:2023/11/27 生活经验 33 豆豆
生活随笔 收集整理的這篇文章主要介紹了 Neon Intrinsics各函数介绍 小編覺得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.
#ifndef __ARM_NEON__
#error You must enable NEON instructions (e.g. -mfloat-abi=softfp -mfpu=neon) to use arm_neon.h
#endif/*(1)、正常指令:生成大小相同且類型通常與操作數(shù)向量相同的結(jié)果向量;
(2)、長指令:對雙字向量操作數(shù)執(zhí)行運(yùn)算,生成四字向量的結(jié)果。所生成的元素一般是操作數(shù)元素寬度的兩倍,
并屬于同一類型;
(3)、寬指令:一個(gè)雙字向量操作數(shù)和一個(gè)四字向量操作數(shù)執(zhí)行運(yùn)算,生成四字向量結(jié)果。所生成的元素和第一個(gè)
操作數(shù)的元素是第二個(gè)操作數(shù)元素寬度的兩倍;
(4)、窄指令:四字向量操作數(shù)執(zhí)行運(yùn)算,并生成雙字向量結(jié)果,所生成的元素一般是操作數(shù)元素寬度的一半;
(5)、飽和指令:當(dāng)超過數(shù)據(jù)類型指定的范圍則自動(dòng)限制在該范圍內(nèi)。*//******************************************************Addition*************************/
/*--1、Vector add(正常指令): vadd -> ri = ai + bi; r, a, b have equal lane sizes--*/
int8x8_t vadd_s8 (int8x8_t __a, int8x8_t __b);//_mm_add_epi8
int16x4_t vadd_s16 (int16x4_t __a, int16x4_t __b);//_mm_add_epi16
int32x2_t vadd_s32 (int32x2_t __a, int32x2_t __b);//_mm_add_epi32
int64x1_t vadd_s64 (int64x1_t __a, int64x1_t __b);//_mm_add_epi64
//_mm_add_ps, SSE, use only low 64 bits
float32x2_t vadd_f32 (float32x2_t __a, float32x2_t __b);
uint8x8_t vadd_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_add_epi8
uint16x4_t vadd_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_add_epi16
uint32x2_t vadd_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_add_epi32
uint64x1_t vadd_u64 (uint64x1_t __a, uint64x1_t __b);//_mm_add_epi64
int8x16_t vaddq_s8 (int8x16_t __a, int8x16_t __b);//_mm_add_epi8
int16x8_t vaddq_s16 (int16x8_t __a, int16x8_t __b);//_mm_add_epi16
int32x4_t vaddq_s32 (int32x4_t __a, int32x4_t __b);//_mm_add_epi32
int64x2_t vaddq_s64 (int64x2_t __a, int64x2_t __b);//_mm_add_epi64
float32x4_t vaddq_f32 (float32x4_t __a, float32x4_t __b);//_mm_add_ps
uint8x16_t vaddq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_add_epi8
uint16x8_t vaddq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_add_epi16
uint32x4_t vaddq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_add_epi32
uint64x2_t vaddq_u64 (uint64x2_t __a, uint64x2_t __b);//_mm_add_epi64
/*--2、Vector long add(長指令): vaddl -> ri = ai + bi; a, b have equal lane sizes, 
result is a 128 bit vector of lanes that are twice the width--*/
int16x8_t vaddl_s8 (int8x8_t __a, int8x8_t __b);
int32x4_t vaddl_s16 (int16x4_t __a, int16x4_t __b);
int64x2_t vaddl_s32 (int32x2_t __a, int32x2_t __b);
uint16x8_t vaddl_u8 (uint8x8_t __a, uint8x8_t __b);
uint32x4_t vaddl_u16 (uint16x4_t __a, uint16x4_t __b);
uint64x2_t vaddl_u32 (uint32x2_t __a, uint32x2_t __b);
/*--3、Vector wide add(寬指令): vaddw -> ri = ai + bi--*/
int16x8_t vaddw_s8 (int16x8_t __a, int8x8_t __b);
int32x4_t vaddw_s16 (int32x4_t __a, int16x4_t __b);
int64x2_t vaddw_s32 (int64x2_t __a, int32x2_t __b);
uint16x8_t vaddw_u8 (uint16x8_t __a, uint8x8_t __b);
uint32x4_t vaddw_u16 (uint32x4_t __a, uint16x4_t __b);
uint64x2_t vaddw_u32 (uint64x2_t __a, uint32x2_t __b);
/*--4、Vector halving add: vhadd -> ri = (ai + bi) >> 1; 
shifts each result right one bit, Results are truncated--*/
int8x8_t vhadd_s8 (int8x8_t __a, int8x8_t __b);
int16x4_t vhadd_s16 (int16x4_t __a, int16x4_t __b);
int32x2_t vhadd_s32 (int32x2_t __a, int32x2_t __b);
uint8x8_t vhadd_u8 (uint8x8_t __a, uint8x8_t __b);
uint16x4_t vhadd_u16 (uint16x4_t __a, uint16x4_t __b);
uint32x2_t vhadd_u32 (uint32x2_t __a, uint32x2_t __b);
int8x16_t vhaddq_s8 (int8x16_t __a, int8x16_t __b);
int16x8_t vhaddq_s16 (int16x8_t __a, int16x8_t __b)
int32x4_t vhaddq_s32 (int32x4_t __a, int32x4_t __b)
uint8x16_t vhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
uint16x8_t vhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
uint32x4_t vhaddq_u32 (uint32x4_t __a, uint32x4_t __b);
/*--5、Vector rounding halving add: vrhadd -> ri = (ai + bi + 1) >> 1; 
shifts each result right one bit, Results are rounded(四舍五入)--*/
int8x8_t vrhadd_s8 (int8x8_t __a, int8x8_t __b);
int16x4_t vrhadd_s16 (int16x4_t __a, int16x4_t __b);
int32x2_t vrhadd_s32 (int32x2_t __a, int32x2_t __b);
uint8x8_t vrhadd_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_avg_epu8
uint16x4_t vrhadd_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_avg_epu16
uint32x2_t vrhadd_u32 (uint32x2_t __a, uint32x2_t __b);
int8x16_t vrhaddq_s8 (int8x16_t __a, int8x16_t __b);
int16x8_t vrhaddq_s16 (int16x8_t __a, int16x8_t __b);
int32x4_t vrhaddq_s32 (int32x4_t __a, int32x4_t __b);
uint8x16_t vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_avg_epu8
uint16x8_t vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_avg_epu16
uint32x4_t vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b);
/*--6、Vector saturating add(飽和指令): vqadd -> ri = sat(ai + bi); 
the results are saturated if they overflow--*/
int8x8_t vqadd_s8 (int8x8_t __a, int8x8_t __b);//_mm_adds_epi8
int16x4_t vqadd_s16 (int16x4_t __a, int16x4_t __b);//_mm_adds_epi16
int32x2_t vqadd_s32 (int32x2_t __a, int32x2_t __b);
int64x1_t vqadd_s64 (int64x1_t __a, int64x1_t __b);
uint8x8_t vqadd_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_adds_epu8
uint16x4_t vqadd_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_adds_epu16
uint32x2_t vqadd_u32 (uint32x2_t __a, uint32x2_t __b);
uint64x1_t vqadd_u64 (uint64x1_t __a, uint64x1_t __b);
int8x16_t vqaddq_s8 (int8x16_t __a, int8x16_t __b);//_mm_adds_epi8
int16x8_t vqaddq_s16 (int16x8_t __a, int16x8_t __b);//_mm_adds_epi16
int32x4_t vqaddq_s32 (int32x4_t __a, int32x4_t __b);
int64x2_t vqaddq_s64 (int64x2_t __a, int64x2_t __b);
uint8x16_t vqaddq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_adds_epu8
uint16x8_t vqaddq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_adds_epu16
uint32x4_t vqaddq_u32 (uint32x4_t __a, uint32x4_t __b);
uint64x2_t vqaddq_u64 (uint64x2_t __a, uint64x2_t __b);
/*--7、Vector add high half(窄指令): vaddhn -> ri = sat(ai + bi); 
selecting High half, The results are truncated--*/
int8x8_t vaddhn_s16 (int16x8_t __a, int16x8_t __b);
int16x4_t vaddhn_s32 (int32x4_t __a, int32x4_t __b);
int32x2_t vaddhn_s64 (int64x2_t __a, int64x2_t __b);
uint8x8_t vaddhn_u16 (uint16x8_t __a, uint16x8_t __b);
uint16x4_t vaddhn_u32 (uint32x4_t __a, uint32x4_t __b);
uint32x2_t vaddhn_u64 (uint64x2_t __a, uint64x2_t __b);
/*--8、Vector rounding add high half(窄指令): vraddhn -> ri = ai + bi; 
selecting High half, The results are rounded--*/
int8x8_t vraddhn_s16 (int16x8_t __a, int16x8_t __b);
int16x4_t vraddhn_s32 (int32x4_t __a, int32x4_t __b)
int32x2_t vraddhn_s64 (int64x2_t __a, int64x2_t __b)
uint8x8_t vraddhn_u16 (uint16x8_t __a, uint16x8_t __b)
uint16x4_t vraddhn_u32 (uint32x4_t __a, uint32x4_t __b)
uint32x2_t vraddhn_u64 (uint64x2_t __a, uint64x2_t __b);
/*******************************************Multiplication******************************/
/*--1、Vector multiply(正常指令): vmul -> ri = ai * bi;--*/
int8x8_t vmul_s8 (int8x8_t __a, int8x8_t __b);
int16x4_t vmul_s16 (int16x4_t __a, int16x4_t __b);//_mm_mullo_epi16
int32x2_t vmul_s32 (int32x2_t __a, int32x2_t __b);
float32x2_t vmul_f32 (float32x2_t __a, float32x2_t __b);//_mm_mul_ps
uint8x8_t vmul_u8 (uint8x8_t __a, uint8x8_t __b);
uint16x4_t vmul_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_mullo_epi16
uint32x2_t vmul_u32 (uint32x2_t __a, uint32x2_t __b);
poly8x8_t vmul_p8 (poly8x8_t __a, poly8x8_t __b);
int8x16_t vmulq_s8 (int8x16_t __a, int8x16_t __b);
int16x8_t vmulq_s16 (int16x8_t __a, int16x8_t __b);//_mm_mullo_epi16
int32x4_t vmulq_s32 (int32x4_t __a, int32x4_t __b);
float32x4_t vmulq_f32 (float32x4_t __a, float32x4_t __b);//_mm_mul_ps
uint8x16_t vmulq_u8 (uint8x16_t __a, uint8x16_t __b);
uint16x8_t vmulq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_mullo_epi16
uint32x4_t vmulq_u32 (uint32x4_t __a, uint32x4_t __b);
poly8x16_t vmulq_p8 (poly8x16_t __a, poly8x16_t __b);
/*--2、Vector multiply accumulate: vmla -> ri = ai + bi * ci; --*/
int8x8_t vmla_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c);
int16x4_t vmla_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c);
int32x2_t vmla_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c);
float32x2_t vmla_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c);
uint8x8_t vmla_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c);
uint16x4_t vmla_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c);
uint32x2_t vmla_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c);
int8x16_t vmlaq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c);
int16x8_t vmlaq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c);
int32x4_t vmlaq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c);
float32x4_t vmlaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c);
uint8x16_t vmlaq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c);
uint16x8_t vmlaq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c);
uint32x4_t vmlaq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c);
/*--3、Vector multiply accumulate long: vmlal -> ri = ai + bi * ci --*/
int16x8_t vmlal_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c);
int32x4_t vmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c);
int64x2_t vmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c);
uint16x8_t vmlal_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c);
uint32x4_t vmlal_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c);
uint64x2_t vmlal_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c);
/*--4、Vector multiply subtract: vmls -> ri = ai - bi * ci --*/
int8x8_t vmls_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c);
int16x4_t vmls_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c);
int32x2_t vmls_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c);
float32x2_t vmls_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c);
uint8x8_t vmls_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c);
uint16x4_t vmls_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c);
uint32x2_t vmls_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c);
int8x16_t vmlsq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c);
int16x8_t vmlsq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c);
int32x4_t vmlsq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c);
float32x4_t vmlsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c);
uint8x16_t vmlsq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c);
uint16x8_t vmlsq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c);
uint32x4_t vmlsq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c);
/*--5、Vector multiply subtract long:vmlsl -> ri = ai - bi * ci --*/
int16x8_t vmlsl_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c);
int32x4_t vmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c);
int64x2_t vmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c);
uint16x8_t vmlsl_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c);
uint32x4_t vmlsl_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c);
uint64x2_t vmlsl_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c);
/*--6、Vector saturating doubling multiply high: vqdmulh -> ri = sat(ai * bi); 
doubles the results and returns only the high half of the truncated results--*/
int16x4_t vqdmulh_s16 (int16x4_t __a, int16x4_t __b);
int32x2_t vqdmulh_s32 (int32x2_t __a, int32x2_t __b);
int16x8_t vqdmulhq_s16 (int16x8_t __a, int16x8_t __b);
int32x4_t vqdmulhq_s32 (int32x4_t __a, int32x4_t __b);
/*--7、Vector saturating rounding doubling multiply high vqrdmulh -> ri = ai * bi: 
doubles the results and returns only the high half of the rounded results. 
The results are saturated if they overflow--*/
int16x4_t vqrdmulh_s16 (int16x4_t __a, int16x4_t __b);
int32x2_t vqrdmulh_s32 (int32x2_t __a, int32x2_t __b);
int16x8_t vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b);
int32x4_t vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b);
/*--8、Vector saturating doubling multiply accumulate long: vqdmlal -> ri = ai + bi * ci;
multiplies the elements in the second and third vectors, doubles the results and adds the
results to the values in the first vector. The results are saturated if they overflow--*/
int32x4_t vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c);
int64x2_t  vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c);
/*--9、Vector saturating doubling multiply subtract long: vqdmlsl -> ri = ai - bi * ci;
multiplies the elements in the second and third vectors, doubles the results and subtracts 
the results from the elements in the first vector. 
The results are saturated if they overflow--*/
int32x4_t vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c);
int64x2_t vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c);
/*--10、Vector long multiply(長指令): vmull -> ri = ai * bi;--*/
int16x8_t vmull_s8 (int8x8_t __a, int8x8_t __b);
int32x4_t vmull_s16 (int16x4_t __a, int16x4_t __b);
int64x2_t vmull_s32 (int32x2_t __a, int32x2_t __b);
uint16x8_t vmull_u8 (uint8x8_t __a, uint8x8_t __b);
uint32x4_t vmull_u16 (uint16x4_t __a, uint16x4_t __b);
uint64x2_t vmull_u32 (uint32x2_t __a, uint32x2_t __b);
poly16x8_t vmull_p8 (poly8x8_t __a, poly8x8_t __b);
/*--11、Vector saturating doubling long multiply: vqdmull -> ri = ai * bi;
If any of the results overflow, they are saturated--*/
int32x4_t vqdmull_s16 (int16x4_t __a, int16x4_t __b);
int64x2_t vqdmull_s32 (int32x2_t __a, int32x2_t __b);
/*--12、Fused multiply accumulate: vfma -> ri = ai + bi * ci; 
The result of the multiply is not rounded before the accumulation--*/
float32x2_t vfma_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
float32x4_t vfmaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c);
/*--13、Fused multiply subtract: vfms -> ri = ai - bi * ci; 
The result of the multiply is not rounded before the subtraction--*/
float32x2_t vfms_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c);
float32x4_t vfmsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c);
/******************************************************Round to integral****************/
/*--1、to nearest, ties to even--*/
float32x2_t vrndn_f32 (float32x2_t __a);
float32x4_t vrndqn_f32 (float32x4_t __a);
/*--2、to nearest, ties away from zero--*/
float32x2_t vrnda_f32 (float32x2_t __a);
float32x4_t vrndqa_f32 (float32x4_t __a);
/*--3、towards +Inf--*/
float32x2_t vrndp_f32 (float32x2_t __a);
float32x4_t vrndqp_f32 (float32x4_t __a);
/*--4、towards -Inf--*/
float32x2_t vrndm_f32 (float32x2_t __a);
float32x4_t vrndqm_f32 (float32x4_t __a);
/*--5、towards 0--*/
float32x2_t vrnd_f32 (float32x2_t __a);
float32x4_t vrndq_f32 (float32x4_t __a);
/**********************************************Subtraction******************************/
/*--1、Vector subtract(正常指令):vsub -> ri = ai - bi;--*/
int8x8_t vsub_s8 (int8x8_t __a, int8x8_t __b);//_mm_sub_epi8
int16x4_t vsub_s16 (int16x4_t __a, int16x4_t __b);//_mm_sub_epi16
int32x2_t vsub_s32 (int32x2_t __a, int32x2_t __b);//_mm_sub_epi32
int64x1_t vsub_s64 (int64x1_t __a, int64x1_t __b);//_mm_sub_epi64
float32x2_t vsub_f32 (float32x2_t __a, float32x2_t __b);//_mm_sub_ps
uint8x8_t vsub_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_sub_epi8
uint16x4_t vsub_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_sub_epi16
uint32x2_t vsub_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_sub_epi32
uint64x1_t vsub_u64 (uint64x1_t __a, uint64x1_t __b);//_mm_sub_epi64
int8x16_t vsubq_s8 (int8x16_t __a, int8x16_t __b);//_mm_sub_epi8
int16x8_t vsubq_s16 (int16x8_t __a, int16x8_t __b);//_mm_sub_epi16
int32x4_t vsubq_s32 (int32x4_t __a, int32x4_t __b);//_mm_sub_epi32
int64x2_t vsubq_s64 (int64x2_t __a, int64x2_t __b);//_mm_sub_epi64
float32x4_t vsubq_f32 (float32x4_t __a, float32x4_t __b);//_mm_sub_ps
uint8x16_t vsubq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_sub_epi8
uint16x8_t vsubq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_sub_epi16
uint32x4_t vsubq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_sub_epi32
uint64x2_t vsubq_u64 (uint64x2_t __a, uint64x2_t __b);//_mm_sub_epi64
/*--2、Vector long subtract(長指令): vsubl -> ri = ai - bi; --*/
int16x8_t vsubl_s8 (int8x8_t __a, int8x8_t __b);
int32x4_t vsubl_s16 (int16x4_t __a, int16x4_t __b);
int64x2_t vsubl_s32 (int32x2_t __a, int32x2_t __b);
uint16x8_t vsubl_u8 (uint8x8_t __a, uint8x8_t __b);
uint32x4_t vsubl_u16 (uint16x4_t __a, uint16x4_t __b);
uint64x2_t vsubl_u32 (uint32x2_t __a, uint32x2_t __b);
/*--3、Vector wide subtract(寬指令): vsubw -> ri = ai - bi;--*/
int16x8_t vsubw_s8 (int16x8_t __a, int8x8_t __b);
int32x4_t vsubw_s16 (int32x4_t __a, int16x4_t __b);
int64x2_t vsubw_s32 (int64x2_t __a, int32x2_t __b);
uint16x8_t vsubw_u8 (uint16x8_t __a, uint8x8_t __b);
uint32x4_t vsubw_u16 (uint32x4_t __a, uint16x4_t __b);
uint64x2_t vsubw_u32 (uint64x2_t __a, uint32x2_t __b);
/*--4、Vector saturating subtract(飽和指令): vqsub -> ri = sat(ai - bi);
If any of the results overflow, they are saturated--*/
int8x8_t vqsub_s8 (int8x8_t __a, int8x8_t __b);//_mm_subs_epi8
int16x4_t vqsub_s16 (int16x4_t __a, int16x4_t __b);//_mm_subs_epi16
int32x2_t vqsub_s32 (int32x2_t __a, int32x2_t __b);//_mm_subs_epi32
int64x1_t vqsub_s64 (int64x1_t __a, int64x1_t __b);
uint8x8_t vqsub_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_subs_epu8
uint16x4_t vqsub_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_subs_epu16
uint32x2_t vqsub_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_subs_epu32
uint64x1_t vqsub_u64 (uint64x1_t __a, uint64x1_t __b);
int8x16_t vqsubq_s8 (int8x16_t __a, int8x16_t __b);//_mm_subs_epi8
int16x8_t vqsubq_s16 (int16x8_t __a, int16x8_t __b);//_mm_subs_epi16
int32x4_t vqsubq_s32 (int32x4_t __a, int32x4_t __b);//_mm_subs_epi32
int64x2_t vqsubq_s64 (int64x2_t __a, int64x2_t __b);
uint8x16_t vqsubq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_subs_epu8
uint16x8_t vqsubq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_subs_epu16
uint32x4_t vqsubq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_subs_epu32
uint64x2_t vqsubq_u64 (uint64x2_t __a, uint64x2_t __b);
/*--5、Vector halving subtract: vhsub -> ri = (ai - bi) >> 1; 
shifts each result right one bit, The results are truncated.--*/
int8x8_t vhsub_s8 (int8x8_t __a, int8x8_t __b);
int16x4_t vhsub_s16 (int16x4_t __a, int16x4_t __b);
int32x2_t vhsub_s32 (int32x2_t __a, int32x2_t __b);
uint8x8_t vhsub_u8 (uint8x8_t __a, uint8x8_t __b);
uint16x4_t vhsub_u16 (uint16x4_t __a, uint16x4_t __b);
uint32x2_t vhsub_u32 (uint32x2_t __a, uint32x2_t __b);
int8x16_t vhsubq_s8 (int8x16_t __a, int8x16_t __b);
int16x8_t vhsubq_s16 (int16x8_t __a, int16x8_t __b);
int32x4_t vhsubq_s32 (int32x4_t __a, int32x4_t __b);
uint8x16_t vhsubq_u8 (uint8x16_t __a, uint8x16_t __b);
uint16x8_t vhsubq_u16 (uint16x8_t __a, uint16x8_t __b);
uint32x4_t vhsubq_u32 (uint32x4_t __a, uint32x4_t __b);
/*--6、Vector subtract high half(窄指令): vsubhn -> ri = ai - bi;
It returns the most significant halves of the results. The results are truncated--*/
int8x8_t vsubhn_s16 (int16x8_t __a, int16x8_t __b);
int16x4_t vsubhn_s32 (int32x4_t __a, int32x4_t __b);
int32x2_t vsubhn_s64 (int64x2_t __a, int64x2_t __b);
uint8x8_t vsubhn_u16 (uint16x8_t __a, uint16x8_t __b);
uint16x4_t vsubhn_u32 (uint32x4_t __a, uint32x4_t __b);
uint32x2_t vsubhn_u64 (uint64x2_t __a, uint64x2_t __b);
/*--7、Vector rounding subtract high half(窄指令): vrsubhn -> ai - bi; 
It returns the most significant halves of the results. The results are rounded--*/
int8x8_t vrsubhn_s16 (int16x8_t __a, int16x8_t __b);
int16x4_t vrsubhn_s32 (int32x4_t __a, int32x4_t __b);
int32x2_t vrsubhn_s64 (int64x2_t __a, int64x2_t __b)
uint8x8_t vrsubhn_u16 (uint16x8_t __a, uint16x8_t __b);
uint16x4_t vrsubhn_u32 (uint32x4_t __a, uint32x4_t __b);
uint32x2_t vrsubhn_u64 (uint64x2_t __a, uint64x2_t __b);
/******************************************************Comparison***********************/
/*--1、Vector compare equal(正常指令): vceq -> ri = ai == bi ? 1...1 : 0...0; 
If they are equal, the corresponding element in the destination vector is set to all ones.
Otherwise, it is set to all zeros--*/
uint8x8_t vceq_s8 (int8x8_t __a, int8x8_t __b);//_mm_cmpeq_epi8
uint16x4_t vceq_s16 (int16x4_t __a, int16x4_t __b);//_mm_cmpeq_epi16
uint32x2_t vceq_s32 (int32x2_t __a, int32x2_t __b);//_mm_cmpeq_epi32
uint32x2_t vceq_f32 (float32x2_t __a, float32x2_t __b);
uint8x8_t vceq_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_cmpeq_epi8
uint16x4_t vceq_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_cmpeq_epi16
uint32x2_t vceq_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_cmpeq_epi32
uint8x8_t vceq_p8 (poly8x8_t __a, poly8x8_t __b);//_mm_cmpeq_epi8
uint8x16_t vceqq_s8 (int8x16_t __a, int8x16_t __b);//_mm_cmpeq_epi8
uint16x8_t vceqq_s16 (int16x8_t __a, int16x8_t __b);//_mm_cmpeq_epi16
uint32x4_t vceqq_s32 (int32x4_t __a, int32x4_t __b);//_mm_cmpeq_epi32
uint32x4_t vceqq_f32 (float32x4_t __a, float32x4_t __b);
uint8x16_t vceqq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_cmpeq_epi8
uint16x8_t vceqq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_cmpeq_epi16
uint32x4_t vceqq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_cmpeq_epi32
uint8x16_t vceqq_p8 (poly8x16_t __a, poly8x16_t __b);//_mm_cmpeq_epi8
/*--2、Vector compare greater-than or equal(正常指令): vcge-> ri = ai >= bi ? 1...1:0...0;
If it is greater than or equal to it, the corresponding element in the destination 
vector is set to all ones. Otherwise, it is set to all zeros.--*/
uint8x8_t vcge_s8 (int8x8_t __a, int8x8_t __b);
uint16x4_t vcge_s16 (int16x4_t __a, int16x4_t __b);
uint32x2_t vcge_s32 (int32x2_t __a, int32x2_t __b);
uint32x2_t vcge_f32 (float32x2_t __a, float32x2_t __b);
uint8x8_t vcge_u8 (uint8x8_t __a, uint8x8_t __b);
uint16x4_t vcge_u16 (uint16x4_t __a, uint16x4_t __b);
uint32x2_t vcge_u32 (uint32x2_t __a, uint32x2_t __b);
uint8x16_t vcgeq_s8 (int8x16_t __a, int8x16_t __b);
uint16x8_t vcgeq_s16 (int16x8_t __a, int16x8_t __b);
uint32x4_t vcgeq_s32 (int32x4_t __a, int32x4_t __b);
uint32x4_t vcgeq_f32 (float32x4_t __a, float32x4_t __b);
uint8x16_t vcgeq_u8 (uint8x16_t __a, uint8x16_t __b);
uint16x8_t vcgeq_u16 (uint16x8_t __a, uint16x8_t __b);
uint32x4_t vcgeq_u32 (uint32x4_t __a, uint32x4_t __b);
/*--3、Vector compare less-than or equal(正常指令): vcle -> ri = ai <= bi ? 1...1:0...0;
If it is less than or equal to it, the corresponding element in the destination vector 
is set to all ones. Otherwise, it is set to all zeros.--*/
uint8x8_t vcle_s8 (int8x8_t __a, int8x8_t __b);
uint16x4_t vcle_s16 (int16x4_t __a, int16x4_t __b);
uint32x2_t vcle_s32 (int32x2_t __a, int32x2_t __b);
uint32x2_t vcle_f32 (float32x2_t __a, float32x2_t __b);
uint8x8_t vcle_u8 (uint8x8_t __a, uint8x8_t __b);
uint16x4_t vcle_u16 (uint16x4_t __a, uint16x4_t __b);
uint32x2_t vcle_u32 (uint32x2_t __a, uint32x2_t __b);
uint8x16_t vcleq_s8 (int8x16_t __a, int8x16_t __b);
uint16x8_t vcleq_s16 (int16x8_t __a, int16x8_t __b);
uint32x4_t vcleq_s32 (int32x4_t __a, int32x4_t __b);
uint32x4_t vcleq_f32 (float32x4_t __a, float32x4_t __b);
uint8x16_t vcleq_u8 (uint8x16_t __a, uint8x16_t __b);
uint16x8_t vcleq_u16 (uint16x8_t __a, uint16x8_t __b);
uint32x4_t vcleq_u32 (uint32x4_t __a, uint32x4_t __b);
/*--4、Vector compare greater-than(正常指令): vcgt -> ri = ai > bi ? 1...1:0...0;
If it is greater than it, the corresponding element in the destination vector is
set to all ones. Otherwise, it is set to all zeros--*/
uint8x8_t vcgt_s8 (int8x8_t __a, int8x8_t __b);
uint16x4_t vcgt_s16 (int16x4_t __a, int16x4_t __b);
uint32x2_t vcgt_s32 (int32x2_t __a, int32x2_t __b);
uint32x2_t vcgt_f32 (float32x2_t __a, float32x2_t __b);
uint8x8_t vcgt_u8 (uint8x8_t __a, uint8x8_t __b);
uint16x4_t vcgt_u16 (uint16x4_t __a, uint16x4_t __b);
uint32x2_t vcgt_u32 (uint32x2_t __a, uint32x2_t __b);
uint8x16_t vcgtq_s8 (int8x16_t __a, int8x16_t __b);
uint16x8_t vcgtq_s16 (int16x8_t __a, int16x8_t __b);
uint32x4_t vcgtq_s32 (int32x4_t __a, int32x4_t __b);
uint32x4_t vcgtq_f32 (float32x4_t __a, float32x4_t __b);
uint8x16_t vcgtq_u8 (uint8x16_t __a, uint8x16_t __b);
uint16x8_t vcgtq_u16 (uint16x8_t __a, uint16x8_t __b);
uint32x4_t vcgtq_u32 (uint32x4_t __a, uint32x4_t __b);
/*--5、Vector compare less-than(正常指令): vclt -> ri = ai < bi ? 1...1:0...0;
If it is less than it, the corresponding element in the destination vector is set 
to all ones.Otherwise, it is set to all zeros--*/
uint8x8_t vclt_s8 (int8x8_t __a, int8x8_t __b);
uint16x4_t vclt_s16 (int16x4_t __a, int16x4_t __b);
uint32x2_t vclt_s32 (int32x2_t __a, int32x2_t __b);
uint32x2_t vclt_f32 (float32x2_t __a, float32x2_t __b);
uint8x8_t vclt_u8 (uint8x8_t __a, uint8x8_t __b);
uint16x4_t vclt_u16 (uint16x4_t __a, uint16x4_t __b);
uint32x2_t vclt_u32 (uint32x2_t __a, uint32x2_t __b);
uint8x16_t vcltq_s8 (int8x16_t __a, int8x16_t __b);
uint16x8_t vcltq_s16 (int16x8_t __a, int16x8_t __b);
uint32x4_t vcltq_s32 (int32x4_t __a, int32x4_t __b);
uint32x4_t vcltq_f32 (float32x4_t __a, float32x4_t __b);
uint8x16_t vcltq_u8 (uint8x16_t __a, uint8x16_t __b);
uint16x8_t vcltq_u16 (uint16x8_t __a, uint16x8_t __b);
uint32x4_t vcltq_u32 (uint32x4_t __a, uint32x4_t __b);
/*--6、Vector compare absolute greater-than or equal(正常指令): 
vcage -> ri = |ai| >= |bi| ? 1...1:0...0;
compares the absolute value of each element in a vector with the absolute value of the 
corresponding element of a second vector. If it is greater than or equal to it, 
the corresponding element in the destination vector is set to all ones.
Otherwise, it is set to all zeros.--*/
uint32x2_t vcage_f32 (float32x2_t __a, float32x2_t __b);
uint32x4_t vcageq_f32 (float32x4_t __a, float32x4_t __b);
/*--7、Vector compare absolute less-than or equal(正常指令):
vcale -> ri = |ai| <= |bi| ? 1...1:0...0;
compares the absolute value of each element in a vector with the absolute value of the 
corresponding element of a second vector. If it is less than or equal to it, 
the corresponding element in the destination vector is set to all ones.
Otherwise, it is set to all zeros--*/
uint32x2_t vcale_f32 (float32x2_t __a, float32x2_t __b);
uint32x4_t vcaleq_f32 (float32x4_t __a, float32x4_t __b);
/*--8、Vector compare absolute greater-than(正常指令):
vcage -> ri = |ai| > |bi| ? 1...1:0...0;
compares the absolute value of each element in a vector with the absolute value of the
corresponding element of a second vector. If it is greater than it, 
the corresponding element in the destination vector is set to all ones. 
Otherwise, it is set to all zeros.--*/
uint32x2_t vcagt_f32 (float32x2_t __a, float32x2_t __b);
uint32x4_t vcagtq_f32 (float32x4_t __a, float32x4_t __b);
/*--9、Vector compare absolute less-than(正常指令):
vcalt -> ri = |ai| < |bi| ? 1...1:0...0;
compares the absolute value of each element in a vector with the absolute value of the
corresponding element of a second vector.If it is less than it, the corresponding 
element in the destination vector is set to all ones. Otherwise,it is set to all zeros--*/
uint32x2_t vcalt_f32 (float32x2_t __a, float32x2_t __b);
uint32x4_t vcaltq_f32 (float32x4_t __a, float32x4_t __b);
/**********************************************Vector test bits*************************/
/*--正常指令,vtst -> ri = (ai & bi != 0) ? 1...1:0...0;
bitwise logical ANDs each element in a vector with the corresponding element of a second 
vector.If the result is not zero, the corresponding element in the destination vector 
is set to all ones. Otherwise, it is set to all zeros--*/
uint8x8_t vtst_s8 (int8x8_t __a, int8x8_t __b);
uint16x4_t vtst_s16 (int16x4_t __a, int16x4_t __b);
uint32x2_t vtst_s32 (int32x2_t __a, int32x2_t __b);
uint8x8_t vtst_u8 (uint8x8_t __a, uint8x8_t __b);
uint16x4_t vtst_u16 (uint16x4_t __a, uint16x4_t __b);
uint32x2_t vtst_u32 (uint32x2_t __a, uint32x2_t __b);
uint8x8_t vtst_p8 (poly8x8_t __a, poly8x8_t __b);
uint8x16_t vtstq_s8 (int8x16_t __a, int8x16_t __b);
uint16x8_t vtstq_s16 (int16x8_t __a, int16x8_t __b);
uint32x4_t vtstq_s32 (int32x4_t __a, int32x4_t __b);
uint8x16_t vtstq_u8 (uint8x16_t __a, uint8x16_t __b);
uint16x8_t vtstq_u16 (uint16x8_t __a, uint16x8_t __b);
uint32x4_t vtstq_u32 (uint32x4_t __a, uint32x4_t __b);
uint8x16_t vtstq_p8 (poly8x16_t __a, poly8x16_t __b);
/**********************************************Absolute difference**********************/
/*--1、Absolute difference between the arguments(正常指令): vabd -> ri = |ai - bi|;
returns the absolute values of the results--*/
int8x8_t vabd_s8 (int8x8_t __a, int8x8_t __b);
int16x4_t vabd_s16 (int16x4_t __a, int16x4_t __b);
int32x2_t vabd_s32 (int32x2_t __a, int32x2_t __b);
float32x2_t vabd_f32 (float32x2_t __a, float32x2_t __b);
uint8x8_t vabd_u8 (uint8x8_t __a, uint8x8_t __b);
uint16x4_t vabd_u16 (uint16x4_t __a, uint16x4_t __b);
uint32x2_t vabd_u32 (uint32x2_t __a, uint32x2_t __b);
int8x16_t vabdq_s8 (int8x16_t __a, int8x16_t __b);
int16x8_t vabdq_s16 (int16x8_t __a, int16x8_t __b);
int32x4_t vabdq_s32 (int32x4_t __a, int32x4_t __b);
float32x4_t vabdq_f32 (float32x4_t __a, float32x4_t __b);
uint8x16_t vabdq_u8 (uint8x16_t __a, uint8x16_t __b);
uint16x8_t vabdq_u16 (uint16x8_t __a, uint16x8_t __b);
uint32x4_t vabdq_u32 (uint32x4_t __a, uint32x4_t __b);
/*--2、Absolute difference - long(長指令): vabdl -> ri = |ai - bi|; 
The elements in the result vector are wider--*/
int16x8_t vabdl_s8 (int8x8_t __a, int8x8_t __b);
int32x4_t vabdl_s16 (int16x4_t __a, int16x4_t __b);
int64x2_t vabdl_s32 (int32x2_t __a, int32x2_t __b);
uint16x8_t vabdl_u8 (uint8x8_t __a, uint8x8_t __b);
uint32x4_t vabdl_u16 (uint16x4_t __a, uint16x4_t __b);
uint64x2_t vabdl_u32 (uint32x2_t __a, uint32x2_t __b);
/*--3、Absolute difference and accumulate: vaba -> ri = ai + |bi - ci|;--*/
int8x8_t vaba_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c);
int16x4_t vaba_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c);
int32x2_t vaba_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c);
uint8x8_t vaba_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c);
uint16x4_t vaba_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c);
uint32x2_t vaba_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c);
int8x16_t vabaq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c);
int16x8_t vabaq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c);
int32x4_t vabaq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c);
uint8x16_t vabaq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c);
uint16x8_t vabaq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c);
uint32x4_t vabaq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c);
/*--4、Absolute difference and accumulate - long: vabal -> ri = ai + |bi - ci|; 
The elements in the result are wider--*/
int16x8_t vabal_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c);
int32x4_t vabal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c);
int64x2_t vabal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c);
uint16x8_t vabal_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c);
uint32x4_t vabal_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c);
uint64x2_t vabal_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c);
/***********************************************Max*************************************/
/*--正常指令, vmax -> ri = ai >= bi ? ai : bi; returns the larger of each pair--*/
int8x8_t vmax_s8 (int8x8_t __a, int8x8_t __b);//_mm_max_epi8
int16x4_t vmax_s16 (int16x4_t __a, int16x4_t __b);//_mm_max_epi16
int32x2_t vmax_s32 (int32x2_t __a, int32x2_t __b);//_mm_max_epi32
float32x2_t vmax_f32 (float32x2_t __a, float32x2_t __b);//_mm_max_ps
uint8x8_t vmax_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_max_epu8
uint16x4_t vmax_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_max_epu16
uint32x2_t vmax_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_max_epu32
int8x16_t vmaxq_s8 (int8x16_t __a, int8x16_t __b);//_mm_max_epi8
int16x8_t vmaxq_s16 (int16x8_t __a, int16x8_t __b);//_mm_max_epi16
int32x4_t vmaxq_s32 (int32x4_t __a, int32x4_t __b);//_mm_max_epi32
float32x4_t vmaxq_f32 (float32x4_t __a, float32x4_t __b);//_mm_max_ps
uint8x16_t vmaxq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_max_epu8
uint16x8_t vmaxq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_max_epu16
uint32x4_t vmaxq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_max_epu32
/****************************************************Min********************************/
/*--正常指令, vmin -> ri = ai >= bi ? bi : ai; returns the smaller of each pair--*/
int8x8_t vmin_s8 (int8x8_t __a, int8x8_t __b);//_mm_min_epi8
int16x4_t vmin_s16 (int16x4_t __a, int16x4_t __b);//_mm_min_epi16
int32x2_t vmin_s32 (int32x2_t __a, int32x2_t __b);//_mm_min_epi32
float32x2_t vmin_f32 (float32x2_t __a, float32x2_t __b);//_mm_min_ps
uint8x8_t vmin_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_min_epu8
uint16x4_t vmin_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_min_epu16
uint32x2_t vmin_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_min_epu32
int8x16_t vminq_s8 (int8x16_t __a, int8x16_t __b);//_mm_min_epi8
int16x8_t vminq_s16 (int16x8_t __a, int16x8_t __b);//_mm_min_epi16
int32x4_t vminq_s32 (int32x4_t __a, int32x4_t __b);//_mm_min_epi32
float32x4_t vminq_f32 (float32x4_t __a, float32x4_t __b);//_mm_min_ps
uint8x16_t vminq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_min_epu8
uint16x8_t vminq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_min_epu16
uint32x4_t vminq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_min_epu32
/*******************************************Pairwise addition***************************/
/*--1、Pairwise add(正常指令): 
vpadd -> r0 = a0 + a1, ..., r3 = a6 + a7, r4 = b0 + b1, ..., r7 = b6 + b7
adds adjacent pairs of elements of two vectors, 
and places the results in the destination vector.--*/
//r0 = a0 + a1, ...,r3 = a6 + a7, r4 = b0 + b1, ...,r7 = b6 + b7
int8x8_t vpadd_s8 (int8x8_t __a, int8x8_t __b);
int16x4_t vpadd_s16 (int16x4_t __a, int16x4_t __b);
int32x2_t vpadd_s32 (int32x2_t __a, int32x2_t __b);
float32x2_t vpadd_f32 (float32x2_t __a, float32x2_t __b);
uint8x8_t vpadd_u8 (uint8x8_t __a, uint8x8_t __b);
uint16x4_t vpadd_u16 (uint16x4_t __a, uint16x4_t __b);
uint32x2_t vpadd_u32 (uint32x2_t __a, uint32x2_t __b);
/*--2、Long pairwise add: vpaddl vpaddl -> r0 = a0 + a1, ..., r3 = a6 + a7;
adds adjacent pairs of elements of a vector, sign extends or zero extends the results to 
twice their original width, and places the final results in the destination vector--*/
int16x4_t vpaddl_s8 (int8x8_t __a);
int32x2_t vpaddl_s16 (int16x4_t __a);
int64x1_t vpaddl_s32 (int32x2_t __a);
uint16x4_t vpaddl_u8 (uint8x8_t __a);
uint32x2_t vpaddl_u16 (uint16x4_t __a);
uint64x1_t vpaddl_u32 (uint32x2_t __a);
int16x8_t vpaddlq_s8 (int8x16_t __a);
int32x4_t vpaddlq_s16 (int16x8_t __a);
int64x2_t vpaddlq_s32 (int32x4_t __a);
uint16x8_t vpaddlq_u8 (uint8x16_t __a);
uint32x4_t vpaddlq_u16 (uint16x8_t __a);
uint64x2_t vpaddlq_u32 (uint32x4_t __a);
/*--3、Long pairwise add and accumulate: 
vpadal -> r0 = a0 + (b0 + b1), ..., r3 = a3 + (b6 + b7);
adds adjacent pairs of elements in the second vector, sign extends or zero extends the
results to twice the original width.  It then accumulates this with the corresponding 
element in the first vector and places the final results in the destination vector--*/
int16x4_t vpadal_s8 (int16x4_t __a, int8x8_t __b);
int32x2_t vpadal_s16 (int32x2_t __a, int16x4_t __b);
int64x1_t vpadal_s32 (int64x1_t __a, int32x2_t __b);
uint16x4_t vpadal_u8 (uint16x4_t __a, uint8x8_t __b);
uint32x2_t vpadal_u16 (uint32x2_t __a, uint16x4_t __b);
uint64x1_t vpadal_u32 (uint64x1_t __a, uint32x2_t __b);
int16x8_t vpadalq_s8 (int16x8_t __a, int8x16_t __b);
int32x4_t vpadalq_s16 (int32x4_t __a, int16x8_t __b);
int64x2_t vpadalq_s32 (int64x2_t __a, int32x4_t __b);
uint16x8_t vpadalq_u8 (uint16x8_t __a, uint8x16_t __b);
uint32x4_t vpadalq_u16 (uint32x4_t __a, uint16x8_t __b);
uint64x2_t vpadalq_u32 (uint64x2_t __a, uint32x4_t __b);
/**********************************************Folding maximum**************************/
/*--飽和指令, vpmax -> vpmax r0 = a0 >= a1 ? a0 : a1, ..., r4 = b0 >= b1 ? b0 : b1, ...;
compares adjacent pairs of elements, and copies the larger of each pair into the 
destination vector.The maximums from each pair of the first input vector are stored in 
the lower half of the destination vector. The maximums from each pair of the second input 
vector are stored in the higher half of the destination vector--*/
int8x8_t vpmax_s8 (int8x8_t __a, int8x8_t __b);
int16x4_t vpmax_s16 (int16x4_t __a, int16x4_t __b);
int32x2_t vpmax_s32 (int32x2_t __a, int32x2_t __b);
float32x2_t vpmax_f32 (float32x2_t __a, float32x2_t __b);
uint8x8_t vpmax_u8 (uint8x8_t __a, uint8x8_t __b);
uint16x4_t vpmax_u16 (uint16x4_t __a, uint16x4_t __b);
uint32x2_t vpmax_u32 (uint32x2_t __a, uint32x2_t __b);
/***************************************************Folding minimum*********************/
/*--飽和指令, vpmin -> r0 = a0 >= a1 ? a1 : a0, ..., r4 = b0 >= b1 ? b1 : b0, ...;
compares adjacent pairs of elements, and copies the smaller of each pair into the 
destination vector.The minimums from each pair of the first input vector are stored in 
the lower half of the destination vector. The minimums from each pair of the second 
input vector are stored in the higher half of the destination vector.--*/
int8x8_t vpmin_s8 (int8x8_t __a, int8x8_t __b);
int16x4_t vpmin_s16 (int16x4_t __a, int16x4_t __b);
int32x2_t vpmin_s32 (int32x2_t __a, int32x2_t __b);
float32x2_t vpmin_f32 (float32x2_t __a, float32x2_t __b);
uint8x8_t vpmin_u8 (uint8x8_t __a, uint8x8_t __b);
uint16x4_t vpmin_u16 (uint16x4_t __a, uint16x4_t __b);
uint32x2_t vpmin_u32 (uint32x2_t __a, uint32x2_t __b);
/***************************************************Reciprocal**************************/
/*--1、飽和指令, Newton-Raphson iteration(牛頓 - 拉夫遜迭代)
performs a Newton-Raphson step for finding the reciprocal. It multiplies the elements of
one vector by the corresponding elements of another vector, subtracts each of the results
from 2, and places the final results into the elements of the destination vector--*/
float32x2_t vrecps_f32 (float32x2_t __a, float32x2_t __b);
float32x4_t vrecpsq_f32 (float32x4_t __a, float32x4_t __b);
/*--2、飽和指令,performs a Newton-Raphson step for finding the reciprocal square root. 
It multiplies the elements of one vector by the corresponding elements of another vector, 
subtracts each of the results from 3, divides these results by two, and places 
the final results into the elements of the destination vector--*/
float32x2_t vrsqrts_f32 (float32x2_t __a, float32x2_t __b);
float32x4_t vrsqrtsq_f32 (float32x4_t __a, float32x4_t __b);
/************************************************Shifts by signed variable**************/
/*--1、Vector shift left(飽和指令): vshl -> ri = ai << bi; (negative values shift right)
left shifts each element in a vector by an amount specified in the corresponding element 
in the second input vector. The shift amount is the signed integer value of the least 
significant byte of the element in the second input vector. The bits shifted out of each
element are lost.If the signed integer value is negative, it results in a right shift--*/
int8x8_t vshl_s8 (int8x8_t __a, int8x8_t __b);
int16x4_t vshl_s16 (int16x4_t __a, int16x4_t __b);
int32x2_t vshl_s32 (int32x2_t __a, int32x2_t __b);
int64x1_t vshl_s64 (int64x1_t __a, int64x1_t __b);
uint8x8_t vshl_u8 (uint8x8_t __a, int8x8_t __b);
uint16x4_t vshl_u16 (uint16x4_t __a, int16x4_t __b);
uint32x2_t vshl_u32 (uint32x2_t __a, int32x2_t __b);
uint64x1_t vshl_u64 (uint64x1_t __a, int64x1_t __b);
int8x16_t vshlq_s8 (int8x16_t __a, int8x16_t __b);
int16x8_t vshlq_s16 (int16x8_t __a, int16x8_t __b);
int32x4_t vshlq_s32 (int32x4_t __a, int32x4_t __b);
int64x2_t vshlq_s64 (int64x2_t __a, int64x2_t __b);
uint8x16_t vshlq_u8 (uint8x16_t __a, int8x16_t __b);
uint16x8_t vshlq_u16 (uint16x8_t __a, int16x8_t __b);
uint32x4_t vshlq_u32 (uint32x4_t __a, int32x4_t __b);
uint64x2_t vshlq_u64 (uint64x2_t __a, int64x2_t __b);
/*--2、Vector saturating shift left(飽和指令): 
vqshl -> ri = ai << bi;(negative values shift right)
If the shift value is positive, the operation is a left shift. Otherwise, it is a 
truncating right shift. left shifts each element in a vector of integers and places
the results in the destination vector. It is similar to VSHL. 
The difference is that the sticky QC flag is set if saturation occurs--*/
int8x8_t vqshl_s8 (int8x8_t __a, int8x8_t __b);
int16x4_t vqshl_s16 (int16x4_t __a, int16x4_t __b);
int32x2_t vqshl_s32 (int32x2_t __a, int32x2_t __b);
int64x1_t vqshl_s64 (int64x1_t __a, int64x1_t __b);
uint8x8_t vqshl_u8 (uint8x8_t __a, int8x8_t __b);
uint16x4_t vqshl_u16 (uint16x4_t __a, int16x4_t __b);
uint32x2_t vqshl_u32 (uint32x2_t __a, int32x2_t __b);
uint64x1_t vqshl_u64 (uint64x1_t __a, int64x1_t __b);
int8x16_t vqshlq_s8 (int8x16_t __a, int8x16_t __b);
int16x8_t vqshlq_s16 (int16x8_t __a, int16x8_t __b);
int32x4_t vqshlq_s32 (int32x4_t __a, int32x4_t __b);
int64x2_t vqshlq_s64 (int64x2_t __a, int64x2_t __b);
uint8x16_t vqshlq_u8 (uint8x16_t __a, int8x16_t __b);
uint16x8_t vqshlq_u16 (uint16x8_t __a, int16x8_t __b);
uint32x4_t vqshlq_u32 (uint32x4_t __a, int32x4_t __b);
uint64x2_t vqshlq_u64 (uint64x2_t __a, int64x2_t __b);
/*--3、Vector rounding shift left(飽和指令): 
vrshl -> ri = ai << bi;(negative values shift right)
If the shift value is positive, the operation is a left shift. Otherwise, it is a
rounding right shift. left shifts each element in a vector of integers and places
the results in the destination vector. It is similar to VSHL. 
The difference is that the shifted value is then rounded.--*/
int8x8_t vrshl_s8 (int8x8_t __a, int8x8_t __b);
int16x4_t vrshl_s16 (int16x4_t __a, int16x4_t __b);
int32x2_t vrshl_s32 (int32x2_t __a, int32x2_t __b);
int64x1_t vrshl_s64 (int64x1_t __a, int64x1_t __b);
uint8x8_t vrshl_u8 (uint8x8_t __a, int8x8_t __b);
uint16x4_t vrshl_u16 (uint16x4_t __a, int16x4_t __b);
uint32x2_t vrshl_u32 (uint32x2_t __a, int32x2_t __b);
uint64x1_t vrshl_u64 (uint64x1_t __a, int64x1_t __b);
int8x16_t vrshlq_s8 (int8x16_t __a, int8x16_t __b);
int16x8_t vrshlq_s16 (int16x8_t __a, int16x8_t __b);
int32x4_t vrshlq_s32 (int32x4_t __a, int32x4_t __b);
int64x2_t vrshlq_s64 (int64x2_t __a, int64x2_t __b);
uint8x16_t vrshlq_u8 (uint8x16_t __a, int8x16_t __b);
uint16x8_t vrshlq_u16 (uint16x8_t __a, int16x8_t __b);
uint32x4_t vrshlq_u32 (uint32x4_t __a, int32x4_t __b);
uint64x2_t vrshlq_u64 (uint64x2_t __a, int64x2_t __b);
/*--4、Vector saturating rounding shift left(飽和指令):
vqrshl -> ri = ai << bi;(negative values shift right)
left shifts each element in a vector of integers and places the results in the 
destination vector.It is similar to VSHL. The difference is that the shifted value
is rounded, and the sticky QC flag is set if saturation occurs.--*/
int8x8_t vqrshl_s8 (int8x8_t __a, int8x8_t __b);
int16x4_t vqrshl_s16 (int16x4_t __a, int16x4_t __b);
int32x2_t vqrshl_s32 (int32x2_t __a, int32x2_t __b);
int64x1_t vqrshl_s64 (int64x1_t __a, int64x1_t __b);
uint8x8_t vqrshl_u8 (uint8x8_t __a, int8x8_t __b);
uint16x4_t vqrshl_u16 (uint16x4_t __a, int16x4_t __b);
uint32x2_t vqrshl_u32 (uint32x2_t __a, int32x2_t __b);
uint64x1_t vqrshl_u64 (uint64x1_t __a, int64x1_t __b);
int8x16_t vqrshlq_s8 (int8x16_t __a, int8x16_t __b);
int16x8_t vqrshlq_s16 (int16x8_t __a, int16x8_t __b);
int32x4_t vqrshlq_s32 (int32x4_t __a, int32x4_t __b);
int64x2_t vqrshlq_s64 (int64x2_t __a, int64x2_t __b);
uint8x16_t vqrshlq_u8 (uint8x16_t __a, int8x16_t __b);
uint16x8_t vqrshlq_u16 (uint16x8_t __a, int16x8_t __b);
uint32x4_t vqrshlq_u32 (uint32x4_t __a, int32x4_t __b);
uint64x2_t vqrshlq_u64 (uint64x2_t __a, int64x2_t __b);
/****************************************Shifts by a constant***************************/
/*--1、Vector shift right by constant: vshr -> ri = ai >> b;The results are truncated.
right shifts each element in a vector by an immediate value, 
and places the results in the destination vector.--*/
int8x8_t vshr_n_s8 (int8x8_t __a, const int __b);
int16x4_t vshr_n_s16 (int16x4_t __a, const int __b);
int32x2_t vshr_n_s32 (int32x2_t __a, const int __b);
int64x1_t vshr_n_s64 (int64x1_t __a, const int __b);
uint8x8_t vshr_n_u8 (uint8x8_t __a, const int __b);
uint16x4_t vshr_n_u16 (uint16x4_t __a, const int __b);
uint32x2_t vshr_n_u32 (uint32x2_t __a, const int __b);
uint64x1_t vshr_n_u64 (uint64x1_t __a, const int __b);
int8x16_t vshrq_n_s8 (int8x16_t __a, const int __b);
int16x8_t vshrq_n_s16 (int16x8_t __a, const int __b);
int32x4_t vshrq_n_s32 (int32x4_t __a, const int __b);
int64x2_t vshrq_n_s64 (int64x2_t __a, const int __b);
uint8x16_t vshrq_n_u8 (uint8x16_t __a, const int __b);
uint16x8_t vshrq_n_u16 (uint16x8_t __a, const int __b);
uint32x4_t vshrq_n_u32 (uint32x4_t __a, const int __b);
uint64x2_t vshrq_n_u64 (uint64x2_t __a, const int __b);
/*--2、Vector shift left by constant: vshl -> ri = ai << b;
left shifts each element in a vector by an immediate value, and places the results in the 
destination vector. The bits shifted out of the left of each element are lost--*/
int8x8_t vshl_n_s8 (int8x8_t __a, const int __b);
int16x4_t vshl_n_s16 (int16x4_t __a, const int __b);
int32x2_t vshl_n_s32 (int32x2_t __a, const int __b);
int64x1_t vshl_n_s64 (int64x1_t __a, const int __b);
uint8x8_t vshl_n_u8 (uint8x8_t __a, const int __b);
uint16x4_t vshl_n_u16 (uint16x4_t __a, const int __b);
uint32x2_t vshl_n_u32 (uint32x2_t __a, const int __b);
uint64x1_t vshl_n_u64 (uint64x1_t __a, const int __b);
int8x16_t vshlq_n_s8 (int8x16_t __a, const int __b);
int16x8_t vshlq_n_s16 (int16x8_t __a, const int __b);
int32x4_t vshlq_n_s32 (int32x4_t __a, const int __b);
int64x2_t vshlq_n_s64 (int64x2_t __a, const int __b);
uint8x16_t vshlq_n_u8 (uint8x16_t __a, const int __b);
uint16x8_t vshlq_n_u16 (uint16x8_t __a, const int __b);
uint32x4_t vshlq_n_u32 (uint32x4_t __a, const int __b);
uint64x2_t vshlq_n_u64 (uint64x2_t __a, const int __b);
/*--3、Vector rounding shift right by constant: vrshr -> ri = ai >> b;
right shifts each element in a vector by an immediate value, and places the results
in the destination vector. The shifted values are rounded.--*/
int8x8_t vrshr_n_s8 (int8x8_t __a, const int __b);
int16x4_t vrshr_n_s16 (int16x4_t __a, const int __b);
int32x2_t vrshr_n_s32 (int32x2_t __a, const int __b);
int64x1_t vrshr_n_s64 (int64x1_t __a, const int __b);
uint8x8_t vrshr_n_u8 (uint8x8_t __a, const int __b);
uint16x4_t vrshr_n_u16 (uint16x4_t __a, const int __b);
uint32x2_t vrshr_n_u32 (uint32x2_t __a, const int __b);
uint64x1_t vrshr_n_u64 (uint64x1_t __a, const int __b);
int8x16_t vrshrq_n_s8 (int8x16_t __a, const int __b);
int16x8_t vrshrq_n_s16 (int16x8_t __a, const int __b);
int32x4_t vrshrq_n_s32 (int32x4_t __a, const int __b);
int64x2_t vrshrq_n_s64 (int64x2_t __a, const int __b);
uint8x16_t vrshrq_n_u8 (uint8x16_t __a, const int __b);
uint16x8_t vrshrq_n_u16 (uint16x8_t __a, const int __b);
uint32x4_t vrshrq_n_u32 (uint32x4_t __a, const int __b);
uint64x2_t vrshrq_n_u64 (uint64x2_t __a, const int __b);
/*--4、Vector shift right by constant and accumulate: vsra -> ri = (ai >> c) + (bi >> c); 
The results are truncated. right shifts each element in a vector by an immediate value, 
and accumulates the results into the destination vector.--*/
int8x8_t vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c);
int16x4_t vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c);
int32x2_t vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c);
int64x1_t vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c);
uint8x8_t vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c);
uint16x4_t vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c);
uint32x2_t vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c);
uint64x1_t vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c);
int8x16_t vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c);
int16x8_t vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c);
int32x4_t vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c);
int64x2_t vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c);
uint8x16_t vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c);
uint16x8_t vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c);
uint32x4_t vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c);
uint64x2_t vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c);
/*--5、Vector rounding shift right by constant and accumulate: 
vrsra -> ri = (ai >> c) + (bi >> c);
The results are rounded.right shifts each element in a vector by an immediate value, 
and accumulates the rounded results into the destination vector.--*/
int8x8_t vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c);
int16x4_t vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c);
int32x2_t vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c);
int64x1_t vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c);
uint8x8_t vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c);
uint16x4_t vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c);
uint32x2_t vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c);
uint64x1_t vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c);
int8x16_t vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c);
int16x8_t vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c);
int32x4_t vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c);
int64x2_t vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c);
uint8x16_t vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c);
uint16x8_t vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c);
uint32x4_t vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c);
uint64x2_t vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c);
/*--6、Vector saturating shift left by constant: vqshl -> ri = sat(ai << b); 
left shifts each element in a vector of integers by an immediate value, and places the 
results in the destination vector,and the sticky QC flag is set if saturation occurs.--*/
int8x8_t vqshl_n_s8 (int8x8_t __a, const int __b);
int16x4_t vqshl_n_s16 (int16x4_t __a, const int __b);
int32x2_t vqshl_n_s32 (int32x2_t __a, const int __b);
int64x1_t vqshl_n_s64 (int64x1_t __a, const int __b);
uint8x8_t vqshl_n_u8 (uint8x8_t __a, const int __b);
uint16x4_t vqshl_n_u16 (uint16x4_t __a, const int __b);
uint32x2_t vqshl_n_u32 (uint32x2_t __a, const int __b);
uint64x1_t vqshl_n_u64 (uint64x1_t __a, const int __b);
int8x16_t vqshlq_n_s8 (int8x16_t __a, const int __b);
int16x8_t vqshlq_n_s16 (int16x8_t __a, const int __b);
int32x4_t vqshlq_n_s32 (int32x4_t __a, const int __b);
int64x2_t vqshlq_n_s64 (int64x2_t __a, const int __b);
uint8x16_t vqshlq_n_u8 (uint8x16_t __a, const int __b);
uint16x8_t vqshlq_n_u16 (uint16x8_t __a, const int __b);
uint32x4_t vqshlq_n_u32 (uint32x4_t __a, const int __b);
uint64x2_t vqshlq_n_u64 (uint64x2_t __a, const int __b);
/*--7、Vector signed->unsigned saturating shift left by constant: vqshlu -> ri = ai << b; 
left shifts each element in a vector of integers by an immediate value, places the 
results in the destination vector, the sticky QC flag is set if saturation occurs, 
and indicates that the results are unsigned even though the operands are signed.--*/
uint8x8_t vqshlu_n_s8 (int8x8_t __a, const int __b);
uint16x4_t vqshlu_n_s16 (int16x4_t __a, const int __b);
uint32x2_t vqshlu_n_s32 (int32x2_t __a, const int __b);
uint64x1_t vqshlu_n_s64 (int64x1_t __a, const int __b);
uint8x16_t vqshluq_n_s8 (int8x16_t __a, const int __b);
uint16x8_t vqshluq_n_s16 (int16x8_t __a, const int __b);
uint32x4_t vqshluq_n_s32 (int32x4_t __a, const int __b);
uint64x2_t vqshluq_n_s64 (int64x2_t __a, const int __b);
/*--8、Vector narrowing shift right by constant: vshrn -> ri = ai >> b;
The results are truncated.right shifts each element in the input vector by an 
immediate value. It then narrows the result by storing only the least significant
half of each element into the destination vector.--*/
int8x8_t vshrn_n_s16 (int16x8_t __a, const int __b);
int16x4_t vshrn_n_s32 (int32x4_t __a, const int __b);
int32x2_t vshrn_n_s64 (int64x2_t __a, const int __b);
uint8x8_t vshrn_n_u16 (uint16x8_t __a, const int __b);
uint16x4_t vshrn_n_u32 (uint32x4_t __a, const int __b);
uint32x2_t vshrn_n_u64 (uint64x2_t __a, const int __b);
/*--9、Vector signed->unsigned narrowing saturating shift right by constant: 
vqshrun -> ri = ai >> b; 
Results are truncated. right shifts each element in a quadword vector of integers by an
immediate value, and places the results in a doubleword vector. The results are unsigned, 
although the operands are signed. The sticky QC flag is set if saturation occurs.--*/
uint8x8_t vqshrun_n_s16 (int16x8_t __a, const int __b);
uint16x4_t vqshrun_n_s32 (int32x4_t __a, const int __b);
uint32x2_t vqshrun_n_s64 (int64x2_t __a, const int __b);
/*--10、Vector signed->unsigned rounding narrowing saturating shift right by constant: 
vqrshrun -> ri = ai >> b; Results are rounded. right shifts each element in a quadword 
vector of integers by an immediate value, and places the rounded results in a doubleword 
vector. The results are unsigned, although the operands are signed.--*/
uint8x8_t vqrshrun_n_s16 (int16x8_t __a, const int __b);
uint16x4_t vqrshrun_n_s32 (int32x4_t __a, const int __b);
uint32x2_t vqrshrun_n_s64 (int64x2_t __a, const int __b);
/*--11、Vector narrowing saturating shift right by constant: vqshrn -> ri = ai >> b; 
Results are truncated. right shifts each element in a quadword vector of integers by an 
immediate value, and places the results in a doubleword vector, 
and the sticky QC flag is set if saturation occurs.--*/
int8x8_t vqshrn_n_s16 (int16x8_t __a, const int __b);
int16x4_t vqshrn_n_s32 (int32x4_t __a, const int __b);
int32x2_t vqshrn_n_s64 (int64x2_t __a, const int __b);
uint8x8_t vqshrn_n_u16 (uint16x8_t __a, const int __b);
uint16x4_t vqshrn_n_u32 (uint32x4_t __a, const int __b);
uint32x2_t vqshrn_n_u64 (uint64x2_t __a, const int __b);
/*--12、Vector rounding narrowing shift right by constant: vrshrn -> ri = ai >> b; 
The results are rounded. right shifts each element in a vector by an immediate value,
and places the rounded,narrowed results in the destination vector.--*/
int8x8_t vrshrn_n_s16 (int16x8_t __a, const int __b);
int16x4_t vrshrn_n_s32 (int32x4_t __a, const int __b);
int32x2_t vrshrn_n_s64 (int64x2_t __a, const int __b);
uint8x8_t vrshrn_n_u16 (uint16x8_t __a, const int __b);
uint16x4_t vrshrn_n_u32 (uint32x4_t __a, const int __b);
uint32x2_t vrshrn_n_u64 (uint64x2_t __a, const int __b);
/*--13、Vector rounding narrowing saturating shift right by constant:
vqrshrn -> ri = ai >> b;
Results are rounded. right shifts each element in a quadword vector of integers by an 
immediate value,and places the rounded,narrowed results in a doubleword vector. 
The sticky QC flag is set if saturation occurs.--*/
int8x8_t vqrshrn_n_s16 (int16x8_t __a, const int __b);
int16x4_t vqrshrn_n_s32 (int32x4_t __a, const int __b);
int32x2_t vqrshrn_n_s64 (int64x2_t __a, const int __b);
uint8x8_t vqrshrn_n_u16 (uint16x8_t __a, const int __b);
uint16x4_t vqrshrn_n_u32 (uint32x4_t __a, const int __b);
uint32x2_t vqrshrn_n_u64 (uint64x2_t __a, const int __b);
/*--14、Vector widening shift left by constant: vshll -> ri = ai << b; 
left shifts each element in a vector of integers by an immediate value, 
and place the results in the destination vector. Bits shifted out of the left of each
element are lost and values are sign extended or zero extended.--*/
int16x8_t vshll_n_s8 (int8x8_t __a, const int __b);
int32x4_t vshll_n_s16 (int16x4_t __a, const int __b);
int64x2_t vshll_n_s32 (int32x2_t __a, const int __b);
uint16x8_t vshll_n_u8 (uint8x8_t __a, const int __b);
uint32x4_t vshll_n_u16 (uint16x4_t __a, const int __b);
uint64x2_t vshll_n_u32 (uint32x2_t __a, const int __b);
/********************************************Shifts with insert*************************/
/*--1、Vector shift right and insert: vsri -> ; The two most significant bits in the 
destination vector are unchanged. right shifts each element in the second input vector 
by an immediate value, and inserts the results in the destination vector. It does not 
affect the highest n significant bits of the elements in the destination register.
Bits shifted out of the right of each element are lost.The first input vector holds
the elements of the destination vector before the operation is performed.--*/
int8x8_t vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c);
int16x4_t vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c);
int32x2_t vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c);
int64x1_t vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c);
uint8x8_t vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c);
uint16x4_t vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c);
uint32x2_t vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c);
uint64x1_t vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c);
poly8x8_t vsri_n_p8 (poly8x8_t __a, poly8x8_t __b, const int __c);
poly16x4_t vsri_n_p16 (poly16x4_t __a, poly16x4_t __b, const int __c);
int8x16_t vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c);
int16x8_t vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c);
int32x4_t vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c);
int64x2_t vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c);
uint8x16_t vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c);
uint16x8_t vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c);
uint32x4_t vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c);
uint64x2_t vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c);
poly8x16_t vsriq_n_p8 (poly8x16_t __a, poly8x16_t __b, const int __c);
poly16x8_t vsriq_n_p16 (poly16x8_t __a, poly16x8_t __b, const int __c);
/*--2、Vector shift left and insert: vsli ->; The least significant bit in each element
in the destination vector is unchanged. left shifts each element in the second input 
vector by an immediate value, and inserts the results in the destination vector.
It does not affect the lowest n significant bits of the elements in the destination 
register. Bits shifted out of the left of each element are lost. The first input vector
holds the elements of the destination vector before the operation is performed.--*/
int8x8_t vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c);
int16x4_t vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c);
int32x2_t vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c);
int64x1_t vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c);
uint8x8_t vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c);
uint16x4_t vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c);
uint32x2_t vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c);
uint64x1_t vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c);
poly8x8_t vsli_n_p8 (poly8x8_t __a, poly8x8_t __b, const int __c);
poly16x4_t vsli_n_p16 (poly16x4_t __a, poly16x4_t __b, const int __c);
int8x16_t vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c);
int16x8_t vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c);
int32x4_t vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c);
int64x2_t vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c);
uint8x16_t vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c);
uint16x8_t vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c);
uint32x4_t vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c);
uint64x2_t vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c);
poly8x16_t vsliq_n_p8 (poly8x16_t __a, poly8x16_t __b, const int __c);
poly16x8_t vsliq_n_p16 (poly16x8_t __a, poly16x8_t __b, const int __c);
/*****************************************Absolute value********************************/
/*--1、Absolute(正常指令): vabs -> ri = |ai|;
returns the absolute value of each element in a vector.--*/
int8x8_t vabs_s8 (int8x8_t __a);//_mm_abs_epi8
int16x4_t vabs_s16 (int16x4_t __a);//_mm_abs_epi16
int32x2_t vabs_s32 (int32x2_t __a);//_mm_abs_epi32
float32x2_t vabs_f32 (float32x2_t __a);
int8x16_t vabsq_s8 (int8x16_t __a);//_mm_abs_epi8
int16x8_t vabsq_s16 (int16x8_t __a);//_mm_abs_epi16
int32x4_t vabsq_s32 (int32x4_t __a);//_mm_abs_epi32
float32x4_t vabsq_f32 (float32x4_t __a);
/*--2、Saturating absolute(飽和指令): vqabs -> ri = sat(|ai|);
returns the absolute value of each element in a vector. If any of the results overflow,
they are saturated and the sticky QC flag is set.--*/
int8x8_t vqabs_s8 (int8x8_t __a);
int16x4_t vqabs_s16 (int16x4_t __a);
int32x2_t vqabs_s32 (int32x2_t __a);
int8x16_t vqabsq_s8 (int8x16_t __a);
int16x8_t vqabsq_s16 (int16x8_t __a);
int32x4_t vqabsq_s32 (int32x4_t __a);
/***************************************************Negation****************************/
/*--1、Negate(正常指令): vneg -> ri = -ai; negates each element in a vector.--*/
int8x8_t vneg_s8 (int8x8_t __a);
int16x4_t vneg_s16 (int16x4_t __a);
int32x2_t vneg_s32 (int32x2_t __a);
float32x2_t vneg_f32 (float32x2_t __a);
int8x16_t vnegq_s8 (int8x16_t __a);
int16x8_t vnegq_s16 (int16x8_t __a);
int32x4_t vnegq_s32 (int32x4_t __a);
float32x4_t vnegq_f32 (float32x4_t __a);
/*--2、Saturating Negate: vqneg -> ri = sat(-ai);
negates each element in a vector. If any of the results overflow, 
they are saturated and the sticky QC flag is set.--*/
int8x8_t vqneg_s8 (int8x8_t __a);
int16x4_t vqneg_s16 (int16x4_t __a);
int32x2_t vqneg_s32 (int32x2_t __a);
int8x16_t vqnegq_s8 (int8x16_t __a);
int16x8_t vqnegq_s16 (int16x8_t __a);
int32x4_t vqnegq_s32 (int32x4_t __a);
/********************************************Logical operations*************************/
/*--1、Bitwise not(正常指令): vmvn -> ri = ~ai; 
performs a bitwise inversion of each element from the input vector.--*/
int8x8_t vmvn_s8 (int8x8_t __a);
int16x4_t vmvn_s16 (int16x4_t __a);
int32x2_t vmvn_s32 (int32x2_t __a);
uint8x8_t vmvn_u8 (uint8x8_t __a);
uint16x4_t vmvn_u16 (uint16x4_t __a);
uint32x2_t vmvn_u32 (uint32x2_t __a);
poly8x8_t vmvn_p8 (poly8x8_t __a);
int8x16_t vmvnq_s8 (int8x16_t __a);
int16x8_t vmvnq_s16 (int16x8_t __a);
int32x4_t vmvnq_s32 (int32x4_t __a);
uint8x16_t vmvnq_u8 (uint8x16_t __a);
uint16x8_t vmvnq_u16 (uint16x8_t __a);
uint32x4_t vmvnq_u32 (uint32x4_t __a);
poly8x16_t vmvnq_p8 (poly8x16_t __a);
/*--2、Bitwise and(正常指令): vand -> ri = ai & bi; performs a bitwise AND between 
corresponding elements of the input vectors.--*/
int8x8_t vand_s8 (int8x8_t __a, int8x8_t __b);//_mm_and_si128
int16x4_t vand_s16 (int16x4_t __a, int16x4_t __b);//_mm_and_si128
int32x2_t vand_s32 (int32x2_t __a, int32x2_t __b);//_mm_and_si128
uint8x8_t vand_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_and_si128
uint16x4_t vand_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_and_si128
uint32x2_t vand_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_and_si128
int64x1_t vand_s64 (int64x1_t __a, int64x1_t __b);//_mm_and_si128
uint64x1_t vand_u64 (uint64x1_t __a, uint64x1_t __b);//_mm_and_si128
int8x16_t vandq_s8 (int8x16_t __a, int8x16_t __b);//_mm_and_si128
int16x8_t vandq_s16 (int16x8_t __a, int16x8_t __b);//_mm_and_si128
int32x4_t vandq_s32 (int32x4_t __a, int32x4_t __b);//_mm_and_si128
int64x2_t vandq_s64 (int64x2_t __a, int64x2_t __b);//_mm_and_si128
uint8x16_t vandq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_and_si128
uint16x8_t vandq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_and_si128
uint32x4_t vandq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_and_si128
uint64x2_t vandq_u64 (uint64x2_t __a, uint64x2_t __b);//_mm_and_si128
/*--3、Bitwise or(正常指令): vorr -> ri = ai | bi; performs a bitwise OR between
corresponding elements of the input vectors.--*/
int8x8_t vorr_s8 (int8x8_t __a, int8x8_t __b);//_mm_or_si128
int16x4_t vorr_s16 (int16x4_t __a, int16x4_t __b);//_mm_or_si128
int32x2_t vorr_s32 (int32x2_t __a, int32x2_t __b);//_mm_or_si128
uint8x8_t vorr_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_or_si128
uint16x4_t vorr_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_or_si128
uint32x2_t vorr_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_or_si128
int64x1_t vorr_s64 (int64x1_t __a, int64x1_t __b);//_mm_or_si128
uint64x1_t vorr_u64 (uint64x1_t __a, uint64x1_t __b);//_mm_or_si128
int8x16_t vorrq_s8 (int8x16_t __a, int8x16_t __b);//_mm_or_si128
int16x8_t vorrq_s16 (int16x8_t __a, int16x8_t __b);//_mm_or_si128
int32x4_t vorrq_s32 (int32x4_t __a, int32x4_t __b);//_mm_or_si128
int64x2_t vorrq_s64 (int64x2_t __a, int64x2_t __b);//_mm_or_si128
uint8x16_t vorrq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_or_si128
uint16x8_t vorrq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_or_si128
uint32x4_t vorrq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_or_si128
uint64x2_t vorrq_u64 (uint64x2_t __a, uint64x2_t __b);//_mm_or_si128
/*--4、Bitwise exclusive or (EOR or XOR)(正常指令): veor -> ri = ai ^ bi; 
performs a bitwise exclusive-OR between corresponding elements of the input vectors.--*/
int8x8_t veor_s8 (int8x8_t __a, int8x8_t __b);//_mm_xor_si128
int16x4_t veor_s16 (int16x4_t __a, int16x4_t __b);//_mm_xor_si128
int32x2_t veor_s32 (int32x2_t __a, int32x2_t __b);//_mm_xor_si128
uint8x8_t veor_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_xor_si128
uint16x4_t veor_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_xor_si128
uint32x2_t veor_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_xor_si128
int64x1_t veor_s64 (int64x1_t __a, int64x1_t __b);//_mm_xor_si128
uint64x1_t veor_u64 (uint64x1_t __a, uint64x1_t __b);//_mm_xor_si128
int8x16_t veorq_s8 (int8x16_t __a, int8x16_t __b);//_mm_xor_si128
int16x8_t veorq_s16 (int16x8_t __a, int16x8_t __b);//_mm_xor_si128
int32x4_t veorq_s32 (int32x4_t __a, int32x4_t __b);//_mm_xor_si128
int64x2_t veorq_s64 (int64x2_t __a, int64x2_t __b);//_mm_xor_si128
uint8x16_t veorq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_xor_si128
uint16x8_t veorq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_xor_si128
uint32x4_t veorq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_xor_si128
uint64x2_t veorq_u64 (uint64x2_t __a, uint64x2_t __b);//_mm_xor_si128
/*--5、Bit Clear(正常指令): vbic -> ri = ~ai & bi;
VBIC (Vector Bitwise Clear) performs a bitwise logical AND complement operation between
values in two registers, and places the results in the destination register.--*/
int8x8_t vbic_s8 (int8x8_t __a, int8x8_t __b);//_mm_andnot_si128
int16x4_t vbic_s16 (int16x4_t __a, int16x4_t __b);//_mm_andnot_si128
int32x2_t vbic_s32 (int32x2_t __a, int32x2_t __b);//_mm_andnot_si128
uint8x8_t vbic_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_andnot_si128
uint16x4_t vbic_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_andnot_si128
uint32x2_t vbic_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_andnot_si128
int64x1_t vbic_s64 (int64x1_t __a, int64x1_t __b);//_mm_andnot_si128
uint64x1_t vbic_u64 (uint64x1_t __a, uint64x1_t __b);//_mm_andnot_si128
int8x16_t vbicq_s8 (int8x16_t __a, int8x16_t __b);//_mm_andnot_si128
int16x8_t vbicq_s16 (int16x8_t __a, int16x8_t __b);//_mm_andnot_si128
int32x4_t vbicq_s32 (int32x4_t __a, int32x4_t __b);//_mm_andnot_si128
int64x2_t vbicq_s64 (int64x2_t __a, int64x2_t __b);//_mm_andnot_si128
uint8x16_t vbicq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_andnot_si128
uint16x8_t vbicq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_andnot_si128
uint32x4_t vbicq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_andnot_si128
uint64x2_t vbicq_u64 (uint64x2_t __a, uint64x2_t __b);//_mm_andnot_si128
/*--6、Bitwise OR complement(正常指令): vorn -> ri = ai | (~bi); 
performs a bitwise logical OR NOT operation 
between values in two registers, and places the results in the destination register.--*/
int8x8_t vorn_s8 (int8x8_t __a, int8x8_t __b);
int16x4_t vorn_s16 (int16x4_t __a, int16x4_t __b);
int32x2_t vorn_s32 (int32x2_t __a, int32x2_t __b);
uint8x8_t vorn_u8 (uint8x8_t __a, uint8x8_t __b);
uint16x4_t vorn_u16 (uint16x4_t __a, uint16x4_t __b);
uint32x2_t vorn_u32 (uint32x2_t __a, uint32x2_t __b);
int64x1_t vorn_s64 (int64x1_t __a, int64x1_t __b);
uint64x1_t vorn_u64 (uint64x1_t __a, uint64x1_t __b);
int8x16_t vornq_s8 (int8x16_t __a, int8x16_t __b);
int16x8_t vornq_s16 (int16x8_t __a, int16x8_t __b);
int32x4_t vornq_s32 (int32x4_t __a, int32x4_t __b);
int64x2_t vornq_s64 (int64x2_t __a, int64x2_t __b);
uint8x16_t vornq_u8 (uint8x16_t __a, uint8x16_t __b);
uint16x8_t vornq_u16 (uint16x8_t __a, uint16x8_t __b);
uint32x4_t vornq_u32 (uint32x4_t __a, uint32x4_t __b);
uint64x2_t vornq_u64 (uint64x2_t __a, uint64x2_t __b);
/****************************************Count leading sign bits************************/
/*--正常指令, vcls -> ; counts the number of consecutive bits, starting from the most 
significant bit,that are the same as the most significant bit, in each element in a 
vector, and places the count in the result vector.--*/
int8x8_t vcls_s8 (int8x8_t __a);
int16x4_t vcls_s16 (int16x4_t __a);
int32x2_t vcls_s32 (int32x2_t __a);
int8x16_t vclsq_s8 (int8x16_t __a);
int16x8_t vclsq_s16 (int16x8_t __a);
int32x4_t vclsq_s32 (int32x4_t __a);
/*******************************************Count leading zeros*************************/
/*--正常指令, vclz -> ; counts the number of consecutive zeros, starting from the most
significant bit, in each element in a vector, and places the count in result vector.--*/
int8x8_t vclz_s8 (int8x8_t __a);
int16x4_t vclz_s16 (int16x4_t __a);
int32x2_t vclz_s32 (int32x2_t __a);
uint8x8_t vclz_u8 (uint8x8_t __a);
uint16x4_t vclz_u16 (uint16x4_t __a);
uint32x2_t vclz_u32 (uint32x2_t __a);
int8x16_t vclzq_s8 (int8x16_t __a);
int16x8_t vclzq_s16 (int16x8_t __a);
int32x4_t vclzq_s32 (int32x4_t __a);
uint8x16_t vclzq_u8 (uint8x16_t __a);
uint16x8_t vclzq_u16 (uint16x8_t __a);
uint32x4_t vclzq_u32 (uint32x4_t __a);
/*******************************************Count number of set bits********************/
/*--正常指令, vcnt -> counts the number of bits that are one in each element in a vector, 
and places the count in the result vector.--*/
int8x8_t vcnt_s8 (int8x8_t __a);
uint8x8_t vcnt_u8 (uint8x8_t __a);
poly8x8_t vcnt_p8 (poly8x8_t __a);
int8x16_t vcntq_s8 (int8x16_t __a);
uint8x16_t vcntq_u8 (uint8x16_t __a);
poly8x16_t vcntq_p8 (poly8x16_t __a);
/*****************************************Reciprocal estimate***************************/
/*--正常指令, vrecpe -> ; finds an approximate reciprocal of each element in a vector, 
and places it in the result vector.--*/
float32x2_t vrecpe_f32 (float32x2_t __a);
uint32x2_t vrecpe_u32 (uint32x2_t __a);
float32x4_t vrecpeq_f32 (float32x4_t __a);
uint32x4_t vrecpeq_u32 (uint32x4_t __a);
/****************************************Reciprocal square-root estimate****************/
/*--正常指令, vrsqrte -> ; finds an approximate reciprocal square root of each element
in a vector, and places it in the return vector.--*/
float32x2_t vrsqrte_f32 (float32x2_t __a);
uint32x2_t vrsqrte_u32 (uint32x2_t __a);
float32x4_t vrsqrteq_f32 (float32x4_t __a);
uint32x4_t vrsqrteq_u32 (uint32x4_t __a);
/*******************************************Get lanes from a vector*********************/
/*--vmov -> r = a[b]; returns the value from the specified lane of a vector.
Extract lanes from a vector and put into a register. 
These intrinsics extract a single lane (element) from a vector.--*/
int8_t vget_lane_s8 (int8x8_t __a, const int __b);//_mm_extract_epi8
int16_t vget_lane_s16 (int16x4_t __a, const int __b);//_mm_extract_epi16
int32_t vget_lane_s32 (int32x2_t __a, const int __b);//_mm_extract_epi32
float32_t vget_lane_f32 (float32x2_t __a, const int __b);
uint8_t vget_lane_u8 (uint8x8_t __a, const int __b);//_mm_extract_epi8
uint16_t vget_lane_u16 (uint16x4_t __a, const int __b);//_mm_extract_epi16
uint32_t vget_lane_u32 (uint32x2_t __a, const int __b);//_mm_extract_epi32
poly8_t vget_lane_p8 (poly8x8_t __a, const int __b);//_mm_extract_epi8
poly16_t vget_lane_p16 (poly16x4_t __a, const int __b);//_mm_extract_epi16
int64_t vget_lane_s64 (int64x1_t __a, const int __b);//_mm_extract_epi64
uint64_t vget_lane_u64 (uint64x1_t __a, const int __b);//_mm_extract_epi64
int8_t vgetq_lane_s8 (int8x16_t __a, const int __b);//_mm_extract_epi8
int16_t vgetq_lane_s16 (int16x8_t __a, const int __b);//_mm_extract_epi16
int32_t vgetq_lane_s32 (int32x4_t __a, const int __b);//_mm_extract_epi32
float32_t vgetq_lane_f32 (float32x4_t __a, const int __b);
uint8_t vgetq_lane_u8 (uint8x16_t __a, const int __b);//_mm_extract_epi8
uint16_t vgetq_lane_u16 (uint16x8_t __a, const int __b);//_mm_extract_epi16
uint32_t vgetq_lane_u32 (uint32x4_t __a, const int __b);//_mm_extract_epi32
poly8_t vgetq_lane_p8 (poly8x16_t __a, const int __b);//_mm_extract_epi8
poly16_t vgetq_lane_p16 (poly16x8_t __a, const int __b);//_mm_extract_epi16
int64_t vgetq_lane_s64 (int64x2_t __a, const int __b);//_mm_extract_epi64
uint64_t vgetq_lane_u64 (uint64x2_t __a, const int __b);//_mm_extract_epi64
/*********************************************Set lanes in a vector********************/
/*--vmov -> ; sets the value of the specified lane of a vector. It returns the vector 
with the new value.Load a single lane of a vector from a literal. These intrinsics set 
a single lane (element) within a vector.--*/
int8x8_t vset_lane_s8 (int8_t __a, int8x8_t __b, const int __c);
int16x4_t vset_lane_s16 (int16_t __a, int16x4_t __b, const int __c);
int32x2_t vset_lane_s32 (int32_t __a, int32x2_t __b, const int __c);
float32x2_t vset_lane_f32 (float32_t __a, float32x2_t __b, const int __c);
uint8x8_t vset_lane_u8 (uint8_t __a, uint8x8_t __b, const int __c);
uint16x4_t vset_lane_u16 (uint16_t __a, uint16x4_t __b, const int __c);
uint32x2_t vset_lane_u32 (uint32_t __a, uint32x2_t __b, const int __c);
poly8x8_t vset_lane_p8 (poly8_t __a, poly8x8_t __b, const int __c);
poly16x4_t vset_lane_p16 (poly16_t __a, poly16x4_t __b, const int __c);
int64x1_t vset_lane_s64 (int64_t __a, int64x1_t __b, const int __c);
uint64x1_t vset_lane_u64 (uint64_t __a, uint64x1_t __b, const int __c);
int8x16_t vsetq_lane_s8 (int8_t __a, int8x16_t __b, const int __c);
int16x8_t vsetq_lane_s16 (int16_t __a, int16x8_t __b, const int __c);
int32x4_t vsetq_lane_s32 (int32_t __a, int32x4_t __b, const int __c);
float32x4_t vsetq_lane_f32 (float32_t __a, float32x4_t __b, const int __c);
uint8x16_t vsetq_lane_u8 (uint8_t __a, uint8x16_t __b, const int __c);
uint16x8_t vsetq_lane_u16 (uint16_t __a, uint16x8_t __b, const int __c);
uint32x4_t vsetq_lane_u32 (uint32_t __a, uint32x4_t __b, const int __c);
poly8x16_t vsetq_lane_p8 (poly8_t __a, poly8x16_t __b, const int __c);
poly16x8_t vsetq_lane_p16 (poly16_t __a, poly16x8_t __b, const int __c);
int64x2_t vsetq_lane_s64 (int64_t __a, int64x2_t __b, const int __c);
uint64x2_t vsetq_lane_u64 (uint64_t __a, uint64x2_t __b, const int __c);
/****************************************Create vector from literal bit pattern*********/
/*--vmov -> ; creates a vector from a 64-bit pattern. 
Initialize a vector from a literal bit pattern.--*/
int8x8_t vcreate_s8 (uint64_t __a);//_mm_loadl_epi64
int16x4_t vcreate_s16 (uint64_t __a);//_mm_loadl_epi64
int32x2_t vcreate_s32 (uint64_t __a);//_mm_loadl_epi64
int64x1_t vcreate_s64 (uint64_t __a);//_mm_loadl_epi64
float32x2_t vcreate_f32 (uint64_t __a);
uint8x8_t vcreate_u8 (uint64_t __a);//_mm_loadl_epi64
uint16x4_t vcreate_u16 (uint64_t __a);//_mm_loadl_epi64
uint32x2_t vcreate_u32 (uint64_t __a);//_mm_loadl_epi64
uint64x1_t vcreate_u64 (uint64_t __a);//_mm_loadl_epi64
poly8x8_t vcreate_p8 (uint64_t __a);//_mm_loadl_epi64
poly16x4_t vcreate_p16 (uint64_t __a);//_mm_loadl_epi64
/*****************************************Set all lanes to the same value***************/
/*--1、Load all lanes of vector to the same literal value: vdup/vmov -> ri = a; 
duplicates a scalar into every element of the destination vector. 
Load all lanes of vector to the same literal value--*/
int8x8_t vdup_n_s8 (int8_t __a);//_mm_set1_epi8
int16x4_t vdup_n_s16 (int16_t __a);//_mm_set1_epi16
int32x2_t vdup_n_s32 (int32_t __a);//_mm_set1_epi32
float32x2_t vdup_n_f32 (float32_t __a);//_mm_set1_ps
uint8x8_t vdup_n_u8 (uint8_t __a);//_mm_set1_epi8
uint16x4_t vdup_n_u16 (uint16_t __a);//_mm_set1_epi16
uint32x2_t vdup_n_u32 (uint32_t __a);//_mm_set1_epi32
poly8x8_t vdup_n_p8 (poly8_t __a);//_mm_set1_epi8
poly16x4_t vdup_n_p16 (poly16_t __a);//_mm_set1_epi16
int64x1_t vdup_n_s64 (int64_t __a);
uint64x1_t vdup_n_u64 (uint64_t __a);
int8x16_t vdupq_n_s8 (int8_t __a);//_mm_set1_epi8
int16x8_t vdupq_n_s16 (int16_t __a);//_mm_set1_epi16
int32x4_t vdupq_n_s32 (int32_t __a);//_mm_set1_epi32
float32x4_t vdupq_n_f32 (float32_t __a);//_mm_set1_ps
uint8x16_t vdupq_n_u8 (uint8_t __a);//_mm_set1_epi8
uint16x8_t vdupq_n_u16 (uint16_t __a);//_mm_set1_epi16
uint32x4_t vdupq_n_u32 (uint32_t __a);//_mm_set1_epi32
poly8x16_t vdupq_n_p8 (poly8_t __a);//_mm_set1_epi8
poly16x8_t vdupq_n_p16 (poly16_t __a);//_mm_set1_epi16
int64x2_t vdupq_n_s64 (int64_t __a);
uint64x2_t vdupq_n_u64 (uint64_t __a);
int8x8_t vmov_n_s8 (int8_t __a);//_mm_set1_epi8
int16x4_t vmov_n_s16 (int16_t __a);//_mm_set1_epi16
int32x2_t vmov_n_s32 (int32_t __a);//_mm_set1_epi32
float32x2_t vmov_n_f32 (float32_t __a);//_mm_set1_ps
uint8x8_t vmov_n_u8 (uint8_t __a);//_mm_set1_epi8
uint16x4_t vmov_n_u16 (uint16_t __a);//_mm_set1_epi16
uint32x2_t vmov_n_u32 (uint32_t __a);//_mm_set1_epi32
poly8x8_t vmov_n_p8 (poly8_t __a);//_mm_set1_epi8
poly16x4_t vmov_n_p16 (poly16_t __a);//_mm_set1_epi16
int64x1_t vmov_n_s64 (int64_t __a);
uint64x1_t vmov_n_u64 (uint64_t __a);
int8x16_t vmovq_n_s8 (int8_t __a);//_mm_set1_epi8
int16x8_t vmovq_n_s16 (int16_t __a);//_mm_set1_epi16
int32x4_t vmovq_n_s32 (int32_t __a);//_mm_set1_epi32
float32x4_t vmovq_n_f32 (float32_t __a);//_mm_set1_ps
uint8x16_t vmovq_n_u8 (uint8_t __a);//_mm_set1_epi8
uint16x8_t vmovq_n_u16 (uint16_t __a);//_mm_set1_epi16
uint32x4_t vmovq_n_u32 (uint32_t __a);//_mm_set1_epi32
poly8x16_t vmovq_n_p8 (poly8_t __a);//_mm_set1_epi8
poly16x8_t vmovq_n_p16 (poly16_t __a);//_mm_set1_epi16
int64x2_t vmovq_n_s64 (int64_t __a);
uint64x2_t vmovq_n_u64 (uint64_t __a);
/*--2、Load all lanes of the vector to the value of a lane of a vector: 
vdup/vmov -> ri = a[b];
duplicates a scalar into every element of the destination vector.--*/
int8x8_t vdup_lane_s8 (int8x8_t __a, const int __b);
int16x4_t vdup_lane_s16 (int16x4_t __a, const int __b);
int32x2_t vdup_lane_s32 (int32x2_t __a, const int __b);
float32x2_t vdup_lane_f32 (float32x2_t __a, const int __b);
uint8x8_t vdup_lane_u8 (uint8x8_t __a, const int __b);
uint16x4_t vdup_lane_u16 (uint16x4_t __a, const int __b);
uint32x2_t vdup_lane_u32 (uint32x2_t __a, const int __b);
poly8x8_t vdup_lane_p8 (poly8x8_t __a, const int __b);
poly16x4_t vdup_lane_p16 (poly16x4_t __a, const int __b);
int64x1_t vdup_lane_s64 (int64x1_t __a, const int __b);
uint64x1_t vdup_lane_u64 (uint64x1_t __a, const int __b);
int8x16_t vdupq_lane_s8 (int8x8_t __a, const int __b);
int16x8_t vdupq_lane_s16 (int16x4_t __a, const int __b);
int32x4_t vdupq_lane_s32 (int32x2_t __a, const int __b);
float32x4_t vdupq_lane_f32 (float32x2_t __a, const int __b);
uint8x16_t vdupq_lane_u8 (uint8x8_t __a, const int __b);
uint16x8_t vdupq_lane_u16 (uint16x4_t __a, const int __b);
uint32x4_t vdupq_lane_u32 (uint32x2_t __a, const int __b);
poly8x16_t vdupq_lane_p8 (poly8x8_t __a, const int __b);
poly16x8_t vdupq_lane_p16 (poly16x4_t __a, const int __b);
int64x2_t vdupq_lane_s64 (int64x1_t __a, const int __b);//_mm_unpacklo_epi64
uint64x2_t vdupq_lane_u64 (uint64x1_t __a, const int __b);//_mm_unpacklo_epi64
/********************************************Combining vectors**************************/
/*--長指令, -> r0 = a0, ..., r7 = a7, r8 = b0, ..., r15 = b7;
joins two 64-bit vectors into a single 128-bit vector. 
The output vector contains twice the number of elements as each input vector. 
The lower half of the output vector contains the elements of the first input vector.--*/
int8x16_t vcombine_s8 (int8x8_t __a, int8x8_t __b);//_mm_unpacklo_epi64
int16x8_t vcombine_s16 (int16x4_t __a, int16x4_t __b);//_mm_unpacklo_epi64
int32x4_t vcombine_s32 (int32x2_t __a, int32x2_t __b);//_mm_unpacklo_epi64
int64x2_t vcombine_s64 (int64x1_t __a, int64x1_t __b);//_mm_unpacklo_epi64
float32x4_t vcombine_f32 (float32x2_t __a, float32x2_t __b);
uint8x16_t vcombine_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_unpacklo_epi64
uint16x8_t vcombine_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_unpacklo_epi64
uint32x4_t vcombine_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_unpacklo_epi64
uint64x2_t vcombine_u64 (uint64x1_t __a, uint64x1_t __b);//_mm_unpacklo_epi64
poly8x16_t vcombine_p8 (poly8x8_t __a, poly8x8_t __b);//_mm_unpacklo_epi64
poly16x8_t vcombine_p16 (poly16x4_t __a, poly16x4_t __b);//_mm_unpacklo_epi64
/***************************************Splitting vectors*******************************/
/*--1、窄指令, -> ri = a(i+4); returns the higher half of the 128-bit input vector. The
output is a 64-bit vector that has half the number of elements as the input vector.--*/
int8x8_t vget_high_s8 (int8x16_t __a);//_mm_unpackhi_epi64
int16x4_t vget_high_s16 (int16x8_t __a);//_mm_unpackhi_epi64
int32x2_t vget_high_s32 (int32x4_t __a);//_mm_unpackhi_epi64
int64x1_t vget_high_s64 (int64x2_t __a);//_mm_unpackhi_epi64
float32x2_t vget_high_f32 (float32x4_t __a);
uint8x8_t vget_high_u8 (uint8x16_t __a);//_mm_unpackhi_epi64
uint16x4_t vget_high_u16 (uint16x8_t __a);//_mm_unpackhi_epi64
uint32x2_t vget_high_u32 (uint32x4_t __a);//_mm_unpackhi_epi64
uint64x1_t vget_high_u64 (uint64x2_t __a);//_mm_unpackhi_epi64
poly8x8_t vget_high_p8 (poly8x16_t __a);//_mm_unpackhi_epi64
poly16x4_t vget_high_p16 (poly16x8_t __a);//_mm_unpackhi_epi64
/*--2、窄指令, -> ri = ai; returns the lower half of the 128-bit input vector. The
output is a 64-bit vector that has half the number of elements as the input vector.--*/
int8x8_t vget_low_s8 (int8x16_t __a);
int16x4_t vget_low_s16 (int16x8_t __a);
int32x2_t vget_low_s32 (int32x4_t __a);
float32x2_t vget_low_f32 (float32x4_t __a);
uint8x8_t vget_low_u8 (uint8x16_t __a);
uint16x4_t vget_low_u16 (uint16x8_t __a);
uint32x2_t vget_low_u32 (uint32x4_t __a);
poly8x8_t vget_low_p8 (poly8x16_t __a);
poly16x4_t vget_low_p16 (poly16x8_t __a);
int64x1_t vget_low_s64 (int64x2_t __a);
uint64x1_t vget_low_u64 (uint64x2_t __a);
/****************************************************Conversions************************/
/*--1、Convert from float: vcvt ->, convert from floating-point to integer.--*/
int32x2_t vcvt_s32_f32 (float32x2_t __a);
uint32x2_t vcvt_u32_f32 (float32x2_t __a);
int32x4_t vcvtq_s32_f32 (float32x4_t __a);
uint32x4_t vcvtq_u32_f32 (float32x4_t __a);
int32x2_t vcvt_n_s32_f32 (float32x2_t __a, const int __b);
uint32x2_t vcvt_n_u32_f32 (float32x2_t __a, const int __b);
int32x4_t vcvtq_n_s32_f32 (float32x4_t __a, const int __b);
uint32x4_t vcvtq_n_u32_f32 (float32x4_t __a, const int __b);
/*--2、Convert to float: vcvt ->, convert from integer to floating-point.--*/
float32x2_t vcvt_f32_s32 (int32x2_t __a);
float32x2_t vcvt_f32_u32 (uint32x2_t __a);
float32x4_t vcvtq_f32_s32 (int32x4_t __a);
float32x4_t vcvtq_f32_u32 (uint32x4_t __a);
float32x2_t vcvt_n_f32_s32 (int32x2_t __a, const int __b);
float32x2_t vcvt_n_f32_u32 (uint32x2_t __a, const int __b);
float32x4_t vcvtq_n_f32_s32 (int32x4_t __a, const int __b);
float32x4_t vcvtq_n_f32_u32 (uint32x4_t __a, const int __b);
/*--3、between single-precision and double-precision numbers: vcvt ->--*/
float16x4_t vcvt_f16_f32(float32x4_t a);
float32x4_t vcvt_f32_f16(float16x4_t a);
/*************************************************Move**********************************/
/*--1、Vector narrow integer(窄指令): vmovn -> ri = ai[0...8]; copies the least 
significant half of each element of a quadword vector into 
the corresponding elements of a doubleword vector.--*/
int8x8_t vmovn_s16 (int16x8_t __a);
int16x4_t vmovn_s32 (int32x4_t __a);
int32x2_t vmovn_s64 (int64x2_t __a);
uint8x8_t vmovn_u16 (uint16x8_t __a);
uint16x4_t vmovn_u32 (uint32x4_t __a);
uint32x2_t vmovn_u64 (uint64x2_t __a);
/*--2、Vector long move(長指令): vmovl -> sign extends or zero extends each element
in a doubleword vector to twice its original length,
and places the results in a quadword vector.--*/
int16x8_t vmovl_s8 (int8x8_t __a);//_mm_cvtepi8_epi16
int32x4_t vmovl_s16 (int16x4_t __a);//_mm_cvtepi16_epi32
int64x2_t vmovl_s32 (int32x2_t __a);//_mm_cvtepi32_epi64
uint16x8_t vmovl_u8 (uint8x8_t __a);//_mm_cvtepu8_epi16
uint32x4_t vmovl_u16 (uint16x4_t __a);//_mm_cvtepu16_epi32
uint64x2_t vmovl_u32 (uint32x2_t __a);_mm_cvtepu32_epi64
/*--3、Vector saturating narrow integer(窄指令): vqmovn -> copies each element of the
operand vector to the corresponding element of the destination vector. 
The result element is half the width of 
the operand element, and values are saturated to the result width.
The results are the same type as the operands.--*/
int8x8_t vqmovn_s16 (int16x8_t __a);//_mm_packs_epi16
int16x4_t vqmovn_s32 (int32x4_t __a);//_mm_packs_epi32
int32x2_t vqmovn_s64 (int64x2_t __a);
uint8x8_t vqmovn_u16 (uint16x8_t __a);
uint16x4_t vqmovn_u32 (uint32x4_t __a);
uint32x2_t vqmovn_u64 (uint64x2_t __a);
/*--4、Vector saturating narrow integer signed->unsigned(窄指令): copies each element of
the operand vector to the corresponding element of the destination vector.
The result element is half the width of the operand element,
and values are saturated to the result width.
The elements in the operand are signed and the elements in the result are unsigned.--*/
uint8x8_t vqmovun_s16 (int16x8_t __a);//_mm_packus_epi16
uint16x4_t vqmovun_s32 (int32x4_t __a);//_mm_packus_epi32
uint32x2_t vqmovun_s64 (int64x2_t __a);
/******************************************************Table lookup*********************/
/*--1、Table lookup: vtbl -> uses byte indexes in a control vector to look up byte 
values in a table and generate a new vector. Indexes out of range return 0. 
The table is in Vector1 and uses one(or two or three or four)D registers.--*/
int8x8_t vtbl1_s8 (int8x8_t __a, int8x8_t __b);
uint8x8_t vtbl1_u8 (uint8x8_t __a, uint8x8_t __b);
poly8x8_t vtbl1_p8 (poly8x8_t __a, uint8x8_t __b);
int8x8_t vtbl2_s8 (int8x8x2_t __a, int8x8_t __b);
uint8x8_t vtbl2_u8 (uint8x8x2_t __a, uint8x8_t __b);
poly8x8_t vtbl2_p8 (poly8x8x2_t __a, uint8x8_t __b);
int8x8_t vtbl3_s8 (int8x8x3_t __a, int8x8_t __b);
uint8x8_t vtbl3_u8 (uint8x8x3_t __a, uint8x8_t __b);
poly8x8_t vtbl3_p8 (poly8x8x3_t __a, uint8x8_t __b);
int8x8_t vtbl4_s8 (int8x8x4_t __a, int8x8_t __b);
uint8x8_t vtbl4_u8 (uint8x8x4_t __a, uint8x8_t __b);
poly8x8_t vtbl4_p8 (poly8x8x4_t __a, uint8x8_t __b);
/*--2、Extended table lookup: vtbx -> uses byte indexes in a control vector to look up
byte values in a table and generate a new vector. Indexes out of range leave the 
destination element unchanged.The table is in Vector2 and uses one(or two or three or
four) D register. Vector1 contains the elements of the destination vector.--*/
int8x8_t vtbx1_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c);
uint8x8_t vtbx1_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c);
poly8x8_t vtbx1_p8 (poly8x8_t __a, poly8x8_t __b, uint8x8_t __c);
int8x8_t vtbx2_s8 (int8x8_t __a, int8x8x2_t __b, int8x8_t __c);
uint8x8_t vtbx2_u8 (uint8x8_t __a, uint8x8x2_t __b, uint8x8_t __c);
poly8x8_t vtbx2_p8 (poly8x8_t __a, poly8x8x2_t __b, uint8x8_t __c);
int8x8_t vtbx3_s8 (int8x8_t __a, int8x8x3_t __b, int8x8_t __c);
uint8x8_t vtbx3_u8 (uint8x8_t __a, uint8x8x3_t __b, uint8x8_t __c);
poly8x8_t vtbx3_p8 (poly8x8_t __a, poly8x8x3_t __b, uint8x8_t __c);
int8x8_t vtbx4_s8 (int8x8_t __a, int8x8x4_t __b, int8x8_t __c);
uint8x8_t vtbx4_u8 (uint8x8_t __a, uint8x8x4_t __b, uint8x8_t __c);
poly8x8_t vtbx4_p8 (poly8x8_t __a, poly8x8x4_t __b, uint8x8_t __c);
/***************************************Multiply, scalar, lane**************************/
/*--1、Vector multiply by scalar: vmul -> ri = ai * b; 
multiplies each element in a vector by a scalar, 
and places the results in the destination vector.--*/
int16x4_t vmul_n_s16 (int16x4_t __a, int16_t __b);
int32x2_t vmul_n_s32 (int32x2_t __a, int32_t __b);
float32x2_t vmul_n_f32 (float32x2_t __a, float32_t __b);
uint16x4_t vmul_n_u16 (uint16x4_t __a, uint16_t __b);
uint32x2_t vmul_n_u32 (uint32x2_t __a, uint32_t __b);
int16x8_t vmulq_n_s16 (int16x8_t __a, int16_t __b);
int32x4_t vmulq_n_s32 (int32x4_t __a, int32_t __b);
float32x4_t vmulq_n_f32 (float32x4_t __a, float32_t __b);
uint16x8_t vmulq_n_u16 (uint16x8_t __a, uint16_t __b);
uint32x4_t vmulq_n_u32 (uint32x4_t __a, uint32_t __b);
/*--2、Vector multiply by scalar: -> ri = ai * b[c]; 
multiplies the first vector by a scalar. 
The scalar is the element in the second vector with index c.--*/
int16x4_t vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c);
int32x2_t vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c);
float32x2_t vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __c);
uint16x4_t vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __c);
uint32x2_t vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __c);
int16x8_t vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c);
int32x4_t vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c);
float32x4_t vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __c);
uint16x8_t vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __c);
uint32x4_t vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __c);
/*--3、Vector long multiply with scalar: vmull ->  ri = ai * b;
multiplies a vector by a scalar. 
Elements in the result are wider than elements in input vector.--*/
int32x4_t vmull_n_s16 (int16x4_t __a, int16_t __b);
int64x2_t vmull_n_s32 (int32x2_t __a, int32_t __b);
uint32x4_t vmull_n_u16 (uint16x4_t __a, uint16_t __b);
uint64x2_t vmull_n_u32 (uint32x2_t __a, uint32_t __b);
/*--4、Vector long multiply by scalar: vmull -> ri = ai * b[c];
multiplies the first vector by a scalar. 
The scalar is the element in the second vector with index c. 
The elements in the result are wider than the elements in input vector.--*/
int32x4_t vmull_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c);
int64x2_t vmull_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c);
uint32x4_t vmull_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __c);
uint64x2_t vmull_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __c);
/*--5、Vector saturating doubling long multiply with scalar: vqdmull -> ri = sat(ai * b);
multiplies the elements in the vector by a scalar, and doubles the results. 
If any of the results overflow, they are saturated and the sticky QC flag is set.--*/
int32x4_t vqdmull_n_s16 (int16x4_t __a, int16_t __b);
int64x2_t vqdmull_n_s32 (int32x2_t __a, int32_t __b);
/*--6、Vector saturating doubling long multiply by scalar: vqdmull -> ri = sat(ai * b[c]);
multiplies the elements in the first vector by a scalar, and doubles the results. 
The scalar has index c in the second vector. If any of the results overflow, 
they are saturated and the sticky QC flagis set.--*/
int32x4_t vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c);
int64x2_t vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c);
/*--7、Vector saturating doubling multiply high with scalar: vqdmulh -> ri = sat(ai * b)
multiplies the elements of the vector by a scalar, and doubles the results.
It then returns only the high half of the results.
If any of the results overflow, they are saturated and the sticky QC flag is set.--*/
int16x4_t vqdmulh_n_s16 (int16x4_t __a, int16_t __b);
int32x2_t vqdmulh_n_s32 (int32x2_t __a, int32_t __b);
int16x8_t vqdmulhq_n_s16 (int16x8_t __a, int16_t __b);
int32x4_t vqdmulhq_n_s32 (int32x4_t __a, int32_t __b);
/*--8、Vector saturating doubling multiply high by scalar: 
vqdmulh -> ri = sat(ai * b[c]);
multiplies the elements of the first vector by a scalar, and doubles the results. It then
returns only the high half of the results. The scalar has index n in the second vector.
If any of the results overflow, they are saturated and the sticky QC flag is set.--*/
int16x4_t vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c);
int32x2_t vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c);
int16x8_t vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c);
int32x4_t vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c);
/*--9、Vector saturating rounding doubling multiply high with scalar: 
vqqrdmulh -> ri = sat(ai * b);
multiplies the elements of the vector by a scalar and doubles the results. 
It then returns only the high half of the rounded results. 
If any of the results overflow, they are saturated and the sticky QC flag is set.--*/
int16x4_t vqrdmulh_n_s16 (int16x4_t __a, int16_t __b);
int32x2_t vqrdmulh_n_s32 (int32x2_t __a, int32_t __b);
int16x8_t vqrdmulhq_n_s16 (int16x8_t __a, int16_t __b);
int32x4_t vqrdmulhq_n_s32 (int32x4_t __a, int32_t __b);
/*--10、Vector rounding saturating doubling multiply high by scalar: 
vqrdmulh -> ri = sat(ai * b[c]);
multiplies the elements of the first vector by a scalar and doubles the results.
It then returns only the high half of the rounded results.
The scalar has index n in the second vector. If any of the results overflow, 
they are saturated and the sticky QC flag is set.--*/
int16x4_t vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c);
int32x2_t vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c);
int16x8_t vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c);
int32x4_t vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c);
/*--11、Vector multiply accumulate with scalar: vmla -> ri = ai + bi * c;
multiplies each element in the second vector by a scalar, 
and adds the results to the corresponding elements of the first vector.--*/
int16x4_t vmla_n_s16 (int16x4_t __a, int16x4_t __b, int16_t __c);
int32x2_t vmla_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c);
float32x2_t vmla_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c);
uint16x4_t vmla_n_u16 (uint16x4_t __a, uint16x4_t __b, uint16_t __c);
uint32x2_t vmla_n_u32 (uint32x2_t __a, uint32x2_t __b, uint32_t __c);
int16x8_t vmlaq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c);
int32x4_t vmlaq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c);
float32x4_t vmlaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c);
uint16x8_t vmlaq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c);
uint32x4_t vmlaq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c);
/*--12、Vector multiply accumulate by scalar: vmla -> ri = ai + bi * c[d];
multiplies each element in the second vector by a scalar, 
and adds the results to the corresponding elements of the first vector. 
The scalar has index d in the third vector.--*/
int16x4_t vmla_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d);
int32x2_t vmla_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d);
float32x2_t vmla_lane_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c,const int __d);
uint16x4_t vmla_lane_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d);
uint32x2_t vmla_lane_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d);
int16x8_t vmlaq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d);
int32x4_t vmlaq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d);
float32x4_t vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b, float32x2_t __c,const int __d);
uint16x8_t vmlaq_lane_u16 (uint16x8_t __a, uint16x8_t __b, uint16x4_t __c, const int __d);
uint32x4_t vmlaq_lane_u32 (uint32x4_t __a, uint32x4_t __b, uint32x2_t __c, const int __d);
/*--13、Vector widening multiply accumulate with scalar: vmlal -> ri = ai + bi * c;
multiplies each element in the second vector by a scalar, and adds the results into the 
corresponding elements of the first vector. 
The scalar has index n in the third vector. The elements in the result are wider.--*/
int32x4_t vmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c);
int64x2_t vmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c);
uint32x4_t vmlal_n_u16 (uint32x4_t __a, uint16x4_t __b, uint16_t __c);
uint64x2_t vmlal_n_u32 (uint64x2_t __a, uint32x2_t __b, uint32_t __c);
/*--14、Vector widening multiply accumulate by scalar: vmlal -> ri = ai + bi * c[d];
multiplies each element in the second vector by a scalar, and adds the results to the 
corresponding elements of the first vector. The scalar has index d in the third vector.
The elements in the result are wider.--*/
int32x4_t vmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d);
int64x2_t vmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d);
uint32x4_t vmlal_lane_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d);
uint64x2_t vmlal_lane_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d);
/*--15、Vector widening saturating doubling multiply accumulate with scalar: 
vqdmlal -> ri = sat(ai + bi * c);
multiplies the elements in the second vector by a scalar, and doubles the results. 
It then adds the results to the elements in the first vector.
If any of the results overflow, they are saturated and the sticky QC flag is set.--*/
int32x4_t vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c);
int64x2_t vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c);
/*--16、Vector widening saturating doubling multiply accumulate by scalar: 
vqdmlal -> ri = sat(ai + bi * c[d])
multiplies each element in the second vector by a scalar, doubles the results and adds 
them to the corresponding elements of the first vector. The scalar has index d in the 
third vector. If any of the results overflow,
they are saturated and the sticky QC flag is set.--*/
int32x4_t vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d);
int64x2_t vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d);
/*--17、Vector multiply subtract with scalar: vmls -> ri = ai - bi * c;
multiplies each element in a vector by a scalar, subtracts the results from the 
corresponding elements of the destination vector, 
and places the final results in the destination vector.--*/
int16x4_t vmls_n_s16 (int16x4_t __a, int16x4_t __b, int16_t __c);
int32x2_t vmls_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c);
float32x2_t vmls_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c);
uint16x4_t vmls_n_u16 (uint16x4_t __a, uint16x4_t __b, uint16_t __c);
uint32x2_t vmls_n_u32 (uint32x2_t __a, uint32x2_t __b, uint32_t __c);
int16x8_t vmlsq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c);
int32x4_t vmlsq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c);
float32x4_t vmlsq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c);
uint16x8_t vmlsq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c);
uint32x4_t vmlsq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c);
/*--18、Vector multiply subtract by scalar: vmls -> ri = ai - bi * c[d];
multiplies each element in the second vector by a scalar, and subtracts them from the
corresponding elements of the first vector.
The scalar has index d in the third vector.--*/
int16x4_t vmls_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d);
int32x2_t vmls_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d);
float32x2_t vmls_lane_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c,const int __d);
uint16x4_t vmls_lane_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d);
uint32x2_t vmls_lane_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d);
int16x8_t vmlsq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d);
int32x4_t vmlsq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d);
float32x4_t vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b, float32x2_t __c,const int __d);
uint16x8_t vmlsq_lane_u16 (uint16x8_t __a, uint16x8_t __b, uint16x4_t __c, const int __d);
uint32x4_t vmlsq_lane_u32 (uint32x4_t __a, uint32x4_t __b, uint32x2_t __c, const int __d);
/*--19、Vector widening multiply subtract with scalar: vmlsl -> ri = ai - bi * c;
multiplies the elements in the second vector by a scalar, then subtracts the results from
the elements in the first vector. The elements of the result are wider.--*/
int32x4_t vmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c);
int64x2_t vmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c);
uint32x4_t vmlsl_n_u16 (uint32x4_t __a, uint16x4_t __b, uint16_t __c);
uint64x2_t vmlsl_n_u32 (uint64x2_t __a, uint32x2_t __b, uint32_t __c);
/*--20、Vector widening multiply subtract by scalar: vmlsl -> ri = ai - bi * c[d];
multiplies each element in the second vector by a scalar, 
and subtracts them from the corresponding elements of the first vector. 
The scalar has index d in the third vector. The elements in the result are wider.--*/
int32x4_t vmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d);
int64x2_t vmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d);
uint32x4_t vmlsl_lane_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d)
uint64x2_t vmlsl_lane_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d);
/*--21、Vector widening saturating doubling multiply subtract with scalar: 
vqdmlsl -> ri = sat(ai - bi * c);
multiplies the elements of the second vector with a scalar and doubles the results. 
It then subtracts the results from the elements in the first vector.
If any of the results overflow, they are saturated and the sticky QC flag is set.--*/
int32x4_t vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c);
int64x2_t vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c);
/*--22、Vector widening saturating doubling multiply subtract by scalar:
vqdmlsl -> ri = sat(ai - bi * c[[d]);
multiplies each element in the second vector by a scalar, doubles the results and subtracts
them from the corresponding elements of the first vector. The scalar has index n in the 
third vector.If any of the results overflow, 
they are saturated and the sticky QC flag is set.--*/
int32x4_t vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d);
int64x2_t vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d);
/*****************************************************Vector extract********************/
/*--Vector extract: vext -> extracts n elements from the lower end of the second operand
vector and the remaining elements from the higher end of the first, and combines them to
form the result vector. The elements from the second operand are placed in the most 
significant part of the result vector.The elements from the first operand are placed in
the least significant part of the result vector.This intrinsic cycles the elements
through the lanes if the two input vectors are the same.--*/
int8x8_t vext_s8 (int8x8_t __a, int8x8_t __b, const int __c);
int16x4_t vext_s16 (int16x4_t __a, int16x4_t __b, const int __c);
int32x2_t vext_s32 (int32x2_t __a, int32x2_t __b, const int __c);
int64x1_t vext_s64 (int64x1_t __a, int64x1_t __b, const int __c);
float32x2_t vext_f32 (float32x2_t __a, float32x2_t __b, const int __c);
uint8x8_t vext_u8 (uint8x8_t __a, uint8x8_t __b, const int __c);
uint16x4_t vext_u16 (uint16x4_t __a, uint16x4_t __b, const int __c);
uint32x2_t vext_u32 (uint32x2_t __a, uint32x2_t __b, const int __c);
uint64x1_t vext_u64 (uint64x1_t __a, uint64x1_t __b, const int __c);
poly8x8_t vext_p8 (poly8x8_t __a, poly8x8_t __b, const int __c);
poly16x4_t vext_p16 (poly16x4_t __a, poly16x4_t __b, const int __c);
int8x16_t vextq_s8 (int8x16_t __a, int8x16_t __b, const int __c);//_mm_alignr_epi8 
int16x8_t vextq_s16 (int16x8_t __a, int16x8_t __b, const int __c);//_mm_alignr_epi8 
int32x4_t vextq_s32 (int32x4_t __a, int32x4_t __b, const int __c);//_mm_alignr_epi8
int64x2_t vextq_s64 (int64x2_t __a, int64x2_t __b, const int __c);//_mm_alignr_epi8
float32x4_t vextq_f32 (float32x4_t __a, float32x4_t __b, const int __c);//_mm_alignr_epi8
uint8x16_t vextq_u8 (uint8x16_t __a, uint8x16_t __b, const int __c);//_mm_alignr_epi8
uint16x8_t vextq_u16 (uint16x8_t __a, uint16x8_t __b, const int __c);//_mm_alignr_epi8
uint32x4_t vextq_u32 (uint32x4_t __a, uint32x4_t __b, const int __c);//_mm_alignr_epi8
uint64x2_t vextq_u64 (uint64x2_t __a, uint64x2_t __b, const int __c);//_mm_alignr_epi8
poly8x16_t vextq_p8 (poly8x16_t __a, poly8x16_t __b, const int __c);//_mm_alignr_epi8
poly16x8_t vextq_p16 (poly16x8_t __a, poly16x8_t __b, const int __c);//_mm_alignr_epi8
/****************************************************Reverse elements*******************/
/*--1、Reverse vector elements (swap endianness): vrev64 -> reverses the order of 8-bit, 
16-bit, or 32-bit elements within each doubleword of the vector, 
and places the result in the corresponding destination vector.--*/
int8x8_t vrev64_s8 (int8x8_t __a);
int16x4_t vrev64_s16 (int16x4_t __a);
int32x2_t vrev64_s32 (int32x2_t __a);
float32x2_t vrev64_f32 (float32x2_t __a);//_mm_shuffle_ps
uint8x8_t vrev64_u8 (uint8x8_t __a);
uint16x4_t vrev64_u16 (uint16x4_t __a);
uint32x2_t vrev64_u32 (uint32x2_t __a);
poly8x8_t vrev64_p8 (poly8x8_t __a);
poly16x4_t vrev64_p16 (poly16x4_t __a);
int8x16_t vrev64q_s8 (int8x16_t __a);
int16x8_t vrev64q_s16 (int16x8_t __a);
int32x4_t vrev64q_s32 (int32x4_t __a);
float32x4_t vrev64q_f32 (float32x4_t __a);//_mm_shuffle_ps
uint8x16_t vrev64q_u8 (uint8x16_t __a);
uint16x8_t vrev64q_u16 (uint16x8_t __a);
uint32x4_t vrev64q_u32 (uint32x4_t __a);
poly8x16_t vrev64q_p8 (poly8x16_t __a);
poly16x8_t vrev64q_p16 (poly16x8_t __a);
/*--2、Reverse vector elements (swap endianness): vrev32 -> reverses the order of 8-bit 
or 16-bit elements within each word of the vector, 
and places the result in the corresponding destination vector.--*/
int8x8_t vrev32_s8 (int8x8_t __a);
int16x4_t vrev32_s16 (int16x4_t __a);
uint8x8_t vrev32_u8 (uint8x8_t __a);
uint16x4_t vrev32_u16 (uint16x4_t __a);
poly8x8_t vrev32_p8 (poly8x8_t __a);
poly16x4_t vrev32_p16 (poly16x4_t __a);
int8x16_t vrev32q_s8 (int8x16_t __a);
int16x8_t vrev32q_s16 (int16x8_t __a);
uint8x16_t vrev32q_u8 (uint8x16_t __a);
uint16x8_t vrev32q_u16 (uint16x8_t __a);
poly8x16_t vrev32q_p8 (poly8x16_t __a);
poly16x8_t vrev32q_p16 (poly16x8_t __a);
/*--3、Reverse vector elements (swap endianness): vrev16 -> reverses the order 
of 8-bit elements within each halfword of the vector, 
and places the result in the corresponding destination vector.--*/
int8x8_t vrev16_s8 (int8x8_t __a);
uint8x8_t vrev16_u8 (uint8x8_t __a);
poly8x8_t vrev16_p8 (poly8x8_t __a);
int8x16_t vrev16q_s8 (int8x16_t __a);
uint8x16_t vrev16q_u8 (uint8x16_t __a);
poly8x16_t vrev16q_p8 (poly8x16_t __a);
/**********************************************************Bitwise Select***************/
/*--Bitwise Select: vbsl -> selects each bit for the destination from the first operand 
if the corresponding bit of the destination is 1, 
or from the second operand if the corresponding bit of the destination is 0.--*/
int8x8_t vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c);
int16x4_t vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c);
int32x2_t vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c);
int64x1_t vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c);
float32x2_t vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c);
uint8x8_t vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c);
uint16x4_t vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c);
uint32x2_t vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c);
uint64x1_t vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c);
poly8x8_t vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c);
poly16x4_t vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c);
int8x16_t vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c);
int16x8_t vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c);
int32x4_t vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c);
int64x2_t vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c);
float32x4_t vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c);
uint8x16_t vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c);
uint16x8_t vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c);
uint32x4_t vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c);
uint64x2_t vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c);
poly8x16_t vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c);
poly16x8_t vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c);
/************************************Transposition operations***************************/
/*--1、Transpose elements: vtrn -> treats the elements of its input vectors as elements
of 2 x 2 matrices, and transposes the matrices. Essentially, it exchanges the elements 
with odd indices from Vector1 with the elements with even indices from Vector2.--*/
int8x8x2_t vtrn_s8 (int8x8_t __a, int8x8_t __b);
int16x4x2_t vtrn_s16 (int16x4_t __a, int16x4_t __b);
uint8x8x2_t vtrn_u8 (uint8x8_t __a, uint8x8_t __b);
uint16x4x2_t vtrn_u16 (uint16x4_t __a, uint16x4_t __b);
poly8x8x2_t vtrn_p8 (poly8x8_t __a, poly8x8_t __b);
poly16x4x2_t vtrn_p16 (poly16x4_t __a, poly16x4_t __b);
int32x2x2_t vtrn_s32 (int32x2_t __a, int32x2_t __b)
float32x2x2_t vtrn_f32 (float32x2_t __a, float32x2_t __b)
uint32x2x2_t vtrn_u32 (uint32x2_t __a, uint32x2_t __b)
int8x16x2_t vtrnq_s8 (int8x16_t __a, int8x16_t __b)
int16x8x2_t vtrnq_s16 (int16x8_t __a, int16x8_t __b)
int32x4x2_t vtrnq_s32 (int32x4_t __a, int32x4_t __b)
float32x4x2_t vtrnq_f32 (float32x4_t __a, float32x4_t __b)
uint8x16x2_t vtrnq_u8 (uint8x16_t __a, uint8x16_t __b)
uint16x8x2_t vtrnq_u16 (uint16x8_t __a, uint16x8_t __b)
uint32x4x2_t vtrnq_u32 (uint32x4_t __a, uint32x4_t __b);
poly8x16x2_t vtrnq_p8 (poly8x16_t __a, poly8x16_t __b);
poly16x8x2_t vtrnq_p16 (poly16x8_t __a, poly16x8_t __b);
/*--2、Interleave elements(Zip elements): 
vzip ->  (Vector Zip) interleaves the elements of two vectors.--*/
int8x8x2_t vzip_s8 (int8x8_t __a, int8x8_t __b);
int16x4x2_t vzip_s16 (int16x4_t __a, int16x4_t __b);
uint8x8x2_t vzip_u8 (uint8x8_t __a, uint8x8_t __b);
uint16x4x2_t vzip_u16 (uint16x4_t __a, uint16x4_t __b);
poly8x8x2_t vzip_p8 (poly8x8_t __a, poly8x8_t __b);
poly16x4x2_t vzip_p16 (poly16x4_t __a, poly16x4_t __b);
int32x2x2_t vzip_s32 (int32x2_t __a, int32x2_t __b);
float32x2x2_t vzip_f32 (float32x2_t __a, float32x2_t __b);
uint32x2x2_t vzip_u32 (uint32x2_t __a, uint32x2_t __b);
int8x16x2_t vzipq_s8 (int8x16_t __a, int8x16_t __b);
int16x8x2_t vzipq_s16 (int16x8_t __a, int16x8_t __b);
int32x4x2_t vzipq_s32 (int32x4_t __a, int32x4_t __b);
float32x4x2_t vzipq_f32 (float32x4_t __a, float32x4_t __b);
uint8x16x2_t vzipq_u8 (uint8x16_t __a, uint8x16_t __b);
uint16x8x2_t vzipq_u16 (uint16x8_t __a, uint16x8_t __b);
uint32x4x2_t vzipq_u32 (uint32x4_t __a, uint32x4_t __b);
poly8x16x2_t vzipq_p8 (poly8x16_t __a, poly8x16_t __b);
poly16x8x2_t vzipq_p16 (poly16x8_t __a, poly16x8_t __b);
/*--3、De-Interleave elements(Unzip elements): 
vuzp -> (Vector Unzip) de-interleaves the elements of two vectors.
De-interleaving is the inverse process of interleaving.--*/
int8x8x2_t vuzp_s8 (int8x8_t __a, int8x8_t __b);
int16x4x2_t vuzp_s16 (int16x4_t __a, int16x4_t __b);
int32x2x2_t vuzp_s32 (int32x2_t __a, int32x2_t __b);
float32x2x2_t vuzp_f32 (float32x2_t __a, float32x2_t __b);
uint8x8x2_t vuzp_u8 (uint8x8_t __a, uint8x8_t __b);
uint16x4x2_t vuzp_u16 (uint16x4_t __a, uint16x4_t __b);
uint32x2x2_t vuzp_u32 (uint32x2_t __a, uint32x2_t __b);
poly8x8x2_t vuzp_p8 (poly8x8_t __a, poly8x8_t __b);
poly16x4x2_t vuzp_p16 (poly16x4_t __a, poly16x4_t __b);
int8x16x2_t vuzpq_s8 (int8x16_t __a, int8x16_t __b);
int16x8x2_t vuzpq_s16 (int16x8_t __a, int16x8_t __b);
int32x4x2_t vuzpq_s32 (int32x4_t __a, int32x4_t __b);
float32x4x2_t vuzpq_f32 (float32x4_t __a, float32x4_t __b);
uint8x16x2_t vuzpq_u8 (uint8x16_t __a, uint8x16_t __b);
uint16x8x2_t vuzpq_u16 (uint16x8_t __a, uint16x8_t __b);
uint32x4x2_t vuzpq_u32 (uint32x4_t __a, uint32x4_t __b);
poly8x16x2_t vuzpq_p8 (poly8x16_t __a, poly8x16_t __b);
poly16x8x2_t vuzpq_p16 (poly16x8_t __a, poly16x8_t __b);
/*********************************************************Load**************************/
/*--1、Load a single vector from memory: vld1 -> loads a vector from memory.--*/
int8x8_t vld1_s8 (const int8_t * __a);
int16x4_t vld1_s16 (const int16_t * __a);
int32x2_t vld1_s32 (const int32_t * __a);
int64x1_t vld1_s64 (const int64_t * __a);
float32x2_t vld1_f32 (const float32_t * __a);
uint8x8_t vld1_u8 (const uint8_t * __a);//_mm_loadl_epi64
uint16x4_t vld1_u16 (const uint16_t * __a);//_mm_loadl_epi64
uint32x2_t vld1_u32 (const uint32_t * __a);//_mm_loadl_epi64
uint64x1_t vld1_u64 (const uint64_t * __a);//_mm_loadl_epi64
poly8x8_t vld1_p8 (const poly8_t * __a);
poly16x4_t vld1_p16 (const poly16_t * __a);
int8x16_t vld1q_s8 (const int8_t * __a);
int16x8_t vld1q_s16 (const int16_t * __a);
int32x4_t vld1q_s32 (const int32_t * __a);
int64x2_t vld1q_s64 (const int64_t * __a);
float32x4_t vld1q_f32 (const float32_t * __a);
uint8x16_t vld1q_u8 (const uint8_t * __a);
uint16x8_t vld1q_u16 (const uint16_t * __a);
uint32x4_t vld1q_u32 (const uint32_t * __a);
uint64x2_t vld1q_u64 (const uint64_t * __a);
poly8x16_t vld1q_p8 (const poly8_t * __a);
poly16x8_t vld1q_p16 (const poly16_t * __a);
/*--2、Load a single lane from memory: vld1 -> loads one element of the input vector 
from memory and returns this in the result vector. Elements of the vector that are not
loaded are returned in the result vector unaltered. 
c is the index of the element to load.--*/
int8x8_t vld1_lane_s8 (const int8_t * __a, int8x8_t __b, const int __c);//_mm_insert_epi8
int16x4_t vld1_lane_s16 (const int16_t * __a, int16x4_t __b,const int __c);//_mm_insert_epi16
int32x2_t vld1_lane_s32 (const int32_t * __a, int32x2_t __b, const int __c);//_mm_insert_epi32
float32x2_t vld1_lane_f32 (const float32_t * __a, float32x2_t __b, const int __c);
uint8x8_t vld1_lane_u8 (const uint8_t * __a, uint8x8_t __b, const int __c);//_mm_insert_epi8
uint16x4_t vld1_lane_u16 (const uint16_t * __a, uint16x4_t __b, const int __c);//_mm_insert_epi16
uint32x2_t vld1_lane_u32 (const uint32_t * __a, uint32x2_t __b, const int __c);//_mm_insert_epi32
poly8x8_t vld1_lane_p8 (const poly8_t * __a, poly8x8_t __b, const int __c);//_mm_insert_epi8
poly16x4_t vld1_lane_p16 (const poly16_t * __a, poly16x4_t __b, const int __c);//_mm_insert_epi16
int64x1_t vld1_lane_s64 (const int64_t * __a, int64x1_t __b, const int __c);
uint64x1_t vld1_lane_u64 (const uint64_t * __a, uint64x1_t __b, const int __c);
int8x16_t vld1q_lane_s8 (const int8_t * __a, int8x16_t __b, const int __c);//_mm_insert_epi8
int16x8_t vld1q_lane_s16 (const int16_t * __a, int16x8_t __b, const int __c);//_mm_insert_epi16
int32x4_t vld1q_lane_s32 (const int32_t * __a, int32x4_t __b, const int __c);//_mm_insert_epi32
float32x4_t vld1q_lane_f32 (const float32_t * __a, float32x4_t __b, const int __c);
uint8x16_t vld1q_lane_u8 (const uint8_t * __a, uint8x16_t __b, const int __c);//_mm_insert_epi8
uint16x8_t vld1q_lane_u16 (const uint16_t * __a, uint16x8_t __b, const int __c);//_mm_insert_epi16
uint32x4_t vld1q_lane_u32 (const uint32_t * __a, uint32x4_t __b, const int __c);//_mm_insert_epi32
poly8x16_t vld1q_lane_p8 (const poly8_t * __a, poly8x16_t __b, const int __c);//_mm_insert_epi8
poly16x8_t vld1q_lane_p16 (const poly16_t * __a, poly16x8_t __b, const int __c);//_mm_insert_epi16
int64x2_t vld1q_lane_s64 (const int64_t * __a, int64x2_t __b, const int __c);//_mm_insert_epi64
uint64x2_t vld1q_lane_u64 (const uint64_t * __a, uint64x2_t __b, const int __c);//_mm_insert_epi64
/*--3、Load all lanes of vector with same value from memory: vld1 -> 
loads one element in a vector from memory. 
The loaded element is copied to all other lanes of the vector.--*/
int8x8_t vld1_dup_s8 (const int8_t * __a);//_mm_set1_epi8
int16x4_t vld1_dup_s16 (const int16_t * __a);//_mm_set1_epi16
int32x2_t vld1_dup_s32 (const int32_t * __a);//_mm_set1_epi32
float32x2_t vld1_dup_f32 (const float32_t * __a);//_mm_set1_ps
uint8x8_t vld1_dup_u8 (const uint8_t * __a);//_mm_set1_epi8
uint16x4_t vld1_dup_u16 (const uint16_t * __a);//_mm_set1_epi16
uint32x2_t vld1_dup_u32 (const uint32_t * __a);//_mm_set1_epi32
poly8x8_t vld1_dup_p8 (const poly8_t * __a);//_mm_set1_epi8
poly16x4_t vld1_dup_p16 (const poly16_t * __a);//_mm_set1_epi16
int64x1_t vld1_dup_s64 (const int64_t * __a);
uint64x1_t vld1_dup_u64 (const uint64_t * __a);
int8x16_t vld1q_dup_s8 (const int8_t * __a);//_mm_set1_epi8
int16x8_t vld1q_dup_s16 (const int16_t * __a);//_mm_set1_epi16
int32x4_t vld1q_dup_s32 (const int32_t * __a);//_mm_set1_epi32
float32x4_t vld1q_dup_f32 (const float32_t * __a);//_mm_set1_ps
uint8x16_t vld1q_dup_u8 (const uint8_t * __a);//_mm_set1_epi8
uint16x8_t vld1q_dup_u16 (const uint16_t * __a);//_mm_set1_epi16
uint32x4_t vld1q_dup_u32 (const uint32_t * __a);//_mm_set1_epi32
poly8x16_t vld1q_dup_p8 (const poly8_t * __a);//_mm_set1_epi8
poly16x8_t vld1q_dup_p16 (const poly16_t * __a);//_mm_set1_epi16
int64x2_t vld1q_dup_s64 (const int64_t * __a);
uint64x2_t vld1q_dup_u64 (const uint64_t * __a);
/*--4、Load 2-element structure from memory: vld2 -> loads 2 vectors from memory. 
It performs a 2-way de-interleave from memory to the vectors.--*/
int8x8x2_t vld2_s8 (const int8_t * __a);
int16x4x2_t vld2_s16 (const int16_t * __a);
int32x2x2_t vld2_s32 (const int32_t * __a);
float32x2x2_t vld2_f32 (const float32_t * __a);
uint8x8x2_t vld2_u8 (const uint8_t * __a);
uint16x4x2_t vld2_u16 (const uint16_t * __a);
uint32x2x2_t vld2_u32 (const uint32_t * __a);
poly8x8x2_t vld2_p8 (const poly8_t * __a);
poly16x4x2_t vld2_p16 (const poly16_t * __a);
int64x1x2_t vld2_s64 (const int64_t * __a);
uint64x1x2_t vld2_u64 (const uint64_t * __a);
int8x16x2_t vld2q_s8 (const int8_t * __a);
int16x8x2_t vld2q_s16 (const int16_t * __a);
int32x4x2_t vld2q_s32 (const int32_t * __a);
float32x4x2_t vld2q_f32 (const float32_t * __a);
uint8x16x2_t vld2q_u8 (const uint8_t * __a);
uint16x8x2_t vld2q_u16 (const uint16_t * __a);
uint32x4x2_t vld2q_u32 (const uint32_t * __a);
poly8x16x2_t vld2q_p8 (const poly8_t * __a);
poly16x8x2_t vld2q_p16 (const poly16_t * __a);
/*--5、Load a single lane of 2-element structure from memory: vld2 -> 
loads two elements in a double-vector structure from memory and returns this in 
the result. The loaded values are from consecutive memory addresses. 
Elements in the structure that are not loaded are returned in the result unaltered. 
c is the index of the elements to load.--*/
int8x8x2_t vld2_lane_s8 (const int8_t * __a, int8x8x2_t __b, const int __c);
int16x4x2_t vld2_lane_s16 (const int16_t * __a, int16x4x2_t __b, const int __c);
int32x2x2_t vld2_lane_s32 (const int32_t * __a, int32x2x2_t __b, const int __c);
float32x2x2_t vld2_lane_f32 (const float32_t * __a, float32x2x2_t __b, const int __c);
uint8x8x2_t vld2_lane_u8 (const uint8_t * __a, uint8x8x2_t __b, const int __c);
uint16x4x2_t vld2_lane_u16 (const uint16_t * __a, uint16x4x2_t __b, const int __c);
uint32x2x2_t vld2_lane_u32 (const uint32_t * __a, uint32x2x2_t __b, const int __c);
poly8x8x2_t vld2_lane_p8 (const poly8_t * __a, poly8x8x2_t __b, const int __c);
poly16x4x2_t vld2_lane_p16 (const poly16_t * __a, poly16x4x2_t __b, const int __c);
int16x8x2_t vld2q_lane_s16 (const int16_t * __a, int16x8x2_t __b, const int __c);
int32x4x2_t vld2q_lane_s32 (const int32_t * __a, int32x4x2_t __b, const int __c);
float32x4x2_t vld2q_lane_f32 (const float32_t * __a, float32x4x2_t __b, const int __c);
uint16x8x2_t vld2q_lane_u16 (const uint16_t * __a, uint16x8x2_t __b, const int __c);
uint32x4x2_t vld2q_lane_u32 (const uint32_t * __a, uint32x4x2_t __b, const int __c);
poly16x8x2_t vld2q_lane_p16 (const poly16_t * __a, poly16x8x2_t __b, const int __c);
/*--6、Load all lanes of 2-element structure with same value from memory: vld2 -> 
loads 2 elements from memory and returns a double-vector structure. 
The first element is copied to all lanes of the first vector. 
The second element is copied to all lanes of the second vector.--*/
int8x8x2_t vld2_dup_s8 (const int8_t * __a);
int16x4x2_t vld2_dup_s16 (const int16_t * __a);
int32x2x2_t vld2_dup_s32 (const int32_t * __a);
float32x2x2_t vld2_dup_f32 (const float32_t * __a);
uint8x8x2_t vld2_dup_u8 (const uint8_t * __a);
uint16x4x2_t vld2_dup_u16 (const uint16_t * __a);
uint32x2x2_t vld2_dup_u32 (const uint32_t * __a);
poly8x8x2_t vld2_dup_p8 (const poly8_t * __a);
poly16x4x2_t vld2_dup_p16 (const poly16_t * __a);
int64x1x2_t vld2_dup_s64 (const int64_t * __a);
uint64x1x2_t vld2_dup_u64 (const uint64_t * __a);
/*--7、Load 3-element structure from memory: vld3 -> 
loads 3 vectors from memory. 
It performs a 3-way de-interleave from memory to the vectors.--*/
int8x8x3_t vld3_s8 (const int8_t * __a);
int16x4x3_t vld3_s16 (const int16_t * __a);
int32x2x3_t vld3_s32 (const int32_t * __a);
float32x2x3_t vld3_f32 (const float32_t * __a);
uint8x8x3_t vld3_u8 (const uint8_t * __a);
uint16x4x3_t vld3_u16 (const uint16_t * __a);
uint32x2x3_t vld3_u32 (const uint32_t * __a);
poly8x8x3_t vld3_p8 (const poly8_t * __a);
poly16x4x3_t vld3_p16 (const poly16_t * __a);
int64x1x3_t vld3_s64 (const int64_t * __a);
uint64x1x3_t vld3_u64 (const uint64_t * __a);
int8x16x3_t vld3q_s8 (const int8_t * __a);
int16x8x3_t vld3q_s16 (const int16_t * __a);
int32x4x3_t vld3q_s32 (const int32_t * __a);
float32x4x3_t vld3q_f32 (const float32_t * __a);
uint8x16x3_t vld3q_u8 (const uint8_t * __a);
uint16x8x3_t vld3q_u16 (const uint16_t * __a);
uint32x4x3_t vld3q_u32 (const uint32_t * __a);
poly8x16x3_t vld3q_p8 (const poly8_t * __a);
poly16x8x3_t vld3q_p16 (const poly16_t * __a);
/*--8、Load a single lane of 3-element structure from memory: vld3 -> 
loads three elements in a triple-vector structure from memory and returns this in the
result. The loaded values are from consecutive memory addresses. 
Elements in the structure that are not loaded are returned in the result unaltered.
c is the index of the element to load.--*/
int8x8x3_t vld3_lane_s8 (const int8_t * __a, int8x8x3_t __b, const int __c);
int16x4x3_t vld3_lane_s16 (const int16_t * __a, int16x4x3_t __b, const int __c);
int32x2x3_t vld3_lane_s32 (const int32_t * __a, int32x2x3_t __b, const int __c);
float32x2x3_t vld3_lane_f32 (const float32_t * __a, float32x2x3_t __b, const int __c);
uint8x8x3_t vld3_lane_u8 (const uint8_t * __a, uint8x8x3_t __b, const int __c);
uint16x4x3_t vld3_lane_u16 (const uint16_t * __a, uint16x4x3_t __b, const int __c);
uint32x2x3_t vld3_lane_u32 (const uint32_t * __a, uint32x2x3_t __b, const int __c);
poly8x8x3_t vld3_lane_p8 (const poly8_t * __a, poly8x8x3_t __b, const int __c);
poly16x4x3_t vld3_lane_p16 (const poly16_t * __a, poly16x4x3_t __b, const int __c);
int16x8x3_t vld3q_lane_s16 (const int16_t * __a, int16x8x3_t __b, const int __c);
int32x4x3_t vld3q_lane_s32 (const int32_t * __a, int32x4x3_t __b, const int __c);
float32x4x3_t vld3q_lane_f32 (const float32_t * __a, float32x4x3_t __b, const int __c);
uint16x8x3_t vld3q_lane_u16 (const uint16_t * __a, uint16x8x3_t __b, const int __c);
uint32x4x3_t vld3q_lane_u32 (const uint32_t * __a, uint32x4x3_t __b, const int __c);
poly16x8x3_t vld3q_lane_p16 (const poly16_t * __a, poly16x8x3_t __b, const int __c);
/*--9、Load all lanes of 3-element structure with same value from memory: vld3 ->
loads 3 elements from memory and returns a triple-vector structure. The first element
is copied to all lanes of the first vector. And similarly the second and third elements 
are copied to the second and third vectors respectively.--*/
int8x8x3_t vld3_dup_s8 (const int8_t * __a);
int16x4x3_t vld3_dup_s16 (const int16_t * __a);
int32x2x3_t vld3_dup_s32 (const int32_t * __a);
float32x2x3_t vld3_dup_f32 (const float32_t * __a);
uint8x8x3_t vld3_dup_u8 (const uint8_t * __a);
uint16x4x3_t vld3_dup_u16 (const uint16_t * __a);
uint32x2x3_t vld3_dup_u32 (const uint32_t * __a);
poly8x8x3_t vld3_dup_p8 (const poly8_t * __a);
poly16x4x3_t vld3_dup_p16 (const poly16_t * __a);
int64x1x3_t vld3_dup_s64 (const int64_t * __a);
uint64x1x3_t vld3_dup_u64 (const uint64_t * __a);
/*--10、Load 4-element structure from memory: vld4 -> 
loads 4 vectors from memory. 
It performs a 4-way de-interleave from memory to the vectors.--*/
int8x8x4_t vld4_s8 (const int8_t * __a);
int16x4x4_t vld4_s16 (const int16_t * __a);
int32x2x4_t vld4_s32 (const int32_t * __a);
float32x2x4_t vld4_f32 (const float32_t * __a);
uint8x8x4_t  vld4_u8 (const uint8_t * __a);
uint16x4x4_t vld4_u16 (const uint16_t * __a);
uint32x2x4_t vld4_u32 (const uint32_t * __a);
poly8x8x4_t vld4_p8 (const poly8_t * __a);
poly16x4x4_t vld4_p16 (const poly16_t * __a);
int64x1x4_t vld4_s64 (const int64_t * __a);
uint64x1x4_t vld4_u64 (const uint64_t * __a);
int8x16x4_t vld4q_s8 (const int8_t * __a);
int16x8x4_t vld4q_s16 (const int16_t * __a);
int32x4x4_t vld4q_s32 (const int32_t * __a);
float32x4x4_t vld4q_f32 (const float32_t * __a);
uint8x16x4_t vld4q_u8 (const uint8_t * __a);
uint16x8x4_t vld4q_u16 (const uint16_t * __a);
uint32x4x4_t vld4q_u32 (const uint32_t * __a);
poly8x16x4_t vld4q_p8 (const poly8_t * __a);
poly16x8x4_t vld4q_p16 (const poly16_t * __a);
/*--11、Load a single lane of 4-element structure from memory: vld4 -> 
loads four elements in a quad-vector structure from memory and returns this in the result. 
The loaded values are from consecutive memory addresses.
Elements in the structure that are not loaded are returned in the result unaltered. 
c is the index of the element to load.--*/
int8x8x4_t vld4_lane_s8 (const int8_t * __a, int8x8x4_t __b, const int __c);
int16x4x4_t vld4_lane_s16 (const int16_t * __a, int16x4x4_t __b, const int __c);
int32x2x4_t vld4_lane_s32 (const int32_t * __a, int32x2x4_t __b, const int __c);
float32x2x4_t vld4_lane_f32 (const float32_t * __a, float32x2x4_t __b, const int __c);
uint8x8x4_t vld4_lane_u8 (const uint8_t * __a, uint8x8x4_t __b, const int __c);
uint16x4x4_t vld4_lane_u16 (const uint16_t * __a, uint16x4x4_t __b, const int __c);
uint32x2x4_t vld4_lane_u32 (const uint32_t * __a, uint32x2x4_t __b, const int __c);
poly8x8x4_t vld4_lane_p8 (const poly8_t * __a, poly8x8x4_t __b, const int __c);
poly16x4x4_t vld4_lane_p16 (const poly16_t * __a, poly16x4x4_t __b, const int __c);
int16x8x4_t vld4q_lane_s16 (const int16_t * __a, int16x8x4_t __b, const int __c);
int32x4x4_t vld4q_lane_s32 (const int32_t * __a, int32x4x4_t __b, const int __c);
float32x4x4_t vld4q_lane_f32 (const float32_t * __a, float32x4x4_t __b, const int __c);
uint16x8x4_t vld4q_lane_u16 (const uint16_t * __a, uint16x8x4_t __b, const int __c);
uint32x4x4_t vld4q_lane_u32 (const uint32_t * __a, uint32x4x4_t __b, const int __c);
poly16x8x4_t vld4q_lane_p16 (const poly16_t * __a, poly16x8x4_t __b, const int __c);
/*--12、Load all lanes of 4-element structure with same value from memory: vld4 ->
loads 4 elements from memory and returns a quad-vector structure. The first element is 
copied to all lanes of the first vector. And similarly the second, third, and fourth 
elements are copied to the second, third, and fourth vectors respectively.--*/
int8x8x4_t vld4_dup_s8 (const int8_t * __a);
int16x4x4_t vld4_dup_s16 (const int16_t * __a);
int32x2x4_t vld4_dup_s32 (const int32_t * __a);
float32x2x4_t vld4_dup_f32 (const float32_t * __a);
uint8x8x4_t vld4_dup_u8 (const uint8_t * __a);
uint16x4x4_t vld4_dup_u16 (const uint16_t * __a);
uint32x2x4_t vld4_dup_u32 (const uint32_t * __a);
poly8x8x4_t vld4_dup_p8 (const poly8_t * __a);
poly16x4x4_t vld4_dup_p16 (const poly16_t * __a);
int64x1x4_t vld4_dup_s64 (const int64_t * __a);
uint64x1x4_t vld4_dup_u64 (const uint64_t * __a);
/*****************************************************Store*****************************/
/*--1、Store a single vector into memory: vst1 -> stores a vector into memory.--*/
void vst1_s8 (int8_t * __a, int8x8_t __b);
void vst1_s16 (int16_t * __a, int16x4_t __b);
void vst1_s32 (int32_t * __a, int32x2_t __b);
void vst1_s64 (int64_t * __a, int64x1_t __b);
void vst1_f32 (float32_t * __a, float32x2_t __b);
void vst1_u8 (uint8_t * __a, uint8x8_t __b);
void vst1_u16 (uint16_t * __a, uint16x4_t __b);
void vst1_u32 (uint32_t * __a, uint32x2_t __b);
void vst1_u64 (uint64_t * __a, uint64x1_t __b);
void vst1_p8 (poly8_t * __a, poly8x8_t __b);
void vst1_p16 (poly16_t * __a, poly16x4_t __b);
void vst1q_s8 (int8_t * __a, int8x16_t __b);
void vst1q_s16 (int16_t * __a, int16x8_t __b);
void vst1q_s32 (int32_t * __a, int32x4_t __b);
void vst1q_s64 (int64_t * __a, int64x2_t __b);
void vst1q_f32 (float32_t * __a, float32x4_t __b);
void vst1q_u8 (uint8_t * __a, uint8x16_t __b);
void vst1q_u16 (uint16_t * __a, uint16x8_t __b);
void vst1q_u32 (uint32_t * __a, uint32x4_t __b);
void vst1q_u64 (uint64_t * __a, uint64x2_t __b);
void vst1q_p8 (poly8_t * __a, poly8x16_t __b);
void vst1q_p16 (poly16_t * __a, poly16x8_t __b);
/*--2、Store a single lane into memory: vst1 -> 
stores one element of the vector into memory. 
c is the index in the vector to be stored.--*/
void vst1_lane_s8 (int8_t * __a, int8x8_t __b, const int __c);
void vst1_lane_s16 (int16_t * __a, int16x4_t __b, const int __c);
void vst1_lane_s32 (int32_t * __a, int32x2_t __b, const int __c);
void vst1_lane_f32 (float32_t * __a, float32x2_t __b, const int __c);
void vst1_lane_u8 (uint8_t * __a, uint8x8_t __b, const int __c);
void vst1_lane_u16 (uint16_t * __a, uint16x4_t __b, const int __c);
void vst1_lane_u32 (uint32_t * __a, uint32x2_t __b, const int __c);
void vst1_lane_p8 (poly8_t * __a, poly8x8_t __b, const int __c);
void vst1_lane_p16 (poly16_t * __a, poly16x4_t __b, const int __c);
void vst1_lane_s64 (int64_t * __a, int64x1_t __b, const int __c);
void vst1_lane_u64 (uint64_t * __a, uint64x1_t __b, const int __c);
void vst1q_lane_s8 (int8_t * __a, int8x16_t __b, const int __c);
void vst1q_lane_s16 (int16_t * __a, int16x8_t __b, const int __c);
void vst1q_lane_s32 (int32_t * __a, int32x4_t __b, const int __c);
void vst1q_lane_f32 (float32_t * __a, float32x4_t __b, const int __c);
void vst1q_lane_u8 (uint8_t * __a, uint8x16_t __b, const int __c);
void vst1q_lane_u16 (uint16_t * __a, uint16x8_t __b, const int __c);
void vst1q_lane_u32 (uint32_t * __a, uint32x4_t __b, const int __c);
void vst1q_lane_p8 (poly8_t * __a, poly8x16_t __b, const int __c);
void vst1q_lane_p16 (poly16_t * __a, poly16x8_t __b, const int __c);
void vst1q_lane_s64 (int64_t * __a, int64x2_t __b, const int __c);
void vst1q_lane_u64 (uint64_t * __a, uint64x2_t __b, const int __c);
/*--3、Store 2 vectors into memory: vst2 -> 
stores 2 vectors into memory. It interleaves the 2 vectors into memory.--*/
void vst2_s8 (int8_t * __a, int8x8x2_t __b);
void vst2_s16 (int16_t * __a, int16x4x2_t __b);
void vst2_s32 (int32_t * __a, int32x2x2_t __b);
void vst2_f32 (float32_t * __a, float32x2x2_t __b);
void vst2_u8 (uint8_t * __a, uint8x8x2_t __b);
void vst2_u16 (uint16_t * __a, uint16x4x2_t __b);
void vst2_u32 (uint32_t * __a, uint32x2x2_t __b);
void vst2_p8 (poly8_t * __a, poly8x8x2_t __b);
void vst2_p16 (poly16_t * __a, poly16x4x2_t __b);
void vst2_s64 (int64_t * __a, int64x1x2_t __b);
void vst2_u64 (uint64_t * __a, uint64x1x2_t __b);
void vst2q_s8 (int8_t * __a, int8x16x2_t __b);
void vst2q_s16 (int16_t * __a, int16x8x2_t __b);
void vst2q_s32 (int32_t * __a, int32x4x2_t __b);
void vst2q_f32 (float32_t * __a, float32x4x2_t __b);
void vst2q_u8 (uint8_t * __a, uint8x16x2_t __b);
void vst2q_u16 (uint16_t * __a, uint16x8x2_t __b);
void vst2q_u32 (uint32_t * __a, uint32x4x2_t __b);
void vst2q_p8 (poly8_t * __a, poly8x16x2_t __b);
void vst2q_p16 (poly16_t * __a, poly16x8x2_t __b);
/*--4、Store a lane of two elements into memory: vst2 ->
stores a lane of two elements from a double-vector structure into memory.
The elements to be stored are from the same lane in the vectors and their index is c.--*/
void vst2_lane_s8 (int8_t * __a, int8x8x2_t __b, const int __c);
void vst2_lane_s16 (int16_t * __a, int16x4x2_t __b, const int __c);
void vst2_lane_s32 (int32_t * __a, int32x2x2_t __b, const int __c);
void vst2_lane_f32 (float32_t * __a, float32x2x2_t __b, const int __c);
void vst2_lane_u8 (uint8_t * __a, uint8x8x2_t __b, const int __c);
void vst2_lane_u16 (uint16_t * __a, uint16x4x2_t __b, const int __c);
void vst2_lane_u32 (uint32_t * __a, uint32x2x2_t __b, const int __c);
void vst2_lane_p8 (poly8_t * __a, poly8x8x2_t __b, const int __c);
void vst2_lane_p16 (poly16_t * __a, poly16x4x2_t __b, const int __c);
void vst2q_lane_s16 (int16_t * __a, int16x8x2_t __b, const int __c);
void vst2q_lane_s32 (int32_t * __a, int32x4x2_t __b, const int __c);
void vst2q_lane_f32 (float32_t * __a, float32x4x2_t __b, const int __c);
void vst2q_lane_u16 (uint16_t * __a, uint16x8x2_t __b, const int __c);
void vst2q_lane_u32 (uint32_t * __a, uint32x4x2_t __b, const int __c);
void vst2q_lane_p16 (poly16_t * __a, poly16x8x2_t __b, const int __c);
/*--5、Store 3 vectors into memory: vst3 -> 
stores 3 vectors into memory. It interleaves the 3 vectors into memory.--*/
void vst3_s8 (int8_t * __a, int8x8x3_t __b);
void vst3_s16 (int16_t * __a, int16x4x3_t __b);
void vst3_s32 (int32_t * __a, int32x2x3_t __b);
void vst3_f32 (float32_t * __a, float32x2x3_t __b);
void  vst3_u8 (uint8_t * __a, uint8x8x3_t __b);
void vst3_u16 (uint16_t * __a, uint16x4x3_t __b);
void vst3_u32 (uint32_t * __a, uint32x2x3_t __b);
void vst3_p8 (poly8_t * __a, poly8x8x3_t __b);
void vst3_p16 (poly16_t * __a, poly16x4x3_t __b);
void vst3_s64 (int64_t * __a, int64x1x3_t __b);
void vst3_u64 (uint64_t * __a, uint64x1x3_t __b);
void vst3q_s8 (int8_t * __a, int8x16x3_t __b);
void vst3q_s16 (int16_t * __a, int16x8x3_t __b);
void vst3q_s32 (int32_t * __a, int32x4x3_t __b);
void vst3q_f32 (float32_t * __a, float32x4x3_t __b);
void vst3q_u8 (uint8_t * __a, uint8x16x3_t __b);
void vst3q_u16 (uint16_t * __a, uint16x8x3_t __b);
void vst3q_u32 (uint32_t * __a, uint32x4x3_t __b);
void vst3q_p8 (poly8_t * __a, poly8x16x3_t __b);
void vst3q_p16 (poly16_t * __a, poly16x8x3_t __b);
/*--6、Store a lane of three elements into memory: vst3 ->
stores a lane of three elements from a triple-vector structure into memory. 
The elements to be stored are from the same lane in the vectors and their index is c.--*/
void vst3_lane_s8 (int8_t * __a, int8x8x3_t __b, const int __c);
void vst3_lane_s16 (int16_t * __a, int16x4x3_t __b, const int __c);
void vst3_lane_s32 (int32_t * __a, int32x2x3_t __b, const int __c);
void vst3_lane_f32 (float32_t * __a, float32x2x3_t __b, const int __c);
void vst3_lane_u8 (uint8_t * __a, uint8x8x3_t __b, const int __c);
void vst3_lane_u16 (uint16_t * __a, uint16x4x3_t __b, const int __c);
void vst3_lane_u32 (uint32_t * __a, uint32x2x3_t __b, const int __c);
void vst3_lane_p8 (poly8_t * __a, poly8x8x3_t __b, const int __c);
void vst3_lane_p16 (poly16_t * __a, poly16x4x3_t __b, const int __c);
void vst3q_lane_s16 (int16_t * __a, int16x8x3_t __b, const int __c);
void vst3q_lane_s32 (int32_t * __a, int32x4x3_t __b, const int __c);
void vst3q_lane_f32 (float32_t * __a, float32x4x3_t __b, const int __c);
void vst3q_lane_u16 (uint16_t * __a, uint16x8x3_t __b, const int __c);
void vst3q_lane_u32 (uint32_t * __a, uint32x4x3_t __b, const int __c);
void vst3q_lane_p16 (poly16_t * __a, poly16x8x3_t __b, const int __c);
/*--7、Store 4 vectors into memory: vst4 -> 
stores 4 vectors into memory. It interleaves the 4 vectors into memory.--*/
void vst4_s8 (int8_t * __a, int8x8x4_t __b);
void vst4_s16 (int16_t * __a, int16x4x4_t __b);
void vst4_s32 (int32_t * __a, int32x2x4_t __b);
void vst4_f32 (float32_t * __a, float32x2x4_t __b);
void vst4_u8 (uint8_t * __a, uint8x8x4_t __b);
void vst4_u16 (uint16_t * __a, uint16x4x4_t __b);
void vst4_u32 (uint32_t * __a, uint32x2x4_t __b);
void vst4_p8 (poly8_t * __a, poly8x8x4_t __b);
void vst4_p16 (poly16_t * __a, poly16x4x4_t __b);
void vst4_s64 (int64_t * __a, int64x1x4_t __b);
void vst4_u64 (uint64_t * __a, uint64x1x4_t __b);
void vst4q_s8 (int8_t * __a, int8x16x4_t __b);
void vst4q_s16 (int16_t * __a, int16x8x4_t __b);
void vst4q_s32 (int32_t * __a, int32x4x4_t __b);
void  vst4q_f32 (float32_t * __a, float32x4x4_t __b);
void vst4q_u8 (uint8_t * __a, uint8x16x4_t __b);
void vst4q_u16 (uint16_t * __a, uint16x8x4_t __b);
void vst4q_u32 (uint32_t * __a, uint32x4x4_t __b);
void vst4q_p8 (poly8_t * __a, poly8x16x4_t __b);
void vst4q_p16 (poly16_t * __a, poly16x8x4_t __b);
/*--8、Store a lane of four elements into memory: vst4 ->
stores a lane of four elements from a quad-vector structure into memory.
The elements to be stored are from the same lane in the vectors and their index is c.--*/
void vst4_lane_s8 (int8_t * __a, int8x8x4_t __b, const int __c);
void vst4_lane_s16 (int16_t * __a, int16x4x4_t __b, const int __c);
void vst4_lane_s32 (int32_t * __a, int32x2x4_t __b, const int __c);
void vst4_lane_f32 (float32_t * __a, float32x2x4_t __b, const int __c);
void vst4_lane_u8 (uint8_t * __a, uint8x8x4_t __b, const int __c);
void vst4_lane_u16 (uint16_t * __a, uint16x4x4_t __b, const int __c);
void vst4_lane_u32 (uint32_t * __a, uint32x2x4_t __b, const int __c);
void vst4_lane_p8 (poly8_t * __a, poly8x8x4_t __b, const int __c);
void vst4_lane_p16 (poly16_t * __a, poly16x4x4_t __b, const int __c);
void vst4q_lane_s16 (int16_t * __a, int16x8x4_t __b, const int __c);
void vst4q_lane_s32 (int32_t * __a, int32x4x4_t __b, const int __c);
void vst4q_lane_f32 (float32_t * __a, float32x4x4_t __b, const int __c);
void vst4q_lane_u16 (uint16_t * __a, uint16x8x4_t __b, const int __c);
void vst4q_lane_u32 (uint32_t * __a, uint32x4x4_t __b, const int __c);
void vst4q_lane_p16 (poly16_t * __a, poly16x8x4_t __b, const int __c);
/*********************************Reinterpret casts(type conversion)********************/
/*--convert between types: vreinterpret -> treats a vector as having a different 
datatype, without changing its value.--*/
poly8x8_t vreinterpret_p8_s8 (int8x8_t __a);
poly8x8_t vreinterpret_p8_s16 (int16x4_t __a);
poly8x8_t vreinterpret_p8_s32 (int32x2_t __a);
poly8x8_t vreinterpret_p8_s64 (int64x1_t __a);
poly8x8_t vreinterpret_p8_f32 (float32x2_t __a);
poly8x8_t vreinterpret_p8_u8 (uint8x8_t __a);
poly8x8_t vreinterpret_p8_u16 (uint16x4_t __a);
poly8x8_t vreinterpret_p8_u32 (uint32x2_t __a);
poly8x8_t vreinterpret_p8_u64 (uint64x1_t __a);
poly8x8_t vreinterpret_p8_p16 (poly16x4_t __a);
poly8x16_t vreinterpretq_p8_s8 (int8x16_t __a);
poly8x16_t vreinterpretq_p8_s16 (int16x8_t __a);
poly8x16_t vreinterpretq_p8_s32 (int32x4_t __a);
poly8x16_t vreinterpretq_p8_s64 (int64x2_t __a);
poly8x16_t vreinterpretq_p8_f32 (float32x4_t __a);
poly8x16_t vreinterpretq_p8_u8 (uint8x16_t __a);
poly8x16_t vreinterpretq_p8_u16 (uint16x8_t __a);
poly8x16_t vreinterpretq_p8_u32 (uint32x4_t __a);
poly8x16_t vreinterpretq_p8_u64 (uint64x2_t __a);
poly8x16_t vreinterpretq_p8_p16 (poly16x8_t __a);
poly16x4_t vreinterpret_p16_s8 (int8x8_t __a);
poly16x4_t vreinterpret_p16_s16 (int16x4_t __a);
poly16x4_t vreinterpret_p16_s32 (int32x2_t __a);
poly16x4_t vreinterpret_p16_s64 (int64x1_t __a);
poly16x4_t vreinterpret_p16_f32 (float32x2_t __a);
poly16x4_t vreinterpret_p16_u8 (uint8x8_t __a);
poly16x4_t vreinterpret_p16_u16 (uint16x4_t __a);
poly16x4_t vreinterpret_p16_u32 (uint32x2_t __a);
poly16x4_t vreinterpret_p16_u64 (uint64x1_t __a);
poly16x4_t vreinterpret_p16_p8 (poly8x8_t __a);
poly16x8_t vreinterpretq_p16_s8 (int8x16_t __a);
poly16x8_t vreinterpretq_p16_s16 (int16x8_t __a);
poly16x8_t vreinterpretq_p16_s32 (int32x4_t __a);
poly16x8_t vreinterpretq_p16_s64 (int64x2_t __a);
poly16x8_t vreinterpretq_p16_f32 (float32x4_t __a);
poly16x8_t vreinterpretq_p16_u8 (uint8x16_t __a);
poly16x8_t vreinterpretq_p16_u16 (uint16x8_t __a);
poly16x8_t vreinterpretq_p16_u32 (uint32x4_t __a);
poly16x8_t vreinterpretq_p16_u64 (uint64x2_t __a);
poly16x8_t vreinterpretq_p16_p8 (poly8x16_t __a);
float32x2_t vreinterpret_f32_s8 (int8x8_t __a);
float32x2_t vreinterpret_f32_s16 (int16x4_t __a);
float32x2_t vreinterpret_f32_s32 (int32x2_t __a);
float32x2_t vreinterpret_f32_s64 (int64x1_t __a);
float32x2_t vreinterpret_f32_u8 (uint8x8_t __a);
float32x2_t vreinterpret_f32_u16 (uint16x4_t __a);
float32x2_t vreinterpret_f32_u32 (uint32x2_t __a);
float32x2_t vreinterpret_f32_u64 (uint64x1_t __a);
float32x2_t vreinterpret_f32_p8 (poly8x8_t __a);
float32x2_t vreinterpret_f32_p16 (poly16x4_t __a);
float32x4_t vreinterpretq_f32_s8 (int8x16_t __a);
float32x4_t vreinterpretq_f32_s16 (int16x8_t __a);
float32x4_t vreinterpretq_f32_s32 (int32x4_t __a);
float32x4_t vreinterpretq_f32_s64 (int64x2_t __a);
float32x4_t vreinterpretq_f32_u8 (uint8x16_t __a);
float32x4_t vreinterpretq_f32_u16 (uint16x8_t __a);
float32x4_t vreinterpretq_f32_u32 (uint32x4_t __a);
float32x4_t vreinterpretq_f32_u64 (uint64x2_t __a);
float32x4_t vreinterpretq_f32_p8 (poly8x16_t __a);
float32x4_t vreinterpretq_f32_p16 (poly16x8_t __a);
int64x1_t vreinterpret_s64_s8 (int8x8_t __a);
int64x1_t vreinterpret_s64_s16 (int16x4_t __a);
int64x1_t vreinterpret_s64_s32 (int32x2_t __a);
int64x1_t vreinterpret_s64_f32 (float32x2_t __a);
int64x1_t vreinterpret_s64_u8 (uint8x8_t __a);
int64x1_t vreinterpret_s64_u16 (uint16x4_t __a);
int64x1_t vreinterpret_s64_u32 (uint32x2_t __a);
int64x1_t vreinterpret_s64_u64 (uint64x1_t __a);
int64x1_t vreinterpret_s64_p8 (poly8x8_t __a);
int64x1_t vreinterpret_s64_p16 (poly16x4_t __a);
int64x2_t vreinterpretq_s64_s8 (int8x16_t __a);
int64x2_t vreinterpretq_s64_s16 (int16x8_t __a);
int64x2_t vreinterpretq_s64_s32 (int32x4_t __a);
int64x2_t vreinterpretq_s64_f32 (float32x4_t __a);
int64x2_t vreinterpretq_s64_u8 (uint8x16_t __a);
int64x2_t vreinterpretq_s64_u16 (uint16x8_t __a);
int64x2_t vreinterpretq_s64_u32 (uint32x4_t __a);
int64x2_t vreinterpretq_s64_u64 (uint64x2_t __a);
int64x2_t vreinterpretq_s64_p8 (poly8x16_t __a);
int64x2_t vreinterpretq_s64_p16 (poly16x8_t __a);
uint64x1_t vreinterpret_u64_s8 (int8x8_t __a);
uint64x1_t vreinterpret_u64_s16 (int16x4_t __a);
uint64x1_t vreinterpret_u64_s32 (int32x2_t __a);
uint64x1_t vreinterpret_u64_s64 (int64x1_t __a);
uint64x1_t vreinterpret_u64_f32 (float32x2_t __a);
uint64x1_t vreinterpret_u64_u8 (uint8x8_t __a);
uint64x1_t vreinterpret_u64_u16 (uint16x4_t __a);
uint64x1_t vreinterpret_u64_u32 (uint32x2_t __a);
uint64x1_t vreinterpret_u64_p8 (poly8x8_t __a);
uint64x1_t vreinterpret_u64_p16 (poly16x4_t __a);
uint64x2_t vreinterpretq_u64_s8 (int8x16_t __a);
uint64x2_t vreinterpretq_u64_s16 (int16x8_t __a);
uint64x2_t vreinterpretq_u64_s32 (int32x4_t __a);
uint64x2_t vreinterpretq_u64_s64 (int64x2_t __a);
uint64x2_t vreinterpretq_u64_f32 (float32x4_t __a);
uint64x2_t vreinterpretq_u64_u8 (uint8x16_t __a);
uint64x2_t vreinterpretq_u64_u16 (uint16x8_t __a);
uint64x2_t vreinterpretq_u64_u32 (uint32x4_t __a);
uint64x2_t vreinterpretq_u64_p8 (poly8x16_t __a);
uint64x2_t vreinterpretq_u64_p16 (poly16x8_t __a);
int8x8_t vreinterpret_s8_s16 (int16x4_t __a);
int8x8_t vreinterpret_s8_s32 (int32x2_t __a);
int8x8_t vreinterpret_s8_s64 (int64x1_t __a);
int8x8_t vreinterpret_s8_f32 (float32x2_t __a);
int8x8_t vreinterpret_s8_u8 (uint8x8_t __a);
int8x8_t vreinterpret_s8_u16 (uint16x4_t __a);
int8x8_t vreinterpret_s8_u32 (uint32x2_t __a);
int8x8_t vreinterpret_s8_u64 (uint64x1_t __a);
int8x8_t vreinterpret_s8_p8 (poly8x8_t __a);
int8x8_t vreinterpret_s8_p16 (poly16x4_t __a);
int8x16_t vreinterpretq_s8_s16 (int16x8_t __a);
int8x16_t vreinterpretq_s8_s32 (int32x4_t __a);
int8x16_t vreinterpretq_s8_s64 (int64x2_t __a);
int8x16_t vreinterpretq_s8_f32 (float32x4_t __a);
int8x16_t vreinterpretq_s8_u8 (uint8x16_t __a);
int8x16_t vreinterpretq_s8_u16 (uint16x8_t __a);
int8x16_t vreinterpretq_s8_u32 (uint32x4_t __a);
int8x16_t vreinterpretq_s8_u64 (uint64x2_t __a);
int8x16_t vreinterpretq_s8_p8 (poly8x16_t __a);
int8x16_t vreinterpretq_s8_p16 (poly16x8_t __a);
int16x4_t vreinterpret_s16_s8 (int8x8_t __a);
int16x4_t vreinterpret_s16_s32 (int32x2_t __a);
int16x4_t vreinterpret_s16_s64 (int64x1_t __a);
int16x4_t vreinterpret_s16_f32 (float32x2_t __a);
int16x4_t vreinterpret_s16_u8 (uint8x8_t __a);
int16x4_t vreinterpret_s16_u16 (uint16x4_t __a);
int16x4_t vreinterpret_s16_u32 (uint32x2_t __a);
int16x4_t vreinterpret_s16_u64 (uint64x1_t __a);
int16x4_t vreinterpret_s16_p8 (poly8x8_t __a);
int16x4_t vreinterpret_s16_p16 (poly16x4_t __a);
int16x8_t vreinterpretq_s16_s8 (int8x16_t __a);
int16x8_t vreinterpretq_s16_s32 (int32x4_t __a);
int16x8_t vreinterpretq_s16_s64 (int64x2_t __a);
int16x8_t vreinterpretq_s16_f32 (float32x4_t __a);
int16x8_t vreinterpretq_s16_u8 (uint8x16_t __a);
int16x8_t vreinterpretq_s16_u16 (uint16x8_t __a);
int16x8_t vreinterpretq_s16_u32 (uint32x4_t __a);
int16x8_t vreinterpretq_s16_u64 (uint64x2_t __a);
int16x8_t vreinterpretq_s16_p8 (poly8x16_t __a);
int16x8_t vreinterpretq_s16_p16 (poly16x8_t __a);
int32x2_t vreinterpret_s32_s8 (int8x8_t __a);
int32x2_t vreinterpret_s32_s16 (int16x4_t __a);
int32x2_t vreinterpret_s32_s64 (int64x1_t __a);
int32x2_t vreinterpret_s32_f32 (float32x2_t __a);
int32x2_t vreinterpret_s32_u8 (uint8x8_t __a);
int32x2_t vreinterpret_s32_u16 (uint16x4_t __a);
int32x2_t vreinterpret_s32_u32 (uint32x2_t __a);
int32x2_t vreinterpret_s32_u64 (uint64x1_t __a);
int32x2_t vreinterpret_s32_p8 (poly8x8_t __a);
int32x2_t vreinterpret_s32_p16 (poly16x4_t __a);
int32x4_t vreinterpretq_s32_s8 (int8x16_t __a);
int32x4_t vreinterpretq_s32_s16 (int16x8_t __a);
int32x4_t vreinterpretq_s32_s64 (int64x2_t __a);
int32x4_t vreinterpretq_s32_f32 (float32x4_t __a);
int32x4_t vreinterpretq_s32_u8 (uint8x16_t __a);
int32x4_t vreinterpretq_s32_u16 (uint16x8_t __a);
int32x4_t vreinterpretq_s32_u32 (uint32x4_t __a);
int32x4_t vreinterpretq_s32_u64 (uint64x2_t __a);
int32x4_t vreinterpretq_s32_p8 (poly8x16_t __a);
int32x4_t vreinterpretq_s32_p16 (poly16x8_t __a);
uint8x8_t vreinterpret_u8_s8 (int8x8_t __a);
uint8x8_t vreinterpret_u8_s16 (int16x4_t __a);
uint8x8_t vreinterpret_u8_s32 (int32x2_t __a);
uint8x8_t vreinterpret_u8_s64 (int64x1_t __a);
uint8x8_t vreinterpret_u8_f32 (float32x2_t __a);
uint8x8_t vreinterpret_u8_u16 (uint16x4_t __a);
uint8x8_t vreinterpret_u8_u32 (uint32x2_t __a);
uint8x8_t vreinterpret_u8_u64 (uint64x1_t __a);
uint8x8_t vreinterpret_u8_p8 (poly8x8_t __a);
uint8x8_t vreinterpret_u8_p16 (poly16x4_t __a);
uint8x16_t vreinterpretq_u8_s8 (int8x16_t __a);
uint8x16_t vreinterpretq_u8_s16 (int16x8_t __a);
uint8x16_t vreinterpretq_u8_s32 (int32x4_t __a);
uint8x16_t vreinterpretq_u8_s64 (int64x2_t __a);
uint8x16_t vreinterpretq_u8_f32 (float32x4_t __a);
uint8x16_t vreinterpretq_u8_u16 (uint16x8_t __a);
uint8x16_t vreinterpretq_u8_u32 (uint32x4_t __a);
uint8x16_t vreinterpretq_u8_u64 (uint64x2_t __a);
uint8x16_t vreinterpretq_u8_p8 (poly8x16_t __a);
uint8x16_t vreinterpretq_u8_p16 (poly16x8_t __a);
uint16x4_t vreinterpret_u16_s8 (int8x8_t __a);
uint16x4_t vreinterpret_u16_s16 (int16x4_t __a);
uint16x4_t vreinterpret_u16_s32 (int32x2_t __a);
uint16x4_t vreinterpret_u16_s64 (int64x1_t __a);
uint16x4_t vreinterpret_u16_f32 (float32x2_t __a);
uint16x4_t vreinterpret_u16_u8 (uint8x8_t __a);
uint16x4_t vreinterpret_u16_u32 (uint32x2_t __a);
uint16x4_t vreinterpret_u16_u64 (uint64x1_t __a);
uint16x4_t vreinterpret_u16_p8 (poly8x8_t __a);
uint16x4_t vreinterpret_u16_p16 (poly16x4_t __a);
uint16x8_t vreinterpretq_u16_s8 (int8x16_t __a);
uint16x8_t vreinterpretq_u16_s16 (int16x8_t __a);
uint16x8_t vreinterpretq_u16_s32 (int32x4_t __a);
uint16x8_t vreinterpretq_u16_s64 (int64x2_t __a);
uint16x8_t vreinterpretq_u16_f32 (float32x4_t __a);
uint16x8_t vreinterpretq_u16_u8 (uint8x16_t __a);
uint16x8_t vreinterpretq_u16_u32 (uint32x4_t __a);
uint16x8_t vreinterpretq_u16_u64 (uint64x2_t __a);
uint16x8_t vreinterpretq_u16_p8 (poly8x16_t __a);
uint16x8_t vreinterpretq_u16_p16 (poly16x8_t __a);
uint32x2_t vreinterpret_u32_s8 (int8x8_t __a);
uint32x2_t vreinterpret_u32_s16 (int16x4_t __a);
uint32x2_t vreinterpret_u32_s32 (int32x2_t __a);
uint32x2_t vreinterpret_u32_s64 (int64x1_t __a);
uint32x2_t vreinterpret_u32_f32 (float32x2_t __a);
uint32x2_t vreinterpret_u32_u8 (uint8x8_t __a);
uint32x2_t vreinterpret_u32_u16 (uint16x4_t __a);
uint32x2_t vreinterpret_u32_u64 (uint64x1_t __a);
uint32x2_t vreinterpret_u32_p8 (poly8x8_t __a);
uint32x2_t vreinterpret_u32_p16 (poly16x4_t __a);
uint32x4_t vreinterpretq_u32_s8 (int8x16_t __a);
uint32x4_t vreinterpretq_u32_s16 (int16x8_t __a);
uint32x4_t vreinterpretq_u32_s32 (int32x4_t __a);
uint32x4_t vreinterpretq_u32_s64 (int64x2_t __a);
uint32x4_t vreinterpretq_u32_f32 (float32x4_t __a);
uint32x4_t vreinterpretq_u32_u8 (uint8x16_t __a);
uint32x4_t vreinterpretq_u32_u16 (uint16x8_t __a);
uint32x4_t vreinterpretq_u32_u64 (uint64x2_t __a);
uint32x4_t vreinterpretq_u32_p8 (poly8x16_t __a);
uint32x4_t vreinterpretq_u32_p16 (poly16x8_t __a);

總結(jié)

以上是生活随笔為你收集整理的Neon Intrinsics各函数介绍的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網(wǎng)站內(nèi)容還不錯(cuò),歡迎將生活随笔推薦給好友。

歡迎分享!

轉(zhuǎn)載請說明來源于"生活随笔",并保留原作者的名字。

本文地址:Neon Intrinsics各函数介绍

国产成人人人97超碰超爽8 | 欧美人与禽猛交狂配 | 中文亚洲成a人片在线观看 | 国内精品人妻无码久久久影院 | 亚洲日韩av一区二区三区四区 | 伦伦影院午夜理论片 | 亚洲 日韩 欧美 成人 在线观看 | 亚洲国产精品成人久久蜜臀 | 人妻夜夜爽天天爽三区 | 欧美肥老太牲交大战 | 狠狠色丁香久久婷婷综合五月 | 中文字幕无码免费久久99 | 国产亚av手机在线观看 | 女人被男人躁得好爽免费视频 | 无码午夜成人1000部免费视频 | 亚洲国产综合无码一区 | 国内老熟妇对白xxxxhd | 日本丰满熟妇videos | 青草青草久热国产精品 | 伊人久久大香线焦av综合影院 | 免费播放一区二区三区 | 装睡被陌生人摸出水好爽 | 正在播放东北夫妻内射 | 老头边吃奶边弄进去呻吟 | 精品成人av一区二区三区 | 国产av剧情md精品麻豆 | 久久久国产精品无码免费专区 | 中文字幕+乱码+中文字幕一区 | 欧美日韩一区二区免费视频 | 国产精品无套呻吟在线 | 成人免费视频在线观看 | 日韩视频 中文字幕 视频一区 | 久久精品视频在线看15 | 精品久久久无码人妻字幂 | 99国产欧美久久久精品 | 三级4级全黄60分钟 | 国产精品资源一区二区 | 国语自产偷拍精品视频偷 | 成人毛片一区二区 | 国产欧美熟妇另类久久久 | 日日噜噜噜噜夜夜爽亚洲精品 | 亚洲理论电影在线观看 | 亚洲国产精华液网站w | 精品国产乱码久久久久乱码 | 十八禁视频网站在线观看 | 熟女少妇人妻中文字幕 | 99视频精品全部免费免费观看 | 在线播放免费人成毛片乱码 | 久青草影院在线观看国产 | 欧美变态另类xxxx | √天堂资源地址中文在线 | 亚洲爆乳精品无码一区二区三区 | 99riav国产精品视频 | 国产欧美熟妇另类久久久 | 国产尤物精品视频 | 纯爱无遮挡h肉动漫在线播放 | 最新国产乱人伦偷精品免费网站 | 中文字幕日产无线码一区 | 3d动漫精品啪啪一区二区中 | 荫蒂被男人添的好舒服爽免费视频 | 国产精品香蕉在线观看 | 成人影院yy111111在线观看 | 日本熟妇大屁股人妻 | 久久久久国色av免费观看性色 | 亚洲一区av无码专区在线观看 | 伊人久久大香线蕉午夜 | 中国大陆精品视频xxxx | 亚洲精品综合五月久久小说 | 国产精品久免费的黄网站 | √天堂资源地址中文在线 | 日韩欧美成人免费观看 | 日本精品人妻无码免费大全 | 国产精品18久久久久久麻辣 | 色诱久久久久综合网ywww | 狠狠噜狠狠狠狠丁香五月 | 精品少妇爆乳无码av无码专区 | 日本免费一区二区三区最新 | 久久人人97超碰a片精品 | 国产乱人无码伦av在线a | 熟妇人妻激情偷爽文 | 一个人看的www免费视频在线观看 | 久久精品人人做人人综合试看 | 内射巨臀欧美在线视频 | 综合激情五月综合激情五月激情1 | 中文字幕人妻无码一区二区三区 | 久久综合久久自在自线精品自 | 欧美日韩亚洲国产精品 | 亚洲a无码综合a国产av中文 | 好屌草这里只有精品 | 无码人妻丰满熟妇区五十路百度 | 好男人社区资源 | 国产 精品 自在自线 | 亚洲日本在线电影 | 久久久久久久人妻无码中文字幕爆 | 亚洲午夜福利在线观看 | 亚洲欧洲无卡二区视頻 | 亚洲狠狠色丁香婷婷综合 | 亚洲第一无码av无码专区 | 少妇人妻av毛片在线看 | 欧美丰满熟妇xxxx性ppx人交 | 精品久久久中文字幕人妻 | 久久99久久99精品中文字幕 | 自拍偷自拍亚洲精品被多人伦好爽 | 丝袜足控一区二区三区 | 欧美精品一区二区精品久久 | 国产亚洲精品久久久久久国模美 | 日本大香伊一区二区三区 | 婷婷五月综合缴情在线视频 | 牛和人交xxxx欧美 | 国产成人无码av一区二区 | 成人亚洲精品久久久久 | 欧洲欧美人成视频在线 | 精品 日韩 国产 欧美 视频 | 亚洲国产一区二区三区在线观看 | 亚洲色欲色欲欲www在线 | 亚洲男人av香蕉爽爽爽爽 | 无码人妻av免费一区二区三区 | 天海翼激烈高潮到腰振不止 | 99riav国产精品视频 | 亚洲精品综合一区二区三区在线 | 强开小婷嫩苞又嫩又紧视频 | 一本大道伊人av久久综合 | 久久久www成人免费毛片 | 久久久久人妻一区精品色欧美 | 国产无遮挡吃胸膜奶免费看 | 色婷婷久久一区二区三区麻豆 | 国产在线精品一区二区高清不卡 | 久久精品成人欧美大片 | 日日噜噜噜噜夜夜爽亚洲精品 | 日韩在线不卡免费视频一区 | 露脸叫床粗话东北少妇 | 中文字幕av日韩精品一区二区 | 国产成人午夜福利在线播放 | 无码毛片视频一区二区本码 | 亚洲熟妇色xxxxx亚洲 | 真人与拘做受免费视频一 | 日韩精品a片一区二区三区妖精 | 天天躁日日躁狠狠躁免费麻豆 | 自拍偷自拍亚洲精品10p | 日本一本二本三区免费 | 久久综合久久自在自线精品自 | 亚洲精品一区国产 | 日本大香伊一区二区三区 | 毛片内射-百度 | 国产精品二区一区二区aⅴ污介绍 | 欧美亚洲国产一区二区三区 | 99久久亚洲精品无码毛片 | 欧洲欧美人成视频在线 | 综合激情五月综合激情五月激情1 | 亚洲人成网站免费播放 | 国产三级精品三级男人的天堂 | 国产乱人无码伦av在线a | 成人无码影片精品久久久 | 久久综合网欧美色妞网 | 日韩av激情在线观看 | 国产免费无码一区二区视频 | 久久综合狠狠综合久久综合88 | 国产色精品久久人妻 | 日产国产精品亚洲系列 | 成人动漫在线观看 | 国产激情无码一区二区 | 成人精品天堂一区二区三区 | √8天堂资源地址中文在线 | 成 人 免费观看网站 | 无码人妻av免费一区二区三区 | 国产精品99久久精品爆乳 | 国产人妻精品午夜福利免费 | 亚洲精品www久久久 | 国产成人精品三级麻豆 | 熟女俱乐部五十路六十路av | 色一情一乱一伦一区二区三欧美 | 免费无码肉片在线观看 | 国产超碰人人爽人人做人人添 | 麻豆国产97在线 | 欧洲 | 久久亚洲精品中文字幕无男同 | 亚洲人成无码网www | 成人无码精品1区2区3区免费看 | 国产精品无码一区二区三区不卡 | 色窝窝无码一区二区三区色欲 | 国产精品自产拍在线观看 | 精品日本一区二区三区在线观看 | 久久国内精品自在自线 | 俺去俺来也www色官网 | 精品欧洲av无码一区二区三区 | 国产香蕉97碰碰久久人人 | 亚洲色欲色欲天天天www | 色综合久久久久综合一本到桃花网 | ass日本丰满熟妇pics | 久久97精品久久久久久久不卡 | 婷婷综合久久中文字幕蜜桃三电影 | 亚洲色欲久久久综合网东京热 | 久久久久久av无码免费看大片 | 男女下面进入的视频免费午夜 | 亚洲日韩中文字幕在线播放 | 国产精品毛片一区二区 | 亚洲精品一区二区三区在线观看 | 爱做久久久久久 | 久久久av男人的天堂 | 偷窥日本少妇撒尿chinese | 久久这里只有精品视频9 | 亚洲精品国产精品乱码不卡 | 波多野42部无码喷潮在线 | 国产精品18久久久久久麻辣 | 高清不卡一区二区三区 | 国产成人无码一二三区视频 | 秋霞成人午夜鲁丝一区二区三区 | 人妻体内射精一区二区三四 | 国产成人av免费观看 | 人妻aⅴ无码一区二区三区 | 中文字幕乱妇无码av在线 | 欧美老熟妇乱xxxxx | 亚洲乱亚洲乱妇50p | 四虎永久在线精品免费网址 | 亚洲国产日韩a在线播放 | 亚洲精品美女久久久久久久 | 欧美丰满老熟妇xxxxx性 | 国产午夜视频在线观看 | 波多野结衣aⅴ在线 | 人人妻人人澡人人爽精品欧美 | 久久综合给合久久狠狠狠97色 | 日本一区二区三区免费高清 | 欧美熟妇另类久久久久久多毛 | 啦啦啦www在线观看免费视频 | 日本饥渴人妻欲求不满 | 国产精品亚洲专区无码不卡 | 成 人影片 免费观看 | 国产精品人妻一区二区三区四 | 免费网站看v片在线18禁无码 | 免费看少妇作爱视频 | 久久久久久国产精品无码下载 | 亚洲日韩中文字幕在线播放 | 国产麻豆精品精东影业av网站 | 免费国产成人高清在线观看网站 | 中文字幕 亚洲精品 第1页 | 国产亚洲欧美在线专区 | 精品无码一区二区三区的天堂 | 性啪啪chinese东北女人 | 欧美精品无码一区二区三区 | 国产片av国语在线观看 | 日本熟妇乱子伦xxxx | 日韩少妇白浆无码系列 | 少妇太爽了在线观看 | 国产无套内射久久久国产 | 青青青爽视频在线观看 | 激情人妻另类人妻伦 | 天天拍夜夜添久久精品 | 内射后入在线观看一区 | 国产va免费精品观看 | 日韩无码专区 | 久久视频在线观看精品 | 国产又爽又黄又刺激的视频 | 伊人色综合久久天天小片 | 国产亚洲视频中文字幕97精品 | 亚洲精品国偷拍自产在线麻豆 | 国产精品久久久久9999小说 | 给我免费的视频在线观看 | 久久99久久99精品中文字幕 | 一本大道伊人av久久综合 | 青青久在线视频免费观看 | 18黄暴禁片在线观看 | 日本欧美一区二区三区乱码 | 免费观看又污又黄的网站 | www国产亚洲精品久久久日本 | 樱花草在线播放免费中文 | 国产高清不卡无码视频 | 国内精品久久毛片一区二区 | 丰满诱人的人妻3 | 日韩欧美中文字幕公布 | 奇米综合四色77777久久 东京无码熟妇人妻av在线网址 | 精品熟女少妇av免费观看 | 色狠狠av一区二区三区 | 亚洲精品鲁一鲁一区二区三区 | 国产午夜亚洲精品不卡 | 无码人妻av免费一区二区三区 | 白嫩日本少妇做爰 | 久久国产精品_国产精品 | 暴力强奷在线播放无码 | 亚洲男人av天堂午夜在 | 国内老熟妇对白xxxxhd | 国产精品久久久久久久影院 | 天堂无码人妻精品一区二区三区 | 久久精品人人做人人综合试看 | 亚洲无人区一区二区三区 | 色婷婷久久一区二区三区麻豆 | 国产成人综合色在线观看网站 | 亚洲中文字幕乱码av波多ji | 久久综合给久久狠狠97色 | 亚洲综合无码久久精品综合 | 在线看片无码永久免费视频 | 国产精品亚洲а∨无码播放麻豆 | 人妻少妇精品久久 | 少妇无码av无码专区在线观看 | 波多野42部无码喷潮在线 | 国色天香社区在线视频 | 熟妇人妻无码xxx视频 | 国内精品九九久久久精品 | 青青草原综合久久大伊人精品 | 久久精品国产亚洲精品 | 夫妻免费无码v看片 | 2019午夜福利不卡片在线 | 欧美三级a做爰在线观看 | 久久人人爽人人爽人人片av高清 | 六月丁香婷婷色狠狠久久 | 久久久精品成人免费观看 | 国产av一区二区三区最新精品 | 青青草原综合久久大伊人精品 | 国产精品沙发午睡系列 | 久久97精品久久久久久久不卡 | 国产精品久久久av久久久 | 久久99久久99精品中文字幕 | 国产欧美熟妇另类久久久 | 久久99国产综合精品 | 国产午夜福利亚洲第一 | 亚洲国产日韩a在线播放 | 六月丁香婷婷色狠狠久久 | 天堂亚洲2017在线观看 | 亚洲中文字幕无码中字 | 亚洲无人区一区二区三区 | 亚洲中文字幕久久无码 | 亚洲a无码综合a国产av中文 | 国产乱人伦app精品久久 国产在线无码精品电影网 国产国产精品人在线视 | 亚拍精品一区二区三区探花 | 国产麻豆精品精东影业av网站 | 色欲久久久天天天综合网精品 | 亚洲日本一区二区三区在线 | 欧美日韩久久久精品a片 | aa片在线观看视频在线播放 | 久久久国产一区二区三区 | 精品国产青草久久久久福利 | 国产色xx群视频射精 | 特黄特色大片免费播放器图片 | 国产精品无码一区二区三区不卡 | 精品国产乱码久久久久乱码 | 奇米影视7777久久精品人人爽 | 九九在线中文字幕无码 | 欧美精品无码一区二区三区 | 午夜免费福利小电影 | 亚洲精品国产a久久久久久 | 久久久久久久人妻无码中文字幕爆 | 扒开双腿疯狂进出爽爽爽视频 | 日本va欧美va欧美va精品 | 又粗又大又硬又长又爽 | 国产亚av手机在线观看 | 亚洲gv猛男gv无码男同 | 免费人成在线观看网站 | 国产人妻人伦精品 | 全球成人中文在线 | 亚洲另类伦春色综合小说 | 成人亚洲精品久久久久软件 | 内射白嫩少妇超碰 | 亚无码乱人伦一区二区 | 亚洲中文字幕在线观看 | 娇妻被黑人粗大高潮白浆 | 国产精品国产自线拍免费软件 | 国产莉萝无码av在线播放 | 装睡被陌生人摸出水好爽 | 性啪啪chinese东北女人 | 永久免费精品精品永久-夜色 | 又色又爽又黄的美女裸体网站 | 久久久久久久久蜜桃 | 少妇激情av一区二区 | 大色综合色综合网站 | 国产精品高潮呻吟av久久 | а√天堂www在线天堂小说 | 亚洲一区二区三区国产精华液 | 亚洲大尺度无码无码专区 | 野狼第一精品社区 | 搡女人真爽免费视频大全 | 亚洲精品国偷拍自产在线观看蜜桃 | 亚洲中文字幕va福利 | 十八禁视频网站在线观看 | 风流少妇按摩来高潮 | 国产又粗又硬又大爽黄老大爷视 | 天堂亚洲免费视频 | 久久99精品久久久久婷婷 | 丰满人妻被黑人猛烈进入 | 真人与拘做受免费视频 | 97精品人妻一区二区三区香蕉 | 蜜桃av抽搐高潮一区二区 | 国产精品对白交换视频 | 激情亚洲一区国产精品 | 一本色道久久综合狠狠躁 | 中文字幕av日韩精品一区二区 | 国产麻豆精品精东影业av网站 | 日产精品高潮呻吟av久久 | 狠狠亚洲超碰狼人久久 | 水蜜桃av无码 | 国产精品人妻一区二区三区四 | 玩弄少妇高潮ⅹxxxyw | 国产女主播喷水视频在线观看 | 无码福利日韩神码福利片 | 日日碰狠狠丁香久燥 | 亚洲精品综合一区二区三区在线 | 激情内射亚州一区二区三区爱妻 | 亚洲熟熟妇xxxx | 久久久精品欧美一区二区免费 | 一本色道久久综合狠狠躁 | 国产麻豆精品一区二区三区v视界 | 亚洲中文字幕av在天堂 | 日本va欧美va欧美va精品 | 久久精品视频在线看15 | 无码任你躁久久久久久久 | 亚洲综合久久一区二区 | 激情爆乳一区二区三区 | 亚洲色大成网站www国产 | 国产精品久久久午夜夜伦鲁鲁 | 日韩av无码中文无码电影 | 免费中文字幕日韩欧美 | 免费播放一区二区三区 | 国产综合色产在线精品 | 大肉大捧一进一出视频出来呀 | 亚洲成a人片在线观看无码 | 国产精品美女久久久久av爽李琼 | 黑森林福利视频导航 | yw尤物av无码国产在线观看 | 麻豆果冻传媒2021精品传媒一区下载 | 久久无码中文字幕免费影院蜜桃 | 亚洲精品美女久久久久久久 | 亚洲а∨天堂久久精品2021 | 人人澡人摸人人添 | 99久久精品日本一区二区免费 | 久久 国产 尿 小便 嘘嘘 | 久久久精品成人免费观看 | 日日鲁鲁鲁夜夜爽爽狠狠 | 国产亚洲精品久久久久久久久动漫 | 亚洲乱码中文字幕在线 | 无遮无挡爽爽免费视频 | 亚洲综合另类小说色区 | 天天摸天天透天天添 | 中文字幕人妻无码一夲道 | 午夜无码区在线观看 | 国产精品久久久av久久久 | 国产xxx69麻豆国语对白 | 国产精品人人妻人人爽 | 国产精品a成v人在线播放 | 久久午夜无码鲁丝片午夜精品 | 日韩av无码一区二区三区 | 中文字幕无码热在线视频 | 欧美国产亚洲日韩在线二区 | 国产成人无码一二三区视频 | 牲交欧美兽交欧美 | 日日躁夜夜躁狠狠躁 | 亚洲国产精品成人久久蜜臀 | 久久五月精品中文字幕 | 中国大陆精品视频xxxx | 国产成人一区二区三区在线观看 | 老头边吃奶边弄进去呻吟 | 亚洲精品国偷拍自产在线观看蜜桃 | 久久久婷婷五月亚洲97号色 | 人人妻人人澡人人爽欧美一区九九 | 夫妻免费无码v看片 | 国产网红无码精品视频 | 亚洲色成人中文字幕网站 | 性做久久久久久久免费看 | 国产热a欧美热a在线视频 | 粗大的内捧猛烈进出视频 | 99久久人妻精品免费二区 | 在线成人www免费观看视频 | 300部国产真实乱 | 99精品无人区乱码1区2区3区 | 亚洲欧美精品伊人久久 | 99精品无人区乱码1区2区3区 | 强奷人妻日本中文字幕 | 国产成人精品无码播放 | 精品国产av色一区二区深夜久久 | 香港三级日本三级妇三级 | 人人妻人人澡人人爽人人精品 | 久9re热视频这里只有精品 | 欧美精品免费观看二区 | 久久精品人人做人人综合 | 性色欲网站人妻丰满中文久久不卡 | 国产福利视频一区二区 | 亚洲色偷偷男人的天堂 | 领导边摸边吃奶边做爽在线观看 | 亚洲国产精华液网站w | 内射巨臀欧美在线视频 | 国产精品99爱免费视频 | 一本大道久久东京热无码av | 老熟女重囗味hdxx69 | 久久综合香蕉国产蜜臀av | 亚洲狠狠婷婷综合久久 | 亚洲国产高清在线观看视频 | 亚洲精品久久久久avwww潮水 | 久久99精品久久久久久动态图 | 牲欲强的熟妇农村老妇女视频 | 国产亚洲视频中文字幕97精品 | 99久久精品无码一区二区毛片 | 亚欧洲精品在线视频免费观看 | 中文亚洲成a人片在线观看 | 在线а√天堂中文官网 | 亚洲自偷自偷在线制服 | 日产精品高潮呻吟av久久 | 国产亚洲人成a在线v网站 | 久久久久久a亚洲欧洲av冫 | 亚洲精品国偷拍自产在线观看蜜桃 | 亚洲精品午夜无码电影网 | 中文字幕av日韩精品一区二区 | 国产特级毛片aaaaaaa高清 | 国产片av国语在线观看 | 熟妇人妻激情偷爽文 | 精品欧美一区二区三区久久久 | 久久99热只有频精品8 | 国产精品va在线播放 | 日本一区二区更新不卡 | 乱人伦人妻中文字幕无码 | 天下第一社区视频www日本 | 亚洲人成影院在线无码按摩店 | 国产午夜亚洲精品不卡 | 高清国产亚洲精品自在久久 | 精品无码国产自产拍在线观看蜜 | 亚洲国产欧美国产综合一区 | 纯爱无遮挡h肉动漫在线播放 | 丝袜足控一区二区三区 | 在线播放亚洲第一字幕 | 又黄又爽又色的视频 | 76少妇精品导航 | 亚洲区小说区激情区图片区 | 亚洲欧美国产精品久久 | 真人与拘做受免费视频 | 国内精品一区二区三区不卡 | 国产97人人超碰caoprom | 久久久久久久人妻无码中文字幕爆 | 99麻豆久久久国产精品免费 | 国产免费久久精品国产传媒 | 国产免费久久久久久无码 | 国产人妻久久精品二区三区老狼 | 色妞www精品免费视频 | 国产色精品久久人妻 | 欧美亚洲国产一区二区三区 | 亚洲中文字幕在线观看 | 国产精品国产三级国产专播 | 粉嫩少妇内射浓精videos | 无码成人精品区在线观看 | 奇米综合四色77777久久 东京无码熟妇人妻av在线网址 | 粗大的内捧猛烈进出视频 | 麻豆国产97在线 | 欧洲 | 国产9 9在线 | 中文 | 亚洲欧美中文字幕5发布 | 亚欧洲精品在线视频免费观看 | 未满小14洗澡无码视频网站 | 精品国产一区二区三区四区在线看 | 扒开双腿吃奶呻吟做受视频 | 国产真实夫妇视频 | 欧美xxxxx精品 | 亚洲欧美日韩国产精品一区二区 | 青春草在线视频免费观看 | 国产精品嫩草久久久久 | 天堂无码人妻精品一区二区三区 | 国产激情精品一区二区三区 | 久久久中文久久久无码 | 国产在线一区二区三区四区五区 | 国产亚洲精品久久久久久国模美 | 亚洲国产精品久久久天堂 | 67194成是人免费无码 | 九月婷婷人人澡人人添人人爽 | 亚洲精品一区二区三区在线观看 | 国产美女精品一区二区三区 | 久久久精品成人免费观看 | 亚洲中文字幕在线无码一区二区 | 妺妺窝人体色www婷婷 | av香港经典三级级 在线 | 成熟女人特级毛片www免费 | 麻豆果冻传媒2021精品传媒一区下载 | 天天拍夜夜添久久精品大 | 亚洲色大成网站www国产 | 亚洲码国产精品高潮在线 | 亚洲精品综合五月久久小说 | 无码一区二区三区在线 | 精品无码成人片一区二区98 | 最新国产乱人伦偷精品免费网站 | 国产区女主播在线观看 | 国产精品亚洲一区二区三区喷水 | 亚洲精品无码国产 | 日日碰狠狠躁久久躁蜜桃 | 国产成人午夜福利在线播放 | 无码人妻出轨黑人中文字幕 | 色老头在线一区二区三区 | av香港经典三级级 在线 | 老熟妇乱子伦牲交视频 | 亚洲成a人一区二区三区 | 国色天香社区在线视频 | 樱花草在线播放免费中文 | 亚洲欧洲日本综合aⅴ在线 | 国产亚洲人成a在线v网站 | www国产亚洲精品久久网站 | 亚洲欧洲日本综合aⅴ在线 | 99久久久无码国产aaa精品 | 欧美三级不卡在线观看 | 天天爽夜夜爽夜夜爽 | 性生交大片免费看女人按摩摩 | 久久久婷婷五月亚洲97号色 | 丁香啪啪综合成人亚洲 | 99久久久无码国产aaa精品 | 亚洲男女内射在线播放 | 日日摸夜夜摸狠狠摸婷婷 | 国产精品沙发午睡系列 | 少妇一晚三次一区二区三区 | 久久久精品国产sm最大网站 | 乱人伦中文视频在线观看 | 激情内射亚州一区二区三区爱妻 | 日韩视频 中文字幕 视频一区 | 宝宝好涨水快流出来免费视频 | 伊人久久大香线焦av综合影院 | 亚洲无人区午夜福利码高清完整版 | 久久人人97超碰a片精品 | 国产明星裸体无码xxxx视频 | 天堂亚洲2017在线观看 | 98国产精品综合一区二区三区 | 夫妻免费无码v看片 | 我要看www免费看插插视频 | 黑人玩弄人妻中文在线 | 99久久精品国产一区二区蜜芽 | 玩弄少妇高潮ⅹxxxyw | 久久精品人妻少妇一区二区三区 | √天堂资源地址中文在线 | 国产亚洲精品久久久久久 | 日韩av无码一区二区三区不卡 | 牲欲强的熟妇农村老妇女视频 | 午夜丰满少妇性开放视频 | 中文字幕中文有码在线 | 国产无遮挡又黄又爽免费视频 | 久久精品女人的天堂av | 98国产精品综合一区二区三区 | 极品尤物被啪到呻吟喷水 | 国产成人精品三级麻豆 | 永久免费观看美女裸体的网站 | 亚洲а∨天堂久久精品2021 | 亚洲精品国产精品乱码视色 | 国产激情无码一区二区 | 老子影院午夜伦不卡 | 国产一区二区三区日韩精品 | 国产激情精品一区二区三区 | 人人澡人人妻人人爽人人蜜桃 | 又大又紧又粉嫩18p少妇 | 性欧美大战久久久久久久 | 成人综合网亚洲伊人 | 好屌草这里只有精品 | 人妻无码久久精品人妻 | 亚洲gv猛男gv无码男同 | 2019nv天堂香蕉在线观看 | 精品国产福利一区二区 | 狠狠色丁香久久婷婷综合五月 | 国产成人无码a区在线观看视频app | 国产婷婷色一区二区三区在线 | 成人欧美一区二区三区黑人 | 欧美人与物videos另类 | 又大又黄又粗又爽的免费视频 | 国产精品久久精品三级 | 国产舌乚八伦偷品w中 | 老司机亚洲精品影院 | 亚洲成av人影院在线观看 | 性生交大片免费看l | 天堂а√在线地址中文在线 | 亚洲 高清 成人 动漫 | 亚洲熟妇自偷自拍另类 | 午夜嘿嘿嘿影院 | 草草网站影院白丝内射 | 免费中文字幕日韩欧美 | 中文字幕乱码亚洲无线三区 | 撕开奶罩揉吮奶头视频 | 国产精品无码一区二区桃花视频 | 麻豆国产丝袜白领秘书在线观看 | 麻豆国产丝袜白领秘书在线观看 | 成人综合网亚洲伊人 | 国产超级va在线观看视频 | 思思久久99热只有频精品66 | 日本大香伊一区二区三区 | 沈阳熟女露脸对白视频 | 亚洲第一网站男人都懂 | 国产欧美精品一区二区三区 | 国产乱人偷精品人妻a片 | 大地资源网第二页免费观看 | 网友自拍区视频精品 | 婷婷六月久久综合丁香 | 久久伊人色av天堂九九小黄鸭 | 婷婷丁香五月天综合东京热 | 麻豆md0077饥渴少妇 | 蜜桃视频韩日免费播放 | 日日碰狠狠丁香久燥 | 人人妻人人澡人人爽欧美一区 | 日韩欧美成人免费观看 | 国产精品久久久av久久久 | 午夜性刺激在线视频免费 | 日韩av无码一区二区三区 | 麻豆蜜桃av蜜臀av色欲av | 精品久久综合1区2区3区激情 | 天天摸天天碰天天添 | 伊在人天堂亚洲香蕉精品区 | 好爽又高潮了毛片免费下载 | 日韩av无码一区二区三区不卡 | 欧美野外疯狂做受xxxx高潮 | 欧美第一黄网免费网站 | 无遮挡国产高潮视频免费观看 | 成人一区二区免费视频 | 国产精品亚洲综合色区韩国 | 国产麻豆精品一区二区三区v视界 | 在线观看国产一区二区三区 | 红桃av一区二区三区在线无码av | 67194成是人免费无码 | 天堂а√在线中文在线 | 内射爽无广熟女亚洲 | 成人片黄网站色大片免费观看 | 正在播放东北夫妻内射 | 水蜜桃亚洲一二三四在线 | 日产精品高潮呻吟av久久 | 精品无码一区二区三区的天堂 | 中文字幕 人妻熟女 | 1000部夫妻午夜免费 | 中国女人内谢69xxxx | 久久国产精品偷任你爽任你 | 亚洲国产精华液网站w | 好爽又高潮了毛片免费下载 | 国产午夜福利100集发布 | 无码中文字幕色专区 | 日日橹狠狠爱欧美视频 | 六十路熟妇乱子伦 | 国产猛烈高潮尖叫视频免费 | 国产精品久久久久久亚洲影视内衣 | 日本熟妇人妻xxxxx人hd | 日韩亚洲欧美精品综合 | 大乳丰满人妻中文字幕日本 | 欧美人与善在线com | 国内综合精品午夜久久资源 | 久久99精品久久久久久 | 青青久在线视频免费观看 | 俺去俺来也在线www色官网 | 亚洲成av人影院在线观看 | 无码人妻精品一区二区三区不卡 | 精品国产乱码久久久久乱码 | 中文精品久久久久人妻不卡 | 精品久久久无码中文字幕 | 国产精品无码一区二区三区不卡 | 97夜夜澡人人爽人人喊中国片 | 成人女人看片免费视频放人 | 少妇人妻偷人精品无码视频 | 亚洲熟悉妇女xxx妇女av | 97精品国产97久久久久久免费 | 免费人成在线视频无码 | 久久久久av无码免费网 | 亚洲精品成a人在线观看 | 国产性生交xxxxx无码 | 久久久无码中文字幕久... | 无码人妻精品一区二区三区下载 | 黑人粗大猛烈进出高潮视频 | 日韩欧美群交p片內射中文 | 中文字幕 亚洲精品 第1页 | 国产小呦泬泬99精品 | 白嫩日本少妇做爰 | 免费无码一区二区三区蜜桃大 | 国产亚洲视频中文字幕97精品 | 麻花豆传媒剧国产免费mv在线 | 久久久久se色偷偷亚洲精品av | 国产亚洲精品久久久久久大师 | 玩弄人妻少妇500系列视频 | 双乳奶水饱满少妇呻吟 | 国产午夜亚洲精品不卡 | 一本久道久久综合狠狠爱 | 亚洲中文字幕在线观看 | 水蜜桃av无码 | 日本饥渴人妻欲求不满 | 成人精品视频一区二区三区尤物 | 国产情侣作爱视频免费观看 | 久久国产精品_国产精品 | 无码国产色欲xxxxx视频 | 99视频精品全部免费免费观看 | 动漫av网站免费观看 | 2020久久超碰国产精品最新 | 国产 精品 自在自线 | 人人超人人超碰超国产 | 无码人妻少妇伦在线电影 | 精品 日韩 国产 欧美 视频 | 国产精品99久久精品爆乳 | 最近中文2019字幕第二页 | 少妇性l交大片欧洲热妇乱xxx | 国产在线精品一区二区三区直播 | 亚洲色在线无码国产精品不卡 | av香港经典三级级 在线 | 亚洲日本va中文字幕 | 欧美野外疯狂做受xxxx高潮 | 日本一卡2卡3卡四卡精品网站 | 青青青手机频在线观看 | 国产欧美亚洲精品a | 久久综合给久久狠狠97色 | 亚洲精品午夜国产va久久成人 | 日韩少妇白浆无码系列 | 日韩成人一区二区三区在线观看 | 午夜理论片yy44880影院 | 小鲜肉自慰网站xnxx | 风流少妇按摩来高潮 | 牲欲强的熟妇农村老妇女视频 | 中文字幕无码人妻少妇免费 | 无码一区二区三区在线观看 | 亚拍精品一区二区三区探花 | 亚洲日韩av一区二区三区四区 | 国产亚洲日韩欧美另类第八页 | 天天摸天天碰天天添 | 久久久久久国产精品无码下载 | 蜜桃av蜜臀av色欲av麻 999久久久国产精品消防器材 | 亚洲s码欧洲m码国产av | 偷窥村妇洗澡毛毛多 | 人人妻人人澡人人爽人人精品浪潮 | 人人妻人人澡人人爽人人精品浪潮 | 又粗又大又硬毛片免费看 | 水蜜桃色314在线观看 | 亚洲日韩一区二区三区 | 成人免费视频一区二区 | 欧美日韩综合一区二区三区 | 国产香蕉97碰碰久久人人 | 中文字幕+乱码+中文字幕一区 | 成人影院yy111111在线观看 | 亚洲国产综合无码一区 | 亚洲一区二区三区在线观看网站 | 亚洲国产午夜精品理论片 | 无码国内精品人妻少妇 | 国产精品亚洲专区无码不卡 | 无码人妻出轨黑人中文字幕 | 日韩少妇白浆无码系列 | 国产美女精品一区二区三区 | 国精产品一区二区三区 | 国产亚洲人成a在线v网站 | 成 人 免费观看网站 | 色婷婷香蕉在线一区二区 | 亚洲综合在线一区二区三区 | 无码人妻丰满熟妇区五十路百度 | 帮老师解开蕾丝奶罩吸乳网站 | а天堂中文在线官网 | 熟妇激情内射com | 欧美激情内射喷水高潮 | 狠狠亚洲超碰狼人久久 | 精品国产aⅴ无码一区二区 | 久久精品国产99精品亚洲 | 亚洲毛片av日韩av无码 | 成人性做爰aaa片免费看不忠 | 对白脏话肉麻粗话av | 又大又硬又爽免费视频 | 久久精品视频在线看15 | 亚洲日韩精品欧美一区二区 | 人妻少妇精品无码专区二区 | 日韩 欧美 动漫 国产 制服 | 欧美成人高清在线播放 | 鲁一鲁av2019在线 | 国产精品亚洲一区二区三区喷水 | 中文字幕无码人妻少妇免费 | 88国产精品欧美一区二区三区 | 强辱丰满人妻hd中文字幕 | 午夜福利试看120秒体验区 | 高清不卡一区二区三区 | 久久久久免费精品国产 | 精品一区二区不卡无码av | 亚洲欧洲中文日韩av乱码 | 成熟妇人a片免费看网站 | 精品午夜福利在线观看 | 漂亮人妻洗澡被公强 日日躁 | 狠狠色噜噜狠狠狠7777奇米 | 精品国产福利一区二区 | 日本www一道久久久免费榴莲 | 丝袜人妻一区二区三区 | 丰满人妻精品国产99aⅴ | 亚洲人成无码网www | 少妇激情av一区二区 | 99久久精品国产一区二区蜜芽 | 1000部夫妻午夜免费 | 人人澡人人妻人人爽人人蜜桃 | 国产精品va在线观看无码 | 狠狠亚洲超碰狼人久久 | 男人的天堂2018无码 | 欧美zoozzooz性欧美 | 一二三四在线观看免费视频 | 国产精品亚洲а∨无码播放麻豆 | 内射白嫩少妇超碰 | 午夜精品一区二区三区在线观看 | 色婷婷香蕉在线一区二区 | 国内综合精品午夜久久资源 | 亚洲区欧美区综合区自拍区 | av无码电影一区二区三区 | 久久伊人色av天堂九九小黄鸭 | 少妇性荡欲午夜性开放视频剧场 | 午夜精品久久久久久久久 | 国产黑色丝袜在线播放 | 欧美日韩一区二区免费视频 | 领导边摸边吃奶边做爽在线观看 | 欧美日韩精品 | 欧美国产日韩久久mv | 超碰97人人射妻 | 亚洲一区二区三区国产精华液 | 欧美丰满少妇xxxx性 | 亚洲精品综合一区二区三区在线 | 澳门永久av免费网站 | 最近中文2019字幕第二页 | 亚洲 日韩 欧美 成人 在线观看 | 77777熟女视频在线观看 а天堂中文在线官网 | 国产精品资源一区二区 | 国产精品香蕉在线观看 | 在线精品国产一区二区三区 | 欧美精品免费观看二区 | 图片区 小说区 区 亚洲五月 | 伊人久久婷婷五月综合97色 | 亚洲七七久久桃花影院 | 久久精品人人做人人综合 | 性生交大片免费看女人按摩摩 | 精品国产一区二区三区四区在线看 | 亚洲色欲久久久综合网东京热 | 亚洲国产综合无码一区 | 帮老师解开蕾丝奶罩吸乳网站 | 国产绳艺sm调教室论坛 | 精品成人av一区二区三区 | 99riav国产精品视频 | 全黄性性激高免费视频 | 最近的中文字幕在线看视频 | 在线看片无码永久免费视频 | 国产乱人伦av在线无码 | 国产精品久久久久久久9999 | www成人国产高清内射 | 欧美丰满熟妇xxxx | 国模大胆一区二区三区 | 欧美性黑人极品hd | 欧美第一黄网免费网站 | 自拍偷自拍亚洲精品被多人伦好爽 | 天干天干啦夜天干天2017 | 亚洲一区二区观看播放 | 无码av免费一区二区三区试看 | 在线播放无码字幕亚洲 | 日本肉体xxxx裸交 | 性色欲情网站iwww九文堂 | 中文字幕日韩精品一区二区三区 | 亚洲国产精华液网站w | 伊人色综合久久天天小片 | 一本加勒比波多野结衣 | 精品久久久中文字幕人妻 | 色欲久久久天天天综合网精品 | av无码久久久久不卡免费网站 | 最新国产乱人伦偷精品免费网站 | 玩弄少妇高潮ⅹxxxyw | 久久99精品国产麻豆蜜芽 | 最新国产麻豆aⅴ精品无码 | 亚洲精品一区三区三区在线观看 | 蜜桃av蜜臀av色欲av麻 999久久久国产精品消防器材 | 日本成熟视频免费视频 | 人妻少妇精品视频专区 | 国产免费无码一区二区视频 | 亚洲一区二区三区香蕉 | 青青久在线视频免费观看 | 人妻少妇精品久久 | 欧美人与禽猛交狂配 | 国产特级毛片aaaaaa高潮流水 | 特级做a爰片毛片免费69 | 精品一区二区三区波多野结衣 | 澳门永久av免费网站 | 精品无码国产自产拍在线观看蜜 | 国产精品人人爽人人做我的可爱 | 亚洲成av人片在线观看无码不卡 | 四十如虎的丰满熟妇啪啪 | 午夜精品一区二区三区的区别 | 美女黄网站人色视频免费国产 | 欧美成人家庭影院 | 亚洲精品成a人在线观看 | 少女韩国电视剧在线观看完整 | 国内揄拍国内精品少妇国语 | 国产亚洲精品精品国产亚洲综合 | 欧美 日韩 人妻 高清 中文 | 中文字幕 亚洲精品 第1页 | 丰满少妇熟乱xxxxx视频 | 欧美兽交xxxx×视频 | 亚洲自偷自拍另类第1页 | 日韩视频 中文字幕 视频一区 | 300部国产真实乱 | 5858s亚洲色大成网站www | 色噜噜亚洲男人的天堂 | 国产熟妇另类久久久久 | 亚洲中文无码av永久不收费 | 久久99精品久久久久久动态图 | 四虎影视成人永久免费观看视频 | 国产99久久精品一区二区 | 精品亚洲成av人在线观看 | 97夜夜澡人人双人人人喊 | 欧美第一黄网免费网站 | 亚洲男人av天堂午夜在 | 亚洲综合精品香蕉久久网 | 色爱情人网站 | 欧美 丝袜 自拍 制服 另类 | 中文字幕av日韩精品一区二区 | 内射老妇bbwx0c0ck | 午夜福利不卡在线视频 | 俄罗斯老熟妇色xxxx | 欧美日韩色另类综合 | 久久99热只有频精品8 | 乱码午夜-极国产极内射 | 中文字幕亚洲情99在线 | 亚洲综合无码久久精品综合 | 性做久久久久久久免费看 | 麻花豆传媒剧国产免费mv在线 | 性做久久久久久久免费看 | 久精品国产欧美亚洲色aⅴ大片 | 波多野结衣乳巨码无在线观看 | 亚洲国产精品无码一区二区三区 | 激情人妻另类人妻伦 | 免费看少妇作爱视频 | 亚洲国产av精品一区二区蜜芽 | 国产熟女一区二区三区四区五区 | 亚洲精品久久久久久久久久久 | 中文字幕无码免费久久99 | 久久久久亚洲精品男人的天堂 | 强开小婷嫩苞又嫩又紧视频 | 色欲综合久久中文字幕网 | 熟妇人妻无乱码中文字幕 | 久久久精品国产sm最大网站 | 漂亮人妻洗澡被公强 日日躁 | 精品无码一区二区三区爱欲 | 国产又爽又猛又粗的视频a片 | 中文字幕无码乱人伦 | 国产一区二区三区四区五区加勒比 | 国产成人精品无码播放 | 色综合久久久无码中文字幕 | 亚洲性无码av中文字幕 | 午夜精品久久久内射近拍高清 | 久激情内射婷内射蜜桃人妖 | 无码国内精品人妻少妇 | 天天做天天爱天天爽综合网 | 亚洲人成网站色7799 | 国产无av码在线观看 | 麻豆果冻传媒2021精品传媒一区下载 | 国产午夜精品一区二区三区嫩草 | 国产成人无码av片在线观看不卡 | 蜜臀aⅴ国产精品久久久国产老师 | 图片区 小说区 区 亚洲五月 | 国产国语老龄妇女a片 | 亚洲日韩av一区二区三区四区 | 日欧一片内射va在线影院 | 欧美兽交xxxx×视频 | 国产成人无码av在线影院 | 欧美怡红院免费全部视频 | 国产又爽又猛又粗的视频a片 | 色欲综合久久中文字幕网 | 亚洲中文字幕在线观看 | 国产av一区二区三区最新精品 | 大地资源中文第3页 | 国产sm调教视频在线观看 | 人妻有码中文字幕在线 | 色综合久久88色综合天天 | 一本加勒比波多野结衣 | 永久免费观看美女裸体的网站 | 欧洲vodafone精品性 | 一本加勒比波多野结衣 | 色婷婷综合中文久久一本 | 四虎国产精品免费久久 | 亚洲国产欧美在线成人 | 又紧又大又爽精品一区二区 | 99国产欧美久久久精品 | 色欲综合久久中文字幕网 | 国产精品久久精品三级 | 欧美人与物videos另类 | 3d动漫精品啪啪一区二区中 | 在线观看国产一区二区三区 | 国产成人精品视频ⅴa片软件竹菊 | 久久久久成人精品免费播放动漫 | 日韩av激情在线观看 | 丰满人妻一区二区三区免费视频 | 乱人伦人妻中文字幕无码 | 俺去俺来也www色官网 | 欧美人与禽猛交狂配 | 男人的天堂av网站 | 日本精品少妇一区二区三区 | 亚洲熟妇色xxxxx亚洲 | 性欧美熟妇videofreesex | 无码人妻丰满熟妇区毛片18 | 日韩精品久久久肉伦网站 | 国产精品怡红院永久免费 | 国产成人无码av在线影院 | 国产精品手机免费 | 国产欧美熟妇另类久久久 | 亚洲精品国产精品乱码不卡 | 婷婷五月综合缴情在线视频 | 精品水蜜桃久久久久久久 | 日本爽爽爽爽爽爽在线观看免 | 好男人社区资源 | 亚洲日韩av一区二区三区中文 | 最近免费中文字幕中文高清百度 | 久久综合给久久狠狠97色 | 婷婷综合久久中文字幕蜜桃三电影 | 色婷婷av一区二区三区之红樱桃 | 亚洲乱亚洲乱妇50p | 亚洲一区二区三区偷拍女厕 | 无码av岛国片在线播放 | 一本大道伊人av久久综合 | 国产精品久久久午夜夜伦鲁鲁 | 国产精品高潮呻吟av久久4虎 | 大屁股大乳丰满人妻 | 国产亚洲精品久久久闺蜜 | 国产成人精品三级麻豆 | 国内精品久久久久久中文字幕 | 黑人粗大猛烈进出高潮视频 | 国产午夜亚洲精品不卡下载 | 狠狠色噜噜狠狠狠狠7777米奇 | 久久视频在线观看精品 | 少女韩国电视剧在线观看完整 | 网友自拍区视频精品 | 国产精品亚洲专区无码不卡 | aⅴ亚洲 日韩 色 图网站 播放 | 国产亚洲人成在线播放 | 国产精品久久久久久无码 | 欧美猛少妇色xxxxx | 一个人看的视频www在线 | 欧洲熟妇色 欧美 | 嫩b人妻精品一区二区三区 | 麻豆果冻传媒2021精品传媒一区下载 | 亚洲国产精品无码一区二区三区 | 亚洲国产精品美女久久久久 | 波多野结衣av在线观看 | 国产成人精品三级麻豆 | 国产av无码专区亚洲awww | 久久午夜无码鲁丝片秋霞 | 欧美日本精品一区二区三区 | 对白脏话肉麻粗话av | 荫蒂被男人添的好舒服爽免费视频 | 中文无码精品a∨在线观看不卡 | 两性色午夜视频免费播放 | 国内精品人妻无码久久久影院 | 性啪啪chinese东北女人 | 全黄性性激高免费视频 | 人妻少妇精品视频专区 | 欧美高清在线精品一区 | 内射巨臀欧美在线视频 | 无码人妻精品一区二区三区下载 | 欧美人与动性行为视频 | 中国大陆精品视频xxxx | 一本久道久久综合婷婷五月 | 图片小说视频一区二区 | 精品一区二区不卡无码av | 亚洲精品国偷拍自产在线观看蜜桃 | 小sao货水好多真紧h无码视频 | 亚洲日韩av一区二区三区四区 | 无码一区二区三区在线 | 大胆欧美熟妇xx | 欧美国产日韩久久mv | 欧洲精品码一区二区三区免费看 | 国产精品福利视频导航 | 日韩av激情在线观看 | 欧美熟妇另类久久久久久多毛 | 久久天天躁狠狠躁夜夜免费观看 | 亚洲精品无码人妻无码 | 国产精品二区一区二区aⅴ污介绍 | 一个人看的视频www在线 | 亚洲狠狠婷婷综合久久 | 久久精品人人做人人综合试看 | 国产成人人人97超碰超爽8 | 日韩 欧美 动漫 国产 制服 | 国产午夜亚洲精品不卡下载 | 亚洲色欲色欲天天天www | 成人性做爰aaa片免费看不忠 | av无码久久久久不卡免费网站 | 成人aaa片一区国产精品 | 国产av无码专区亚洲a∨毛片 | 亚洲毛片av日韩av无码 | 5858s亚洲色大成网站www | 国产精品久久久久久亚洲影视内衣 | 疯狂三人交性欧美 | 国产高清不卡无码视频 | 日本精品久久久久中文字幕 | 久久精品中文字幕大胸 | 久久精品人妻少妇一区二区三区 | 精品久久久无码人妻字幂 | 麻豆精品国产精华精华液好用吗 | 香蕉久久久久久av成人 | 亚洲精品久久久久久久久久久 | 中文字幕无线码免费人妻 | 国产偷抇久久精品a片69 | 超碰97人人做人人爱少妇 | 日本护士xxxxhd少妇 | 国产一区二区不卡老阿姨 | 色婷婷欧美在线播放内射 | 一本精品99久久精品77 | 日日干夜夜干 | 亚洲中文字幕在线观看 | 精品人妻中文字幕有码在线 | 欧美熟妇另类久久久久久多毛 | 亚洲熟妇自偷自拍另类 | 亚洲综合无码久久精品综合 | 人妻中文无码久热丝袜 | 丝袜 中出 制服 人妻 美腿 | 国产舌乚八伦偷品w中 | 九月婷婷人人澡人人添人人爽 | 亚洲人成网站免费播放 | 国产乱人无码伦av在线a | 亚洲欧美精品aaaaaa片 | 欧美日韩亚洲国产精品 | 99久久人妻精品免费一区 | 人人妻在人人 | 日本熟妇人妻xxxxx人hd | 99久久精品日本一区二区免费 | 熟妇人妻无乱码中文字幕 | 乱码av麻豆丝袜熟女系列 | yw尤物av无码国产在线观看 | 国精产品一品二品国精品69xx | 国产无套内射久久久国产 | 特黄特色大片免费播放器图片 | 亚洲中文字幕va福利 | 国产激情无码一区二区 | 亚洲国产精品美女久久久久 | 亚洲呦女专区 | 国产一区二区三区精品视频 | 最近中文2019字幕第二页 | 乱码午夜-极国产极内射 | 亚洲成a人片在线观看无码3d | 131美女爱做视频 | 一本久道久久综合狠狠爱 | 中文字幕av无码一区二区三区电影 | 一本一道久久综合久久 | 亚洲精品国偷拍自产在线观看蜜桃 | 狠狠色色综合网站 | 日韩在线不卡免费视频一区 | 无码av中文字幕免费放 | 免费人成在线视频无码 | 亚洲爆乳精品无码一区二区三区 | 亚洲 激情 小说 另类 欧美 | 老太婆性杂交欧美肥老太 | 久久99精品久久久久婷婷 | 99久久精品国产一区二区蜜芽 | 成人影院yy111111在线观看 | 欧美日韩亚洲国产精品 | 麻豆av传媒蜜桃天美传媒 | 国产亚洲精品久久久久久久久动漫 | 国产亚洲精品久久久久久久久动漫 | 中国女人内谢69xxxx | 大肉大捧一进一出视频出来呀 | 亚洲综合伊人久久大杳蕉 | 99re在线播放 | 又大又硬又爽免费视频 | 日韩精品a片一区二区三区妖精 | 国产精品亚洲а∨无码播放麻豆 | 少妇性l交大片欧洲热妇乱xxx | 久久久久久亚洲精品a片成人 | 色欲人妻aaaaaaa无码 | 少妇性l交大片欧洲热妇乱xxx | 国产片av国语在线观看 | 好男人www社区 | 天天拍夜夜添久久精品大 | 精品人妻人人做人人爽 | 人妻与老人中文字幕 | 精品成在人线av无码免费看 | 国产色视频一区二区三区 | 精品无码av一区二区三区 | √天堂中文官网8在线 | 性开放的女人aaa片 | 亚洲一区二区观看播放 | 国产精品永久免费视频 | 蜜桃av蜜臀av色欲av麻 999久久久国产精品消防器材 | 青草青草久热国产精品 | 窝窝午夜理论片影院 | 国产在线精品一区二区三区直播 | 十八禁真人啪啪免费网站 | 国产精品久久久久无码av色戒 | 国产av人人夜夜澡人人爽麻豆 | 人妻互换免费中文字幕 | 国内少妇偷人精品视频免费 | 天堂无码人妻精品一区二区三区 | 国产色在线 | 国产 | 红桃av一区二区三区在线无码av | 秋霞成人午夜鲁丝一区二区三区 | 国产手机在线αⅴ片无码观看 | 久久天天躁狠狠躁夜夜免费观看 | 久久精品女人的天堂av | 日日天日日夜日日摸 | 国产香蕉尹人视频在线 | 国产乱人偷精品人妻a片 | 亚洲熟妇自偷自拍另类 | 国产猛烈高潮尖叫视频免费 | 亚洲日韩av一区二区三区四区 | 欧美日韩精品 | 久久午夜无码鲁丝片午夜精品 | 一本加勒比波多野结衣 | 熟女少妇人妻中文字幕 | 无码人妻丰满熟妇区五十路百度 | 中文字幕乱码人妻无码久久 | 男女下面进入的视频免费午夜 | 国产av无码专区亚洲awww | 亚洲中文字幕在线观看 | 国产av无码专区亚洲a∨毛片 | 中文字幕中文有码在线 | 丰满肥臀大屁股熟妇激情视频 | 亚洲综合精品香蕉久久网 | 日韩精品一区二区av在线 | 国产午夜福利亚洲第一 | 成 人 网 站国产免费观看 | 国产无遮挡吃胸膜奶免费看 | 欧洲精品码一区二区三区免费看 | 欧美肥老太牲交大战 | 女人和拘做爰正片视频 | 真人与拘做受免费视频 | 国产亚洲精品久久久久久大师 | 999久久久国产精品消防器材 | 成人免费无码大片a毛片 | 中文字幕无码av波多野吉衣 | 一本久久伊人热热精品中文字幕 | 3d动漫精品啪啪一区二区中 | 国产精品久久久久久亚洲影视内衣 | 99久久久国产精品无码免费 | 日产国产精品亚洲系列 | 我要看www免费看插插视频 | 无码一区二区三区在线观看 | 一本加勒比波多野结衣 | 99久久久国产精品无码免费 | 又粗又大又硬又长又爽 | 少妇一晚三次一区二区三区 | 老头边吃奶边弄进去呻吟 | 蜜桃视频插满18在线观看 | 国内精品久久毛片一区二区 | 国产香蕉尹人视频在线 | 日韩欧美中文字幕在线三区 | 综合激情五月综合激情五月激情1 | 兔费看少妇性l交大片免费 | 国产口爆吞精在线视频 | 亚洲国产精华液网站w | 久久国产精品精品国产色婷婷 | 欧美高清在线精品一区 | 日产精品99久久久久久 | 麻豆精品国产精华精华液好用吗 | 亚洲欧美日韩成人高清在线一区 | 日产精品99久久久久久 | 色一情一乱一伦一区二区三欧美 | 日日夜夜撸啊撸 | 国产农村妇女aaaaa视频 撕开奶罩揉吮奶头视频 | 免费观看激色视频网站 | 亚洲成av人在线观看网址 | 日本熟妇人妻xxxxx人hd | 无码毛片视频一区二区本码 | 午夜无码人妻av大片色欲 | 亚洲天堂2017无码 | 久久久久久国产精品无码下载 | 国产区女主播在线观看 | 国产乱人伦av在线无码 | 在线观看国产午夜福利片 | 四虎永久在线精品免费网址 | 国产午夜手机精彩视频 | 日本又色又爽又黄的a片18禁 | 国产精品沙发午睡系列 | 午夜免费福利小电影 | 精品无码av一区二区三区 | 国产欧美精品一区二区三区 | 内射爽无广熟女亚洲 | 欧美日韩在线亚洲综合国产人 | 一二三四社区在线中文视频 | 撕开奶罩揉吮奶头视频 | 人妻天天爽夜夜爽一区二区 | 少妇性l交大片欧洲热妇乱xxx | 亚洲男人av香蕉爽爽爽爽 | 无码乱肉视频免费大全合集 | 亚洲高清偷拍一区二区三区 | 女人被男人躁得好爽免费视频 | 97色伦图片97综合影院 | 77777熟女视频在线观看 а天堂中文在线官网 | 亚洲理论电影在线观看 | 久久99精品久久久久久 | av小次郎收藏 | 两性色午夜免费视频 | 大屁股大乳丰满人妻 | 中文字幕中文有码在线 | 亚洲自偷自偷在线制服 | 欧美自拍另类欧美综合图片区 | 欧美乱妇无乱码大黄a片 | 国产精品久久国产精品99 | 男人扒开女人内裤强吻桶进去 | 亚洲色偷偷偷综合网 | 色五月丁香五月综合五月 | 三级4级全黄60分钟 | 国内揄拍国内精品人妻 | 骚片av蜜桃精品一区 | 宝宝好涨水快流出来免费视频 | 久久精品国产大片免费观看 | 少妇的肉体aa片免费 | 天堂无码人妻精品一区二区三区 | 国产精品毛片一区二区 | 中文字幕无码日韩欧毛 | 领导边摸边吃奶边做爽在线观看 | 色婷婷欧美在线播放内射 | 东京热无码av男人的天堂 | 狠狠cao日日穞夜夜穞av | 国产高清不卡无码视频 | 亚洲综合无码一区二区三区 | 日日夜夜撸啊撸 | 色五月丁香五月综合五月 | 狂野欧美性猛交免费视频 | 东京无码熟妇人妻av在线网址 | 日欧一片内射va在线影院 | 丰满少妇弄高潮了www | 亚拍精品一区二区三区探花 | 漂亮人妻洗澡被公强 日日躁 | 日本欧美一区二区三区乱码 | 激情内射亚州一区二区三区爱妻 | 亚洲精品成人av在线 | 日本乱偷人妻中文字幕 | 九一九色国产 | 亚洲高清偷拍一区二区三区 | 亚洲の无码国产の无码步美 | 亚洲成a人片在线观看无码 | 午夜精品久久久久久久 | 亚洲精品综合五月久久小说 | 一本久道高清无码视频 | 男女下面进入的视频免费午夜 | 久久五月精品中文字幕 | 国产成人无码区免费内射一片色欲 | 国产偷国产偷精品高清尤物 | 人妻中文无码久热丝袜 | 国产综合色产在线精品 | 小sao货水好多真紧h无码视频 | 老司机亚洲精品影院无码 | 国产色视频一区二区三区 | 97色伦图片97综合影院 | 熟妇女人妻丰满少妇中文字幕 | 精品人妻人人做人人爽 | 在线播放免费人成毛片乱码 | 性色av无码免费一区二区三区 | 亚洲熟妇色xxxxx亚洲 | 精品人妻人人做人人爽夜夜爽 | 亚洲精品国偷拍自产在线麻豆 | 67194成是人免费无码 | 精品一区二区三区波多野结衣 | 丰满少妇熟乱xxxxx视频 | 国产美女精品一区二区三区 | 图片小说视频一区二区 | 亚洲综合无码一区二区三区 | 97无码免费人妻超级碰碰夜夜 | 精品欧美一区二区三区久久久 | 国产成人精品无码播放 | 久久国产精品二国产精品 | 麻豆精品国产精华精华液好用吗 | 久久99精品国产麻豆蜜芽 | 亚洲精品鲁一鲁一区二区三区 | 永久免费观看美女裸体的网站 | 午夜时刻免费入口 | 国产无遮挡吃胸膜奶免费看 | 天天做天天爱天天爽综合网 | 国产亚洲精品精品国产亚洲综合 | 色欲久久久天天天综合网精品 | 小泽玛莉亚一区二区视频在线 | 国内揄拍国内精品少妇国语 | 日日碰狠狠丁香久燥 | 一本久久伊人热热精品中文字幕 | 99精品视频在线观看免费 | 亚洲国产日韩a在线播放 | 亚洲国产av美女网站 | 亚洲午夜无码久久 | 欧美亚洲日韩国产人成在线播放 | 婷婷丁香五月天综合东京热 | 国产成人综合在线女婷五月99播放 | 性欧美熟妇videofreesex | 99视频精品全部免费免费观看 | 日韩av无码中文无码电影 | 亚洲熟妇色xxxxx欧美老妇 | 欧美日韩人成综合在线播放 | 国产高潮视频在线观看 | 国内揄拍国内精品人妻 | 国产免费久久久久久无码 | 亚洲中文字幕无码一久久区 | 麻豆人妻少妇精品无码专区 | 亚洲一区av无码专区在线观看 | 久久99精品久久久久久动态图 | 伊在人天堂亚洲香蕉精品区 | 国产热a欧美热a在线视频 | 夫妻免费无码v看片 | 日韩无码专区 | 日韩无套无码精品 | 免费观看激色视频网站 | 蜜桃av蜜臀av色欲av麻 999久久久国产精品消防器材 | 美女黄网站人色视频免费国产 | 国产美女精品一区二区三区 | 国产精品无码成人午夜电影 | 亚洲小说图区综合在线 | 国产区女主播在线观看 | 国产亚洲美女精品久久久2020 | 日本va欧美va欧美va精品 | 131美女爱做视频 | 亚洲综合在线一区二区三区 | 高中生自慰www网站 | 人人妻人人澡人人爽精品欧美 | 亚洲一区二区观看播放 | 精品aⅴ一区二区三区 | 国产一区二区三区四区五区加勒比 | 丝袜人妻一区二区三区 | 天干天干啦夜天干天2017 | www国产亚洲精品久久久日本 | 特级做a爰片毛片免费69 | 久久这里只有精品视频9 | 亚洲人成影院在线无码按摩店 | 国内少妇偷人精品视频 | 丰满少妇女裸体bbw | 国产人妻大战黑人第1集 | 国产人妻久久精品二区三区老狼 | 久久天天躁狠狠躁夜夜免费观看 | 国产精品亚洲一区二区三区喷水 | 一二三四在线观看免费视频 | 国产成人亚洲综合无码 | 天天爽夜夜爽夜夜爽 | 国内精品人妻无码久久久影院蜜桃 | 在线精品国产一区二区三区 | 久久99国产综合精品 | 人人爽人人澡人人人妻 | 亚洲阿v天堂在线 | 2020最新国产自产精品 | 国产97人人超碰caoprom | 国产成人综合美国十次 | 亚洲 高清 成人 动漫 | 老太婆性杂交欧美肥老太 | 性生交片免费无码看人 | 亚洲国产精品久久人人爱 | 女人被男人爽到呻吟的视频 | 荫蒂添的好舒服视频囗交 | 国产亚洲精品久久久久久久 | 99久久婷婷国产综合精品青草免费 | 高中生自慰www网站 | 久久午夜无码鲁丝片秋霞 | 精品无码一区二区三区的天堂 | 国产欧美精品一区二区三区 | 日本熟妇乱子伦xxxx | 欧美大屁股xxxxhd黑色 | 成人影院yy111111在线观看 | www一区二区www免费 | 中文字幕av无码一区二区三区电影 | 中文字幕无码人妻少妇免费 | 国产精品无码成人午夜电影 | 野狼第一精品社区 | 东京无码熟妇人妻av在线网址 | 国产精品久久久久久久9999 | 少妇性l交大片欧洲热妇乱xxx | 免费看男女做好爽好硬视频 | 色综合久久久无码中文字幕 | 女人被男人躁得好爽免费视频 | 天堂无码人妻精品一区二区三区 | 97久久国产亚洲精品超碰热 | 国产69精品久久久久app下载 | 乱中年女人伦av三区 | 精品国产一区av天美传媒 | 亚洲成色www久久网站 | 亚洲欧美精品aaaaaa片 | 1000部夫妻午夜免费 | 欧美阿v高清资源不卡在线播放 | 久久精品国产99精品亚洲 | 午夜福利一区二区三区在线观看 | 欧美日本免费一区二区三区 | 午夜丰满少妇性开放视频 | 久久午夜无码鲁丝片 | 久久无码中文字幕免费影院蜜桃 | 无码成人精品区在线观看 | 欧美日韩人成综合在线播放 | 男人扒开女人内裤强吻桶进去 | 无码av免费一区二区三区试看 | 精品熟女少妇av免费观看 | 国产在线无码精品电影网 | 51国偷自产一区二区三区 | 狠狠躁日日躁夜夜躁2020 | 国产成人精品久久亚洲高清不卡 | 亚洲色欲色欲欲www在线 | 亚洲熟妇色xxxxx欧美老妇y | 精品久久久久久人妻无码中文字幕 | 国产激情综合五月久久 | 性欧美牲交在线视频 | 亚洲成色www久久网站 | 国产肉丝袜在线观看 | 国产无遮挡吃胸膜奶免费看 | 欧美放荡的少妇 | 国产成人综合色在线观看网站 | 丰满岳乱妇在线观看中字无码 | 最新国产乱人伦偷精品免费网站 | 亚洲精品中文字幕久久久久 | 久久精品中文字幕一区 | 99久久精品国产一区二区蜜芽 | 国产精品高潮呻吟av久久4虎 | 久久久中文字幕日本无吗 | 天海翼激烈高潮到腰振不止 | 国产精品无套呻吟在线 | 人妻少妇精品无码专区二区 | 麻豆人妻少妇精品无码专区 | 熟妇女人妻丰满少妇中文字幕 | 精品久久久久久亚洲精品 | 亚洲中文字幕无码一久久区 | 国产黑色丝袜在线播放 | 亚洲成av人片在线观看无码不卡 | 国产午夜福利100集发布 | 蜜臀aⅴ国产精品久久久国产老师 | 少妇性l交大片欧洲热妇乱xxx | 久久综合香蕉国产蜜臀av | 国产成人无码av在线影院 | 国产精品久久久久久久影院 | 两性色午夜免费视频 | 乱人伦中文视频在线观看 | 亚洲の无码国产の无码步美 | 无码吃奶揉捏奶头高潮视频 | 人妻人人添人妻人人爱 | 亚洲熟妇色xxxxx欧美老妇y | 国产三级久久久精品麻豆三级 | 亚洲熟妇色xxxxx欧美老妇y | 性做久久久久久久久 | 女人被男人躁得好爽免费视频 | 无人区乱码一区二区三区 | 曰韩无码二三区中文字幕 | 国产精品亚洲五月天高清 | 欧洲精品码一区二区三区免费看 | 亚洲色大成网站www国产 | 亚洲精品久久久久久一区二区 | 极品嫩模高潮叫床 | 牛和人交xxxx欧美 | 成在人线av无码免观看麻豆 | 免费男性肉肉影院 | 国产乱人无码伦av在线a | 国产美女精品一区二区三区 | 无码免费一区二区三区 | 99国产精品白浆在线观看免费 | 在线观看国产午夜福利片 | 精品人妻人人做人人爽 | 天堂一区人妻无码 | 亚洲一区二区三区无码久久 | 国产精品久久国产精品99 | 爽爽影院免费观看 | 欧美激情综合亚洲一二区 | 99久久无码一区人妻 | 亚洲国产精品久久久久久 | 亚洲毛片av日韩av无码 | 老熟女乱子伦 | 国产婷婷色一区二区三区在线 | 亚洲乱码中文字幕在线 | 无码任你躁久久久久久久 | 国产成人无码av一区二区 | 无码av岛国片在线播放 | 精品久久久久香蕉网 | 激情五月综合色婷婷一区二区 | 露脸叫床粗话东北少妇 | 国产精品无码久久av | 免费网站看v片在线18禁无码 | 亚洲天堂2017无码中文 | 无码国模国产在线观看 | 亚洲国产av美女网站 | 成人欧美一区二区三区黑人免费 | 亚洲人成影院在线观看 | 精品人妻人人做人人爽夜夜爽 | 国精产品一品二品国精品69xx | 亚洲人成网站色7799 | 国产真人无遮挡作爱免费视频 | 老熟妇乱子伦牲交视频 | 亚洲中文字幕无码一久久区 | 精品国产乱码久久久久乱码 | 无码人妻av免费一区二区三区 | 精品国产一区二区三区av 性色 | 国产成人无码一二三区视频 | 亚洲精品成a人在线观看 | 久久精品国产99久久6动漫 | 精品欧美一区二区三区久久久 | 国产xxx69麻豆国语对白 | 97资源共享在线视频 | 亚洲精品国产精品乱码不卡 | 日本一区二区三区免费播放 | 97精品国产97久久久久久免费 | 全黄性性激高免费视频 | 欧美35页视频在线观看 | 国产小呦泬泬99精品 | 国内精品久久毛片一区二区 | 久久伊人色av天堂九九小黄鸭 | 撕开奶罩揉吮奶头视频 | 最新版天堂资源中文官网 | 久青草影院在线观看国产 | 国产精品高潮呻吟av久久 | 97人妻精品一区二区三区 | 精品乱码久久久久久久 | 丰满肥臀大屁股熟妇激情视频 | 亚洲综合在线一区二区三区 | 色婷婷久久一区二区三区麻豆 | 老子影院午夜精品无码 | 国产莉萝无码av在线播放 | 国产亚洲精品久久久久久 | 亚洲一区二区三区播放 | 欧美日韩久久久精品a片 | 日本一卡2卡3卡4卡无卡免费网站 国产一区二区三区影院 | 色一情一乱一伦 | 久久99精品久久久久久动态图 | 中国大陆精品视频xxxx | √8天堂资源地址中文在线 | 精品国产一区二区三区四区在线看 | 97无码免费人妻超级碰碰夜夜 |