久久精品国产精品国产精品污,男人扒开添女人下部免费视频,一级国产69式性姿势免费视频,夜鲁夜鲁很鲁在线视频 视频,欧美丰满少妇一区二区三区,国产偷国产偷亚洲高清人乐享,中文 在线 日韩 亚洲 欧美,熟妇人妻无乱码中文字幕真矢织江,一区二区三区人妻制服国产

歡迎訪問 生活随笔!

生活随笔

當(dāng)前位置: 首頁 > 运维知识 > Android >内容正文

Android

clsq客户端android,Android NDK开发之 arm_neon.h文件ABI说明

發(fā)布時間:2025/3/15 Android 32 豆豆
生活随笔 收集整理的這篇文章主要介紹了 clsq客户端android,Android NDK开发之 arm_neon.h文件ABI说明 小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.

這里是官網(wǎng)的指令說明:

http://infocenter.arm.com/help/basic/help.jsp?topic=/com.arm.doc.dui0204ic/

下面是對應(yīng)arm_neon頭文件中,對應(yīng)的ABI說明。

//后期不斷維護(hù)更新

//2016_1_5 14:30

#ifndef __ARM_NEON__

#error You must enable NEON instructions (e.g. -mfloat-abi=softfp -mfpu=neon) to use arm_neon.h

#endif

/*(1)、正常指令:生成大小相同且類型通常與操作數(shù)向量相同的結(jié)果向量;

(2)、長指令:對雙字向量操作數(shù)執(zhí)行運(yùn)算,生成四字向量的結(jié)果。所生成的元素一般是操作數(shù)元素寬度的兩倍,

并屬于同一類型;

(3)、寬指令:一個雙字向量操作數(shù)和一個四字向量操作數(shù)執(zhí)行運(yùn)算,生成四字向量結(jié)果。所生成的元素和第一個

操作數(shù)的元素是第二個操作數(shù)元素寬度的兩倍;

(4)、窄指令:四字向量操作數(shù)執(zhí)行運(yùn)算,并生成雙字向量結(jié)果,所生成的元素一般是操作數(shù)元素寬度的一半;

(5)、飽和指令:當(dāng)超過數(shù)據(jù)類型指定的范圍則自動限制在該范圍內(nèi)。*/

/******************************************************Addition*************************/

/*--1、Vector add(正常指令): vadd -> ri = ai + bi; r, a, b have equal lane sizes--*/

int8x8_t vadd_s8 (int8x8_t __a, int8x8_t __b);//_mm_add_epi8

int16x4_t vadd_s16 (int16x4_t __a, int16x4_t __b);//_mm_add_epi16

int32x2_t vadd_s32 (int32x2_t __a, int32x2_t __b);//_mm_add_epi32

int64x1_t vadd_s64 (int64x1_t __a, int64x1_t __b);//_mm_add_epi64

//_mm_add_ps, SSE, use only low 64 bits

float32x2_t vadd_f32 (float32x2_t __a, float32x2_t __b);

uint8x8_t vadd_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_add_epi8

uint16x4_t vadd_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_add_epi16

uint32x2_t vadd_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_add_epi32

uint64x1_t vadd_u64 (uint64x1_t __a, uint64x1_t __b);//_mm_add_epi64

int8x16_t vaddq_s8 (int8x16_t __a, int8x16_t __b);//_mm_add_epi8

int16x8_t vaddq_s16 (int16x8_t __a, int16x8_t __b);//_mm_add_epi16

int32x4_t vaddq_s32 (int32x4_t __a, int32x4_t __b);//_mm_add_epi32

int64x2_t vaddq_s64 (int64x2_t __a, int64x2_t __b);//_mm_add_epi64

float32x4_t vaddq_f32 (float32x4_t __a, float32x4_t __b);//_mm_add_ps

uint8x16_t vaddq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_add_epi8

uint16x8_t vaddq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_add_epi16

uint32x4_t vaddq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_add_epi32

uint64x2_t vaddq_u64 (uint64x2_t __a, uint64x2_t __b);//_mm_add_epi64

/*--2、Vector long add(長指令): vaddl -> ri = ai + bi; a, b have equal lane sizes,

result is a 128 bit vector of lanes that are twice the width--*/

int16x8_t vaddl_s8 (int8x8_t __a, int8x8_t __b);

int32x4_t vaddl_s16 (int16x4_t __a, int16x4_t __b);

int64x2_t vaddl_s32 (int32x2_t __a, int32x2_t __b);

uint16x8_t vaddl_u8 (uint8x8_t __a, uint8x8_t __b);

uint32x4_t vaddl_u16 (uint16x4_t __a, uint16x4_t __b);

uint64x2_t vaddl_u32 (uint32x2_t __a, uint32x2_t __b);

/*--3、Vector wide add(寬指令): vaddw -> ri = ai + bi--*/

int16x8_t vaddw_s8 (int16x8_t __a, int8x8_t __b);

int32x4_t vaddw_s16 (int32x4_t __a, int16x4_t __b);

int64x2_t vaddw_s32 (int64x2_t __a, int32x2_t __b);

uint16x8_t vaddw_u8 (uint16x8_t __a, uint8x8_t __b);

uint32x4_t vaddw_u16 (uint32x4_t __a, uint16x4_t __b);

uint64x2_t vaddw_u32 (uint64x2_t __a, uint32x2_t __b);

/*--4、Vector halving add: vhadd -> ri = (ai + bi) >> 1;

shifts each result right one bit, Results are truncated--*/

int8x8_t vhadd_s8 (int8x8_t __a, int8x8_t __b);

int16x4_t vhadd_s16 (int16x4_t __a, int16x4_t __b);

int32x2_t vhadd_s32 (int32x2_t __a, int32x2_t __b);

uint8x8_t vhadd_u8 (uint8x8_t __a, uint8x8_t __b);

uint16x4_t vhadd_u16 (uint16x4_t __a, uint16x4_t __b);

uint32x2_t vhadd_u32 (uint32x2_t __a, uint32x2_t __b);

int8x16_t vhaddq_s8 (int8x16_t __a, int8x16_t __b);

int16x8_t vhaddq_s16 (int16x8_t __a, int16x8_t __b)

int32x4_t vhaddq_s32 (int32x4_t __a, int32x4_t __b)

uint8x16_t vhaddq_u8 (uint8x16_t __a, uint8x16_t __b)

uint16x8_t vhaddq_u16 (uint16x8_t __a, uint16x8_t __b)

uint32x4_t vhaddq_u32 (uint32x4_t __a, uint32x4_t __b);

/*--5、Vector rounding halving add: vrhadd -> ri = (ai + bi + 1) >> 1;

shifts each result right one bit, Results are rounded(四舍五入)--*/

int8x8_t vrhadd_s8 (int8x8_t __a, int8x8_t __b);

int16x4_t vrhadd_s16 (int16x4_t __a, int16x4_t __b);

int32x2_t vrhadd_s32 (int32x2_t __a, int32x2_t __b);

uint8x8_t vrhadd_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_avg_epu8

uint16x4_t vrhadd_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_avg_epu16

uint32x2_t vrhadd_u32 (uint32x2_t __a, uint32x2_t __b);

int8x16_t vrhaddq_s8 (int8x16_t __a, int8x16_t __b);

int16x8_t vrhaddq_s16 (int16x8_t __a, int16x8_t __b);

int32x4_t vrhaddq_s32 (int32x4_t __a, int32x4_t __b);

uint8x16_t vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_avg_epu8

uint16x8_t vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_avg_epu16

uint32x4_t vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b);

/*--6、Vector saturating add(飽和指令): vqadd -> ri = sat(ai + bi);

the results are saturated if they overflow--*/

int8x8_t vqadd_s8 (int8x8_t __a, int8x8_t __b);//_mm_adds_epi8

int16x4_t vqadd_s16 (int16x4_t __a, int16x4_t __b);//_mm_adds_epi16

int32x2_t vqadd_s32 (int32x2_t __a, int32x2_t __b);

int64x1_t vqadd_s64 (int64x1_t __a, int64x1_t __b);

uint8x8_t vqadd_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_adds_epu8

uint16x4_t vqadd_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_adds_epu16

uint32x2_t vqadd_u32 (uint32x2_t __a, uint32x2_t __b);

uint64x1_t vqadd_u64 (uint64x1_t __a, uint64x1_t __b);

int8x16_t vqaddq_s8 (int8x16_t __a, int8x16_t __b);//_mm_adds_epi8

int16x8_t vqaddq_s16 (int16x8_t __a, int16x8_t __b);//_mm_adds_epi16

int32x4_t vqaddq_s32 (int32x4_t __a, int32x4_t __b);

int64x2_t vqaddq_s64 (int64x2_t __a, int64x2_t __b);

uint8x16_t vqaddq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_adds_epu8

uint16x8_t vqaddq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_adds_epu16

uint32x4_t vqaddq_u32 (uint32x4_t __a, uint32x4_t __b);

uint64x2_t vqaddq_u64 (uint64x2_t __a, uint64x2_t __b);

/*--7、Vector add high half(窄指令): vaddhn -> ri = sat(ai + bi);

selecting High half, The results are truncated--*/

int8x8_t vaddhn_s16 (int16x8_t __a, int16x8_t __b);

int16x4_t vaddhn_s32 (int32x4_t __a, int32x4_t __b);

int32x2_t vaddhn_s64 (int64x2_t __a, int64x2_t __b);

uint8x8_t vaddhn_u16 (uint16x8_t __a, uint16x8_t __b);

uint16x4_t vaddhn_u32 (uint32x4_t __a, uint32x4_t __b);

uint32x2_t vaddhn_u64 (uint64x2_t __a, uint64x2_t __b);

/*--8、Vector rounding add high half(窄指令): vraddhn -> ri = ai + bi;

selecting High half, The results are rounded--*/

int8x8_t vraddhn_s16 (int16x8_t __a, int16x8_t __b);

int16x4_t vraddhn_s32 (int32x4_t __a, int32x4_t __b)

int32x2_t vraddhn_s64 (int64x2_t __a, int64x2_t __b)

uint8x8_t vraddhn_u16 (uint16x8_t __a, uint16x8_t __b)

uint16x4_t vraddhn_u32 (uint32x4_t __a, uint32x4_t __b)

uint32x2_t vraddhn_u64 (uint64x2_t __a, uint64x2_t __b);

/*******************************************Multiplication******************************/

/*--1、Vector multiply(正常指令): vmul -> ri = ai * bi;--*/

int8x8_t vmul_s8 (int8x8_t __a, int8x8_t __b);

int16x4_t vmul_s16 (int16x4_t __a, int16x4_t __b);//_mm_mullo_epi16

int32x2_t vmul_s32 (int32x2_t __a, int32x2_t __b);

float32x2_t vmul_f32 (float32x2_t __a, float32x2_t __b);//_mm_mul_ps

uint8x8_t vmul_u8 (uint8x8_t __a, uint8x8_t __b);

uint16x4_t vmul_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_mullo_epi16

uint32x2_t vmul_u32 (uint32x2_t __a, uint32x2_t __b);

poly8x8_t vmul_p8 (poly8x8_t __a, poly8x8_t __b);

int8x16_t vmulq_s8 (int8x16_t __a, int8x16_t __b);

int16x8_t vmulq_s16 (int16x8_t __a, int16x8_t __b);//_mm_mullo_epi16

int32x4_t vmulq_s32 (int32x4_t __a, int32x4_t __b);

float32x4_t vmulq_f32 (float32x4_t __a, float32x4_t __b);//_mm_mul_ps

uint8x16_t vmulq_u8 (uint8x16_t __a, uint8x16_t __b);

uint16x8_t vmulq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_mullo_epi16

uint32x4_t vmulq_u32 (uint32x4_t __a, uint32x4_t __b);

poly8x16_t vmulq_p8 (poly8x16_t __a, poly8x16_t __b);

/*--2、Vector multiply accumulate: vmla -> ri = ai + bi * ci; --*/

int8x8_t vmla_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c);

int16x4_t vmla_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c);

int32x2_t vmla_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c);

float32x2_t vmla_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c);

uint8x8_t vmla_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c);

uint16x4_t vmla_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c);

uint32x2_t vmla_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c);

int8x16_t vmlaq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c);

int16x8_t vmlaq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c);

int32x4_t vmlaq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c);

float32x4_t vmlaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c);

uint8x16_t vmlaq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c);

uint16x8_t vmlaq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c);

uint32x4_t vmlaq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c);

/*--3、Vector multiply accumulate long: vmlal -> ri = ai + bi * ci --*/

int16x8_t vmlal_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c);

int32x4_t vmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c);

int64x2_t vmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c);

uint16x8_t vmlal_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c);

uint32x4_t vmlal_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c);

uint64x2_t vmlal_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c);

/*--4、Vector multiply subtract: vmls -> ri = ai - bi * ci --*/

int8x8_t vmls_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c);

int16x4_t vmls_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c);

int32x2_t vmls_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c);

float32x2_t vmls_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c);

uint8x8_t vmls_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c);

uint16x4_t vmls_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c);

uint32x2_t vmls_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c);

int8x16_t vmlsq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c);

int16x8_t vmlsq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c);

int32x4_t vmlsq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c);

float32x4_t vmlsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c);

uint8x16_t vmlsq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c);

uint16x8_t vmlsq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c);

uint32x4_t vmlsq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c);

/*--5、Vector multiply subtract long:vmlsl -> ri = ai - bi * ci --*/

int16x8_t vmlsl_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c);

int32x4_t vmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c);

int64x2_t vmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c);

uint16x8_t vmlsl_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c);

uint32x4_t vmlsl_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c);

uint64x2_t vmlsl_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c);

/*--6、Vector saturating doubling multiply high: vqdmulh -> ri = sat(ai * bi);

doubles the results and returns only the high half of the truncated results--*/

int16x4_t vqdmulh_s16 (int16x4_t __a, int16x4_t __b);

int32x2_t vqdmulh_s32 (int32x2_t __a, int32x2_t __b);

int16x8_t vqdmulhq_s16 (int16x8_t __a, int16x8_t __b);

int32x4_t vqdmulhq_s32 (int32x4_t __a, int32x4_t __b);

/*--7、Vector saturating rounding doubling multiply high vqrdmulh -> ri = ai * bi:

doubles the results and returns only the high half of the rounded results.

The results are saturated if they overflow--*/

int16x4_t vqrdmulh_s16 (int16x4_t __a, int16x4_t __b);

int32x2_t vqrdmulh_s32 (int32x2_t __a, int32x2_t __b);

int16x8_t vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b);

int32x4_t vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b);

/*--8、Vector saturating doubling multiply accumulate long: vqdmlal -> ri = ai + bi * ci;

multiplies the elements in the second and third vectors, doubles the results and adds the

results to the values in the first vector. The results are saturated if they overflow--*/

int32x4_t vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c);

int64x2_t vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c);

/*--9、Vector saturating doubling multiply subtract long: vqdmlsl -> ri = ai - bi * ci;

multiplies the elements in the second and third vectors, doubles the results and subtracts

the results from the elements in the first vector.

The results are saturated if they overflow--*/

int32x4_t vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c);

int64x2_t vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c);

/*--10、Vector long multiply(長指令): vmull -> ri = ai * bi;--*/

int16x8_t vmull_s8 (int8x8_t __a, int8x8_t __b);

int32x4_t vmull_s16 (int16x4_t __a, int16x4_t __b);

int64x2_t vmull_s32 (int32x2_t __a, int32x2_t __b);

uint16x8_t vmull_u8 (uint8x8_t __a, uint8x8_t __b);

uint32x4_t vmull_u16 (uint16x4_t __a, uint16x4_t __b);

uint64x2_t vmull_u32 (uint32x2_t __a, uint32x2_t __b);

poly16x8_t vmull_p8 (poly8x8_t __a, poly8x8_t __b);

/*--11、Vector saturating doubling long multiply: vqdmull -> ri = ai * bi;

If any of the results overflow, they are saturated--*/

int32x4_t vqdmull_s16 (int16x4_t __a, int16x4_t __b);

int64x2_t vqdmull_s32 (int32x2_t __a, int32x2_t __b);

/*--12、Fused multiply accumulate: vfma -> ri = ai + bi * ci;

The result of the multiply is not rounded before the accumulation--*/

float32x2_t vfma_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)

float32x4_t vfmaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c);

/*--13、Fused multiply subtract: vfms -> ri = ai - bi * ci;

The result of the multiply is not rounded before the subtraction--*/

float32x2_t vfms_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c);

float32x4_t vfmsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c);

/******************************************************Round to integral****************/

/*--1、to nearest, ties to even--*/

float32x2_t vrndn_f32 (float32x2_t __a);

float32x4_t vrndqn_f32 (float32x4_t __a);

/*--2、to nearest, ties away from zero--*/

float32x2_t vrnda_f32 (float32x2_t __a);

float32x4_t vrndqa_f32 (float32x4_t __a);

/*--3、towards +Inf--*/

float32x2_t vrndp_f32 (float32x2_t __a);

float32x4_t vrndqp_f32 (float32x4_t __a);

/*--4、towards -Inf--*/

float32x2_t vrndm_f32 (float32x2_t __a);

float32x4_t vrndqm_f32 (float32x4_t __a);

/*--5、towards 0--*/

float32x2_t vrnd_f32 (float32x2_t __a);

float32x4_t vrndq_f32 (float32x4_t __a);

/**********************************************Subtraction******************************/

/*--1、Vector subtract(正常指令):vsub -> ri = ai - bi;--*/

int8x8_t vsub_s8 (int8x8_t __a, int8x8_t __b);//_mm_sub_epi8

int16x4_t vsub_s16 (int16x4_t __a, int16x4_t __b);//_mm_sub_epi16

int32x2_t vsub_s32 (int32x2_t __a, int32x2_t __b);//_mm_sub_epi32

int64x1_t vsub_s64 (int64x1_t __a, int64x1_t __b);//_mm_sub_epi64

float32x2_t vsub_f32 (float32x2_t __a, float32x2_t __b);//_mm_sub_ps

uint8x8_t vsub_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_sub_epi8

uint16x4_t vsub_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_sub_epi16

uint32x2_t vsub_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_sub_epi32

uint64x1_t vsub_u64 (uint64x1_t __a, uint64x1_t __b);//_mm_sub_epi64

int8x16_t vsubq_s8 (int8x16_t __a, int8x16_t __b);//_mm_sub_epi8

int16x8_t vsubq_s16 (int16x8_t __a, int16x8_t __b);//_mm_sub_epi16

int32x4_t vsubq_s32 (int32x4_t __a, int32x4_t __b);//_mm_sub_epi32

int64x2_t vsubq_s64 (int64x2_t __a, int64x2_t __b);//_mm_sub_epi64

float32x4_t vsubq_f32 (float32x4_t __a, float32x4_t __b);//_mm_sub_ps

uint8x16_t vsubq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_sub_epi8

uint16x8_t vsubq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_sub_epi16

uint32x4_t vsubq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_sub_epi32

uint64x2_t vsubq_u64 (uint64x2_t __a, uint64x2_t __b);//_mm_sub_epi64

/*--2、Vector long subtract(長指令): vsubl -> ri = ai - bi; --*/

int16x8_t vsubl_s8 (int8x8_t __a, int8x8_t __b);

int32x4_t vsubl_s16 (int16x4_t __a, int16x4_t __b);

int64x2_t vsubl_s32 (int32x2_t __a, int32x2_t __b);

uint16x8_t vsubl_u8 (uint8x8_t __a, uint8x8_t __b);

uint32x4_t vsubl_u16 (uint16x4_t __a, uint16x4_t __b);

uint64x2_t vsubl_u32 (uint32x2_t __a, uint32x2_t __b);

/*--3、Vector wide subtract(寬指令): vsubw -> ri = ai - bi;--*/

int16x8_t vsubw_s8 (int16x8_t __a, int8x8_t __b);

int32x4_t vsubw_s16 (int32x4_t __a, int16x4_t __b);

int64x2_t vsubw_s32 (int64x2_t __a, int32x2_t __b);

uint16x8_t vsubw_u8 (uint16x8_t __a, uint8x8_t __b);

uint32x4_t vsubw_u16 (uint32x4_t __a, uint16x4_t __b);

uint64x2_t vsubw_u32 (uint64x2_t __a, uint32x2_t __b);

/*--4、Vector saturating subtract(飽和指令): vqsub -> ri = sat(ai - bi);

If any of the results overflow, they are saturated--*/

int8x8_t vqsub_s8 (int8x8_t __a, int8x8_t __b);//_mm_subs_epi8

int16x4_t vqsub_s16 (int16x4_t __a, int16x4_t __b);//_mm_subs_epi16

int32x2_t vqsub_s32 (int32x2_t __a, int32x2_t __b);//_mm_subs_epi32

int64x1_t vqsub_s64 (int64x1_t __a, int64x1_t __b);

uint8x8_t vqsub_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_subs_epu8

uint16x4_t vqsub_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_subs_epu16

uint32x2_t vqsub_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_subs_epu32

uint64x1_t vqsub_u64 (uint64x1_t __a, uint64x1_t __b);

int8x16_t vqsubq_s8 (int8x16_t __a, int8x16_t __b);//_mm_subs_epi8

int16x8_t vqsubq_s16 (int16x8_t __a, int16x8_t __b);//_mm_subs_epi16

int32x4_t vqsubq_s32 (int32x4_t __a, int32x4_t __b);//_mm_subs_epi32

int64x2_t vqsubq_s64 (int64x2_t __a, int64x2_t __b);

uint8x16_t vqsubq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_subs_epu8

uint16x8_t vqsubq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_subs_epu16

uint32x4_t vqsubq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_subs_epu32

uint64x2_t vqsubq_u64 (uint64x2_t __a, uint64x2_t __b);

/*--5、Vector halving subtract: vhsub -> ri = (ai - bi) >> 1;

shifts each result right one bit, The results are truncated.--*/

int8x8_t vhsub_s8 (int8x8_t __a, int8x8_t __b);

int16x4_t vhsub_s16 (int16x4_t __a, int16x4_t __b);

int32x2_t vhsub_s32 (int32x2_t __a, int32x2_t __b);

uint8x8_t vhsub_u8 (uint8x8_t __a, uint8x8_t __b);

uint16x4_t vhsub_u16 (uint16x4_t __a, uint16x4_t __b);

uint32x2_t vhsub_u32 (uint32x2_t __a, uint32x2_t __b);

int8x16_t vhsubq_s8 (int8x16_t __a, int8x16_t __b);

int16x8_t vhsubq_s16 (int16x8_t __a, int16x8_t __b);

int32x4_t vhsubq_s32 (int32x4_t __a, int32x4_t __b);

uint8x16_t vhsubq_u8 (uint8x16_t __a, uint8x16_t __b);

uint16x8_t vhsubq_u16 (uint16x8_t __a, uint16x8_t __b);

uint32x4_t vhsubq_u32 (uint32x4_t __a, uint32x4_t __b);

/*--6、Vector subtract high half(窄指令): vsubhn -> ri = ai - bi;

It returns the most significant halves of the results. The results are truncated--*/

int8x8_t vsubhn_s16 (int16x8_t __a, int16x8_t __b);

int16x4_t vsubhn_s32 (int32x4_t __a, int32x4_t __b);

int32x2_t vsubhn_s64 (int64x2_t __a, int64x2_t __b);

uint8x8_t vsubhn_u16 (uint16x8_t __a, uint16x8_t __b);

uint16x4_t vsubhn_u32 (uint32x4_t __a, uint32x4_t __b);

uint32x2_t vsubhn_u64 (uint64x2_t __a, uint64x2_t __b);

/*--7、Vector rounding subtract high half(窄指令): vrsubhn -> ai - bi;

It returns the most significant halves of the results. The results are rounded--*/

int8x8_t vrsubhn_s16 (int16x8_t __a, int16x8_t __b);

int16x4_t vrsubhn_s32 (int32x4_t __a, int32x4_t __b);

int32x2_t vrsubhn_s64 (int64x2_t __a, int64x2_t __b)

uint8x8_t vrsubhn_u16 (uint16x8_t __a, uint16x8_t __b);

uint16x4_t vrsubhn_u32 (uint32x4_t __a, uint32x4_t __b);

uint32x2_t vrsubhn_u64 (uint64x2_t __a, uint64x2_t __b);

/******************************************************Comparison***********************/

/*--1、Vector compare equal(正常指令): vceq -> ri = ai == bi ? 1...1 : 0...0;

If they are equal, the corresponding element in the destination vector is set to all ones.

Otherwise, it is set to all zeros--*/

uint8x8_t vceq_s8 (int8x8_t __a, int8x8_t __b);//_mm_cmpeq_epi8

uint16x4_t vceq_s16 (int16x4_t __a, int16x4_t __b);//_mm_cmpeq_epi16

uint32x2_t vceq_s32 (int32x2_t __a, int32x2_t __b);//_mm_cmpeq_epi32

uint32x2_t vceq_f32 (float32x2_t __a, float32x2_t __b);

uint8x8_t vceq_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_cmpeq_epi8

uint16x4_t vceq_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_cmpeq_epi16

uint32x2_t vceq_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_cmpeq_epi32

uint8x8_t vceq_p8 (poly8x8_t __a, poly8x8_t __b);//_mm_cmpeq_epi8

uint8x16_t vceqq_s8 (int8x16_t __a, int8x16_t __b);//_mm_cmpeq_epi8

uint16x8_t vceqq_s16 (int16x8_t __a, int16x8_t __b);//_mm_cmpeq_epi16

uint32x4_t vceqq_s32 (int32x4_t __a, int32x4_t __b);//_mm_cmpeq_epi32

uint32x4_t vceqq_f32 (float32x4_t __a, float32x4_t __b);

uint8x16_t vceqq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_cmpeq_epi8

uint16x8_t vceqq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_cmpeq_epi16

uint32x4_t vceqq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_cmpeq_epi32

uint8x16_t vceqq_p8 (poly8x16_t __a, poly8x16_t __b);//_mm_cmpeq_epi8

/*--2、Vector compare greater-than or equal(正常指令): vcge-> ri = ai >= bi ? 1...1:0...0;

If it is greater than or equal to it, the corresponding element in the destination

vector is set to all ones. Otherwise, it is set to all zeros.--*/

uint8x8_t vcge_s8 (int8x8_t __a, int8x8_t __b);

uint16x4_t vcge_s16 (int16x4_t __a, int16x4_t __b);

uint32x2_t vcge_s32 (int32x2_t __a, int32x2_t __b);

uint32x2_t vcge_f32 (float32x2_t __a, float32x2_t __b);

uint8x8_t vcge_u8 (uint8x8_t __a, uint8x8_t __b);

uint16x4_t vcge_u16 (uint16x4_t __a, uint16x4_t __b);

uint32x2_t vcge_u32 (uint32x2_t __a, uint32x2_t __b);

uint8x16_t vcgeq_s8 (int8x16_t __a, int8x16_t __b);

uint16x8_t vcgeq_s16 (int16x8_t __a, int16x8_t __b);

uint32x4_t vcgeq_s32 (int32x4_t __a, int32x4_t __b);

uint32x4_t vcgeq_f32 (float32x4_t __a, float32x4_t __b);

uint8x16_t vcgeq_u8 (uint8x16_t __a, uint8x16_t __b);

uint16x8_t vcgeq_u16 (uint16x8_t __a, uint16x8_t __b);

uint32x4_t vcgeq_u32 (uint32x4_t __a, uint32x4_t __b);

/*--3、Vector compare less-than or equal(正常指令): vcle -> ri = ai <= bi ? 1...1:0...0;

If it is less than or equal to it, the corresponding element in the destination vector

is set to all ones. Otherwise, it is set to all zeros.--*/

uint8x8_t vcle_s8 (int8x8_t __a, int8x8_t __b);

uint16x4_t vcle_s16 (int16x4_t __a, int16x4_t __b);

uint32x2_t vcle_s32 (int32x2_t __a, int32x2_t __b);

uint32x2_t vcle_f32 (float32x2_t __a, float32x2_t __b);

uint8x8_t vcle_u8 (uint8x8_t __a, uint8x8_t __b);

uint16x4_t vcle_u16 (uint16x4_t __a, uint16x4_t __b);

uint32x2_t vcle_u32 (uint32x2_t __a, uint32x2_t __b);

uint8x16_t vcleq_s8 (int8x16_t __a, int8x16_t __b);

uint16x8_t vcleq_s16 (int16x8_t __a, int16x8_t __b);

uint32x4_t vcleq_s32 (int32x4_t __a, int32x4_t __b);

uint32x4_t vcleq_f32 (float32x4_t __a, float32x4_t __b);

uint8x16_t vcleq_u8 (uint8x16_t __a, uint8x16_t __b);

uint16x8_t vcleq_u16 (uint16x8_t __a, uint16x8_t __b);

uint32x4_t vcleq_u32 (uint32x4_t __a, uint32x4_t __b);

/*--4、Vector compare greater-than(正常指令): vcgt -> ri = ai > bi ? 1...1:0...0;

If it is greater than it, the corresponding element in the destination vector is

set to all ones. Otherwise, it is set to all zeros--*/

uint8x8_t vcgt_s8 (int8x8_t __a, int8x8_t __b);

uint16x4_t vcgt_s16 (int16x4_t __a, int16x4_t __b);

uint32x2_t vcgt_s32 (int32x2_t __a, int32x2_t __b);

uint32x2_t vcgt_f32 (float32x2_t __a, float32x2_t __b);

uint8x8_t vcgt_u8 (uint8x8_t __a, uint8x8_t __b);

uint16x4_t vcgt_u16 (uint16x4_t __a, uint16x4_t __b);

uint32x2_t vcgt_u32 (uint32x2_t __a, uint32x2_t __b);

uint8x16_t vcgtq_s8 (int8x16_t __a, int8x16_t __b);

uint16x8_t vcgtq_s16 (int16x8_t __a, int16x8_t __b);

uint32x4_t vcgtq_s32 (int32x4_t __a, int32x4_t __b);

uint32x4_t vcgtq_f32 (float32x4_t __a, float32x4_t __b);

uint8x16_t vcgtq_u8 (uint8x16_t __a, uint8x16_t __b);

uint16x8_t vcgtq_u16 (uint16x8_t __a, uint16x8_t __b);

uint32x4_t vcgtq_u32 (uint32x4_t __a, uint32x4_t __b);

/*--5、Vector compare less-than(正常指令): vclt -> ri = ai < bi ? 1...1:0...0;

If it is less than it, the corresponding element in the destination vector is set

to all ones.Otherwise, it is set to all zeros--*/

uint8x8_t vclt_s8 (int8x8_t __a, int8x8_t __b);

uint16x4_t vclt_s16 (int16x4_t __a, int16x4_t __b);

uint32x2_t vclt_s32 (int32x2_t __a, int32x2_t __b);

uint32x2_t vclt_f32 (float32x2_t __a, float32x2_t __b);

uint8x8_t vclt_u8 (uint8x8_t __a, uint8x8_t __b);

uint16x4_t vclt_u16 (uint16x4_t __a, uint16x4_t __b);

uint32x2_t vclt_u32 (uint32x2_t __a, uint32x2_t __b);

uint8x16_t vcltq_s8 (int8x16_t __a, int8x16_t __b);

uint16x8_t vcltq_s16 (int16x8_t __a, int16x8_t __b);

uint32x4_t vcltq_s32 (int32x4_t __a, int32x4_t __b);

uint32x4_t vcltq_f32 (float32x4_t __a, float32x4_t __b);

uint8x16_t vcltq_u8 (uint8x16_t __a, uint8x16_t __b);

uint16x8_t vcltq_u16 (uint16x8_t __a, uint16x8_t __b);

uint32x4_t vcltq_u32 (uint32x4_t __a, uint32x4_t __b);

/*--6、Vector compare absolute greater-than or equal(正常指令):

vcage -> ri = |ai| >= |bi| ? 1...1:0...0;

compares the absolute value of each element in a vector with the absolute value of the

corresponding element of a second vector. If it is greater than or equal to it,

the corresponding element in the destination vector is set to all ones.

Otherwise, it is set to all zeros.--*/

uint32x2_t vcage_f32 (float32x2_t __a, float32x2_t __b);

uint32x4_t vcageq_f32 (float32x4_t __a, float32x4_t __b);

/*--7、Vector compare absolute less-than or equal(正常指令):

vcale -> ri = |ai| <= |bi| ? 1...1:0...0;

compares the absolute value of each element in a vector with the absolute value of the

corresponding element of a second vector. If it is less than or equal to it,

the corresponding element in the destination vector is set to all ones.

Otherwise, it is set to all zeros--*/

uint32x2_t vcale_f32 (float32x2_t __a, float32x2_t __b);

uint32x4_t vcaleq_f32 (float32x4_t __a, float32x4_t __b);

/*--8、Vector compare absolute greater-than(正常指令):

vcage -> ri = |ai| > |bi| ? 1...1:0...0;

compares the absolute value of each element in a vector with the absolute value of the

corresponding element of a second vector. If it is greater than it,

the corresponding element in the destination vector is set to all ones.

Otherwise, it is set to all zeros.--*/

uint32x2_t vcagt_f32 (float32x2_t __a, float32x2_t __b);

uint32x4_t vcagtq_f32 (float32x4_t __a, float32x4_t __b);

/*--9、Vector compare absolute less-than(正常指令):

vcalt -> ri = |ai| < |bi| ? 1...1:0...0;

compares the absolute value of each element in a vector with the absolute value of the

corresponding element of a second vector.If it is less than it, the corresponding

element in the destination vector is set to all ones. Otherwise,it is set to all zeros--*/

uint32x2_t vcalt_f32 (float32x2_t __a, float32x2_t __b);

uint32x4_t vcaltq_f32 (float32x4_t __a, float32x4_t __b);

/**********************************************Vector test bits*************************/

/*--正常指令,vtst -> ri = (ai & bi != 0) ? 1...1:0...0;

bitwise logical ANDs each element in a vector with the corresponding element of a second

vector.If the result is not zero, the corresponding element in the destination vector

is set to all ones. Otherwise, it is set to all zeros--*/

uint8x8_t vtst_s8 (int8x8_t __a, int8x8_t __b);

uint16x4_t vtst_s16 (int16x4_t __a, int16x4_t __b);

uint32x2_t vtst_s32 (int32x2_t __a, int32x2_t __b);

uint8x8_t vtst_u8 (uint8x8_t __a, uint8x8_t __b);

uint16x4_t vtst_u16 (uint16x4_t __a, uint16x4_t __b);

uint32x2_t vtst_u32 (uint32x2_t __a, uint32x2_t __b);

uint8x8_t vtst_p8 (poly8x8_t __a, poly8x8_t __b);

uint8x16_t vtstq_s8 (int8x16_t __a, int8x16_t __b);

uint16x8_t vtstq_s16 (int16x8_t __a, int16x8_t __b);

uint32x4_t vtstq_s32 (int32x4_t __a, int32x4_t __b);

uint8x16_t vtstq_u8 (uint8x16_t __a, uint8x16_t __b);

uint16x8_t vtstq_u16 (uint16x8_t __a, uint16x8_t __b);

uint32x4_t vtstq_u32 (uint32x4_t __a, uint32x4_t __b);

uint8x16_t vtstq_p8 (poly8x16_t __a, poly8x16_t __b);

/**********************************************Absolute difference**********************/

/*--1、Absolute difference between the arguments(正常指令): vabd -> ri = |ai - bi|;

returns the absolute values of the results--*/

int8x8_t vabd_s8 (int8x8_t __a, int8x8_t __b);

int16x4_t vabd_s16 (int16x4_t __a, int16x4_t __b);

int32x2_t vabd_s32 (int32x2_t __a, int32x2_t __b);

float32x2_t vabd_f32 (float32x2_t __a, float32x2_t __b);

uint8x8_t vabd_u8 (uint8x8_t __a, uint8x8_t __b);

uint16x4_t vabd_u16 (uint16x4_t __a, uint16x4_t __b);

uint32x2_t vabd_u32 (uint32x2_t __a, uint32x2_t __b);

int8x16_t vabdq_s8 (int8x16_t __a, int8x16_t __b);

int16x8_t vabdq_s16 (int16x8_t __a, int16x8_t __b);

int32x4_t vabdq_s32 (int32x4_t __a, int32x4_t __b);

float32x4_t vabdq_f32 (float32x4_t __a, float32x4_t __b);

uint8x16_t vabdq_u8 (uint8x16_t __a, uint8x16_t __b);

uint16x8_t vabdq_u16 (uint16x8_t __a, uint16x8_t __b);

uint32x4_t vabdq_u32 (uint32x4_t __a, uint32x4_t __b);

/*--2、Absolute difference - long(長指令): vabdl -> ri = |ai - bi|;

The elements in the result vector are wider--*/

int16x8_t vabdl_s8 (int8x8_t __a, int8x8_t __b);

int32x4_t vabdl_s16 (int16x4_t __a, int16x4_t __b);

int64x2_t vabdl_s32 (int32x2_t __a, int32x2_t __b);

uint16x8_t vabdl_u8 (uint8x8_t __a, uint8x8_t __b);

uint32x4_t vabdl_u16 (uint16x4_t __a, uint16x4_t __b);

uint64x2_t vabdl_u32 (uint32x2_t __a, uint32x2_t __b);

/*--3、Absolute difference and accumulate: vaba -> ri = ai + |bi - ci|;--*/

int8x8_t vaba_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c);

int16x4_t vaba_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c);

int32x2_t vaba_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c);

uint8x8_t vaba_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c);

uint16x4_t vaba_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c);

uint32x2_t vaba_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c);

int8x16_t vabaq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c);

int16x8_t vabaq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c);

int32x4_t vabaq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c);

uint8x16_t vabaq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c);

uint16x8_t vabaq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c);

uint32x4_t vabaq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c);

/*--4、Absolute difference and accumulate - long: vabal -> ri = ai + |bi - ci|;

The elements in the result are wider--*/

int16x8_t vabal_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c);

int32x4_t vabal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c);

int64x2_t vabal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c);

uint16x8_t vabal_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c);

uint32x4_t vabal_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c);

uint64x2_t vabal_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c);

/***********************************************Max*************************************/

/*--正常指令, vmax -> ri = ai >= bi ? ai : bi; returns the larger of each pair--*/

int8x8_t vmax_s8 (int8x8_t __a, int8x8_t __b);//_mm_max_epi8

int16x4_t vmax_s16 (int16x4_t __a, int16x4_t __b);//_mm_max_epi16

int32x2_t vmax_s32 (int32x2_t __a, int32x2_t __b);//_mm_max_epi32

float32x2_t vmax_f32 (float32x2_t __a, float32x2_t __b);//_mm_max_ps

uint8x8_t vmax_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_max_epu8

uint16x4_t vmax_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_max_epu16

uint32x2_t vmax_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_max_epu32

int8x16_t vmaxq_s8 (int8x16_t __a, int8x16_t __b);//_mm_max_epi8

int16x8_t vmaxq_s16 (int16x8_t __a, int16x8_t __b);//_mm_max_epi16

int32x4_t vmaxq_s32 (int32x4_t __a, int32x4_t __b);//_mm_max_epi32

float32x4_t vmaxq_f32 (float32x4_t __a, float32x4_t __b);//_mm_max_ps

uint8x16_t vmaxq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_max_epu8

uint16x8_t vmaxq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_max_epu16

uint32x4_t vmaxq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_max_epu32

/****************************************************Min********************************/

/*--正常指令, vmin -> ri = ai >= bi ? bi : ai; returns the smaller of each pair--*/

int8x8_t vmin_s8 (int8x8_t __a, int8x8_t __b);//_mm_min_epi8

int16x4_t vmin_s16 (int16x4_t __a, int16x4_t __b);//_mm_min_epi16

int32x2_t vmin_s32 (int32x2_t __a, int32x2_t __b);//_mm_min_epi32

float32x2_t vmin_f32 (float32x2_t __a, float32x2_t __b);//_mm_min_ps

uint8x8_t vmin_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_min_epu8

uint16x4_t vmin_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_min_epu16

uint32x2_t vmin_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_min_epu32

int8x16_t vminq_s8 (int8x16_t __a, int8x16_t __b);//_mm_min_epi8

int16x8_t vminq_s16 (int16x8_t __a, int16x8_t __b);//_mm_min_epi16

int32x4_t vminq_s32 (int32x4_t __a, int32x4_t __b);//_mm_min_epi32

float32x4_t vminq_f32 (float32x4_t __a, float32x4_t __b);//_mm_min_ps

uint8x16_t vminq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_min_epu8

uint16x8_t vminq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_min_epu16

uint32x4_t vminq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_min_epu32

/*******************************************Pairwise addition***************************/

/*--1、Pairwise add(正常指令):

vpadd -> r0 = a0 + a1, ..., r3 = a6 + a7, r4 = b0 + b1, ..., r7 = b6 + b7

adds adjacent pairs of elements of two vectors,

and places the results in the destination vector.--*/

//r0 = a0 + a1, ...,r3 = a6 + a7, r4 = b0 + b1, ...,r7 = b6 + b7

int8x8_t vpadd_s8 (int8x8_t __a, int8x8_t __b);

int16x4_t vpadd_s16 (int16x4_t __a, int16x4_t __b);

int32x2_t vpadd_s32 (int32x2_t __a, int32x2_t __b);

float32x2_t vpadd_f32 (float32x2_t __a, float32x2_t __b);

uint8x8_t vpadd_u8 (uint8x8_t __a, uint8x8_t __b);

uint16x4_t vpadd_u16 (uint16x4_t __a, uint16x4_t __b);

uint32x2_t vpadd_u32 (uint32x2_t __a, uint32x2_t __b);

/*--2、Long pairwise add: vpaddl vpaddl -> r0 = a0 + a1, ..., r3 = a6 + a7;

adds adjacent pairs of elements of a vector, sign extends or zero extends the results to

twice their original width, and places the final results in the destination vector--*/

int16x4_t vpaddl_s8 (int8x8_t __a);

int32x2_t vpaddl_s16 (int16x4_t __a);

int64x1_t vpaddl_s32 (int32x2_t __a);

uint16x4_t vpaddl_u8 (uint8x8_t __a);

uint32x2_t vpaddl_u16 (uint16x4_t __a);

uint64x1_t vpaddl_u32 (uint32x2_t __a);

int16x8_t vpaddlq_s8 (int8x16_t __a);

int32x4_t vpaddlq_s16 (int16x8_t __a);

int64x2_t vpaddlq_s32 (int32x4_t __a);

uint16x8_t vpaddlq_u8 (uint8x16_t __a);

uint32x4_t vpaddlq_u16 (uint16x8_t __a);

uint64x2_t vpaddlq_u32 (uint32x4_t __a);

/*--3、Long pairwise add and accumulate:

vpadal -> r0 = a0 + (b0 + b1), ..., r3 = a3 + (b6 + b7);

adds adjacent pairs of elements in the second vector, sign extends or zero extends the

results to twice the original width. It then accumulates this with the corresponding

element in the first vector and places the final results in the destination vector--*/

int16x4_t vpadal_s8 (int16x4_t __a, int8x8_t __b);

int32x2_t vpadal_s16 (int32x2_t __a, int16x4_t __b);

int64x1_t vpadal_s32 (int64x1_t __a, int32x2_t __b);

uint16x4_t vpadal_u8 (uint16x4_t __a, uint8x8_t __b);

uint32x2_t vpadal_u16 (uint32x2_t __a, uint16x4_t __b);

uint64x1_t vpadal_u32 (uint64x1_t __a, uint32x2_t __b);

int16x8_t vpadalq_s8 (int16x8_t __a, int8x16_t __b);

int32x4_t vpadalq_s16 (int32x4_t __a, int16x8_t __b);

int64x2_t vpadalq_s32 (int64x2_t __a, int32x4_t __b);

uint16x8_t vpadalq_u8 (uint16x8_t __a, uint8x16_t __b);

uint32x4_t vpadalq_u16 (uint32x4_t __a, uint16x8_t __b);

uint64x2_t vpadalq_u32 (uint64x2_t __a, uint32x4_t __b);

/**********************************************Folding maximum**************************/

/*--飽和指令, vpmax -> vpmax r0 = a0 >= a1 ? a0 : a1, ..., r4 = b0 >= b1 ? b0 : b1, ...;

compares adjacent pairs of elements, and copies the larger of each pair into the

destination vector.The maximums from each pair of the first input vector are stored in

the lower half of the destination vector. The maximums from each pair of the second input

vector are stored in the higher half of the destination vector--*/

int8x8_t vpmax_s8 (int8x8_t __a, int8x8_t __b);

int16x4_t vpmax_s16 (int16x4_t __a, int16x4_t __b);

int32x2_t vpmax_s32 (int32x2_t __a, int32x2_t __b);

float32x2_t vpmax_f32 (float32x2_t __a, float32x2_t __b);

uint8x8_t vpmax_u8 (uint8x8_t __a, uint8x8_t __b);

uint16x4_t vpmax_u16 (uint16x4_t __a, uint16x4_t __b);

uint32x2_t vpmax_u32 (uint32x2_t __a, uint32x2_t __b);

/***************************************************Folding minimum*********************/

/*--飽和指令, vpmin -> r0 = a0 >= a1 ? a1 : a0, ..., r4 = b0 >= b1 ? b1 : b0, ...;

compares adjacent pairs of elements, and copies the smaller of each pair into the

destination vector.The minimums from each pair of the first input vector are stored in

the lower half of the destination vector. The minimums from each pair of the second

input vector are stored in the higher half of the destination vector.--*/

int8x8_t vpmin_s8 (int8x8_t __a, int8x8_t __b);

int16x4_t vpmin_s16 (int16x4_t __a, int16x4_t __b);

int32x2_t vpmin_s32 (int32x2_t __a, int32x2_t __b);

float32x2_t vpmin_f32 (float32x2_t __a, float32x2_t __b);

uint8x8_t vpmin_u8 (uint8x8_t __a, uint8x8_t __b);

uint16x4_t vpmin_u16 (uint16x4_t __a, uint16x4_t __b);

uint32x2_t vpmin_u32 (uint32x2_t __a, uint32x2_t __b);

/***************************************************Reciprocal**************************/

/*--1、飽和指令, Newton-Raphson iteration(牛頓 - 拉夫遜迭代)

performs a Newton-Raphson step for finding the reciprocal. It multiplies the elements of

one vector by the corresponding elements of another vector, subtracts each of the results

from 2, and places the final results into the elements of the destination vector--*/

float32x2_t vrecps_f32 (float32x2_t __a, float32x2_t __b);

float32x4_t vrecpsq_f32 (float32x4_t __a, float32x4_t __b);

/*--2、飽和指令,performs a Newton-Raphson step for finding the reciprocal square root.

It multiplies the elements of one vector by the corresponding elements of another vector,

subtracts each of the results from 3, divides these results by two, and places

the final results into the elements of the destination vector--*/

float32x2_t vrsqrts_f32 (float32x2_t __a, float32x2_t __b);

float32x4_t vrsqrtsq_f32 (float32x4_t __a, float32x4_t __b);

/************************************************Shifts by signed variable**************/

/*--1、Vector shift left(飽和指令): vshl -> ri = ai << bi; (negative values shift right)

left shifts each element in a vector by an amount specified in the corresponding element

in the second input vector. The shift amount is the signed integer value of the least

significant byte of the element in the second input vector. The bits shifted out of each

element are lost.If the signed integer value is negative, it results in a right shift--*/

int8x8_t vshl_s8 (int8x8_t __a, int8x8_t __b);

int16x4_t vshl_s16 (int16x4_t __a, int16x4_t __b);

int32x2_t vshl_s32 (int32x2_t __a, int32x2_t __b);

int64x1_t vshl_s64 (int64x1_t __a, int64x1_t __b);

uint8x8_t vshl_u8 (uint8x8_t __a, int8x8_t __b);

uint16x4_t vshl_u16 (uint16x4_t __a, int16x4_t __b);

uint32x2_t vshl_u32 (uint32x2_t __a, int32x2_t __b);

uint64x1_t vshl_u64 (uint64x1_t __a, int64x1_t __b);

int8x16_t vshlq_s8 (int8x16_t __a, int8x16_t __b);

int16x8_t vshlq_s16 (int16x8_t __a, int16x8_t __b);

int32x4_t vshlq_s32 (int32x4_t __a, int32x4_t __b);

int64x2_t vshlq_s64 (int64x2_t __a, int64x2_t __b);

uint8x16_t vshlq_u8 (uint8x16_t __a, int8x16_t __b);

uint16x8_t vshlq_u16 (uint16x8_t __a, int16x8_t __b);

uint32x4_t vshlq_u32 (uint32x4_t __a, int32x4_t __b);

uint64x2_t vshlq_u64 (uint64x2_t __a, int64x2_t __b);

/*--2、Vector saturating shift left(飽和指令):

vqshl -> ri = ai << bi;(negative values shift right)

If the shift value is positive, the operation is a left shift. Otherwise, it is a

truncating right shift. left shifts each element in a vector of integers and places

the results in the destination vector. It is similar to VSHL.

The difference is that the sticky QC flag is set if saturation occurs--*/

int8x8_t vqshl_s8 (int8x8_t __a, int8x8_t __b);

int16x4_t vqshl_s16 (int16x4_t __a, int16x4_t __b);

int32x2_t vqshl_s32 (int32x2_t __a, int32x2_t __b);

int64x1_t vqshl_s64 (int64x1_t __a, int64x1_t __b);

uint8x8_t vqshl_u8 (uint8x8_t __a, int8x8_t __b);

uint16x4_t vqshl_u16 (uint16x4_t __a, int16x4_t __b);

uint32x2_t vqshl_u32 (uint32x2_t __a, int32x2_t __b);

uint64x1_t vqshl_u64 (uint64x1_t __a, int64x1_t __b);

int8x16_t vqshlq_s8 (int8x16_t __a, int8x16_t __b);

int16x8_t vqshlq_s16 (int16x8_t __a, int16x8_t __b);

int32x4_t vqshlq_s32 (int32x4_t __a, int32x4_t __b);

int64x2_t vqshlq_s64 (int64x2_t __a, int64x2_t __b);

uint8x16_t vqshlq_u8 (uint8x16_t __a, int8x16_t __b);

uint16x8_t vqshlq_u16 (uint16x8_t __a, int16x8_t __b);

uint32x4_t vqshlq_u32 (uint32x4_t __a, int32x4_t __b);

uint64x2_t vqshlq_u64 (uint64x2_t __a, int64x2_t __b);

/*--3、Vector rounding shift left(飽和指令):

vrshl -> ri = ai << bi;(negative values shift right)

If the shift value is positive, the operation is a left shift. Otherwise, it is a

rounding right shift. left shifts each element in a vector of integers and places

the results in the destination vector. It is similar to VSHL.

The difference is that the shifted value is then rounded.--*/

int8x8_t vrshl_s8 (int8x8_t __a, int8x8_t __b);

int16x4_t vrshl_s16 (int16x4_t __a, int16x4_t __b);

int32x2_t vrshl_s32 (int32x2_t __a, int32x2_t __b);

int64x1_t vrshl_s64 (int64x1_t __a, int64x1_t __b);

uint8x8_t vrshl_u8 (uint8x8_t __a, int8x8_t __b);

uint16x4_t vrshl_u16 (uint16x4_t __a, int16x4_t __b);

uint32x2_t vrshl_u32 (uint32x2_t __a, int32x2_t __b);

uint64x1_t vrshl_u64 (uint64x1_t __a, int64x1_t __b);

int8x16_t vrshlq_s8 (int8x16_t __a, int8x16_t __b);

int16x8_t vrshlq_s16 (int16x8_t __a, int16x8_t __b);

int32x4_t vrshlq_s32 (int32x4_t __a, int32x4_t __b);

int64x2_t vrshlq_s64 (int64x2_t __a, int64x2_t __b);

uint8x16_t vrshlq_u8 (uint8x16_t __a, int8x16_t __b);

uint16x8_t vrshlq_u16 (uint16x8_t __a, int16x8_t __b);

uint32x4_t vrshlq_u32 (uint32x4_t __a, int32x4_t __b);

uint64x2_t vrshlq_u64 (uint64x2_t __a, int64x2_t __b);

/*--4、Vector saturating rounding shift left(飽和指令):

vqrshl -> ri = ai << bi;(negative values shift right)

left shifts each element in a vector of integers and places the results in the

destination vector.It is similar to VSHL. The difference is that the shifted value

is rounded, and the sticky QC flag is set if saturation occurs.--*/

int8x8_t vqrshl_s8 (int8x8_t __a, int8x8_t __b);

int16x4_t vqrshl_s16 (int16x4_t __a, int16x4_t __b);

int32x2_t vqrshl_s32 (int32x2_t __a, int32x2_t __b);

int64x1_t vqrshl_s64 (int64x1_t __a, int64x1_t __b);

uint8x8_t vqrshl_u8 (uint8x8_t __a, int8x8_t __b);

uint16x4_t vqrshl_u16 (uint16x4_t __a, int16x4_t __b);

uint32x2_t vqrshl_u32 (uint32x2_t __a, int32x2_t __b);

uint64x1_t vqrshl_u64 (uint64x1_t __a, int64x1_t __b);

int8x16_t vqrshlq_s8 (int8x16_t __a, int8x16_t __b);

int16x8_t vqrshlq_s16 (int16x8_t __a, int16x8_t __b);

int32x4_t vqrshlq_s32 (int32x4_t __a, int32x4_t __b);

int64x2_t vqrshlq_s64 (int64x2_t __a, int64x2_t __b);

uint8x16_t vqrshlq_u8 (uint8x16_t __a, int8x16_t __b);

uint16x8_t vqrshlq_u16 (uint16x8_t __a, int16x8_t __b);

uint32x4_t vqrshlq_u32 (uint32x4_t __a, int32x4_t __b);

uint64x2_t vqrshlq_u64 (uint64x2_t __a, int64x2_t __b);

/****************************************Shifts by a constant***************************/

/*--1、Vector shift right by constant: vshr -> ri = ai >> b;The results are truncated.

right shifts each element in a vector by an immediate value,

and places the results in the destination vector.--*/

int8x8_t vshr_n_s8 (int8x8_t __a, const int __b);

int16x4_t vshr_n_s16 (int16x4_t __a, const int __b);

int32x2_t vshr_n_s32 (int32x2_t __a, const int __b);

int64x1_t vshr_n_s64 (int64x1_t __a, const int __b);

uint8x8_t vshr_n_u8 (uint8x8_t __a, const int __b);

uint16x4_t vshr_n_u16 (uint16x4_t __a, const int __b);

uint32x2_t vshr_n_u32 (uint32x2_t __a, const int __b);

uint64x1_t vshr_n_u64 (uint64x1_t __a, const int __b);

int8x16_t vshrq_n_s8 (int8x16_t __a, const int __b);

int16x8_t vshrq_n_s16 (int16x8_t __a, const int __b);

int32x4_t vshrq_n_s32 (int32x4_t __a, const int __b);

int64x2_t vshrq_n_s64 (int64x2_t __a, const int __b);

uint8x16_t vshrq_n_u8 (uint8x16_t __a, const int __b);

uint16x8_t vshrq_n_u16 (uint16x8_t __a, const int __b);

uint32x4_t vshrq_n_u32 (uint32x4_t __a, const int __b);

uint64x2_t vshrq_n_u64 (uint64x2_t __a, const int __b);

/*--2、Vector shift left by constant: vshl -> ri = ai << b;

left shifts each element in a vector by an immediate value, and places the results in the

destination vector. The bits shifted out of the left of each element are lost--*/

int8x8_t vshl_n_s8 (int8x8_t __a, const int __b);

int16x4_t vshl_n_s16 (int16x4_t __a, const int __b);

int32x2_t vshl_n_s32 (int32x2_t __a, const int __b);

int64x1_t vshl_n_s64 (int64x1_t __a, const int __b);

uint8x8_t vshl_n_u8 (uint8x8_t __a, const int __b);

uint16x4_t vshl_n_u16 (uint16x4_t __a, const int __b);

uint32x2_t vshl_n_u32 (uint32x2_t __a, const int __b);

uint64x1_t vshl_n_u64 (uint64x1_t __a, const int __b);

int8x16_t vshlq_n_s8 (int8x16_t __a, const int __b);

int16x8_t vshlq_n_s16 (int16x8_t __a, const int __b);

int32x4_t vshlq_n_s32 (int32x4_t __a, const int __b);

int64x2_t vshlq_n_s64 (int64x2_t __a, const int __b);

uint8x16_t vshlq_n_u8 (uint8x16_t __a, const int __b);

uint16x8_t vshlq_n_u16 (uint16x8_t __a, const int __b);

uint32x4_t vshlq_n_u32 (uint32x4_t __a, const int __b);

uint64x2_t vshlq_n_u64 (uint64x2_t __a, const int __b);

/*--3、Vector rounding shift right by constant: vrshr -> ri = ai >> b;

right shifts each element in a vector by an immediate value, and places the results

in the destination vector. The shifted values are rounded.--*/

int8x8_t vrshr_n_s8 (int8x8_t __a, const int __b);

int16x4_t vrshr_n_s16 (int16x4_t __a, const int __b);

int32x2_t vrshr_n_s32 (int32x2_t __a, const int __b);

int64x1_t vrshr_n_s64 (int64x1_t __a, const int __b);

uint8x8_t vrshr_n_u8 (uint8x8_t __a, const int __b);

uint16x4_t vrshr_n_u16 (uint16x4_t __a, const int __b);

uint32x2_t vrshr_n_u32 (uint32x2_t __a, const int __b);

uint64x1_t vrshr_n_u64 (uint64x1_t __a, const int __b);

int8x16_t vrshrq_n_s8 (int8x16_t __a, const int __b);

int16x8_t vrshrq_n_s16 (int16x8_t __a, const int __b);

int32x4_t vrshrq_n_s32 (int32x4_t __a, const int __b);

int64x2_t vrshrq_n_s64 (int64x2_t __a, const int __b);

uint8x16_t vrshrq_n_u8 (uint8x16_t __a, const int __b);

uint16x8_t vrshrq_n_u16 (uint16x8_t __a, const int __b);

uint32x4_t vrshrq_n_u32 (uint32x4_t __a, const int __b);

uint64x2_t vrshrq_n_u64 (uint64x2_t __a, const int __b);

/*--4、Vector shift right by constant and accumulate: vsra -> ri = (ai >> c) + (bi >> c);

The results are truncated. right shifts each element in a vector by an immediate value,

and accumulates the results into the destination vector.--*/

int8x8_t vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c);

int16x4_t vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c);

int32x2_t vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c);

int64x1_t vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c);

uint8x8_t vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c);

uint16x4_t vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c);

uint32x2_t vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c);

uint64x1_t vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c);

int8x16_t vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c);

int16x8_t vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c);

int32x4_t vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c);

int64x2_t vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c);

uint8x16_t vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c);

uint16x8_t vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c);

uint32x4_t vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c);

uint64x2_t vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c);

/*--5、Vector rounding shift right by constant and accumulate:

vrsra -> ri = (ai >> c) + (bi >> c);

The results are rounded.right shifts each element in a vector by an immediate value,

and accumulates the rounded results into the destination vector.--*/

int8x8_t vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c);

int16x4_t vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c);

int32x2_t vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c);

int64x1_t vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c);

uint8x8_t vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c);

uint16x4_t vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c);

uint32x2_t vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c);

uint64x1_t vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c);

int8x16_t vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c);

int16x8_t vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c);

int32x4_t vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c);

int64x2_t vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c);

uint8x16_t vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c);

uint16x8_t vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c);

uint32x4_t vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c);

uint64x2_t vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c);

/*--6、Vector saturating shift left by constant: vqshl -> ri = sat(ai << b);

left shifts each element in a vector of integers by an immediate value, and places the

results in the destination vector,and the sticky QC flag is set if saturation occurs.--*/

int8x8_t vqshl_n_s8 (int8x8_t __a, const int __b);

int16x4_t vqshl_n_s16 (int16x4_t __a, const int __b);

int32x2_t vqshl_n_s32 (int32x2_t __a, const int __b);

int64x1_t vqshl_n_s64 (int64x1_t __a, const int __b);

uint8x8_t vqshl_n_u8 (uint8x8_t __a, const int __b);

uint16x4_t vqshl_n_u16 (uint16x4_t __a, const int __b);

uint32x2_t vqshl_n_u32 (uint32x2_t __a, const int __b);

uint64x1_t vqshl_n_u64 (uint64x1_t __a, const int __b);

int8x16_t vqshlq_n_s8 (int8x16_t __a, const int __b);

int16x8_t vqshlq_n_s16 (int16x8_t __a, const int __b);

int32x4_t vqshlq_n_s32 (int32x4_t __a, const int __b);

int64x2_t vqshlq_n_s64 (int64x2_t __a, const int __b);

uint8x16_t vqshlq_n_u8 (uint8x16_t __a, const int __b);

uint16x8_t vqshlq_n_u16 (uint16x8_t __a, const int __b);

uint32x4_t vqshlq_n_u32 (uint32x4_t __a, const int __b);

uint64x2_t vqshlq_n_u64 (uint64x2_t __a, const int __b);

/*--7、Vector signed->unsigned saturating shift left by constant: vqshlu -> ri = ai << b;

left shifts each element in a vector of integers by an immediate value, places the

results in the destination vector, the sticky QC flag is set if saturation occurs,

and indicates that the results are unsigned even though the operands are signed.--*/

uint8x8_t vqshlu_n_s8 (int8x8_t __a, const int __b);

uint16x4_t vqshlu_n_s16 (int16x4_t __a, const int __b);

uint32x2_t vqshlu_n_s32 (int32x2_t __a, const int __b);

uint64x1_t vqshlu_n_s64 (int64x1_t __a, const int __b);

uint8x16_t vqshluq_n_s8 (int8x16_t __a, const int __b);

uint16x8_t vqshluq_n_s16 (int16x8_t __a, const int __b);

uint32x4_t vqshluq_n_s32 (int32x4_t __a, const int __b);

uint64x2_t vqshluq_n_s64 (int64x2_t __a, const int __b);

/*--8、Vector narrowing shift right by constant: vshrn -> ri = ai >> b;

The results are truncated.right shifts each element in the input vector by an

immediate value. It then narrows the result by storing only the least significant

half of each element into the destination vector.--*/

int8x8_t vshrn_n_s16 (int16x8_t __a, const int __b);

int16x4_t vshrn_n_s32 (int32x4_t __a, const int __b);

int32x2_t vshrn_n_s64 (int64x2_t __a, const int __b);

uint8x8_t vshrn_n_u16 (uint16x8_t __a, const int __b);

uint16x4_t vshrn_n_u32 (uint32x4_t __a, const int __b);

uint32x2_t vshrn_n_u64 (uint64x2_t __a, const int __b);

/*--9、Vector signed->unsigned narrowing saturating shift right by constant:

vqshrun -> ri = ai >> b;

Results are truncated. right shifts each element in a quadword vector of integers by an

immediate value, and places the results in a doubleword vector. The results are unsigned,

although the operands are signed. The sticky QC flag is set if saturation occurs.--*/

uint8x8_t vqshrun_n_s16 (int16x8_t __a, const int __b);

uint16x4_t vqshrun_n_s32 (int32x4_t __a, const int __b);

uint32x2_t vqshrun_n_s64 (int64x2_t __a, const int __b);

/*--10、Vector signed->unsigned rounding narrowing saturating shift right by constant:

vqrshrun -> ri = ai >> b; Results are rounded. right shifts each element in a quadword

vector of integers by an immediate value, and places the rounded results in a doubleword

vector. The results are unsigned, although the operands are signed.--*/

uint8x8_t vqrshrun_n_s16 (int16x8_t __a, const int __b);

uint16x4_t vqrshrun_n_s32 (int32x4_t __a, const int __b);

uint32x2_t vqrshrun_n_s64 (int64x2_t __a, const int __b);

/*--11、Vector narrowing saturating shift right by constant: vqshrn -> ri = ai >> b;

Results are truncated. right shifts each element in a quadword vector of integers by an

immediate value, and places the results in a doubleword vector,

and the sticky QC flag is set if saturation occurs.--*/

int8x8_t vqshrn_n_s16 (int16x8_t __a, const int __b);

int16x4_t vqshrn_n_s32 (int32x4_t __a, const int __b);

int32x2_t vqshrn_n_s64 (int64x2_t __a, const int __b);

uint8x8_t vqshrn_n_u16 (uint16x8_t __a, const int __b);

uint16x4_t vqshrn_n_u32 (uint32x4_t __a, const int __b);

uint32x2_t vqshrn_n_u64 (uint64x2_t __a, const int __b);

/*--12、Vector rounding narrowing shift right by constant: vrshrn -> ri = ai >> b;

The results are rounded. right shifts each element in a vector by an immediate value,

and places the rounded,narrowed results in the destination vector.--*/

int8x8_t vrshrn_n_s16 (int16x8_t __a, const int __b);

int16x4_t vrshrn_n_s32 (int32x4_t __a, const int __b);

int32x2_t vrshrn_n_s64 (int64x2_t __a, const int __b);

uint8x8_t vrshrn_n_u16 (uint16x8_t __a, const int __b);

uint16x4_t vrshrn_n_u32 (uint32x4_t __a, const int __b);

uint32x2_t vrshrn_n_u64 (uint64x2_t __a, const int __b);

/*--13、Vector rounding narrowing saturating shift right by constant:

vqrshrn -> ri = ai >> b;

Results are rounded. right shifts each element in a quadword vector of integers by an

immediate value,and places the rounded,narrowed results in a doubleword vector.

The sticky QC flag is set if saturation occurs.--*/

int8x8_t vqrshrn_n_s16 (int16x8_t __a, const int __b);

int16x4_t vqrshrn_n_s32 (int32x4_t __a, const int __b);

int32x2_t vqrshrn_n_s64 (int64x2_t __a, const int __b);

uint8x8_t vqrshrn_n_u16 (uint16x8_t __a, const int __b);

uint16x4_t vqrshrn_n_u32 (uint32x4_t __a, const int __b);

uint32x2_t vqrshrn_n_u64 (uint64x2_t __a, const int __b);

/*--14、Vector widening shift left by constant: vshll -> ri = ai << b;

left shifts each element in a vector of integers by an immediate value,

and place the results in the destination vector. Bits shifted out of the left of each

element are lost and values are sign extended or zero extended.--*/

int16x8_t vshll_n_s8 (int8x8_t __a, const int __b);

int32x4_t vshll_n_s16 (int16x4_t __a, const int __b);

int64x2_t vshll_n_s32 (int32x2_t __a, const int __b);

uint16x8_t vshll_n_u8 (uint8x8_t __a, const int __b);

uint32x4_t vshll_n_u16 (uint16x4_t __a, const int __b);

uint64x2_t vshll_n_u32 (uint32x2_t __a, const int __b);

/********************************************Shifts with insert*************************/

/*--1、Vector shift right and insert: vsri -> ; The two most significant bits in the

destination vector are unchanged. right shifts each element in the second input vector

by an immediate value, and inserts the results in the destination vector. It does not

affect the highest n significant bits of the elements in the destination register.

Bits shifted out of the right of each element are lost.The first input vector holds

the elements of the destination vector before the operation is performed.--*/

int8x8_t vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c);

int16x4_t vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c);

int32x2_t vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c);

int64x1_t vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c);

uint8x8_t vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c);

uint16x4_t vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c);

uint32x2_t vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c);

uint64x1_t vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c);

poly8x8_t vsri_n_p8 (poly8x8_t __a, poly8x8_t __b, const int __c);

poly16x4_t vsri_n_p16 (poly16x4_t __a, poly16x4_t __b, const int __c);

int8x16_t vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c);

int16x8_t vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c);

int32x4_t vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c);

int64x2_t vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c);

uint8x16_t vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c);

uint16x8_t vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c);

uint32x4_t vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c);

uint64x2_t vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c);

poly8x16_t vsriq_n_p8 (poly8x16_t __a, poly8x16_t __b, const int __c);

poly16x8_t vsriq_n_p16 (poly16x8_t __a, poly16x8_t __b, const int __c);

/*--2、Vector shift left and insert: vsli ->; The least significant bit in each element

in the destination vector is unchanged. left shifts each element in the second input

vector by an immediate value, and inserts the results in the destination vector.

It does not affect the lowest n significant bits of the elements in the destination

register. Bits shifted out of the left of each element are lost. The first input vector

holds the elements of the destination vector before the operation is performed.--*/

int8x8_t vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c);

int16x4_t vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c);

int32x2_t vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c);

int64x1_t vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c);

uint8x8_t vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c);

uint16x4_t vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c);

uint32x2_t vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c);

uint64x1_t vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c);

poly8x8_t vsli_n_p8 (poly8x8_t __a, poly8x8_t __b, const int __c);

poly16x4_t vsli_n_p16 (poly16x4_t __a, poly16x4_t __b, const int __c);

int8x16_t vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c);

int16x8_t vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c);

int32x4_t vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c);

int64x2_t vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c);

uint8x16_t vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c);

uint16x8_t vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c);

uint32x4_t vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c);

uint64x2_t vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c);

poly8x16_t vsliq_n_p8 (poly8x16_t __a, poly8x16_t __b, const int __c);

poly16x8_t vsliq_n_p16 (poly16x8_t __a, poly16x8_t __b, const int __c);

/*****************************************Absolute value********************************/

/*--1、Absolute(正常指令): vabs -> ri = |ai|;

returns the absolute value of each element in a vector.--*/

int8x8_t vabs_s8 (int8x8_t __a);//_mm_abs_epi8

int16x4_t vabs_s16 (int16x4_t __a);//_mm_abs_epi16

int32x2_t vabs_s32 (int32x2_t __a);//_mm_abs_epi32

float32x2_t vabs_f32 (float32x2_t __a);

int8x16_t vabsq_s8 (int8x16_t __a);//_mm_abs_epi8

int16x8_t vabsq_s16 (int16x8_t __a);//_mm_abs_epi16

int32x4_t vabsq_s32 (int32x4_t __a);//_mm_abs_epi32

float32x4_t vabsq_f32 (float32x4_t __a);

/*--2、Saturating absolute(飽和指令): vqabs -> ri = sat(|ai|);

returns the absolute value of each element in a vector. If any of the results overflow,

they are saturated and the sticky QC flag is set.--*/

int8x8_t vqabs_s8 (int8x8_t __a);

int16x4_t vqabs_s16 (int16x4_t __a);

int32x2_t vqabs_s32 (int32x2_t __a);

int8x16_t vqabsq_s8 (int8x16_t __a);

int16x8_t vqabsq_s16 (int16x8_t __a);

int32x4_t vqabsq_s32 (int32x4_t __a);

/***************************************************Negation****************************/

/*--1、Negate(正常指令): vneg -> ri = -ai; negates each element in a vector.--*/

int8x8_t vneg_s8 (int8x8_t __a);

int16x4_t vneg_s16 (int16x4_t __a);

int32x2_t vneg_s32 (int32x2_t __a);

float32x2_t vneg_f32 (float32x2_t __a);

int8x16_t vnegq_s8 (int8x16_t __a);

int16x8_t vnegq_s16 (int16x8_t __a);

int32x4_t vnegq_s32 (int32x4_t __a);

float32x4_t vnegq_f32 (float32x4_t __a);

/*--2、Saturating Negate: vqneg -> ri = sat(-ai);

negates each element in a vector. If any of the results overflow,

they are saturated and the sticky QC flag is set.--*/

int8x8_t vqneg_s8 (int8x8_t __a);

int16x4_t vqneg_s16 (int16x4_t __a);

int32x2_t vqneg_s32 (int32x2_t __a);

int8x16_t vqnegq_s8 (int8x16_t __a);

int16x8_t vqnegq_s16 (int16x8_t __a);

int32x4_t vqnegq_s32 (int32x4_t __a);

/********************************************Logical operations*************************/

/*--1、Bitwise not(正常指令): vmvn -> ri = ~ai;

performs a bitwise inversion of each element from the input vector.--*/

int8x8_t vmvn_s8 (int8x8_t __a);

int16x4_t vmvn_s16 (int16x4_t __a);

int32x2_t vmvn_s32 (int32x2_t __a);

uint8x8_t vmvn_u8 (uint8x8_t __a);

uint16x4_t vmvn_u16 (uint16x4_t __a);

uint32x2_t vmvn_u32 (uint32x2_t __a);

poly8x8_t vmvn_p8 (poly8x8_t __a);

int8x16_t vmvnq_s8 (int8x16_t __a);

int16x8_t vmvnq_s16 (int16x8_t __a);

int32x4_t vmvnq_s32 (int32x4_t __a);

uint8x16_t vmvnq_u8 (uint8x16_t __a);

uint16x8_t vmvnq_u16 (uint16x8_t __a);

uint32x4_t vmvnq_u32 (uint32x4_t __a);

poly8x16_t vmvnq_p8 (poly8x16_t __a);

/*--2、Bitwise and(正常指令): vand -> ri = ai & bi; performs a bitwise AND between

corresponding elements of the input vectors.--*/

int8x8_t vand_s8 (int8x8_t __a, int8x8_t __b);//_mm_and_si128

int16x4_t vand_s16 (int16x4_t __a, int16x4_t __b);//_mm_and_si128

int32x2_t vand_s32 (int32x2_t __a, int32x2_t __b);//_mm_and_si128

uint8x8_t vand_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_and_si128

uint16x4_t vand_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_and_si128

uint32x2_t vand_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_and_si128

int64x1_t vand_s64 (int64x1_t __a, int64x1_t __b);//_mm_and_si128

uint64x1_t vand_u64 (uint64x1_t __a, uint64x1_t __b);//_mm_and_si128

int8x16_t vandq_s8 (int8x16_t __a, int8x16_t __b);//_mm_and_si128

int16x8_t vandq_s16 (int16x8_t __a, int16x8_t __b);//_mm_and_si128

int32x4_t vandq_s32 (int32x4_t __a, int32x4_t __b);//_mm_and_si128

int64x2_t vandq_s64 (int64x2_t __a, int64x2_t __b);//_mm_and_si128

uint8x16_t vandq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_and_si128

uint16x8_t vandq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_and_si128

uint32x4_t vandq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_and_si128

uint64x2_t vandq_u64 (uint64x2_t __a, uint64x2_t __b);//_mm_and_si128

/*--3、Bitwise or(正常指令): vorr -> ri = ai | bi; performs a bitwise OR between

corresponding elements of the input vectors.--*/

int8x8_t vorr_s8 (int8x8_t __a, int8x8_t __b);//_mm_or_si128

int16x4_t vorr_s16 (int16x4_t __a, int16x4_t __b);//_mm_or_si128

int32x2_t vorr_s32 (int32x2_t __a, int32x2_t __b);//_mm_or_si128

uint8x8_t vorr_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_or_si128

uint16x4_t vorr_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_or_si128

uint32x2_t vorr_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_or_si128

int64x1_t vorr_s64 (int64x1_t __a, int64x1_t __b);//_mm_or_si128

uint64x1_t vorr_u64 (uint64x1_t __a, uint64x1_t __b);//_mm_or_si128

int8x16_t vorrq_s8 (int8x16_t __a, int8x16_t __b);//_mm_or_si128

int16x8_t vorrq_s16 (int16x8_t __a, int16x8_t __b);//_mm_or_si128

int32x4_t vorrq_s32 (int32x4_t __a, int32x4_t __b);//_mm_or_si128

int64x2_t vorrq_s64 (int64x2_t __a, int64x2_t __b);//_mm_or_si128

uint8x16_t vorrq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_or_si128

uint16x8_t vorrq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_or_si128

uint32x4_t vorrq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_or_si128

uint64x2_t vorrq_u64 (uint64x2_t __a, uint64x2_t __b);//_mm_or_si128

/*--4、Bitwise exclusive or (EOR or XOR)(正常指令): veor -> ri = ai ^ bi;

performs a bitwise exclusive-OR between corresponding elements of the input vectors.--*/

int8x8_t veor_s8 (int8x8_t __a, int8x8_t __b);//_mm_xor_si128

int16x4_t veor_s16 (int16x4_t __a, int16x4_t __b);//_mm_xor_si128

int32x2_t veor_s32 (int32x2_t __a, int32x2_t __b);//_mm_xor_si128

uint8x8_t veor_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_xor_si128

uint16x4_t veor_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_xor_si128

uint32x2_t veor_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_xor_si128

int64x1_t veor_s64 (int64x1_t __a, int64x1_t __b);//_mm_xor_si128

uint64x1_t veor_u64 (uint64x1_t __a, uint64x1_t __b);//_mm_xor_si128

int8x16_t veorq_s8 (int8x16_t __a, int8x16_t __b);//_mm_xor_si128

int16x8_t veorq_s16 (int16x8_t __a, int16x8_t __b);//_mm_xor_si128

int32x4_t veorq_s32 (int32x4_t __a, int32x4_t __b);//_mm_xor_si128

int64x2_t veorq_s64 (int64x2_t __a, int64x2_t __b);//_mm_xor_si128

uint8x16_t veorq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_xor_si128

uint16x8_t veorq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_xor_si128

uint32x4_t veorq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_xor_si128

uint64x2_t veorq_u64 (uint64x2_t __a, uint64x2_t __b);//_mm_xor_si128

/*--5、Bit Clear(正常指令): vbic -> ri = ~ai & bi;

VBIC (Vector Bitwise Clear) performs a bitwise logical AND complement operation between

values in two registers, and places the results in the destination register.--*/

int8x8_t vbic_s8 (int8x8_t __a, int8x8_t __b);//_mm_andnot_si128

int16x4_t vbic_s16 (int16x4_t __a, int16x4_t __b);//_mm_andnot_si128

int32x2_t vbic_s32 (int32x2_t __a, int32x2_t __b);//_mm_andnot_si128

uint8x8_t vbic_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_andnot_si128

uint16x4_t vbic_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_andnot_si128

uint32x2_t vbic_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_andnot_si128

int64x1_t vbic_s64 (int64x1_t __a, int64x1_t __b);//_mm_andnot_si128

uint64x1_t vbic_u64 (uint64x1_t __a, uint64x1_t __b);//_mm_andnot_si128

int8x16_t vbicq_s8 (int8x16_t __a, int8x16_t __b);//_mm_andnot_si128

int16x8_t vbicq_s16 (int16x8_t __a, int16x8_t __b);//_mm_andnot_si128

int32x4_t vbicq_s32 (int32x4_t __a, int32x4_t __b);//_mm_andnot_si128

int64x2_t vbicq_s64 (int64x2_t __a, int64x2_t __b);//_mm_andnot_si128

uint8x16_t vbicq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_andnot_si128

uint16x8_t vbicq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_andnot_si128

uint32x4_t vbicq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_andnot_si128

uint64x2_t vbicq_u64 (uint64x2_t __a, uint64x2_t __b);//_mm_andnot_si128

/*--6、Bitwise OR complement(正常指令): vorn -> ri = ai | (~bi);

performs a bitwise logical OR NOT operation

between values in two registers, and places the results in the destination register.--*/

int8x8_t vorn_s8 (int8x8_t __a, int8x8_t __b);

int16x4_t vorn_s16 (int16x4_t __a, int16x4_t __b);

int32x2_t vorn_s32 (int32x2_t __a, int32x2_t __b);

uint8x8_t vorn_u8 (uint8x8_t __a, uint8x8_t __b);

uint16x4_t vorn_u16 (uint16x4_t __a, uint16x4_t __b);

uint32x2_t vorn_u32 (uint32x2_t __a, uint32x2_t __b);

int64x1_t vorn_s64 (int64x1_t __a, int64x1_t __b);

uint64x1_t vorn_u64 (uint64x1_t __a, uint64x1_t __b);

int8x16_t vornq_s8 (int8x16_t __a, int8x16_t __b);

int16x8_t vornq_s16 (int16x8_t __a, int16x8_t __b);

int32x4_t vornq_s32 (int32x4_t __a, int32x4_t __b);

int64x2_t vornq_s64 (int64x2_t __a, int64x2_t __b);

uint8x16_t vornq_u8 (uint8x16_t __a, uint8x16_t __b);

uint16x8_t vornq_u16 (uint16x8_t __a, uint16x8_t __b);

uint32x4_t vornq_u32 (uint32x4_t __a, uint32x4_t __b);

uint64x2_t vornq_u64 (uint64x2_t __a, uint64x2_t __b);

/****************************************Count leading sign bits************************/

/*--正常指令, vcls -> ; counts the number of consecutive bits, starting from the most

significant bit,that are the same as the most significant bit, in each element in a

vector, and places the count in the result vector.--*/

int8x8_t vcls_s8 (int8x8_t __a);

int16x4_t vcls_s16 (int16x4_t __a);

int32x2_t vcls_s32 (int32x2_t __a);

int8x16_t vclsq_s8 (int8x16_t __a);

int16x8_t vclsq_s16 (int16x8_t __a);

int32x4_t vclsq_s32 (int32x4_t __a);

/*******************************************Count leading zeros*************************/

/*--正常指令, vclz -> ; counts the number of consecutive zeros, starting from the most

significant bit, in each element in a vector, and places the count in result vector.--*/

int8x8_t vclz_s8 (int8x8_t __a);

int16x4_t vclz_s16 (int16x4_t __a);

int32x2_t vclz_s32 (int32x2_t __a);

uint8x8_t vclz_u8 (uint8x8_t __a);

uint16x4_t vclz_u16 (uint16x4_t __a);

uint32x2_t vclz_u32 (uint32x2_t __a);

int8x16_t vclzq_s8 (int8x16_t __a);

int16x8_t vclzq_s16 (int16x8_t __a);

int32x4_t vclzq_s32 (int32x4_t __a);

uint8x16_t vclzq_u8 (uint8x16_t __a);

uint16x8_t vclzq_u16 (uint16x8_t __a);

uint32x4_t vclzq_u32 (uint32x4_t __a);

/*******************************************Count number of set bits********************/

/*--正常指令, vcnt -> counts the number of bits that are one in each element in a vector,

and places the count in the result vector.--*/

int8x8_t vcnt_s8 (int8x8_t __a);

uint8x8_t vcnt_u8 (uint8x8_t __a);

poly8x8_t vcnt_p8 (poly8x8_t __a);

int8x16_t vcntq_s8 (int8x16_t __a);

uint8x16_t vcntq_u8 (uint8x16_t __a);

poly8x16_t vcntq_p8 (poly8x16_t __a);

/*****************************************Reciprocal estimate***************************/

/*--正常指令, vrecpe -> ; finds an approximate reciprocal of each element in a vector,

and places it in the result vector.--*/

float32x2_t vrecpe_f32 (float32x2_t __a);

uint32x2_t vrecpe_u32 (uint32x2_t __a);

float32x4_t vrecpeq_f32 (float32x4_t __a);

uint32x4_t vrecpeq_u32 (uint32x4_t __a);

/****************************************Reciprocal square-root estimate****************/

/*--正常指令, vrsqrte -> ; finds an approximate reciprocal square root of each element

in a vector, and places it in the return vector.--*/

float32x2_t vrsqrte_f32 (float32x2_t __a);

uint32x2_t vrsqrte_u32 (uint32x2_t __a);

float32x4_t vrsqrteq_f32 (float32x4_t __a);

uint32x4_t vrsqrteq_u32 (uint32x4_t __a);

/*******************************************Get lanes from a vector*********************/

/*--vmov -> r = a[b]; returns the value from the specified lane of a vector.

Extract lanes from a vector and put into a register.

These intrinsics extract a single lane (element) from a vector.--*/

int8_t vget_lane_s8 (int8x8_t __a, const int __b);//_mm_extract_epi8

int16_t vget_lane_s16 (int16x4_t __a, const int __b);//_mm_extract_epi16

int32_t vget_lane_s32 (int32x2_t __a, const int __b);//_mm_extract_epi32

float32_t vget_lane_f32 (float32x2_t __a, const int __b);

uint8_t vget_lane_u8 (uint8x8_t __a, const int __b);//_mm_extract_epi8

uint16_t vget_lane_u16 (uint16x4_t __a, const int __b);//_mm_extract_epi16

uint32_t vget_lane_u32 (uint32x2_t __a, const int __b);//_mm_extract_epi32

poly8_t vget_lane_p8 (poly8x8_t __a, const int __b);//_mm_extract_epi8

poly16_t vget_lane_p16 (poly16x4_t __a, const int __b);//_mm_extract_epi16

int64_t vget_lane_s64 (int64x1_t __a, const int __b);//_mm_extract_epi64

uint64_t vget_lane_u64 (uint64x1_t __a, const int __b);//_mm_extract_epi64

int8_t vgetq_lane_s8 (int8x16_t __a, const int __b);//_mm_extract_epi8

int16_t vgetq_lane_s16 (int16x8_t __a, const int __b);//_mm_extract_epi16

int32_t vgetq_lane_s32 (int32x4_t __a, const int __b);//_mm_extract_epi32

float32_t vgetq_lane_f32 (float32x4_t __a, const int __b);

uint8_t vgetq_lane_u8 (uint8x16_t __a, const int __b);//_mm_extract_epi8

uint16_t vgetq_lane_u16 (uint16x8_t __a, const int __b);//_mm_extract_epi16

uint32_t vgetq_lane_u32 (uint32x4_t __a, const int __b);//_mm_extract_epi32

poly8_t vgetq_lane_p8 (poly8x16_t __a, const int __b);//_mm_extract_epi8

poly16_t vgetq_lane_p16 (poly16x8_t __a, const int __b);//_mm_extract_epi16

int64_t vgetq_lane_s64 (int64x2_t __a, const int __b);//_mm_extract_epi64

uint64_t vgetq_lane_u64 (uint64x2_t __a, const int __b);//_mm_extract_epi64

/*********************************************Set lanes in a vector********************/

/*--vmov -> ; sets the value of the specified lane of a vector. It returns the vector

with the new value.Load a single lane of a vector from a literal. These intrinsics set

a single lane (element) within a vector.--*/

int8x8_t vset_lane_s8 (int8_t __a, int8x8_t __b, const int __c);

int16x4_t vset_lane_s16 (int16_t __a, int16x4_t __b, const int __c);

int32x2_t vset_lane_s32 (int32_t __a, int32x2_t __b, const int __c);

float32x2_t vset_lane_f32 (float32_t __a, float32x2_t __b, const int __c);

uint8x8_t vset_lane_u8 (uint8_t __a, uint8x8_t __b, const int __c);

uint16x4_t vset_lane_u16 (uint16_t __a, uint16x4_t __b, const int __c);

uint32x2_t vset_lane_u32 (uint32_t __a, uint32x2_t __b, const int __c);

poly8x8_t vset_lane_p8 (poly8_t __a, poly8x8_t __b, const int __c);

poly16x4_t vset_lane_p16 (poly16_t __a, poly16x4_t __b, const int __c);

int64x1_t vset_lane_s64 (int64_t __a, int64x1_t __b, const int __c);

uint64x1_t vset_lane_u64 (uint64_t __a, uint64x1_t __b, const int __c);

int8x16_t vsetq_lane_s8 (int8_t __a, int8x16_t __b, const int __c);

int16x8_t vsetq_lane_s16 (int16_t __a, int16x8_t __b, const int __c);

int32x4_t vsetq_lane_s32 (int32_t __a, int32x4_t __b, const int __c);

float32x4_t vsetq_lane_f32 (float32_t __a, float32x4_t __b, const int __c);

uint8x16_t vsetq_lane_u8 (uint8_t __a, uint8x16_t __b, const int __c);

uint16x8_t vsetq_lane_u16 (uint16_t __a, uint16x8_t __b, const int __c);

uint32x4_t vsetq_lane_u32 (uint32_t __a, uint32x4_t __b, const int __c);

poly8x16_t vsetq_lane_p8 (poly8_t __a, poly8x16_t __b, const int __c);

poly16x8_t vsetq_lane_p16 (poly16_t __a, poly16x8_t __b, const int __c);

int64x2_t vsetq_lane_s64 (int64_t __a, int64x2_t __b, const int __c);

uint64x2_t vsetq_lane_u64 (uint64_t __a, uint64x2_t __b, const int __c);

/****************************************Create vector from literal bit pattern*********/

/*--vmov -> ; creates a vector from a 64-bit pattern.

Initialize a vector from a literal bit pattern.--*/

int8x8_t vcreate_s8 (uint64_t __a);//_mm_loadl_epi64

int16x4_t vcreate_s16 (uint64_t __a);//_mm_loadl_epi64

int32x2_t vcreate_s32 (uint64_t __a);//_mm_loadl_epi64

int64x1_t vcreate_s64 (uint64_t __a);//_mm_loadl_epi64

float32x2_t vcreate_f32 (uint64_t __a);

uint8x8_t vcreate_u8 (uint64_t __a);//_mm_loadl_epi64

uint16x4_t vcreate_u16 (uint64_t __a);//_mm_loadl_epi64

uint32x2_t vcreate_u32 (uint64_t __a);//_mm_loadl_epi64

uint64x1_t vcreate_u64 (uint64_t __a);//_mm_loadl_epi64

poly8x8_t vcreate_p8 (uint64_t __a);//_mm_loadl_epi64

poly16x4_t vcreate_p16 (uint64_t __a);//_mm_loadl_epi64

/*****************************************Set all lanes to the same value***************/

/*--1、Load all lanes of vector to the same literal value: vdup/vmov -> ri = a;

duplicates a scalar into every element of the destination vector.

Load all lanes of vector to the same literal value--*/

int8x8_t vdup_n_s8 (int8_t __a);//_mm_set1_epi8

int16x4_t vdup_n_s16 (int16_t __a);//_mm_set1_epi16

int32x2_t vdup_n_s32 (int32_t __a);//_mm_set1_epi32

float32x2_t vdup_n_f32 (float32_t __a);//_mm_set1_ps

uint8x8_t vdup_n_u8 (uint8_t __a);//_mm_set1_epi8

uint16x4_t vdup_n_u16 (uint16_t __a);//_mm_set1_epi16

uint32x2_t vdup_n_u32 (uint32_t __a);//_mm_set1_epi32

poly8x8_t vdup_n_p8 (poly8_t __a);//_mm_set1_epi8

poly16x4_t vdup_n_p16 (poly16_t __a);//_mm_set1_epi16

int64x1_t vdup_n_s64 (int64_t __a);

uint64x1_t vdup_n_u64 (uint64_t __a);

int8x16_t vdupq_n_s8 (int8_t __a);//_mm_set1_epi8

int16x8_t vdupq_n_s16 (int16_t __a);//_mm_set1_epi16

int32x4_t vdupq_n_s32 (int32_t __a);//_mm_set1_epi32

float32x4_t vdupq_n_f32 (float32_t __a);//_mm_set1_ps

uint8x16_t vdupq_n_u8 (uint8_t __a);//_mm_set1_epi8

uint16x8_t vdupq_n_u16 (uint16_t __a);//_mm_set1_epi16

uint32x4_t vdupq_n_u32 (uint32_t __a);//_mm_set1_epi32

poly8x16_t vdupq_n_p8 (poly8_t __a);//_mm_set1_epi8

poly16x8_t vdupq_n_p16 (poly16_t __a);//_mm_set1_epi16

int64x2_t vdupq_n_s64 (int64_t __a);

uint64x2_t vdupq_n_u64 (uint64_t __a);

int8x8_t vmov_n_s8 (int8_t __a);//_mm_set1_epi8

int16x4_t vmov_n_s16 (int16_t __a);//_mm_set1_epi16

int32x2_t vmov_n_s32 (int32_t __a);//_mm_set1_epi32

float32x2_t vmov_n_f32 (float32_t __a);//_mm_set1_ps

uint8x8_t vmov_n_u8 (uint8_t __a);//_mm_set1_epi8

uint16x4_t vmov_n_u16 (uint16_t __a);//_mm_set1_epi16

uint32x2_t vmov_n_u32 (uint32_t __a);//_mm_set1_epi32

poly8x8_t vmov_n_p8 (poly8_t __a);//_mm_set1_epi8

poly16x4_t vmov_n_p16 (poly16_t __a);//_mm_set1_epi16

int64x1_t vmov_n_s64 (int64_t __a);

uint64x1_t vmov_n_u64 (uint64_t __a);

int8x16_t vmovq_n_s8 (int8_t __a);//_mm_set1_epi8

int16x8_t vmovq_n_s16 (int16_t __a);//_mm_set1_epi16

int32x4_t vmovq_n_s32 (int32_t __a);//_mm_set1_epi32

float32x4_t vmovq_n_f32 (float32_t __a);//_mm_set1_ps

uint8x16_t vmovq_n_u8 (uint8_t __a);//_mm_set1_epi8

uint16x8_t vmovq_n_u16 (uint16_t __a);//_mm_set1_epi16

uint32x4_t vmovq_n_u32 (uint32_t __a);//_mm_set1_epi32

poly8x16_t vmovq_n_p8 (poly8_t __a);//_mm_set1_epi8

poly16x8_t vmovq_n_p16 (poly16_t __a);//_mm_set1_epi16

int64x2_t vmovq_n_s64 (int64_t __a);

uint64x2_t vmovq_n_u64 (uint64_t __a);

/*--2、Load all lanes of the vector to the value of a lane of a vector:

vdup/vmov -> ri = a[b];

duplicates a scalar into every element of the destination vector.--*/

int8x8_t vdup_lane_s8 (int8x8_t __a, const int __b);

int16x4_t vdup_lane_s16 (int16x4_t __a, const int __b);

int32x2_t vdup_lane_s32 (int32x2_t __a, const int __b);

float32x2_t vdup_lane_f32 (float32x2_t __a, const int __b);

uint8x8_t vdup_lane_u8 (uint8x8_t __a, const int __b);

uint16x4_t vdup_lane_u16 (uint16x4_t __a, const int __b);

uint32x2_t vdup_lane_u32 (uint32x2_t __a, const int __b);

poly8x8_t vdup_lane_p8 (poly8x8_t __a, const int __b);

poly16x4_t vdup_lane_p16 (poly16x4_t __a, const int __b);

int64x1_t vdup_lane_s64 (int64x1_t __a, const int __b);

uint64x1_t vdup_lane_u64 (uint64x1_t __a, const int __b);

int8x16_t vdupq_lane_s8 (int8x8_t __a, const int __b);

int16x8_t vdupq_lane_s16 (int16x4_t __a, const int __b);

int32x4_t vdupq_lane_s32 (int32x2_t __a, const int __b);

float32x4_t vdupq_lane_f32 (float32x2_t __a, const int __b);

uint8x16_t vdupq_lane_u8 (uint8x8_t __a, const int __b);

uint16x8_t vdupq_lane_u16 (uint16x4_t __a, const int __b);

uint32x4_t vdupq_lane_u32 (uint32x2_t __a, const int __b);

poly8x16_t vdupq_lane_p8 (poly8x8_t __a, const int __b);

poly16x8_t vdupq_lane_p16 (poly16x4_t __a, const int __b);

int64x2_t vdupq_lane_s64 (int64x1_t __a, const int __b);//_mm_unpacklo_epi64

uint64x2_t vdupq_lane_u64 (uint64x1_t __a, const int __b);//_mm_unpacklo_epi64

/********************************************Combining vectors**************************/

/*--長指令, -> r0 = a0, ..., r7 = a7, r8 = b0, ..., r15 = b7;

joins two 64-bit vectors into a single 128-bit vector.

The output vector contains twice the number of elements as each input vector.

The lower half of the output vector contains the elements of the first input vector.--*/

int8x16_t vcombine_s8 (int8x8_t __a, int8x8_t __b);//_mm_unpacklo_epi64

int16x8_t vcombine_s16 (int16x4_t __a, int16x4_t __b);//_mm_unpacklo_epi64

int32x4_t vcombine_s32 (int32x2_t __a, int32x2_t __b);//_mm_unpacklo_epi64

int64x2_t vcombine_s64 (int64x1_t __a, int64x1_t __b);//_mm_unpacklo_epi64

float32x4_t vcombine_f32 (float32x2_t __a, float32x2_t __b);

uint8x16_t vcombine_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_unpacklo_epi64

uint16x8_t vcombine_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_unpacklo_epi64

uint32x4_t vcombine_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_unpacklo_epi64

uint64x2_t vcombine_u64 (uint64x1_t __a, uint64x1_t __b);//_mm_unpacklo_epi64

poly8x16_t vcombine_p8 (poly8x8_t __a, poly8x8_t __b);//_mm_unpacklo_epi64

poly16x8_t vcombine_p16 (poly16x4_t __a, poly16x4_t __b);//_mm_unpacklo_epi64

/***************************************Splitting vectors*******************************/

/*--1、窄指令, -> ri = a(i+4); returns the higher half of the 128-bit input vector. The

output is a 64-bit vector that has half the number of elements as the input vector.--*/

int8x8_t vget_high_s8 (int8x16_t __a);//_mm_unpackhi_epi64

int16x4_t vget_high_s16 (int16x8_t __a);//_mm_unpackhi_epi64

int32x2_t vget_high_s32 (int32x4_t __a);//_mm_unpackhi_epi64

int64x1_t vget_high_s64 (int64x2_t __a);//_mm_unpackhi_epi64

float32x2_t vget_high_f32 (float32x4_t __a);

uint8x8_t vget_high_u8 (uint8x16_t __a);//_mm_unpackhi_epi64

uint16x4_t vget_high_u16 (uint16x8_t __a);//_mm_unpackhi_epi64

uint32x2_t vget_high_u32 (uint32x4_t __a);//_mm_unpackhi_epi64

uint64x1_t vget_high_u64 (uint64x2_t __a);//_mm_unpackhi_epi64

poly8x8_t vget_high_p8 (poly8x16_t __a);//_mm_unpackhi_epi64

poly16x4_t vget_high_p16 (poly16x8_t __a);//_mm_unpackhi_epi64

/*--2、窄指令, -> ri = ai; returns the lower half of the 128-bit input vector. The

output is a 64-bit vector that has half the number of elements as the input vector.--*/

int8x8_t vget_low_s8 (int8x16_t __a);

int16x4_t vget_low_s16 (int16x8_t __a);

int32x2_t vget_low_s32 (int32x4_t __a);

float32x2_t vget_low_f32 (float32x4_t __a);

uint8x8_t vget_low_u8 (uint8x16_t __a);

uint16x4_t vget_low_u16 (uint16x8_t __a);

uint32x2_t vget_low_u32 (uint32x4_t __a);

poly8x8_t vget_low_p8 (poly8x16_t __a);

poly16x4_t vget_low_p16 (poly16x8_t __a);

int64x1_t vget_low_s64 (int64x2_t __a);

uint64x1_t vget_low_u64 (uint64x2_t __a);

/****************************************************Conversions************************/

/*--1、Convert from float: vcvt ->, convert from floating-point to integer.--*/

int32x2_t vcvt_s32_f32 (float32x2_t __a);

uint32x2_t vcvt_u32_f32 (float32x2_t __a);

int32x4_t vcvtq_s32_f32 (float32x4_t __a);

uint32x4_t vcvtq_u32_f32 (float32x4_t __a);

int32x2_t vcvt_n_s32_f32 (float32x2_t __a, const int __b);

uint32x2_t vcvt_n_u32_f32 (float32x2_t __a, const int __b);

int32x4_t vcvtq_n_s32_f32 (float32x4_t __a, const int __b);

uint32x4_t vcvtq_n_u32_f32 (float32x4_t __a, const int __b);

/*--2、Convert to float: vcvt ->, convert from integer to floating-point.--*/

float32x2_t vcvt_f32_s32 (int32x2_t __a);

float32x2_t vcvt_f32_u32 (uint32x2_t __a);

float32x4_t vcvtq_f32_s32 (int32x4_t __a);

float32x4_t vcvtq_f32_u32 (uint32x4_t __a);

float32x2_t vcvt_n_f32_s32 (int32x2_t __a, const int __b);

float32x2_t vcvt_n_f32_u32 (uint32x2_t __a, const int __b);

float32x4_t vcvtq_n_f32_s32 (int32x4_t __a, const int __b);

float32x4_t vcvtq_n_f32_u32 (uint32x4_t __a, const int __b);

/*--3、between single-precision and double-precision numbers: vcvt ->--*/

float16x4_t vcvt_f16_f32(float32x4_t a);

float32x4_t vcvt_f32_f16(float16x4_t a);

/*************************************************Move**********************************/

/*--1、Vector narrow integer(窄指令): vmovn -> ri = ai[0...8]; copies the least

significant half of each element of a quadword vector into

the corresponding elements of a doubleword vector.--*/

int8x8_t vmovn_s16 (int16x8_t __a);

int16x4_t vmovn_s32 (int32x4_t __a);

int32x2_t vmovn_s64 (int64x2_t __a);

uint8x8_t vmovn_u16 (uint16x8_t __a);

uint16x4_t vmovn_u32 (uint32x4_t __a);

uint32x2_t vmovn_u64 (uint64x2_t __a);

/*--2、Vector long move(長指令): vmovl -> sign extends or zero extends each element

in a doubleword vector to twice its original length,

and places the results in a quadword vector.--*/

int16x8_t vmovl_s8 (int8x8_t __a);//_mm_cvtepi8_epi16

int32x4_t vmovl_s16 (int16x4_t __a);//_mm_cvtepi16_epi32

int64x2_t vmovl_s32 (int32x2_t __a);//_mm_cvtepi32_epi64

uint16x8_t vmovl_u8 (uint8x8_t __a);//_mm_cvtepu8_epi16

uint32x4_t vmovl_u16 (uint16x4_t __a);//_mm_cvtepu16_epi32

uint64x2_t vmovl_u32 (uint32x2_t __a);_mm_cvtepu32_epi64

/*--3、Vector saturating narrow integer(窄指令): vqmovn -> copies each element of the

operand vector to the corresponding element of the destination vector.

The result element is half the width of

the operand element, and values are saturated to the result width.

The results are the same type as the operands.--*/

int8x8_t vqmovn_s16 (int16x8_t __a);//_mm_packs_epi16

int16x4_t vqmovn_s32 (int32x4_t __a);//_mm_packs_epi32

int32x2_t vqmovn_s64 (int64x2_t __a);

uint8x8_t vqmovn_u16 (uint16x8_t __a);

uint16x4_t vqmovn_u32 (uint32x4_t __a);

uint32x2_t vqmovn_u64 (uint64x2_t __a);

/*--4、Vector saturating narrow integer signed->unsigned(窄指令): copies each element of

the operand vector to the corresponding element of the destination vector.

The result element is half the width of the operand element,

and values are saturated to the result width.

The elements in the operand are signed and the elements in the result are unsigned.--*/

uint8x8_t vqmovun_s16 (int16x8_t __a);//_mm_packus_epi16

uint16x4_t vqmovun_s32 (int32x4_t __a);//_mm_packus_epi32

uint32x2_t vqmovun_s64 (int64x2_t __a);

/******************************************************Table lookup*********************/

/*--1、Table lookup: vtbl -> uses byte indexes in a control vector to look up byte

values in a table and generate a new vector. Indexes out of range return 0.

The table is in Vector1 and uses one(or two or three or four)D registers.--*/

int8x8_t vtbl1_s8 (int8x8_t __a, int8x8_t __b);

uint8x8_t vtbl1_u8 (uint8x8_t __a, uint8x8_t __b);

poly8x8_t vtbl1_p8 (poly8x8_t __a, uint8x8_t __b);

int8x8_t vtbl2_s8 (int8x8x2_t __a, int8x8_t __b);

uint8x8_t vtbl2_u8 (uint8x8x2_t __a, uint8x8_t __b);

poly8x8_t vtbl2_p8 (poly8x8x2_t __a, uint8x8_t __b);

int8x8_t vtbl3_s8 (int8x8x3_t __a, int8x8_t __b);

uint8x8_t vtbl3_u8 (uint8x8x3_t __a, uint8x8_t __b);

poly8x8_t vtbl3_p8 (poly8x8x3_t __a, uint8x8_t __b);

int8x8_t vtbl4_s8 (int8x8x4_t __a, int8x8_t __b);

uint8x8_t vtbl4_u8 (uint8x8x4_t __a, uint8x8_t __b);

poly8x8_t vtbl4_p8 (poly8x8x4_t __a, uint8x8_t __b);

/*--2、Extended table lookup: vtbx -> uses byte indexes in a control vector to look up

byte values in a table and generate a new vector. Indexes out of range leave the

destination element unchanged.The table is in Vector2 and uses one(or two or three or

four) D register. Vector1 contains the elements of the destination vector.--*/

int8x8_t vtbx1_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c);

uint8x8_t vtbx1_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c);

poly8x8_t vtbx1_p8 (poly8x8_t __a, poly8x8_t __b, uint8x8_t __c);

int8x8_t vtbx2_s8 (int8x8_t __a, int8x8x2_t __b, int8x8_t __c);

uint8x8_t vtbx2_u8 (uint8x8_t __a, uint8x8x2_t __b, uint8x8_t __c);

poly8x8_t vtbx2_p8 (poly8x8_t __a, poly8x8x2_t __b, uint8x8_t __c);

int8x8_t vtbx3_s8 (int8x8_t __a, int8x8x3_t __b, int8x8_t __c);

uint8x8_t vtbx3_u8 (uint8x8_t __a, uint8x8x3_t __b, uint8x8_t __c);

poly8x8_t vtbx3_p8 (poly8x8_t __a, poly8x8x3_t __b, uint8x8_t __c);

int8x8_t vtbx4_s8 (int8x8_t __a, int8x8x4_t __b, int8x8_t __c);

uint8x8_t vtbx4_u8 (uint8x8_t __a, uint8x8x4_t __b, uint8x8_t __c);

poly8x8_t vtbx4_p8 (poly8x8_t __a, poly8x8x4_t __b, uint8x8_t __c);

/***************************************Multiply, scalar, lane**************************/

/*--1、Vector multiply by scalar: vmul -> ri = ai * b;

multiplies each element in a vector by a scalar,

and places the results in the destination vector.--*/

int16x4_t vmul_n_s16 (int16x4_t __a, int16_t __b);

int32x2_t vmul_n_s32 (int32x2_t __a, int32_t __b);

float32x2_t vmul_n_f32 (float32x2_t __a, float32_t __b);

uint16x4_t vmul_n_u16 (uint16x4_t __a, uint16_t __b);

uint32x2_t vmul_n_u32 (uint32x2_t __a, uint32_t __b);

int16x8_t vmulq_n_s16 (int16x8_t __a, int16_t __b);

int32x4_t vmulq_n_s32 (int32x4_t __a, int32_t __b);

float32x4_t vmulq_n_f32 (float32x4_t __a, float32_t __b);

uint16x8_t vmulq_n_u16 (uint16x8_t __a, uint16_t __b);

uint32x4_t vmulq_n_u32 (uint32x4_t __a, uint32_t __b);

/*--2、Vector multiply by scalar: -> ri = ai * b[c];

multiplies the first vector by a scalar.

The scalar is the element in the second vector with index c.--*/

int16x4_t vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c);

int32x2_t vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c);

float32x2_t vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __c);

uint16x4_t vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __c);

uint32x2_t vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __c);

int16x8_t vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c);

int32x4_t vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c);

float32x4_t vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __c);

uint16x8_t vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __c);

uint32x4_t vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __c);

/*--3、Vector long multiply with scalar: vmull -> ri = ai * b;

multiplies a vector by a scalar.

Elements in the result are wider than elements in input vector.--*/

int32x4_t vmull_n_s16 (int16x4_t __a, int16_t __b);

int64x2_t vmull_n_s32 (int32x2_t __a, int32_t __b);

uint32x4_t vmull_n_u16 (uint16x4_t __a, uint16_t __b);

uint64x2_t vmull_n_u32 (uint32x2_t __a, uint32_t __b);

/*--4、Vector long multiply by scalar: vmull -> ri = ai * b[c];

multiplies the first vector by a scalar.

The scalar is the element in the second vector with index c.

The elements in the result are wider than the elements in input vector.--*/

int32x4_t vmull_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c);

int64x2_t vmull_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c);

uint32x4_t vmull_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __c);

uint64x2_t vmull_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __c);

/*--5、Vector saturating doubling long multiply with scalar: vqdmull -> ri = sat(ai * b);

multiplies the elements in the vector by a scalar, and doubles the results.

If any of the results overflow, they are saturated and the sticky QC flag is set.--*/

int32x4_t vqdmull_n_s16 (int16x4_t __a, int16_t __b);

int64x2_t vqdmull_n_s32 (int32x2_t __a, int32_t __b);

/*--6、Vector saturating doubling long multiply by scalar: vqdmull -> ri = sat(ai * b[c]);

multiplies the elements in the first vector by a scalar, and doubles the results.

The scalar has index c in the second vector. If any of the results overflow,

they are saturated and the sticky QC flagis set.--*/

int32x4_t vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c);

int64x2_t vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c);

/*--7、Vector saturating doubling multiply high with scalar: vqdmulh -> ri = sat(ai * b)

multiplies the elements of the vector by a scalar, and doubles the results.

It then returns only the high half of the results.

If any of the results overflow, they are saturated and the sticky QC flag is set.--*/

int16x4_t vqdmulh_n_s16 (int16x4_t __a, int16_t __b);

int32x2_t vqdmulh_n_s32 (int32x2_t __a, int32_t __b);

int16x8_t vqdmulhq_n_s16 (int16x8_t __a, int16_t __b);

int32x4_t vqdmulhq_n_s32 (int32x4_t __a, int32_t __b);

/*--8、Vector saturating doubling multiply high by scalar:

vqdmulh -> ri = sat(ai * b[c]);

multiplies the elements of the first vector by a scalar, and doubles the results. It then

returns only the high half of the results. The scalar has index n in the second vector.

If any of the results overflow, they are saturated and the sticky QC flag is set.--*/

int16x4_t vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c);

int32x2_t vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c);

int16x8_t vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c);

int32x4_t vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c);

/*--9、Vector saturating rounding doubling multiply high with scalar:

vqqrdmulh -> ri = sat(ai * b);

multiplies the elements of the vector by a scalar and doubles the results.

It then returns only the high half of the rounded results.

If any of the results overflow, they are saturated and the sticky QC flag is set.--*/

int16x4_t vqrdmulh_n_s16 (int16x4_t __a, int16_t __b);

int32x2_t vqrdmulh_n_s32 (int32x2_t __a, int32_t __b);

int16x8_t vqrdmulhq_n_s16 (int16x8_t __a, int16_t __b);

int32x4_t vqrdmulhq_n_s32 (int32x4_t __a, int32_t __b);

/*--10、Vector rounding saturating doubling multiply high by scalar:

vqrdmulh -> ri = sat(ai * b[c]);

multiplies the elements of the first vector by a scalar and doubles the results.

It then returns only the high half of the rounded results.

The scalar has index n in the second vector. If any of the results overflow,

they are saturated and the sticky QC flag is set.--*/

int16x4_t vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c);

int32x2_t vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c);

int16x8_t vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c);

int32x4_t vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c);

/*--11、Vector multiply accumulate with scalar: vmla -> ri = ai + bi * c;

multiplies each element in the second vector by a scalar,

and adds the results to the corresponding elements of the first vector.--*/

int16x4_t vmla_n_s16 (int16x4_t __a, int16x4_t __b, int16_t __c);

int32x2_t vmla_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c);

float32x2_t vmla_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c);

uint16x4_t vmla_n_u16 (uint16x4_t __a, uint16x4_t __b, uint16_t __c);

uint32x2_t vmla_n_u32 (uint32x2_t __a, uint32x2_t __b, uint32_t __c);

int16x8_t vmlaq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c);

int32x4_t vmlaq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c);

float32x4_t vmlaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c);

uint16x8_t vmlaq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c);

uint32x4_t vmlaq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c);

/*--12、Vector multiply accumulate by scalar: vmla -> ri = ai + bi * c[d];

multiplies each element in the second vector by a scalar,

and adds the results to the corresponding elements of the first vector.

The scalar has index d in the third vector.--*/

int16x4_t vmla_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d);

int32x2_t vmla_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d);

float32x2_t vmla_lane_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c,

const int __d);

uint16x4_t vmla_lane_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d);

uint32x2_t vmla_lane_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d);

int16x8_t vmlaq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d);

int32x4_t vmlaq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d);

float32x4_t vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b, float32x2_t __c,

const int __d);

uint16x8_t vmlaq_lane_u16 (uint16x8_t __a, uint16x8_t __b, uint16x4_t __c, const int __d);

uint32x4_t vmlaq_lane_u32 (uint32x4_t __a, uint32x4_t __b, uint32x2_t __c, const int __d);

/*--13、Vector widening multiply accumulate with scalar: vmlal -> ri = ai + bi * c;

multiplies each element in the second vector by a scalar, and adds the results into the

corresponding elements of the first vector.

The scalar has index n in the third vector. The elements in the result are wider.--*/

int32x4_t vmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c);

int64x2_t vmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c);

uint32x4_t vmlal_n_u16 (uint32x4_t __a, uint16x4_t __b, uint16_t __c);

uint64x2_t vmlal_n_u32 (uint64x2_t __a, uint32x2_t __b, uint32_t __c);

/*--14、Vector widening multiply accumulate by scalar: vmlal -> ri = ai + bi * c[d];

multiplies each element in the second vector by a scalar, and adds the results to the

corresponding elements of the first vector. The scalar has index d in the third vector.

The elements in the result are wider.--*/

int32x4_t vmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d);

int64x2_t vmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d);

uint32x4_t vmlal_lane_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d);

uint64x2_t vmlal_lane_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d);

/*--15、Vector widening saturating doubling multiply accumulate with scalar:

vqdmlal -> ri = sat(ai + bi * c);

multiplies the elements in the second vector by a scalar, and doubles the results.

It then adds the results to the elements in the first vector.

If any of the results overflow, they are saturated and the sticky QC flag is set.--*/

int32x4_t vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c);

int64x2_t vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c);

/*--16、Vector widening saturating doubling multiply accumulate by scalar:

vqdmlal -> ri = sat(ai + bi * c[d])

multiplies each element in the second vector by a scalar, doubles the results and adds

them to the corresponding elements of the first vector. The scalar has index d in the

third vector. If any of the results overflow,

they are saturated and the sticky QC flag is set.--*/

int32x4_t vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d);

int64x2_t vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d);

/*--17、Vector multiply subtract with scalar: vmls -> ri = ai - bi * c;

multiplies each element in a vector by a scalar, subtracts the results from the

corresponding elements of the destination vector,

and places the final results in the destination vector.--*/

int16x4_t vmls_n_s16 (int16x4_t __a, int16x4_t __b, int16_t __c);

int32x2_t vmls_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c);

float32x2_t vmls_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c);

uint16x4_t vmls_n_u16 (uint16x4_t __a, uint16x4_t __b, uint16_t __c);

uint32x2_t vmls_n_u32 (uint32x2_t __a, uint32x2_t __b, uint32_t __c);

int16x8_t vmlsq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c);

int32x4_t vmlsq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c);

float32x4_t vmlsq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c);

uint16x8_t vmlsq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c);

uint32x4_t vmlsq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c);

/*--18、Vector multiply subtract by scalar: vmls -> ri = ai - bi * c[d];

multiplies each element in the second vector by a scalar, and subtracts them from the

corresponding elements of the first vector.

The scalar has index d in the third vector.--*/

int16x4_t vmls_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d);

int32x2_t vmls_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d);

float32x2_t vmls_lane_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c,

const int __d);

uint16x4_t vmls_lane_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d);

uint32x2_t vmls_lane_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d);

int16x8_t vmlsq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d);

int32x4_t vmlsq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d);

float32x4_t vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b, float32x2_t __c,

const int __d);

uint16x8_t vmlsq_lane_u16 (uint16x8_t __a, uint16x8_t __b, uint16x4_t __c, const int __d);

uint32x4_t vmlsq_lane_u32 (uint32x4_t __a, uint32x4_t __b, uint32x2_t __c, const int __d);

/*--19、Vector widening multiply subtract with scalar: vmlsl -> ri = ai - bi * c;

multiplies the elements in the second vector by a scalar, then subtracts the results from

the elements in the first vector. The elements of the result are wider.--*/

int32x4_t vmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c);

int64x2_t vmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c);

uint32x4_t vmlsl_n_u16 (uint32x4_t __a, uint16x4_t __b, uint16_t __c);

uint64x2_t vmlsl_n_u32 (uint64x2_t __a, uint32x2_t __b, uint32_t __c);

/*--20、Vector widening multiply subtract by scalar: vmlsl -> ri = ai - bi * c[d];

multiplies each element in the second vector by a scalar,

and subtracts them from the corresponding elements of the first vector.

The scalar has index d in the third vector. The elements in the result are wider.--*/

int32x4_t vmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d);

int64x2_t vmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d);

uint32x4_t vmlsl_lane_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d)

uint64x2_t vmlsl_lane_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d);

/*--21、Vector widening saturating doubling multiply subtract with scalar:

vqdmlsl -> ri = sat(ai - bi * c);

multiplies the elements of the second vector with a scalar and doubles the results.

It then subtracts the results from the elements in the first vector.

If any of the results overflow, they are saturated and the sticky QC flag is set.--*/

int32x4_t vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c);

int64x2_t vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c);

/*--22、Vector widening saturating doubling multiply subtract by scalar:

vqdmlsl -> ri = sat(ai - bi * c[[d]);

multiplies each element in the second vector by a scalar, doubles the results and subtracts

them from the corresponding elements of the first vector. The scalar has index n in the

third vector.If any of the results overflow,

they are saturated and the sticky QC flag is set.--*/

int32x4_t vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d);

int64x2_t vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d);

/*****************************************************Vector extract********************/

/*--Vector extract: vext -> extracts n elements from the lower end of the second operand

vector and the remaining elements from the higher end of the first, and combines them to

form the result vector. The elements from the second operand are placed in the most

significant part of the result vector.The elements from the first operand are placed in

the least significant part of the result vector.This intrinsic cycles the elements

through the lanes if the two input vectors are the same.--*/

int8x8_t vext_s8 (int8x8_t __a, int8x8_t __b, const int __c);

int16x4_t vext_s16 (int16x4_t __a, int16x4_t __b, const int __c);

int32x2_t vext_s32 (int32x2_t __a, int32x2_t __b, const int __c);

int64x1_t vext_s64 (int64x1_t __a, int64x1_t __b, const int __c);

float32x2_t vext_f32 (float32x2_t __a, float32x2_t __b, const int __c);

uint8x8_t vext_u8 (uint8x8_t __a, uint8x8_t __b, const int __c);

uint16x4_t vext_u16 (uint16x4_t __a, uint16x4_t __b, const int __c);

uint32x2_t vext_u32 (uint32x2_t __a, uint32x2_t __b, const int __c);

uint64x1_t vext_u64 (uint64x1_t __a, uint64x1_t __b, const int __c);

poly8x8_t vext_p8 (poly8x8_t __a, poly8x8_t __b, const int __c);

poly16x4_t vext_p16 (poly16x4_t __a, poly16x4_t __b, const int __c);

int8x16_t vextq_s8 (int8x16_t __a, int8x16_t __b, const int __c);//_mm_alignr_epi8

int16x8_t vextq_s16 (int16x8_t __a, int16x8_t __b, const int __c);//_mm_alignr_epi8

int32x4_t vextq_s32 (int32x4_t __a, int32x4_t __b, const int __c);//_mm_alignr_epi8

int64x2_t vextq_s64 (int64x2_t __a, int64x2_t __b, const int __c);//_mm_alignr_epi8

float32x4_t vextq_f32 (float32x4_t __a, float32x4_t __b, const int __c);//_mm_alignr_epi8

uint8x16_t vextq_u8 (uint8x16_t __a, uint8x16_t __b, const int __c);//_mm_alignr_epi8

uint16x8_t vextq_u16 (uint16x8_t __a, uint16x8_t __b, const int __c);//_mm_alignr_epi8

uint32x4_t vextq_u32 (uint32x4_t __a, uint32x4_t __b, const int __c);//_mm_alignr_epi8

uint64x2_t vextq_u64 (uint64x2_t __a, uint64x2_t __b, const int __c);//_mm_alignr_epi8

poly8x16_t vextq_p8 (poly8x16_t __a, poly8x16_t __b, const int __c);//_mm_alignr_epi8

poly16x8_t vextq_p16 (poly16x8_t __a, poly16x8_t __b, const int __c);//_mm_alignr_epi8

/****************************************************Reverse elements*******************/

/*--1、Reverse vector elements (swap endianness): vrev64 -> reverses the order of 8-bit,

16-bit, or 32-bit elements within each doubleword of the vector,

and places the result in the corresponding destination vector.--*/

int8x8_t vrev64_s8 (int8x8_t __a);

int16x4_t vrev64_s16 (int16x4_t __a);

int32x2_t vrev64_s32 (int32x2_t __a);

float32x2_t vrev64_f32 (float32x2_t __a);//_mm_shuffle_ps

uint8x8_t vrev64_u8 (uint8x8_t __a);

uint16x4_t vrev64_u16 (uint16x4_t __a);

uint32x2_t vrev64_u32 (uint32x2_t __a);

poly8x8_t vrev64_p8 (poly8x8_t __a);

poly16x4_t vrev64_p16 (poly16x4_t __a);

int8x16_t vrev64q_s8 (int8x16_t __a);

int16x8_t vrev64q_s16 (int16x8_t __a);

int32x4_t vrev64q_s32 (int32x4_t __a);

float32x4_t vrev64q_f32 (float32x4_t __a);//_mm_shuffle_ps

uint8x16_t vrev64q_u8 (uint8x16_t __a);

uint16x8_t vrev64q_u16 (uint16x8_t __a);

uint32x4_t vrev64q_u32 (uint32x4_t __a);

poly8x16_t vrev64q_p8 (poly8x16_t __a);

poly16x8_t vrev64q_p16 (poly16x8_t __a);

/*--2、Reverse vector elements (swap endianness): vrev32 -> reverses the order of 8-bit

or 16-bit elements within each word of the vector,

and places the result in the corresponding destination vector.--*/

int8x8_t vrev32_s8 (int8x8_t __a);

int16x4_t vrev32_s16 (int16x4_t __a);

uint8x8_t vrev32_u8 (uint8x8_t __a);

uint16x4_t vrev32_u16 (uint16x4_t __a);

poly8x8_t vrev32_p8 (poly8x8_t __a);

poly16x4_t vrev32_p16 (poly16x4_t __a);

int8x16_t vrev32q_s8 (int8x16_t __a);

int16x8_t vrev32q_s16 (int16x8_t __a);

uint8x16_t vrev32q_u8 (uint8x16_t __a);

uint16x8_t vrev32q_u16 (uint16x8_t __a);

poly8x16_t vrev32q_p8 (poly8x16_t __a);

poly16x8_t vrev32q_p16 (poly16x8_t __a);

/*--3、Reverse vector elements (swap endianness): vrev16 -> reverses the order

of 8-bit elements within each halfword of the vector,

and places the result in the corresponding destination vector.--*/

int8x8_t vrev16_s8 (int8x8_t __a);

uint8x8_t vrev16_u8 (uint8x8_t __a);

poly8x8_t vrev16_p8 (poly8x8_t __a);

int8x16_t vrev16q_s8 (int8x16_t __a);

uint8x16_t vrev16q_u8 (uint8x16_t __a);

poly8x16_t vrev16q_p8 (poly8x16_t __a);

/**********************************************************Bitwise Select***************/

/*--Bitwise Select: vbsl -> selects each bit for the destination from the first operand

if the corresponding bit of the destination is 1,

or from the second operand if the corresponding bit of the destination is 0.--*/

int8x8_t vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c);

int16x4_t vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c);

int32x2_t vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c);

int64x1_t vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c);

float32x2_t vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c);

uint8x8_t vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c);

uint16x4_t vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c);

uint32x2_t vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c);

uint64x1_t vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c);

poly8x8_t vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c);

poly16x4_t vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c);

int8x16_t vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c);

int16x8_t vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c);

int32x4_t vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c);

int64x2_t vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c);

float32x4_t vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c);

uint8x16_t vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c);

uint16x8_t vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c);

uint32x4_t vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c);

uint64x2_t vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c);

poly8x16_t vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c);

poly16x8_t vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c);

/************************************Transposition operations***************************/

/*--1、Transpose elements: vtrn -> treats the elements of its input vectors as elements

of 2 x 2 matrices, and transposes the matrices. Essentially, it exchanges the elements

with odd indices from Vector1 with the elements with even indices from Vector2.--*/

int8x8x2_t vtrn_s8 (int8x8_t __a, int8x8_t __b);

int16x4x2_t vtrn_s16 (int16x4_t __a, int16x4_t __b);

uint8x8x2_t vtrn_u8 (uint8x8_t __a, uint8x8_t __b);

uint16x4x2_t vtrn_u16 (uint16x4_t __a, uint16x4_t __b);

poly8x8x2_t vtrn_p8 (poly8x8_t __a, poly8x8_t __b);

poly16x4x2_t vtrn_p16 (poly16x4_t __a, poly16x4_t __b);

int32x2x2_t vtrn_s32 (int32x2_t __a, int32x2_t __b)

float32x2x2_t vtrn_f32 (float32x2_t __a, float32x2_t __b)

uint32x2x2_t vtrn_u32 (uint32x2_t __a, uint32x2_t __b)

int8x16x2_t vtrnq_s8 (int8x16_t __a, int8x16_t __b)

int16x8x2_t vtrnq_s16 (int16x8_t __a, int16x8_t __b)

int32x4x2_t vtrnq_s32 (int32x4_t __a, int32x4_t __b)

float32x4x2_t vtrnq_f32 (float32x4_t __a, float32x4_t __b)

uint8x16x2_t vtrnq_u8 (uint8x16_t __a, uint8x16_t __b)

uint16x8x2_t vtrnq_u16 (uint16x8_t __a, uint16x8_t __b)

uint32x4x2_t vtrnq_u32 (uint32x4_t __a, uint32x4_t __b);

poly8x16x2_t vtrnq_p8 (poly8x16_t __a, poly8x16_t __b);

poly16x8x2_t vtrnq_p16 (poly16x8_t __a, poly16x8_t __b);

/*--2、Interleave elements(Zip elements):

vzip -> (Vector Zip) interleaves the elements of two vectors.--*/

int8x8x2_t vzip_s8 (int8x8_t __a, int8x8_t __b);

int16x4x2_t vzip_s16 (int16x4_t __a, int16x4_t __b);

uint8x8x2_t vzip_u8 (uint8x8_t __a, uint8x8_t __b);

uint16x4x2_t vzip_u16 (uint16x4_t __a, uint16x4_t __b);

poly8x8x2_t vzip_p8 (poly8x8_t __a, poly8x8_t __b);

poly16x4x2_t vzip_p16 (poly16x4_t __a, poly16x4_t __b);

int32x2x2_t vzip_s32 (int32x2_t __a, int32x2_t __b);

float32x2x2_t vzip_f32 (float32x2_t __a, float32x2_t __b);

uint32x2x2_t vzip_u32 (uint32x2_t __a, uint32x2_t __b);

int8x16x2_t vzipq_s8 (int8x16_t __a, int8x16_t __b);

int16x8x2_t vzipq_s16 (int16x8_t __a, int16x8_t __b);

int32x4x2_t vzipq_s32 (int32x4_t __a, int32x4_t __b);

float32x4x2_t vzipq_f32 (float32x4_t __a, float32x4_t __b);

uint8x16x2_t vzipq_u8 (uint8x16_t __a, uint8x16_t __b);

uint16x8x2_t vzipq_u16 (uint16x8_t __a, uint16x8_t __b);

uint32x4x2_t vzipq_u32 (uint32x4_t __a, uint32x4_t __b);

poly8x16x2_t vzipq_p8 (poly8x16_t __a, poly8x16_t __b);

poly16x8x2_t vzipq_p16 (poly16x8_t __a, poly16x8_t __b);

/*--3、De-Interleave elements(Unzip elements):

vuzp -> (Vector Unzip) de-interleaves the elements of two vectors.

De-interleaving is the inverse process of interleaving.--*/

int8x8x2_t vuzp_s8 (int8x8_t __a, int8x8_t __b);

int16x4x2_t vuzp_s16 (int16x4_t __a, int16x4_t __b);

int32x2x2_t vuzp_s32 (int32x2_t __a, int32x2_t __b);

float32x2x2_t vuzp_f32 (float32x2_t __a, float32x2_t __b);

uint8x8x2_t vuzp_u8 (uint8x8_t __a, uint8x8_t __b);

uint16x4x2_t vuzp_u16 (uint16x4_t __a, uint16x4_t __b);

uint32x2x2_t vuzp_u32 (uint32x2_t __a, uint32x2_t __b);

poly8x8x2_t vuzp_p8 (poly8x8_t __a, poly8x8_t __b);

poly16x4x2_t vuzp_p16 (poly16x4_t __a, poly16x4_t __b);

int8x16x2_t vuzpq_s8 (int8x16_t __a, int8x16_t __b);

int16x8x2_t vuzpq_s16 (int16x8_t __a, int16x8_t __b);

int32x4x2_t vuzpq_s32 (int32x4_t __a, int32x4_t __b);

float32x4x2_t vuzpq_f32 (float32x4_t __a, float32x4_t __b);

uint8x16x2_t vuzpq_u8 (uint8x16_t __a, uint8x16_t __b);

uint16x8x2_t vuzpq_u16 (uint16x8_t __a, uint16x8_t __b);

uint32x4x2_t vuzpq_u32 (uint32x4_t __a, uint32x4_t __b);

poly8x16x2_t vuzpq_p8 (poly8x16_t __a, poly8x16_t __b);

poly16x8x2_t vuzpq_p16 (poly16x8_t __a, poly16x8_t __b);

/*********************************************************Load**************************/

/*--1、Load a single vector from memory: vld1 -> loads a vector from memory.--*/

int8x8_t vld1_s8 (const int8_t * __a);

int16x4_t vld1_s16 (const int16_t * __a);

int32x2_t vld1_s32 (const int32_t * __a);

int64x1_t vld1_s64 (const int64_t * __a);

float32x2_t vld1_f32 (const float32_t * __a);

uint8x8_t vld1_u8 (const uint8_t * __a);//_mm_loadl_epi64

uint16x4_t vld1_u16 (const uint16_t * __a);//_mm_loadl_epi64

uint32x2_t vld1_u32 (const uint32_t * __a);//_mm_loadl_epi64

uint64x1_t vld1_u64 (const uint64_t * __a);//_mm_loadl_epi64

poly8x8_t vld1_p8 (const poly8_t * __a);

poly16x4_t vld1_p16 (const poly16_t * __a);

int8x16_t vld1q_s8 (const int8_t * __a);

int16x8_t vld1q_s16 (const int16_t * __a);

int32x4_t vld1q_s32 (const int32_t * __a);

int64x2_t vld1q_s64 (const int64_t * __a);

float32x4_t vld1q_f32 (const float32_t * __a);

uint8x16_t vld1q_u8 (const uint8_t * __a);

uint16x8_t vld1q_u16 (const uint16_t * __a);

uint32x4_t vld1q_u32 (const uint32_t * __a);

uint64x2_t vld1q_u64 (const uint64_t * __a);

poly8x16_t vld1q_p8 (const poly8_t * __a);

poly16x8_t vld1q_p16 (const poly16_t * __a);

/*--2、Load a single lane from memory: vld1 -> loads one element of the input vector

from memory and returns this in the result vector. Elements of the vector that are not

loaded are returned in the result vector unaltered.

c is the index of the element to load.--*/

int8x8_t vld1_lane_s8 (const int8_t * __a, int8x8_t __b, const int __c);//_mm_insert_epi8

int16x4_t vld1_lane_s16 (const int16_t * __a, int16x4_t __b,

const int __c);//_mm_insert_epi16

int32x2_t vld1_lane_s32 (const int32_t * __a, int32x2_t __b,

const int __c);//_mm_insert_epi32

float32x2_t vld1_lane_f32 (const float32_t * __a, float32x2_t __b, const int __c);

uint8x8_t vld1_lane_u8 (const uint8_t * __a, uint8x8_t __b,

const int __c);//_mm_insert_epi8

uint16x4_t vld1_lane_u16 (const uint16_t * __a, uint16x4_t __b,

const int __c);//_mm_insert_epi16

uint32x2_t vld1_lane_u32 (const uint32_t * __a, uint32x2_t __b,

const int __c);//_mm_insert_epi32

poly8x8_t vld1_lane_p8 (const poly8_t * __a, poly8x8_t __b,

const int __c);//_mm_insert_epi8

poly16x4_t vld1_lane_p16 (const poly16_t * __a, poly16x4_t __b,

const int __c);//_mm_insert_epi16

int64x1_t vld1_lane_s64 (const int64_t * __a, int64x1_t __b, const int __c);

uint64x1_t vld1_lane_u64 (const uint64_t * __a, uint64x1_t __b, const int __c);

int8x16_t vld1q_lane_s8 (const int8_t * __a, int8x16_t __b,

const int __c);//_mm_insert_epi8

int16x8_t vld1q_lane_s16 (const int16_t * __a, int16x8_t __b,

const int __c);//_mm_insert_epi16

int32x4_t vld1q_lane_s32 (const int32_t * __a, int32x4_t __b,

const int __c);//_mm_insert_epi32

float32x4_t vld1q_lane_f32 (const float32_t * __a, float32x4_t __b, const int __c);

uint8x16_t vld1q_lane_u8 (const uint8_t * __a, uint8x16_t __b,

const int __c);//_mm_insert_epi8

uint16x8_t vld1q_lane_u16 (const uint16_t * __a, uint16x8_t __b,

const int __c);//_mm_insert_epi16

uint32x4_t vld1q_lane_u32 (const uint32_t * __a, uint32x4_t __b,

const int __c);//_mm_insert_epi32

poly8x16_t vld1q_lane_p8 (const poly8_t * __a, poly8x16_t __b,

const int __c);//_mm_insert_epi8

poly16x8_t vld1q_lane_p16 (const poly16_t * __a, poly16x8_t __b,

const int __c);//_mm_insert_epi16

int64x2_t vld1q_lane_s64 (const int64_t * __a, int64x2_t __b,

const int __c);//_mm_insert_epi64

uint64x2_t vld1q_lane_u64 (const uint64_t * __a, uint64x2_t __b,

const int __c);//_mm_insert_epi64

/*--3、Load all lanes of vector with same value from memory: vld1 ->

loads one element in a vector from memory.

The loaded element is copied to all other lanes of the vector.--*/

int8x8_t vld1_dup_s8 (const int8_t * __a);//_mm_set1_epi8

int16x4_t vld1_dup_s16 (const int16_t * __a);//_mm_set1_epi16

int32x2_t vld1_dup_s32 (const int32_t * __a);//_mm_set1_epi32

float32x2_t vld1_dup_f32 (const float32_t * __a);//_mm_set1_ps

uint8x8_t vld1_dup_u8 (const uint8_t * __a);//_mm_set1_epi8

uint16x4_t vld1_dup_u16 (const uint16_t * __a);//_mm_set1_epi16

uint32x2_t vld1_dup_u32 (const uint32_t * __a);//_mm_set1_epi32

poly8x8_t vld1_dup_p8 (const poly8_t * __a);//_mm_set1_epi8

poly16x4_t vld1_dup_p16 (const poly16_t * __a);//_mm_set1_epi16

int64x1_t vld1_dup_s64 (const int64_t * __a);

uint64x1_t vld1_dup_u64 (const uint64_t * __a);

int8x16_t vld1q_dup_s8 (const int8_t * __a);//_mm_set1_epi8

int16x8_t vld1q_dup_s16 (const int16_t * __a);//_mm_set1_epi16

int32x4_t vld1q_dup_s32 (const int32_t * __a);//_mm_set1_epi32

float32x4_t vld1q_dup_f32 (const float32_t * __a);//_mm_set1_ps

uint8x16_t vld1q_dup_u8 (const uint8_t * __a);//_mm_set1_epi8

uint16x8_t vld1q_dup_u16 (const uint16_t * __a);//_mm_set1_epi16

uint32x4_t vld1q_dup_u32 (const uint32_t * __a);//_mm_set1_epi32

poly8x16_t vld1q_dup_p8 (const poly8_t * __a);//_mm_set1_epi8

poly16x8_t vld1q_dup_p16 (const poly16_t * __a);//_mm_set1_epi16

int64x2_t vld1q_dup_s64 (const int64_t * __a);

uint64x2_t vld1q_dup_u64 (const uint64_t * __a);

/*--4、Load 2-element structure from memory: vld2 -> loads 2 vectors from memory.

It performs a 2-way de-interleave from memory to the vectors.--*/

int8x8x2_t vld2_s8 (const int8_t * __a);

int16x4x2_t vld2_s16 (const int16_t * __a);

int32x2x2_t vld2_s32 (const int32_t * __a);

float32x2x2_t vld2_f32 (const float32_t * __a);

uint8x8x2_t vld2_u8 (const uint8_t * __a);

uint16x4x2_t vld2_u16 (const uint16_t * __a);

uint32x2x2_t vld2_u32 (const uint32_t * __a);

poly8x8x2_t vld2_p8 (const poly8_t * __a);

poly16x4x2_t vld2_p16 (const poly16_t * __a);

int64x1x2_t vld2_s64 (const int64_t * __a);

uint64x1x2_t vld2_u64 (const uint64_t * __a);

int8x16x2_t vld2q_s8 (const int8_t * __a);

int16x8x2_t vld2q_s16 (const int16_t * __a);

int32x4x2_t vld2q_s32 (const int32_t * __a);

float32x4x2_t vld2q_f32 (const float32_t * __a);

uint8x16x2_t vld2q_u8 (const uint8_t * __a);

uint16x8x2_t vld2q_u16 (const uint16_t * __a);

uint32x4x2_t vld2q_u32 (const uint32_t * __a);

poly8x16x2_t vld2q_p8 (const poly8_t * __a);

poly16x8x2_t vld2q_p16 (const poly16_t * __a);

/*--5、Load a single lane of 2-element structure from memory: vld2 ->

loads two elements in a double-vector structure from memory and returns this in

the result. The loaded values are from consecutive memory addresses.

Elements in the structure that are not loaded are returned in the result unaltered.

c is the index of the elements to load.--*/

int8x8x2_t vld2_lane_s8 (const int8_t * __a, int8x8x2_t __b, const int __c);

int16x4x2_t vld2_lane_s16 (const int16_t * __a, int16x4x2_t __b, const int __c);

int32x2x2_t vld2_lane_s32 (const int32_t * __a, int32x2x2_t __b, const int __c);

float32x2x2_t vld2_lane_f32 (const float32_t * __a, float32x2x2_t __b, const int __c);

uint8x8x2_t vld2_lane_u8 (const uint8_t * __a, uint8x8x2_t __b, const int __c);

uint16x4x2_t vld2_lane_u16 (const uint16_t * __a, uint16x4x2_t __b, const int __c);

uint32x2x2_t vld2_lane_u32 (const uint32_t * __a, uint32x2x2_t __b, const int __c);

poly8x8x2_t vld2_lane_p8 (const poly8_t * __a, poly8x8x2_t __b, const int __c);

poly16x4x2_t vld2_lane_p16 (const poly16_t * __a, poly16x4x2_t __b, const int __c);

int16x8x2_t vld2q_lane_s16 (const int16_t * __a, int16x8x2_t __b, const int __c);

int32x4x2_t vld2q_lane_s32 (const int32_t * __a, int32x4x2_t __b, const int __c);

float32x4x2_t vld2q_lane_f32 (const float32_t * __a, float32x4x2_t __b, const int __c);

uint16x8x2_t vld2q_lane_u16 (const uint16_t * __a, uint16x8x2_t __b, const int __c);

uint32x4x2_t vld2q_lane_u32 (const uint32_t * __a, uint32x4x2_t __b, const int __c);

poly16x8x2_t vld2q_lane_p16 (const poly16_t * __a, poly16x8x2_t __b, const int __c);

/*--6、Load all lanes of 2-element structure with same value from memory: vld2 ->

loads 2 elements from memory and returns a double-vector structure.

The first element is copied to all lanes of the first vector.

The second element is copied to all lanes of the second vector.--*/

int8x8x2_t vld2_dup_s8 (const int8_t * __a);

int16x4x2_t vld2_dup_s16 (const int16_t * __a);

int32x2x2_t vld2_dup_s32 (const int32_t * __a);

float32x2x2_t vld2_dup_f32 (const float32_t * __a);

uint8x8x2_t vld2_dup_u8 (const uint8_t * __a);

uint16x4x2_t vld2_dup_u16 (const uint16_t * __a);

uint32x2x2_t vld2_dup_u32 (const uint32_t * __a);

poly8x8x2_t vld2_dup_p8 (const poly8_t * __a);

poly16x4x2_t vld2_dup_p16 (const poly16_t * __a);

int64x1x2_t vld2_dup_s64 (const int64_t * __a);

uint64x1x2_t vld2_dup_u64 (const uint64_t * __a);

/*--7、Load 3-element structure from memory: vld3 ->

loads 3 vectors from memory.

It performs a 3-way de-interleave from memory to the vectors.--*/

int8x8x3_t vld3_s8 (const int8_t * __a);

int16x4x3_t vld3_s16 (const int16_t * __a);

int32x2x3_t vld3_s32 (const int32_t * __a);

float32x2x3_t vld3_f32 (const float32_t * __a);

uint8x8x3_t vld3_u8 (const uint8_t * __a);

uint16x4x3_t vld3_u16 (const uint16_t * __a);

uint32x2x3_t vld3_u32 (const uint32_t * __a);

poly8x8x3_t vld3_p8 (const poly8_t * __a);

poly16x4x3_t vld3_p16 (const poly16_t * __a);

int64x1x3_t vld3_s64 (const int64_t * __a);

uint64x1x3_t vld3_u64 (const uint64_t * __a);

int8x16x3_t vld3q_s8 (const int8_t * __a);

int16x8x3_t vld3q_s16 (const int16_t * __a);

int32x4x3_t vld3q_s32 (const int32_t * __a);

float32x4x3_t vld3q_f32 (const float32_t * __a);

uint8x16x3_t vld3q_u8 (const uint8_t * __a);

uint16x8x3_t vld3q_u16 (const uint16_t * __a);

uint32x4x3_t vld3q_u32 (const uint32_t * __a);

poly8x16x3_t vld3q_p8 (const poly8_t * __a);

poly16x8x3_t vld3q_p16 (const poly16_t * __a);

/*--8、Load a single lane of 3-element structure from memory: vld3 ->

loads three elements in a triple-vector structure from memory and returns this in the

result. The loaded values are from consecutive memory addresses.

Elements in the structure that are not loaded are returned in the result unaltered.

c is the index of the element to load.--*/

int8x8x3_t vld3_lane_s8 (const int8_t * __a, int8x8x3_t __b, const int __c);

int16x4x3_t vld3_lane_s16 (const int16_t * __a, int16x4x3_t __b, const int __c);

int32x2x3_t vld3_lane_s32 (const int32_t * __a, int32x2x3_t __b, const int __c);

float32x2x3_t vld3_lane_f32 (const float32_t * __a, float32x2x3_t __b, const int __c);

uint8x8x3_t vld3_lane_u8 (const uint8_t * __a, uint8x8x3_t __b, const int __c);

uint16x4x3_t vld3_lane_u16 (const uint16_t * __a, uint16x4x3_t __b, const int __c);

uint32x2x3_t vld3_lane_u32 (const uint32_t * __a, uint32x2x3_t __b, const int __c);

poly8x8x3_t vld3_lane_p8 (const poly8_t * __a, poly8x8x3_t __b, const int __c);

poly16x4x3_t vld3_lane_p16 (const poly16_t * __a, poly16x4x3_t __b, const int __c);

int16x8x3_t vld3q_lane_s16 (const int16_t * __a, int16x8x3_t __b, const int __c);

int32x4x3_t vld3q_lane_s32 (const int32_t * __a, int32x4x3_t __b, const int __c);

float32x4x3_t vld3q_lane_f32 (const float32_t * __a, float32x4x3_t __b, const int __c);

uint16x8x3_t vld3q_lane_u16 (const uint16_t * __a, uint16x8x3_t __b, const int __c);

uint32x4x3_t vld3q_lane_u32 (const uint32_t * __a, uint32x4x3_t __b, const int __c);

poly16x8x3_t vld3q_lane_p16 (const poly16_t * __a, poly16x8x3_t __b, const int __c);

/*--9、Load all lanes of 3-element structure with same value from memory: vld3 ->

loads 3 elements from memory and returns a triple-vector structure. The first element

is copied to all lanes of the first vector. And similarly the second and third elements

are copied to the second and third vectors respectively.--*/

int8x8x3_t vld3_dup_s8 (const int8_t * __a);

int16x4x3_t vld3_dup_s16 (const int16_t * __a);

int32x2x3_t vld3_dup_s32 (const int32_t * __a);

float32x2x3_t vld3_dup_f32 (const float32_t * __a);

uint8x8x3_t vld3_dup_u8 (const uint8_t * __a);

uint16x4x3_t vld3_dup_u16 (const uint16_t * __a);

uint32x2x3_t vld3_dup_u32 (const uint32_t * __a);

poly8x8x3_t vld3_dup_p8 (const poly8_t * __a);

poly16x4x3_t vld3_dup_p16 (const poly16_t * __a);

int64x1x3_t vld3_dup_s64 (const int64_t * __a);

uint64x1x3_t vld3_dup_u64 (const uint64_t * __a);

/*--10、Load 4-element structure from memory: vld4 ->

loads 4 vectors from memory.

It performs a 4-way de-interleave from memory to the vectors.--*/

int8x8x4_t vld4_s8 (const int8_t * __a);

int16x4x4_t vld4_s16 (const int16_t * __a);

int32x2x4_t vld4_s32 (const int32_t * __a);

float32x2x4_t vld4_f32 (const float32_t * __a);

uint8x8x4_t vld4_u8 (const uint8_t * __a);

uint16x4x4_t vld4_u16 (const uint16_t * __a);

uint32x2x4_t vld4_u32 (const uint32_t * __a);

poly8x8x4_t vld4_p8 (const poly8_t * __a);

poly16x4x4_t vld4_p16 (const poly16_t * __a);

int64x1x4_t vld4_s64 (const int64_t * __a);

uint64x1x4_t vld4_u64 (const uint64_t * __a);

int8x16x4_t vld4q_s8 (const int8_t * __a);

int16x8x4_t vld4q_s16 (const int16_t * __a);

int32x4x4_t vld4q_s32 (const int32_t * __a);

float32x4x4_t vld4q_f32 (const float32_t * __a);

uint8x16x4_t vld4q_u8 (const uint8_t * __a);

uint16x8x4_t vld4q_u16 (const uint16_t * __a);

uint32x4x4_t vld4q_u32 (const uint32_t * __a);

poly8x16x4_t vld4q_p8 (const poly8_t * __a);

poly16x8x4_t vld4q_p16 (const poly16_t * __a);

/*--11、Load a single lane of 4-element structure from memory: vld4 ->

loads four elements in a quad-vector structure from memory and returns this in the result.

The loaded values are from consecutive memory addresses.

Elements in the structure that are not loaded are returned in the result unaltered.

c is the index of the element to load.--*/

int8x8x4_t vld4_lane_s8 (const int8_t * __a, int8x8x4_t __b, const int __c);

int16x4x4_t vld4_lane_s16 (const int16_t * __a, int16x4x4_t __b, const int __c);

int32x2x4_t vld4_lane_s32 (const int32_t * __a, int32x2x4_t __b, const int __c);

float32x2x4_t vld4_lane_f32 (const float32_t * __a, float32x2x4_t __b, const int __c);

uint8x8x4_t vld4_lane_u8 (const uint8_t * __a, uint8x8x4_t __b, const int __c);

uint16x4x4_t vld4_lane_u16 (const uint16_t * __a, uint16x4x4_t __b, const int __c);

uint32x2x4_t vld4_lane_u32 (const uint32_t * __a, uint32x2x4_t __b, const int __c);

poly8x8x4_t vld4_lane_p8 (const poly8_t * __a, poly8x8x4_t __b, const int __c);

poly16x4x4_t vld4_lane_p16 (const poly16_t * __a, poly16x4x4_t __b, const int __c);

int16x8x4_t vld4q_lane_s16 (const int16_t * __a, int16x8x4_t __b, const int __c);

int32x4x4_t vld4q_lane_s32 (const int32_t * __a, int32x4x4_t __b, const int __c);

float32x4x4_t vld4q_lane_f32 (const float32_t * __a, float32x4x4_t __b, const int __c);

uint16x8x4_t vld4q_lane_u16 (const uint16_t * __a, uint16x8x4_t __b, const int __c);

uint32x4x4_t vld4q_lane_u32 (const uint32_t * __a, uint32x4x4_t __b, const int __c);

poly16x8x4_t vld4q_lane_p16 (const poly16_t * __a, poly16x8x4_t __b, const int __c);

/*--12、Load all lanes of 4-element structure with same value from memory: vld4 ->

loads 4 elements from memory and returns a quad-vector structure. The first element is

copied to all lanes of the first vector. And similarly the second, third, and fourth

elements are copied to the second, third, and fourth vectors respectively.--*/

int8x8x4_t vld4_dup_s8 (const int8_t * __a);

int16x4x4_t vld4_dup_s16 (const int16_t * __a);

int32x2x4_t vld4_dup_s32 (const int32_t * __a);

float32x2x4_t vld4_dup_f32 (const float32_t * __a);

uint8x8x4_t vld4_dup_u8 (const uint8_t * __a);

uint16x4x4_t vld4_dup_u16 (const uint16_t * __a);

uint32x2x4_t vld4_dup_u32 (const uint32_t * __a);

poly8x8x4_t vld4_dup_p8 (const poly8_t * __a);

poly16x4x4_t vld4_dup_p16 (const poly16_t * __a);

int64x1x4_t vld4_dup_s64 (const int64_t * __a);

uint64x1x4_t vld4_dup_u64 (const uint64_t * __a);

/*****************************************************Store*****************************/

/*--1、Store a single vector into memory: vst1 -> stores a vector into memory.--*/

void vst1_s8 (int8_t * __a, int8x8_t __b);

void vst1_s16 (int16_t * __a, int16x4_t __b);

void vst1_s32 (int32_t * __a, int32x2_t __b);

void vst1_s64 (int64_t * __a, int64x1_t __b);

void vst1_f32 (float32_t * __a, float32x2_t __b);

void vst1_u8 (uint8_t * __a, uint8x8_t __b);

void vst1_u16 (uint16_t * __a, uint16x4_t __b);

void vst1_u32 (uint32_t * __a, uint32x2_t __b);

void vst1_u64 (uint64_t * __a, uint64x1_t __b);

void vst1_p8 (poly8_t * __a, poly8x8_t __b);

void vst1_p16 (poly16_t * __a, poly16x4_t __b);

void vst1q_s8 (int8_t * __a, int8x16_t __b);

void vst1q_s16 (int16_t * __a, int16x8_t __b);

void vst1q_s32 (int32_t * __a, int32x4_t __b);

void vst1q_s64 (int64_t * __a, int64x2_t __b);

void vst1q_f32 (float32_t * __a, float32x4_t __b);

void vst1q_u8 (uint8_t * __a, uint8x16_t __b);

void vst1q_u16 (uint16_t * __a, uint16x8_t __b);

void vst1q_u32 (uint32_t * __a, uint32x4_t __b);

void vst1q_u64 (uint64_t * __a, uint64x2_t __b);

void vst1q_p8 (poly8_t * __a, poly8x16_t __b);

void vst1q_p16 (poly16_t * __a, poly16x8_t __b);

/*--2、Store a single lane into memory: vst1 ->

stores one element of the vector into memory.

c is the index in the vector to be stored.--*/

void vst1_lane_s8 (int8_t * __a, int8x8_t __b, const int __c);

void vst1_lane_s16 (int16_t * __a, int16x4_t __b, const int __c);

void vst1_lane_s32 (int32_t * __a, int32x2_t __b, const int __c);

void vst1_lane_f32 (float32_t * __a, float32x2_t __b, const int __c);

void vst1_lane_u8 (uint8_t * __a, uint8x8_t __b, const int __c);

void vst1_lane_u16 (uint16_t * __a, uint16x4_t __b, const int __c);

void vst1_lane_u32 (uint32_t * __a, uint32x2_t __b, const int __c);

void vst1_lane_p8 (poly8_t * __a, poly8x8_t __b, const int __c);

void vst1_lane_p16 (poly16_t * __a, poly16x4_t __b, const int __c);

void vst1_lane_s64 (int64_t * __a, int64x1_t __b, const int __c);

void vst1_lane_u64 (uint64_t * __a, uint64x1_t __b, const int __c);

void vst1q_lane_s8 (int8_t * __a, int8x16_t __b, const int __c);

void vst1q_lane_s16 (int16_t * __a, int16x8_t __b, const int __c);

void vst1q_lane_s32 (int32_t * __a, int32x4_t __b, const int __c);

void vst1q_lane_f32 (float32_t * __a, float32x4_t __b, const int __c);

void vst1q_lane_u8 (uint8_t * __a, uint8x16_t __b, const int __c);

void vst1q_lane_u16 (uint16_t * __a, uint16x8_t __b, const int __c);

void vst1q_lane_u32 (uint32_t * __a, uint32x4_t __b, const int __c);

void vst1q_lane_p8 (poly8_t * __a, poly8x16_t __b, const int __c);

void vst1q_lane_p16 (poly16_t * __a, poly16x8_t __b, const int __c);

void vst1q_lane_s64 (int64_t * __a, int64x2_t __b, const int __c);

void vst1q_lane_u64 (uint64_t * __a, uint64x2_t __b, const int __c);

/*--3、Store 2 vectors into memory: vst2 ->

stores 2 vectors into memory. It interleaves the 2 vectors into memory.--*/

void vst2_s8 (int8_t * __a, int8x8x2_t __b);

void vst2_s16 (int16_t * __a, int16x4x2_t __b);

void vst2_s32 (int32_t * __a, int32x2x2_t __b);

void vst2_f32 (float32_t * __a, float32x2x2_t __b);

void vst2_u8 (uint8_t * __a, uint8x8x2_t __b);

void vst2_u16 (uint16_t * __a, uint16x4x2_t __b);

void vst2_u32 (uint32_t * __a, uint32x2x2_t __b);

void vst2_p8 (poly8_t * __a, poly8x8x2_t __b);

void vst2_p16 (poly16_t * __a, poly16x4x2_t __b);

void vst2_s64 (int64_t * __a, int64x1x2_t __b);

void vst2_u64 (uint64_t * __a, uint64x1x2_t __b);

void vst2q_s8 (int8_t * __a, int8x16x2_t __b);

void vst2q_s16 (int16_t * __a, int16x8x2_t __b);

void vst2q_s32 (int32_t * __a, int32x4x2_t __b);

void vst2q_f32 (float32_t * __a, float32x4x2_t __b);

void vst2q_u8 (uint8_t * __a, uint8x16x2_t __b);

void vst2q_u16 (uint16_t * __a, uint16x8x2_t __b);

void vst2q_u32 (uint32_t * __a, uint32x4x2_t __b);

void vst2q_p8 (poly8_t * __a, poly8x16x2_t __b);

void vst2q_p16 (poly16_t * __a, poly16x8x2_t __b);

/*--4、Store a lane of two elements into memory: vst2 ->

stores a lane of two elements from a double-vector structure into memory.

The elements to be stored are from the same lane in the vectors and their index is c.--*/

void vst2_lane_s8 (int8_t * __a, int8x8x2_t __b, const int __c);

void vst2_lane_s16 (int16_t * __a, int16x4x2_t __b, const int __c);

void vst2_lane_s32 (int32_t * __a, int32x2x2_t __b, const int __c);

void vst2_lane_f32 (float32_t * __a, float32x2x2_t __b, const int __c);

void vst2_lane_u8 (uint8_t * __a, uint8x8x2_t __b, const int __c);

void vst2_lane_u16 (uint16_t * __a, uint16x4x2_t __b, const int __c);

void vst2_lane_u32 (uint32_t * __a, uint32x2x2_t __b, const int __c);

void vst2_lane_p8 (poly8_t * __a, poly8x8x2_t __b, const int __c);

void vst2_lane_p16 (poly16_t * __a, poly16x4x2_t __b, const int __c);

void vst2q_lane_s16 (int16_t * __a, int16x8x2_t __b, const int __c);

void vst2q_lane_s32 (int32_t * __a, int32x4x2_t __b, const int __c);

void vst2q_lane_f32 (float32_t * __a, float32x4x2_t __b, const int __c);

void vst2q_lane_u16 (uint16_t * __a, uint16x8x2_t __b, const int __c);

void vst2q_lane_u32 (uint32_t * __a, uint32x4x2_t __b, const int __c);

void vst2q_lane_p16 (poly16_t * __a, poly16x8x2_t __b, const int __c);

/*--5、Store 3 vectors into memory: vst3 ->

stores 3 vectors into memory. It interleaves the 3 vectors into memory.--*/

void vst3_s8 (int8_t * __a, int8x8x3_t __b);

void vst3_s16 (int16_t * __a, int16x4x3_t __b);

void vst3_s32 (int32_t * __a, int32x2x3_t __b);

void vst3_f32 (float32_t * __a, float32x2x3_t __b);

void vst3_u8 (uint8_t * __a, uint8x8x3_t __b);

void vst3_u16 (uint16_t * __a, uint16x4x3_t __b);

void vst3_u32 (uint32_t * __a, uint32x2x3_t __b);

void vst3_p8 (poly8_t * __a, poly8x8x3_t __b);

void vst3_p16 (poly16_t * __a, poly16x4x3_t __b);

void vst3_s64 (int64_t * __a, int64x1x3_t __b);

void vst3_u64 (uint64_t * __a, uint64x1x3_t __b);

void vst3q_s8 (int8_t * __a, int8x16x3_t __b);

void vst3q_s16 (int16_t * __a, int16x8x3_t __b);

void vst3q_s32 (int32_t * __a, int32x4x3_t __b);

void vst3q_f32 (float32_t * __a, float32x4x3_t __b);

void vst3q_u8 (uint8_t * __a, uint8x16x3_t __b);

void vst3q_u16 (uint16_t * __a, uint16x8x3_t __b);

void vst3q_u32 (uint32_t * __a, uint32x4x3_t __b);

void vst3q_p8 (poly8_t * __a, poly8x16x3_t __b);

void vst3q_p16 (poly16_t * __a, poly16x8x3_t __b);

/*--6、Store a lane of three elements into memory: vst3 ->

stores a lane of three elements from a triple-vector structure into memory.

The elements to be stored are from the same lane in the vectors and their index is c.--*/

void vst3_lane_s8 (int8_t * __a, int8x8x3_t __b, const int __c);

void vst3_lane_s16 (int16_t * __a, int16x4x3_t __b, const int __c);

void vst3_lane_s32 (int32_t * __a, int32x2x3_t __b, const int __c);

void vst3_lane_f32 (float32_t * __a, float32x2x3_t __b, const int __c);

void vst3_lane_u8 (uint8_t * __a, uint8x8x3_t __b, const int __c);

void vst3_lane_u16 (uint16_t * __a, uint16x4x3_t __b, const int __c);

void vst3_lane_u32 (uint32_t * __a, uint32x2x3_t __b, const int __c);

void vst3_lane_p8 (poly8_t * __a, poly8x8x3_t __b, const int __c);

void vst3_lane_p16 (poly16_t * __a, poly16x4x3_t __b, const int __c);

void vst3q_lane_s16 (int16_t * __a, int16x8x3_t __b, const int __c);

void vst3q_lane_s32 (int32_t * __a, int32x4x3_t __b, const int __c);

void vst3q_lane_f32 (float32_t * __a, float32x4x3_t __b, const int __c);

void vst3q_lane_u16 (uint16_t * __a, uint16x8x3_t __b, const int __c);

void vst3q_lane_u32 (uint32_t * __a, uint32x4x3_t __b, const int __c);

void vst3q_lane_p16 (poly16_t * __a, poly16x8x3_t __b, const int __c);

/*--7、Store 4 vectors into memory: vst4 ->

stores 4 vectors into memory. It interleaves the 4 vectors into memory.--*/

void vst4_s8 (int8_t * __a, int8x8x4_t __b);

void vst4_s16 (int16_t * __a, int16x4x4_t __b);

void vst4_s32 (int32_t * __a, int32x2x4_t __b);

void vst4_f32 (float32_t * __a, float32x2x4_t __b);

void vst4_u8 (uint8_t * __a, uint8x8x4_t __b);

void vst4_u16 (uint16_t * __a, uint16x4x4_t __b);

void vst4_u32 (uint32_t * __a, uint32x2x4_t __b);

void vst4_p8 (poly8_t * __a, poly8x8x4_t __b);

void vst4_p16 (poly16_t * __a, poly16x4x4_t __b);

void vst4_s64 (int64_t * __a, int64x1x4_t __b);

void vst4_u64 (uint64_t * __a, uint64x1x4_t __b);

void vst4q_s8 (int8_t * __a, int8x16x4_t __b);

void vst4q_s16 (int16_t * __a, int16x8x4_t __b);

void vst4q_s32 (int32_t * __a, int32x4x4_t __b);

void vst4q_f32 (float32_t * __a, float32x4x4_t __b);

void vst4q_u8 (uint8_t * __a, uint8x16x4_t __b);

void vst4q_u16 (uint16_t * __a, uint16x8x4_t __b);

void vst4q_u32 (uint32_t * __a, uint32x4x4_t __b);

void vst4q_p8 (poly8_t * __a, poly8x16x4_t __b);

void vst4q_p16 (poly16_t * __a, poly16x8x4_t __b);

/*--8、Store a lane of four elements into memory: vst4 ->

stores a lane of four elements from a quad-vector structure into memory.

The elements to be stored are from the same lane in the vectors and their index is c.--*/

void vst4_lane_s8 (int8_t * __a, int8x8x4_t __b, const int __c);

void vst4_lane_s16 (int16_t * __a, int16x4x4_t __b, const int __c);

void vst4_lane_s32 (int32_t * __a, int32x2x4_t __b, const int __c);

void vst4_lane_f32 (float32_t * __a, float32x2x4_t __b, const int __c);

void vst4_lane_u8 (uint8_t * __a, uint8x8x4_t __b, const int __c);

void vst4_lane_u16 (uint16_t * __a, uint16x4x4_t __b, const int __c);

void vst4_lane_u32 (uint32_t * __a, uint32x2x4_t __b, const int __c);

void vst4_lane_p8 (poly8_t * __a, poly8x8x4_t __b, const int __c);

void vst4_lane_p16 (poly16_t * __a, poly16x4x4_t __b, const int __c);

void vst4q_lane_s16 (int16_t * __a, int16x8x4_t __b, const int __c);

void vst4q_lane_s32 (int32_t * __a, int32x4x4_t __b, const int __c);

void vst4q_lane_f32 (float32_t * __a, float32x4x4_t __b, const int __c);

void vst4q_lane_u16 (uint16_t * __a, uint16x8x4_t __b, const int __c);

void vst4q_lane_u32 (uint32_t * __a, uint32x4x4_t __b, const int __c);

void vst4q_lane_p16 (poly16_t * __a, poly16x8x4_t __b, const int __c);

/*********************************Reinterpret casts(type conversion)********************/

/*--convert between types: vreinterpret -> treats a vector as having a different

datatype, without changing its value.--*/

poly8x8_t vreinterpret_p8_s8 (int8x8_t __a);

poly8x8_t vreinterpret_p8_s16 (int16x4_t __a);

poly8x8_t vreinterpret_p8_s32 (int32x2_t __a);

poly8x8_t vreinterpret_p8_s64 (int64x1_t __a);

poly8x8_t vreinterpret_p8_f32 (float32x2_t __a);

poly8x8_t vreinterpret_p8_u8 (uint8x8_t __a);

poly8x8_t vreinterpret_p8_u16 (uint16x4_t __a);

poly8x8_t vreinterpret_p8_u32 (uint32x2_t __a);

poly8x8_t vreinterpret_p8_u64 (uint64x1_t __a);

poly8x8_t vreinterpret_p8_p16 (poly16x4_t __a);

poly8x16_t vreinterpretq_p8_s8 (int8x16_t __a);

poly8x16_t vreinterpretq_p8_s16 (int16x8_t __a);

poly8x16_t vreinterpretq_p8_s32 (int32x4_t __a);

poly8x16_t vreinterpretq_p8_s64 (int64x2_t __a);

poly8x16_t vreinterpretq_p8_f32 (float32x4_t __a);

poly8x16_t vreinterpretq_p8_u8 (uint8x16_t __a);

poly8x16_t vreinterpretq_p8_u16 (uint16x8_t __a);

poly8x16_t vreinterpretq_p8_u32 (uint32x4_t __a);

poly8x16_t vreinterpretq_p8_u64 (uint64x2_t __a);

poly8x16_t vreinterpretq_p8_p16 (poly16x8_t __a);

poly16x4_t vreinterpret_p16_s8 (int8x8_t __a);

poly16x4_t vreinterpret_p16_s16 (int16x4_t __a);

poly16x4_t vreinterpret_p16_s32 (int32x2_t __a);

poly16x4_t vreinterpret_p16_s64 (int64x1_t __a);

poly16x4_t vreinterpret_p16_f32 (float32x2_t __a);

poly16x4_t vreinterpret_p16_u8 (uint8x8_t __a);

poly16x4_t vreinterpret_p16_u16 (uint16x4_t __a);

poly16x4_t vreinterpret_p16_u32 (uint32x2_t __a);

poly16x4_t vreinterpret_p16_u64 (uint64x1_t __a);

poly16x4_t vreinterpret_p16_p8 (poly8x8_t __a);

poly16x8_t vreinterpretq_p16_s8 (int8x16_t __a);

poly16x8_t vreinterpretq_p16_s16 (int16x8_t __a);

poly16x8_t vreinterpretq_p16_s32 (int32x4_t __a);

poly16x8_t vreinterpretq_p16_s64 (int64x2_t __a);

poly16x8_t vreinterpretq_p16_f32 (float32x4_t __a);

poly16x8_t vreinterpretq_p16_u8 (uint8x16_t __a);

poly16x8_t vreinterpretq_p16_u16 (uint16x8_t __a);

poly16x8_t vreinterpretq_p16_u32 (uint32x4_t __a);

poly16x8_t vreinterpretq_p16_u64 (uint64x2_t __a);

poly16x8_t vreinterpretq_p16_p8 (poly8x16_t __a);

float32x2_t vreinterpret_f32_s8 (int8x8_t __a);

float32x2_t vreinterpret_f32_s16 (int16x4_t __a);

float32x2_t vreinterpret_f32_s32 (int32x2_t __a);

float32x2_t vreinterpret_f32_s64 (int64x1_t __a);

float32x2_t vreinterpret_f32_u8 (uint8x8_t __a);

float32x2_t vreinterpret_f32_u16 (uint16x4_t __a);

float32x2_t vreinterpret_f32_u32 (uint32x2_t __a);

float32x2_t vreinterpret_f32_u64 (uint64x1_t __a);

float32x2_t vreinterpret_f32_p8 (poly8x8_t __a);

float32x2_t vreinterpret_f32_p16 (poly16x4_t __a);

float32x4_t vreinterpretq_f32_s8 (int8x16_t __a);

float32x4_t vreinterpretq_f32_s16 (int16x8_t __a);

float32x4_t vreinterpretq_f32_s32 (int32x4_t __a);

float32x4_t vreinterpretq_f32_s64 (int64x2_t __a);

float32x4_t vreinterpretq_f32_u8 (uint8x16_t __a);

float32x4_t vreinterpretq_f32_u16 (uint16x8_t __a);

float32x4_t vreinterpretq_f32_u32 (uint32x4_t __a);

float32x4_t vreinterpretq_f32_u64 (uint64x2_t __a);

float32x4_t vreinterpretq_f32_p8 (poly8x16_t __a);

float32x4_t vreinterpretq_f32_p16 (poly16x8_t __a);

int64x1_t vreinterpret_s64_s8 (int8x8_t __a);

int64x1_t vreinterpret_s64_s16 (int16x4_t __a);

int64x1_t vreinterpret_s64_s32 (int32x2_t __a);

int64x1_t vreinterpret_s64_f32 (float32x2_t __a);

int64x1_t vreinterpret_s64_u8 (uint8x8_t __a);

int64x1_t vreinterpret_s64_u16 (uint16x4_t __a);

int64x1_t vreinterpret_s64_u32 (uint32x2_t __a);

int64x1_t vreinterpret_s64_u64 (uint64x1_t __a);

int64x1_t vreinterpret_s64_p8 (poly8x8_t __a);

int64x1_t vreinterpret_s64_p16 (poly16x4_t __a);

int64x2_t vreinterpretq_s64_s8 (int8x16_t __a);

int64x2_t vreinterpretq_s64_s16 (int16x8_t __a);

int64x2_t vreinterpretq_s64_s32 (int32x4_t __a);

int64x2_t vreinterpretq_s64_f32 (float32x4_t __a);

int64x2_t vreinterpretq_s64_u8 (uint8x16_t __a);

int64x2_t vreinterpretq_s64_u16 (uint16x8_t __a);

int64x2_t vreinterpretq_s64_u32 (uint32x4_t __a);

int64x2_t vreinterpretq_s64_u64 (uint64x2_t __a);

int64x2_t vreinterpretq_s64_p8 (poly8x16_t __a);

int64x2_t vreinterpretq_s64_p16 (poly16x8_t __a);

uint64x1_t vreinterpret_u64_s8 (int8x8_t __a);

uint64x1_t vreinterpret_u64_s16 (int16x4_t __a);

uint64x1_t vreinterpret_u64_s32 (int32x2_t __a);

uint64x1_t vreinterpret_u64_s64 (int64x1_t __a);

uint64x1_t vreinterpret_u64_f32 (float32x2_t __a);

uint64x1_t vreinterpret_u64_u8 (uint8x8_t __a);

uint64x1_t vreinterpret_u64_u16 (uint16x4_t __a);

uint64x1_t vreinterpret_u64_u32 (uint32x2_t __a);

uint64x1_t vreinterpret_u64_p8 (poly8x8_t __a);

uint64x1_t vreinterpret_u64_p16 (poly16x4_t __a);

uint64x2_t vreinterpretq_u64_s8 (int8x16_t __a);

uint64x2_t vreinterpretq_u64_s16 (int16x8_t __a);

uint64x2_t vreinterpretq_u64_s32 (int32x4_t __a);

uint64x2_t vreinterpretq_u64_s64 (int64x2_t __a);

uint64x2_t vreinterpretq_u64_f32 (float32x4_t __a);

uint64x2_t vreinterpretq_u64_u8 (uint8x16_t __a);

uint64x2_t vreinterpretq_u64_u16 (uint16x8_t __a);

uint64x2_t vreinterpretq_u64_u32 (uint32x4_t __a);

uint64x2_t vreinterpretq_u64_p8 (poly8x16_t __a);

uint64x2_t vreinterpretq_u64_p16 (poly16x8_t __a);

int8x8_t vreinterpret_s8_s16 (int16x4_t __a);

int8x8_t vreinterpret_s8_s32 (int32x2_t __a);

int8x8_t vreinterpret_s8_s64 (int64x1_t __a);

int8x8_t vreinterpret_s8_f32 (float32x2_t __a);

int8x8_t vreinterpret_s8_u8 (uint8x8_t __a);

int8x8_t vreinterpret_s8_u16 (uint16x4_t __a);

int8x8_t vreinterpret_s8_u32 (uint32x2_t __a);

int8x8_t vreinterpret_s8_u64 (uint64x1_t __a);

int8x8_t vreinterpret_s8_p8 (poly8x8_t __a);

int8x8_t vreinterpret_s8_p16 (poly16x4_t __a);

int8x16_t vreinterpretq_s8_s16 (int16x8_t __a);

int8x16_t vreinterpretq_s8_s32 (int32x4_t __a);

int8x16_t vreinterpretq_s8_s64 (int64x2_t __a);

int8x16_t vreinterpretq_s8_f32 (float32x4_t __a);

int8x16_t vreinterpretq_s8_u8 (uint8x16_t __a);

int8x16_t vreinterpretq_s8_u16 (uint16x8_t __a);

int8x16_t vreinterpretq_s8_u32 (uint32x4_t __a);

int8x16_t vreinterpretq_s8_u64 (uint64x2_t __a);

int8x16_t vreinterpretq_s8_p8 (poly8x16_t __a);

int8x16_t vreinterpretq_s8_p16 (poly16x8_t __a);

int16x4_t vreinterpret_s16_s8 (int8x8_t __a);

int16x4_t vreinterpret_s16_s32 (int32x2_t __a);

int16x4_t vreinterpret_s16_s64 (int64x1_t __a);

int16x4_t vreinterpret_s16_f32 (float32x2_t __a);

int16x4_t vreinterpret_s16_u8 (uint8x8_t __a);

int16x4_t vreinterpret_s16_u16 (uint16x4_t __a);

int16x4_t vreinterpret_s16_u32 (uint32x2_t __a);

int16x4_t vreinterpret_s16_u64 (uint64x1_t __a);

int16x4_t vreinterpret_s16_p8 (poly8x8_t __a);

int16x4_t vreinterpret_s16_p16 (poly16x4_t __a);

int16x8_t vreinterpretq_s16_s8 (int8x16_t __a);

int16x8_t vreinterpretq_s16_s32 (int32x4_t __a);

int16x8_t vreinterpretq_s16_s64 (int64x2_t __a);

int16x8_t vreinterpretq_s16_f32 (float32x4_t __a);

int16x8_t vreinterpretq_s16_u8 (uint8x16_t __a);

int16x8_t vreinterpretq_s16_u16 (uint16x8_t __a);

int16x8_t vreinterpretq_s16_u32 (uint32x4_t __a);

int16x8_t vreinterpretq_s16_u64 (uint64x2_t __a);

int16x8_t vreinterpretq_s16_p8 (poly8x16_t __a);

int16x8_t vreinterpretq_s16_p16 (poly16x8_t __a);

int32x2_t vreinterpret_s32_s8 (int8x8_t __a);

int32x2_t vreinterpret_s32_s16 (int16x4_t __a);

int32x2_t vreinterpret_s32_s64 (int64x1_t __a);

int32x2_t vreinterpret_s32_f32 (float32x2_t __a);

int32x2_t vreinterpret_s32_u8 (uint8x8_t __a);

int32x2_t vreinterpret_s32_u16 (uint16x4_t __a);

int32x2_t vreinterpret_s32_u32 (uint32x2_t __a);

int32x2_t vreinterpret_s32_u64 (uint64x1_t __a);

int32x2_t vreinterpret_s32_p8 (poly8x8_t __a);

int32x2_t vreinterpret_s32_p16 (poly16x4_t __a);

int32x4_t vreinterpretq_s32_s8 (int8x16_t __a);

int32x4_t vreinterpretq_s32_s16 (int16x8_t __a);

int32x4_t vreinterpretq_s32_s64 (int64x2_t __a);

int32x4_t vreinterpretq_s32_f32 (float32x4_t __a);

int32x4_t vreinterpretq_s32_u8 (uint8x16_t __a);

int32x4_t vreinterpretq_s32_u16 (uint16x8_t __a);

int32x4_t vreinterpretq_s32_u32 (uint32x4_t __a);

int32x4_t vreinterpretq_s32_u64 (uint64x2_t __a);

int32x4_t vreinterpretq_s32_p8 (poly8x16_t __a);

int32x4_t vreinterpretq_s32_p16 (poly16x8_t __a);

uint8x8_t vreinterpret_u8_s8 (int8x8_t __a);

uint8x8_t vreinterpret_u8_s16 (int16x4_t __a);

uint8x8_t vreinterpret_u8_s32 (int32x2_t __a);

uint8x8_t vreinterpret_u8_s64 (int64x1_t __a);

uint8x8_t vreinterpret_u8_f32 (float32x2_t __a);

uint8x8_t vreinterpret_u8_u16 (uint16x4_t __a);

uint8x8_t vreinterpret_u8_u32 (uint32x2_t __a);

uint8x8_t vreinterpret_u8_u64 (uint64x1_t __a);

uint8x8_t vreinterpret_u8_p8 (poly8x8_t __a);

uint8x8_t vreinterpret_u8_p16 (poly16x4_t __a);

uint8x16_t vreinterpretq_u8_s8 (int8x16_t __a);

uint8x16_t vreinterpretq_u8_s16 (int16x8_t __a);

uint8x16_t vreinterpretq_u8_s32 (int32x4_t __a);

uint8x16_t vreinterpretq_u8_s64 (int64x2_t __a);

uint8x16_t vreinterpretq_u8_f32 (float32x4_t __a);

uint8x16_t vreinterpretq_u8_u16 (uint16x8_t __a);

uint8x16_t vreinterpretq_u8_u32 (uint32x4_t __a);

uint8x16_t vreinterpretq_u8_u64 (uint64x2_t __a);

uint8x16_t vreinterpretq_u8_p8 (poly8x16_t __a);

uint8x16_t vreinterpretq_u8_p16 (poly16x8_t __a);

uint16x4_t vreinterpret_u16_s8 (int8x8_t __a);

uint16x4_t vreinterpret_u16_s16 (int16x4_t __a);

uint16x4_t vreinterpret_u16_s32 (int32x2_t __a);

uint16x4_t vreinterpret_u16_s64 (int64x1_t __a);

uint16x4_t vreinterpret_u16_f32 (float32x2_t __a);

uint16x4_t vreinterpret_u16_u8 (uint8x8_t __a);

uint16x4_t vreinterpret_u16_u32 (uint32x2_t __a);

uint16x4_t vreinterpret_u16_u64 (uint64x1_t __a);

uint16x4_t vreinterpret_u16_p8 (poly8x8_t __a);

uint16x4_t vreinterpret_u16_p16 (poly16x4_t __a);

uint16x8_t vreinterpretq_u16_s8 (int8x16_t __a);

uint16x8_t vreinterpretq_u16_s16 (int16x8_t __a);

uint16x8_t vreinterpretq_u16_s32 (int32x4_t __a);

uint16x8_t vreinterpretq_u16_s64 (int64x2_t __a);

uint16x8_t vreinterpretq_u16_f32 (float32x4_t __a);

uint16x8_t vreinterpretq_u16_u8 (uint8x16_t __a);

uint16x8_t vreinterpretq_u16_u32 (uint32x4_t __a);

uint16x8_t vreinterpretq_u16_u64 (uint64x2_t __a);

uint16x8_t vreinterpretq_u16_p8 (poly8x16_t __a);

uint16x8_t vreinterpretq_u16_p16 (poly16x8_t __a);

uint32x2_t vreinterpret_u32_s8 (int8x8_t __a);

uint32x2_t vreinterpret_u32_s16 (int16x4_t __a);

uint32x2_t vreinterpret_u32_s32 (int32x2_t __a);

uint32x2_t vreinterpret_u32_s64 (int64x1_t __a);

uint32x2_t vreinterpret_u32_f32 (float32x2_t __a);

uint32x2_t vreinterpret_u32_u8 (uint8x8_t __a);

uint32x2_t vreinterpret_u32_u16 (uint16x4_t __a);

uint32x2_t vreinterpret_u32_u64 (uint64x1_t __a);

uint32x2_t vreinterpret_u32_p8 (poly8x8_t __a);

uint32x2_t vreinterpret_u32_p16 (poly16x4_t __a);

uint32x4_t vreinterpretq_u32_s8 (int8x16_t __a);

uint32x4_t vreinterpretq_u32_s16 (int16x8_t __a);

uint32x4_t vreinterpretq_u32_s32 (int32x4_t __a);

uint32x4_t vreinterpretq_u32_s64 (int64x2_t __a);

uint32x4_t vreinterpretq_u32_f32 (float32x4_t __a);

uint32x4_t vreinterpretq_u32_u8 (uint8x16_t __a);

uint32x4_t vreinterpretq_u32_u16 (uint16x8_t __a);

uint32x4_t vreinterpretq_u32_u64 (uint64x2_t __a);

uint32x4_t vreinterpretq_u32_p8 (poly8x16_t __a);

uint32x4_t vreinterpretq_u32_p16 (poly16x8_t __a);

總結(jié)

以上是生活随笔為你收集整理的clsq客户端android,Android NDK开发之 arm_neon.h文件ABI说明的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網(wǎng)站內(nèi)容還不錯,歡迎將生活随笔推薦給好友。

亚洲中文无码av永久不收费 | 久久伊人色av天堂九九小黄鸭 | 无码av最新清无码专区吞精 | 国语精品一区二区三区 | 欧美兽交xxxx×视频 | 午夜无码区在线观看 | 亚洲欧美精品伊人久久 | 日韩精品无码一区二区中文字幕 | 欧美猛少妇色xxxxx | 精品一区二区不卡无码av | 国产免费无码一区二区视频 | 成人欧美一区二区三区黑人免费 | 精品成在人线av无码免费看 | 国产成人无码专区 | 撕开奶罩揉吮奶头视频 | 精品无码成人片一区二区98 | 国产av一区二区精品久久凹凸 | 精品无码一区二区三区的天堂 | 欧美激情综合亚洲一二区 | 无码人妻精品一区二区三区不卡 | 漂亮人妻洗澡被公强 日日躁 | 娇妻被黑人粗大高潮白浆 | av无码电影一区二区三区 | a片免费视频在线观看 | 国产尤物精品视频 | 在线观看欧美一区二区三区 | 欧美日本日韩 | 国产精品久久国产三级国 | 国产精品久久久久7777 | 国产精品毛多多水多 | 人妻中文无码久热丝袜 | 成人免费视频在线观看 | 一本大道久久东京热无码av | 人人妻人人澡人人爽欧美精品 | 麻豆国产人妻欲求不满 | аⅴ资源天堂资源库在线 | 亚洲人亚洲人成电影网站色 | 午夜成人1000部免费视频 | 久在线观看福利视频 | 小sao货水好多真紧h无码视频 | 香蕉久久久久久av成人 | 国产亚洲视频中文字幕97精品 | 丰满少妇弄高潮了www | 亚洲精品国偷拍自产在线麻豆 | 精品久久久无码人妻字幂 | 中文字幕无码视频专区 | 亚洲中文字幕av在天堂 | 久久精品人人做人人综合 | 国产乱子伦视频在线播放 | 亚洲熟妇色xxxxx亚洲 | 强辱丰满人妻hd中文字幕 | 荫蒂添的好舒服视频囗交 | 精品成在人线av无码免费看 | 红桃av一区二区三区在线无码av | 网友自拍区视频精品 | 小鲜肉自慰网站xnxx | 亚洲熟妇色xxxxx欧美老妇 | 久久久久久久人妻无码中文字幕爆 | 亚洲s码欧洲m码国产av | 亚洲成av人综合在线观看 | 老熟女重囗味hdxx69 | 亚洲阿v天堂在线 | 无码人妻丰满熟妇区毛片18 | 午夜时刻免费入口 | 麻豆精品国产精华精华液好用吗 | 亚洲一区二区三区四区 | 国产成人一区二区三区在线观看 | 亚洲色偷偷男人的天堂 | 熟妇人妻无乱码中文字幕 | 日日干夜夜干 | 免费人成在线视频无码 | 99久久久无码国产精品免费 | 国产九九九九九九九a片 | 午夜无码区在线观看 | 精品国产精品久久一区免费式 | 国产精品无码一区二区桃花视频 | 午夜福利试看120秒体验区 | 国产suv精品一区二区五 | 国产精品久久国产精品99 | 97久久精品无码一区二区 | 亚洲欧洲中文日韩av乱码 | 色爱情人网站 | 人人爽人人爽人人片av亚洲 | 激情爆乳一区二区三区 | 成人免费无码大片a毛片 | 国产精品毛片一区二区 | 大胆欧美熟妇xx | 精品国产福利一区二区 | 少妇性l交大片 | 丰满岳乱妇在线观看中字无码 | 全黄性性激高免费视频 | 国产精品理论片在线观看 | 国精产品一品二品国精品69xx | 欧美成人免费全部网站 | 国产av一区二区精品久久凹凸 | 久久精品99久久香蕉国产色戒 | 日本精品高清一区二区 | 欧美 亚洲 国产 另类 | 亚洲国产欧美在线成人 | 日韩精品乱码av一区二区 | 成年美女黄网站色大免费全看 | 精品偷自拍另类在线观看 | 亚洲日韩乱码中文无码蜜桃臀网站 | 亚洲 另类 在线 欧美 制服 | 国产人妻精品一区二区三区不卡 | 无码国产色欲xxxxx视频 | 小泽玛莉亚一区二区视频在线 | 在线天堂新版最新版在线8 | 夫妻免费无码v看片 | 国产精品美女久久久网av | 真人与拘做受免费视频 | 成人无码精品一区二区三区 | 中文字幕av日韩精品一区二区 | 爱做久久久久久 | 最近中文2019字幕第二页 | 亚洲综合无码久久精品综合 | 无码成人精品区在线观看 | 日韩精品乱码av一区二区 | 黑人巨大精品欧美一区二区 | 免费无码的av片在线观看 | 国产成人一区二区三区在线观看 | 99riav国产精品视频 | 国产亚洲精品久久久ai换 | 色欲av亚洲一区无码少妇 | 性做久久久久久久久 | 男人扒开女人内裤强吻桶进去 | 成在人线av无码免费 | 亚洲一区av无码专区在线观看 | 特大黑人娇小亚洲女 | 鲁一鲁av2019在线 | 婷婷丁香五月天综合东京热 | 亚洲 激情 小说 另类 欧美 | 狠狠色丁香久久婷婷综合五月 | 国产成人久久精品流白浆 | 国产午夜无码视频在线观看 | 欧美人与物videos另类 | 亚无码乱人伦一区二区 | 久久久久久a亚洲欧洲av冫 | 狠狠色欧美亚洲狠狠色www | 欧美人与物videos另类 | 欧美熟妇另类久久久久久不卡 | 粉嫩少妇内射浓精videos | 欧美人与动性行为视频 | 国产人妻精品一区二区三区不卡 | 中文亚洲成a人片在线观看 | 一个人看的视频www在线 | 蜜桃臀无码内射一区二区三区 | 日韩人妻少妇一区二区三区 | 国产精品久久久久9999小说 | 夜夜夜高潮夜夜爽夜夜爰爰 | 97精品国产97久久久久久免费 | 精品国产成人一区二区三区 | 欧美性猛交内射兽交老熟妇 | 国产 精品 自在自线 | 大胆欧美熟妇xx | 波多野结衣一区二区三区av免费 | 国产麻豆精品精东影业av网站 | 久久午夜无码鲁丝片秋霞 | 日韩人妻无码中文字幕视频 | 国产国产精品人在线视 | 亚洲 高清 成人 动漫 | 伊人久久婷婷五月综合97色 | 熟妇人妻激情偷爽文 | 久久亚洲a片com人成 | 亚洲天堂2017无码中文 | 麻豆人妻少妇精品无码专区 | 精品aⅴ一区二区三区 | 真人与拘做受免费视频一 | 亚洲成av人影院在线观看 | 国产人妻人伦精品1国产丝袜 | 男女猛烈xx00免费视频试看 | 亚洲乱码日产精品bd | 99国产精品白浆在线观看免费 | 又湿又紧又大又爽a视频国产 | 中文字幕无码日韩欧毛 | 自拍偷自拍亚洲精品10p | 亚洲色成人中文字幕网站 | 夫妻免费无码v看片 | 久久久久人妻一区精品色欧美 | 国产精品久久久久久亚洲毛片 | 国产无遮挡吃胸膜奶免费看 | 丝袜美腿亚洲一区二区 | 国产精品内射视频免费 | 无码人妻精品一区二区三区不卡 | 国产乱码精品一品二品 | 久久久久99精品成人片 | 国产极品美女高潮无套在线观看 | 乌克兰少妇xxxx做受 | 国产av久久久久精东av | 国产亚洲精品精品国产亚洲综合 | 久久久久国色av免费观看性色 | 日本精品人妻无码免费大全 | 国产另类ts人妖一区二区 | 熟妇人妻激情偷爽文 | 久久亚洲日韩精品一区二区三区 | 色综合天天综合狠狠爱 | 成 人 免费观看网站 | 欧美人与禽猛交狂配 | 欧美性生交xxxxx久久久 | 亚洲春色在线视频 | 欧美日韩色另类综合 | 久久久精品成人免费观看 | 久久 国产 尿 小便 嘘嘘 | 精品欧美一区二区三区久久久 | 成人av无码一区二区三区 | 精品国产一区二区三区av 性色 | 国产精品久久精品三级 | 天堂а√在线地址中文在线 | 久久国产精品精品国产色婷婷 | 亚洲aⅴ无码成人网站国产app | 在线精品亚洲一区二区 | 亚洲欧美精品伊人久久 | 一区二区三区高清视频一 | 伊人久久大香线焦av综合影院 | 中文字幕色婷婷在线视频 | 麻豆md0077饥渴少妇 | 老司机亚洲精品影院无码 | 人人爽人人澡人人高潮 | 性欧美熟妇videofreesex | 乱码午夜-极国产极内射 | 男女超爽视频免费播放 | 欧美三级a做爰在线观看 | 日韩精品久久久肉伦网站 | 好爽又高潮了毛片免费下载 | 久久精品国产99精品亚洲 | 岛国片人妻三上悠亚 | av无码久久久久不卡免费网站 | 色一情一乱一伦一区二区三欧美 | 大肉大捧一进一出视频出来呀 | 3d动漫精品啪啪一区二区中 | 欧美野外疯狂做受xxxx高潮 | 欧美 日韩 亚洲 在线 | 欧美老妇与禽交 | 日本丰满护士爆乳xxxx | 狠狠躁日日躁夜夜躁2020 | 99精品久久毛片a片 | 俄罗斯老熟妇色xxxx | 亚洲精品午夜国产va久久成人 | 亚洲中文字幕在线观看 | 麻豆国产丝袜白领秘书在线观看 | 国产婷婷色一区二区三区在线 | 樱花草在线播放免费中文 | 97资源共享在线视频 | 欧美老人巨大xxxx做受 | 图片小说视频一区二区 | 成人综合网亚洲伊人 | 激情内射亚州一区二区三区爱妻 | 九九久久精品国产免费看小说 | 高潮毛片无遮挡高清免费 | 爽爽影院免费观看 | 无遮挡国产高潮视频免费观看 | 精品亚洲成av人在线观看 | 欧美成人午夜精品久久久 | 亚洲人亚洲人成电影网站色 | 精品国产aⅴ无码一区二区 | 久久久久久av无码免费看大片 | 国产尤物精品视频 | 午夜福利电影 | 国产熟妇高潮叫床视频播放 | 日韩精品无码一区二区中文字幕 | 亚洲色无码一区二区三区 | 兔费看少妇性l交大片免费 | 免费看男女做好爽好硬视频 | 欧美亚洲日韩国产人成在线播放 | 九月婷婷人人澡人人添人人爽 | 中国女人内谢69xxxxxa片 | 久久国产精品偷任你爽任你 | 久久精品女人的天堂av | 精品国产一区av天美传媒 | 亚洲另类伦春色综合小说 | av小次郎收藏 | 国产精品永久免费视频 | 国产亚洲精品久久久久久久 | 亚洲午夜无码久久 | 99国产精品白浆在线观看免费 | 99久久精品日本一区二区免费 | 成人无码精品1区2区3区免费看 | 久久国产精品精品国产色婷婷 | 精品国产福利一区二区 | 一本无码人妻在中文字幕免费 | 精品国产一区av天美传媒 | 国产免费无码一区二区视频 | 亚洲第一无码av无码专区 | 婷婷丁香五月天综合东京热 | 色欲综合久久中文字幕网 | 亚洲午夜无码久久 | 日韩精品a片一区二区三区妖精 | 日本一卡2卡3卡四卡精品网站 | 国内精品人妻无码久久久影院 | 午夜精品一区二区三区在线观看 | 精品国偷自产在线视频 | 国产人妻久久精品二区三区老狼 | 国产97人人超碰caoprom | 青青草原综合久久大伊人精品 | 欧美自拍另类欧美综合图片区 | 永久黄网站色视频免费直播 | 嫩b人妻精品一区二区三区 | 国产人妖乱国产精品人妖 | 奇米影视888欧美在线观看 | 欧美大屁股xxxxhd黑色 | 国产情侣作爱视频免费观看 | 窝窝午夜理论片影院 | 最近的中文字幕在线看视频 | 免费男性肉肉影院 | 亚洲国产av美女网站 | 99国产精品白浆在线观看免费 | 中文字幕日韩精品一区二区三区 | 波多野42部无码喷潮在线 | 欧洲精品码一区二区三区免费看 | 成人片黄网站色大片免费观看 | 内射白嫩少妇超碰 | 97se亚洲精品一区 | 四虎国产精品一区二区 | 国产无遮挡吃胸膜奶免费看 | 成人精品视频一区二区 | 正在播放老肥熟妇露脸 | 国产超级va在线观看视频 | 乱人伦人妻中文字幕无码久久网 | 欧美老妇交乱视频在线观看 | 亚洲色欲久久久综合网东京热 | 自拍偷自拍亚洲精品10p | 国产精品久久久久7777 | 丰满诱人的人妻3 | 国产97人人超碰caoprom | 国产性猛交╳xxx乱大交 国产精品久久久久久无码 欧洲欧美人成视频在线 | 偷窥村妇洗澡毛毛多 | 人妻无码久久精品人妻 | 熟妇人妻激情偷爽文 | 波多野结衣高清一区二区三区 | 一本大道久久东京热无码av | 日日碰狠狠丁香久燥 | 国产内射老熟女aaaa | 日韩少妇内射免费播放 | 思思久久99热只有频精品66 | 色五月丁香五月综合五月 | 1000部啪啪未满十八勿入下载 | 动漫av一区二区在线观看 | 天堂无码人妻精品一区二区三区 | 亚洲综合色区中文字幕 | 国产精品第一区揄拍无码 | 欧美刺激性大交 | 高清不卡一区二区三区 | 国内精品久久毛片一区二区 | 国产 精品 自在自线 | 国产精品美女久久久网av | 国产人妻人伦精品1国产丝袜 | 久久久婷婷五月亚洲97号色 | 牲欲强的熟妇农村老妇女视频 | 免费观看激色视频网站 | 亚洲欧美色中文字幕在线 | 久久无码专区国产精品s | 国产极品视觉盛宴 | 亚洲色偷偷偷综合网 | 国产亚洲tv在线观看 | 亚洲欧洲中文日韩av乱码 | 国产农村妇女高潮大叫 | 国产成人一区二区三区在线观看 | 婷婷六月久久综合丁香 | 欧美 亚洲 国产 另类 | 日本一卡2卡3卡4卡无卡免费网站 国产一区二区三区影院 | 国产精品成人av在线观看 | 国产成人无码a区在线观看视频app | 未满成年国产在线观看 | 国产sm调教视频在线观看 | 欧美性猛交xxxx富婆 | 精品无码av一区二区三区 | 两性色午夜视频免费播放 | 国产麻豆精品一区二区三区v视界 | 亚洲区小说区激情区图片区 | 国产在热线精品视频 | 日日干夜夜干 | 日韩精品无码一区二区中文字幕 | 伊人久久大香线蕉av一区二区 | av无码不卡在线观看免费 | 国产人妖乱国产精品人妖 | 国产69精品久久久久app下载 | 免费播放一区二区三区 | 扒开双腿疯狂进出爽爽爽视频 | 天下第一社区视频www日本 | 久久精品国产精品国产精品污 | 97人妻精品一区二区三区 | 精品国产一区二区三区av 性色 | 日本护士xxxxhd少妇 | 熟妇激情内射com | 无码av最新清无码专区吞精 | 色综合天天综合狠狠爱 | 无码人妻丰满熟妇区毛片18 | 色综合久久久无码中文字幕 | 2020久久超碰国产精品最新 | 99久久精品国产一区二区蜜芽 | 纯爱无遮挡h肉动漫在线播放 | 色婷婷久久一区二区三区麻豆 | 国产精品内射视频免费 | 综合激情五月综合激情五月激情1 | 帮老师解开蕾丝奶罩吸乳网站 | 欧美性猛交xxxx富婆 | 国产成人无码一二三区视频 | 久久久久久亚洲精品a片成人 | 97久久精品无码一区二区 | 亚洲精品成a人在线观看 | 色偷偷人人澡人人爽人人模 | 国产美女极度色诱视频www | 色情久久久av熟女人妻网站 | 牲欲强的熟妇农村老妇女 | 四虎国产精品一区二区 | 欧美xxxx黑人又粗又长 | 国产农村妇女高潮大叫 | 人妻尝试又大又粗久久 | 88国产精品欧美一区二区三区 | 俄罗斯老熟妇色xxxx | 免费无码肉片在线观看 | 国产精品国产三级国产专播 | 国产美女精品一区二区三区 | 国产凸凹视频一区二区 | 人妻天天爽夜夜爽一区二区 | 特大黑人娇小亚洲女 | 国产成人一区二区三区别 | 狠狠色噜噜狠狠狠狠7777米奇 | 国产精品毛片一区二区 | 在线a亚洲视频播放在线观看 | 国产艳妇av在线观看果冻传媒 | 久久综合网欧美色妞网 | 亚洲国产精品一区二区第一页 | 东京热无码av男人的天堂 | 红桃av一区二区三区在线无码av | 日韩视频 中文字幕 视频一区 | 色 综合 欧美 亚洲 国产 | 亚洲日本va中文字幕 | 内射老妇bbwx0c0ck | 日日碰狠狠躁久久躁蜜桃 | 7777奇米四色成人眼影 | 国产成人精品必看 | 久久久久久a亚洲欧洲av冫 | 国产免费久久久久久无码 | 欧美激情内射喷水高潮 | 激情内射亚州一区二区三区爱妻 | 夫妻免费无码v看片 | 亚洲色欲色欲欲www在线 | 国产亚洲精品久久久久久 | 久久99精品久久久久久动态图 | 亚洲精品中文字幕久久久久 | 一本久久伊人热热精品中文字幕 | 女人被男人躁得好爽免费视频 | 国产va免费精品观看 | 精品久久久无码中文字幕 | 日韩人妻无码中文字幕视频 | 男人扒开女人内裤强吻桶进去 | 精品一区二区不卡无码av | 波多野42部无码喷潮在线 | 在教室伦流澡到高潮hnp视频 | 亚洲色成人中文字幕网站 | 人妻少妇精品无码专区二区 | 少妇人妻大乳在线视频 | 国产成人综合在线女婷五月99播放 | av人摸人人人澡人人超碰下载 | 欧美 丝袜 自拍 制服 另类 | 帮老师解开蕾丝奶罩吸乳网站 | 国产精品无码一区二区桃花视频 | 六十路熟妇乱子伦 | 国产无套粉嫩白浆在线 | 亚洲欧美日韩综合久久久 | 亚洲成av人综合在线观看 | 在线观看免费人成视频 | 啦啦啦www在线观看免费视频 | 欧美精品一区二区精品久久 | 久久亚洲中文字幕精品一区 | 久久综合激激的五月天 | 狠狠色丁香久久婷婷综合五月 | 国产性生交xxxxx无码 | 老头边吃奶边弄进去呻吟 | 性欧美疯狂xxxxbbbb | 亚洲国产成人av在线观看 | 成 人影片 免费观看 | 国产精品99爱免费视频 | 俺去俺来也www色官网 | 无码一区二区三区在线 | 综合网日日天干夜夜久久 | 精品一区二区三区无码免费视频 | 97色伦图片97综合影院 | 亚洲精品鲁一鲁一区二区三区 | 色婷婷av一区二区三区之红樱桃 | 国产乱人伦偷精品视频 | 无码纯肉视频在线观看 | 亚洲熟妇色xxxxx亚洲 | 无码纯肉视频在线观看 | 黑人粗大猛烈进出高潮视频 | 97久久超碰中文字幕 | 久久五月精品中文字幕 | 日日天干夜夜狠狠爱 | 亚洲欧美综合区丁香五月小说 | 无人区乱码一区二区三区 | 动漫av一区二区在线观看 | 亚洲国产精品久久久天堂 | 欧美国产日产一区二区 | 国产疯狂伦交大片 | 一本大道伊人av久久综合 | 色欲久久久天天天综合网精品 | 亚洲人成人无码网www国产 | 无码av中文字幕免费放 | 亚洲爆乳精品无码一区二区三区 | 骚片av蜜桃精品一区 | 午夜无码人妻av大片色欲 | 国产成人综合色在线观看网站 | 无码人妻丰满熟妇区五十路百度 | 偷窥村妇洗澡毛毛多 | 中文字幕久久久久人妻 | 性做久久久久久久免费看 | 欧美人与物videos另类 | 欧美zoozzooz性欧美 | 日本精品久久久久中文字幕 | 国产成人精品久久亚洲高清不卡 | 久久国产36精品色熟妇 | 中文字幕无码人妻少妇免费 | 亚洲人成影院在线无码按摩店 | 国产欧美熟妇另类久久久 | 国产又爽又猛又粗的视频a片 | 国产精品美女久久久久av爽李琼 | 精品偷拍一区二区三区在线看 | 日韩 欧美 动漫 国产 制服 | 大乳丰满人妻中文字幕日本 | 亚洲 另类 在线 欧美 制服 | 狠狠色欧美亚洲狠狠色www | 国产欧美精品一区二区三区 | 日韩人妻无码一区二区三区久久99 | 免费观看的无遮挡av | 一个人看的www免费视频在线观看 | 伊在人天堂亚洲香蕉精品区 | 99在线 | 亚洲 | 伊在人天堂亚洲香蕉精品区 | 亚洲欧美国产精品专区久久 | 成 人 网 站国产免费观看 | 日本xxxx色视频在线观看免费 | 又大又黄又粗又爽的免费视频 | 欧美成人免费全部网站 | 国产高清不卡无码视频 | 日韩人妻无码中文字幕视频 | 东京一本一道一二三区 | 国产成人精品久久亚洲高清不卡 | 久久久www成人免费毛片 | 亚洲国产精品无码一区二区三区 | 牲交欧美兽交欧美 | 超碰97人人做人人爱少妇 | 四虎4hu永久免费 | 宝宝好涨水快流出来免费视频 | 国产精品99爱免费视频 | 荫蒂添的好舒服视频囗交 | 久久精品国产日本波多野结衣 | 久久99久久99精品中文字幕 | 国产偷自视频区视频 | 377p欧洲日本亚洲大胆 | 色婷婷欧美在线播放内射 | 麻花豆传媒剧国产免费mv在线 | 免费人成在线观看网站 | 国产精品资源一区二区 | 无码播放一区二区三区 | 亚洲无人区午夜福利码高清完整版 | 中文精品无码中文字幕无码专区 | 亚洲欧美日韩国产精品一区二区 | 亚洲日本在线电影 | 无码播放一区二区三区 | 成人三级无码视频在线观看 | 人妻少妇精品久久 | www国产亚洲精品久久久日本 | aa片在线观看视频在线播放 | 久久zyz资源站无码中文动漫 | 97精品人妻一区二区三区香蕉 | 欧美三级不卡在线观看 | 一本一道久久综合久久 | 中文字幕无码日韩专区 | 国产精品igao视频网 | 久久午夜无码鲁丝片午夜精品 | 欧美乱妇无乱码大黄a片 | 白嫩日本少妇做爰 | 精品久久久无码人妻字幂 | 欧美性生交活xxxxxdddd | 又粗又大又硬毛片免费看 | 黑人巨大精品欧美黑寡妇 | 国产一区二区三区影院 | 国产va免费精品观看 | 国产成人综合在线女婷五月99播放 | 精品亚洲韩国一区二区三区 | 精品国产一区二区三区av 性色 | 久激情内射婷内射蜜桃人妖 | 国内揄拍国内精品少妇国语 | 国产69精品久久久久app下载 | 亚洲精品综合一区二区三区在线 | 欧美人与禽zoz0性伦交 | 中文字幕无码热在线视频 | 人妻体内射精一区二区三四 | 漂亮人妻洗澡被公强 日日躁 | 麻豆果冻传媒2021精品传媒一区下载 | 久精品国产欧美亚洲色aⅴ大片 | 欧洲熟妇精品视频 | 国产成人久久精品流白浆 | 久久婷婷五月综合色国产香蕉 | 伊人色综合久久天天小片 | 乌克兰少妇xxxx做受 | 国产午夜无码精品免费看 | 国产精品18久久久久久麻辣 | 中文字幕无码人妻少妇免费 | 人妻少妇精品无码专区动漫 | 无码乱肉视频免费大全合集 | 99久久久无码国产精品免费 | 亚洲成a人片在线观看无码3d | 成人无码影片精品久久久 | 久久天天躁狠狠躁夜夜免费观看 | 日韩欧美中文字幕在线三区 | av在线亚洲欧洲日产一区二区 | 偷窥村妇洗澡毛毛多 | 人人妻人人澡人人爽欧美一区 | 99国产欧美久久久精品 | 麻花豆传媒剧国产免费mv在线 | 蜜桃无码一区二区三区 | 免费无码肉片在线观看 | 国产精品亚洲а∨无码播放麻豆 | 又大又硬又黄的免费视频 | 亚洲中文无码av永久不收费 | 国产一区二区三区影院 | 国产福利视频一区二区 | 99麻豆久久久国产精品免费 | 性欧美大战久久久久久久 | 午夜精品一区二区三区的区别 | 扒开双腿吃奶呻吟做受视频 | 无码国模国产在线观看 | 天天摸天天碰天天添 | 国产精品亚洲а∨无码播放麻豆 | 一本色道久久综合狠狠躁 | 成熟妇人a片免费看网站 | 人人超人人超碰超国产 | 亚洲gv猛男gv无码男同 | 中文字幕无线码免费人妻 | 亚洲高清偷拍一区二区三区 | 无码国产乱人伦偷精品视频 | 国产综合色产在线精品 | 大地资源网第二页免费观看 | 久久人妻内射无码一区三区 | 日本在线高清不卡免费播放 | 正在播放东北夫妻内射 | 国产成人无码av片在线观看不卡 | 狠狠色噜噜狠狠狠7777奇米 | 麻豆国产97在线 | 欧洲 | 强辱丰满人妻hd中文字幕 | 国产亚洲精品久久久ai换 | 免费网站看v片在线18禁无码 | 久久久久久国产精品无码下载 | 国内精品久久久久久中文字幕 | 国产乱人伦app精品久久 国产在线无码精品电影网 国产国产精品人在线视 | 成 人 网 站国产免费观看 | 成人aaa片一区国产精品 | 国产肉丝袜在线观看 | 成人无码视频免费播放 | 欧美激情综合亚洲一二区 | 97精品国产97久久久久久免费 | 欧美人妻一区二区三区 | 久久天天躁狠狠躁夜夜免费观看 | 日本丰满熟妇videos | 国产办公室秘书无码精品99 | 无码精品人妻一区二区三区av | 欧美成人高清在线播放 | 色偷偷人人澡人人爽人人模 | 人妻与老人中文字幕 | 国产香蕉尹人综合在线观看 | 成人无码视频免费播放 | 精品久久久久久亚洲精品 | 中文亚洲成a人片在线观看 | 国产无遮挡又黄又爽免费视频 | 国产69精品久久久久app下载 | 国产性生大片免费观看性 | 少妇激情av一区二区 | 色综合久久88色综合天天 | 日韩av无码一区二区三区不卡 | 久久久精品成人免费观看 | 欧美喷潮久久久xxxxx | 欧美一区二区三区视频在线观看 | 1000部啪啪未满十八勿入下载 | 亚洲s色大片在线观看 | 亚洲国产精品久久久天堂 | 一二三四在线观看免费视频 | 噜噜噜亚洲色成人网站 | 67194成是人免费无码 | 亚洲va欧美va天堂v国产综合 | 学生妹亚洲一区二区 | 久久久久成人精品免费播放动漫 | 玩弄人妻少妇500系列视频 | 亚洲中文字幕无码一久久区 | 樱花草在线播放免费中文 | 中文字幕av日韩精品一区二区 | 又大又硬又爽免费视频 | 未满成年国产在线观看 | 老子影院午夜精品无码 | 性生交大片免费看女人按摩摩 | 无码国产激情在线观看 | 日欧一片内射va在线影院 | 久久婷婷五月综合色国产香蕉 | 乱中年女人伦av三区 | 精品无码一区二区三区的天堂 | 波多野结衣av在线观看 | 香港三级日本三级妇三级 | 亚洲午夜久久久影院 | 亚洲国产高清在线观看视频 | 黑人玩弄人妻中文在线 | 亚洲一区二区三区香蕉 | 天下第一社区视频www日本 | 牲欲强的熟妇农村老妇女 | 少妇无码吹潮 | 人人妻人人澡人人爽欧美一区 | 精品成在人线av无码免费看 | 成人免费视频一区二区 | 精品人妻人人做人人爽 | 精品乱码久久久久久久 | 波多野结衣高清一区二区三区 | 国产超碰人人爽人人做人人添 | 欧美zoozzooz性欧美 | 无码人妻丰满熟妇区毛片18 | 免费人成在线视频无码 | 18禁黄网站男男禁片免费观看 | 日韩在线不卡免费视频一区 | 午夜福利一区二区三区在线观看 | 亚洲 另类 在线 欧美 制服 | 久久99精品国产.久久久久 | 帮老师解开蕾丝奶罩吸乳网站 | 精品偷自拍另类在线观看 | www成人国产高清内射 | 学生妹亚洲一区二区 | 成人动漫在线观看 | 欧美三级a做爰在线观看 | 国产乱人偷精品人妻a片 | 又大又硬又黄的免费视频 | 亚洲精品午夜无码电影网 | 少妇无码av无码专区在线观看 | 国产性猛交╳xxx乱大交 国产精品久久久久久无码 欧洲欧美人成视频在线 | 麻豆国产丝袜白领秘书在线观看 | 久久精品国产日本波多野结衣 | 国模大胆一区二区三区 | 中文久久乱码一区二区 | 露脸叫床粗话东北少妇 | 国产美女极度色诱视频www | 六十路熟妇乱子伦 | 午夜精品一区二区三区在线观看 | 99riav国产精品视频 | 亚洲熟妇色xxxxx欧美老妇y | 午夜福利不卡在线视频 | 桃花色综合影院 | 国产成人无码区免费内射一片色欲 | 久久久久99精品成人片 | 久久www免费人成人片 | 欧美老人巨大xxxx做受 | 国产精品亚洲一区二区三区喷水 | 西西人体www44rt大胆高清 | 西西人体www44rt大胆高清 | 国产热a欧美热a在线视频 | 伊人久久大香线蕉亚洲 | 国产莉萝无码av在线播放 | 欧洲vodafone精品性 | 国产舌乚八伦偷品w中 | 激情爆乳一区二区三区 | 亚洲欧美日韩综合久久久 | 男女性色大片免费网站 | 色婷婷综合中文久久一本 | 成人欧美一区二区三区 | 亚洲国产综合无码一区 | 国产精品99久久精品爆乳 | 亚洲一区av无码专区在线观看 | 日日摸日日碰夜夜爽av | 国产成人精品久久亚洲高清不卡 | 一个人看的视频www在线 | √天堂中文官网8在线 | 最新版天堂资源中文官网 | 中文字幕无码免费久久99 | 九月婷婷人人澡人人添人人爽 | 欧美亚洲国产一区二区三区 | 亚拍精品一区二区三区探花 | 日本一区二区三区免费播放 | 人妻aⅴ无码一区二区三区 | 亚洲一区二区三区四区 | 亚洲国产精品久久久天堂 | 免费无码午夜福利片69 | 婷婷综合久久中文字幕蜜桃三电影 | 成人无码视频免费播放 | 久久亚洲中文字幕精品一区 | 天天综合网天天综合色 | 亚洲乱码中文字幕在线 | 亚洲综合无码久久精品综合 | 久久婷婷五月综合色国产香蕉 | 少妇无套内谢久久久久 | 天下第一社区视频www日本 | 性生交大片免费看女人按摩摩 | 性开放的女人aaa片 | 国内精品久久久久久中文字幕 | 欧美日韩一区二区免费视频 | 中文字幕日产无线码一区 | 亚洲午夜久久久影院 | 十八禁真人啪啪免费网站 | 亚洲精品成人av在线 | 日韩精品无码免费一区二区三区 | 日本欧美一区二区三区乱码 | 丰满少妇人妻久久久久久 | 中文字幕精品av一区二区五区 | 婷婷六月久久综合丁香 | 久久久精品456亚洲影院 | 真人与拘做受免费视频 | 欧美国产日韩久久mv | 国内老熟妇对白xxxxhd | 无码帝国www无码专区色综合 | 蜜桃av抽搐高潮一区二区 | a国产一区二区免费入口 | 国产精品永久免费视频 | 国产在线aaa片一区二区99 | a在线亚洲男人的天堂 | 日产国产精品亚洲系列 | 久久97精品久久久久久久不卡 | 亚洲成色在线综合网站 | 九月婷婷人人澡人人添人人爽 | 兔费看少妇性l交大片免费 | 丰满肥臀大屁股熟妇激情视频 | 色综合久久久无码网中文 | 国产人妻大战黑人第1集 | 国产高清av在线播放 | 四虎永久在线精品免费网址 | 图片区 小说区 区 亚洲五月 | 亚洲一区二区三区播放 | 国产艳妇av在线观看果冻传媒 | 亚洲精品一区三区三区在线观看 | 学生妹亚洲一区二区 | 7777奇米四色成人眼影 | 久久久久久av无码免费看大片 | 欧美精品在线观看 | 九九久久精品国产免费看小说 | 午夜丰满少妇性开放视频 | 欧洲欧美人成视频在线 | 久久国产精品二国产精品 | 综合激情五月综合激情五月激情1 | 十八禁真人啪啪免费网站 | 精品国产aⅴ无码一区二区 | 久激情内射婷内射蜜桃人妖 | 亚洲小说春色综合另类 | 日本一区二区三区免费高清 | 久久久精品欧美一区二区免费 | 一二三四在线观看免费视频 | 男女超爽视频免费播放 | 欧美日韩久久久精品a片 | 欧美老熟妇乱xxxxx | 中文字幕中文有码在线 | 欧美人与善在线com | 成熟女人特级毛片www免费 | 精品亚洲韩国一区二区三区 | 55夜色66夜色国产精品视频 | 免费人成网站视频在线观看 | 亚洲一区二区三区无码久久 | 欧美日本免费一区二区三区 | 国产真实乱对白精彩久久 | 蜜桃av抽搐高潮一区二区 | 亚洲精品国偷拍自产在线观看蜜桃 | 日本一区二区更新不卡 | 少妇高潮一区二区三区99 | 欧美丰满熟妇xxxx | 亚洲精品午夜国产va久久成人 | 国产精品美女久久久网av | 日韩av无码一区二区三区不卡 | 日日鲁鲁鲁夜夜爽爽狠狠 | 无码人妻av免费一区二区三区 | 一本加勒比波多野结衣 | 中文字幕av无码一区二区三区电影 | 中文字幕av伊人av无码av | 亚洲色欲色欲天天天www | 欧美丰满熟妇xxxx性ppx人交 | 精品无人区无码乱码毛片国产 | 精品亚洲成av人在线观看 | 性色欲情网站iwww九文堂 | 高清国产亚洲精品自在久久 | 中文字幕无码视频专区 | 久久久久久久人妻无码中文字幕爆 | 色婷婷av一区二区三区之红樱桃 | 亚洲熟妇色xxxxx亚洲 | www一区二区www免费 | 国产成人一区二区三区在线观看 | 狠狠躁日日躁夜夜躁2020 | 国产精品毛多多水多 | аⅴ资源天堂资源库在线 | 丰满肥臀大屁股熟妇激情视频 | 日本一卡二卡不卡视频查询 | 动漫av一区二区在线观看 | 免费国产成人高清在线观看网站 | 国产精品内射视频免费 | 中文字幕无线码免费人妻 | 亚洲成av人片在线观看无码不卡 | 丰满妇女强制高潮18xxxx | 国内精品久久久久久中文字幕 | 狠狠噜狠狠狠狠丁香五月 | 欧美精品在线观看 | 亚洲日韩av片在线观看 | 嫩b人妻精品一区二区三区 | 亚洲国产av精品一区二区蜜芽 | 欧美日韩综合一区二区三区 | 欧美阿v高清资源不卡在线播放 | 九月婷婷人人澡人人添人人爽 | 欧美 丝袜 自拍 制服 另类 | 亚洲精品一区三区三区在线观看 | 国产成人久久精品流白浆 | 乱中年女人伦av三区 | 好爽又高潮了毛片免费下载 | 精品国产一区二区三区四区 | 中文字幕av伊人av无码av | 色婷婷欧美在线播放内射 | 无码纯肉视频在线观看 | 国产乱码精品一品二品 | 性做久久久久久久免费看 | 午夜熟女插插xx免费视频 | 欧美性猛交xxxx富婆 | 亚洲男人av香蕉爽爽爽爽 | 精品国产成人一区二区三区 | 国色天香社区在线视频 | 国产黄在线观看免费观看不卡 | 少妇被粗大的猛进出69影院 | 国产明星裸体无码xxxx视频 | 亚洲色偷偷偷综合网 | 真人与拘做受免费视频 | 亚洲综合精品香蕉久久网 | 乱人伦人妻中文字幕无码久久网 | 啦啦啦www在线观看免费视频 | 国産精品久久久久久久 | 综合激情五月综合激情五月激情1 | 亚洲成av人片在线观看无码不卡 | 九月婷婷人人澡人人添人人爽 | 永久免费精品精品永久-夜色 | 久青草影院在线观看国产 | 亚洲中文字幕在线无码一区二区 | 黄网在线观看免费网站 | 欧美丰满少妇xxxx性 | 大屁股大乳丰满人妻 | 亚洲爆乳精品无码一区二区三区 | 麻豆蜜桃av蜜臀av色欲av | 亚洲中文字幕无码中字 | 亚洲综合另类小说色区 | 久久这里只有精品视频9 | aa片在线观看视频在线播放 | 大色综合色综合网站 | 少妇性l交大片欧洲热妇乱xxx | 久久99久久99精品中文字幕 | 人妻有码中文字幕在线 | 久久精品人人做人人综合 | 日韩在线不卡免费视频一区 | 在线亚洲高清揄拍自拍一品区 | 狠狠色丁香久久婷婷综合五月 | 欧美日本精品一区二区三区 | 高清不卡一区二区三区 | 麻豆md0077饥渴少妇 | 丰满人妻翻云覆雨呻吟视频 | 大屁股大乳丰满人妻 | 成年女人永久免费看片 | 亚洲欧美国产精品专区久久 | 久久人人爽人人人人片 | 国产精品第一区揄拍无码 | 亚洲精品一区国产 | 国产精品无码成人午夜电影 | 久久久久se色偷偷亚洲精品av | 国产人妻久久精品二区三区老狼 | 一本久道高清无码视频 | 强辱丰满人妻hd中文字幕 | 少妇人妻av毛片在线看 | 狠狠亚洲超碰狼人久久 | 国产成人综合色在线观看网站 | 国产色视频一区二区三区 | 国产乱人伦app精品久久 国产在线无码精品电影网 国产国产精品人在线视 | 欧美精品无码一区二区三区 | 国产精品久久国产精品99 | 亚洲精品国偷拍自产在线麻豆 | 午夜肉伦伦影院 | 亚洲成av人片天堂网无码】 | 日本精品久久久久中文字幕 | 在线欧美精品一区二区三区 | 扒开双腿疯狂进出爽爽爽视频 | 乱人伦人妻中文字幕无码久久网 | 国产av一区二区三区最新精品 | 国产区女主播在线观看 | 国产精品久久久久9999小说 | 国产精品.xx视频.xxtv | 少妇的肉体aa片免费 | 色 综合 欧美 亚洲 国产 | 亚洲精品一区二区三区在线 | 狂野欧美激情性xxxx | 国产精品美女久久久网av | 国产精品99爱免费视频 | 熟女少妇在线视频播放 | 亚洲国产欧美在线成人 | 成人无码影片精品久久久 | 中文字幕乱码中文乱码51精品 | 男人的天堂av网站 | 色综合久久久久综合一本到桃花网 | 久久伊人色av天堂九九小黄鸭 | 国产亚洲精品久久久久久 | 久久久久久国产精品无码下载 | 亚洲精品一区二区三区婷婷月 | 77777熟女视频在线观看 а天堂中文在线官网 | 亚洲成a人片在线观看无码 | 东京热一精品无码av | 精品国产成人一区二区三区 | 成人精品一区二区三区中文字幕 | 蜜臀aⅴ国产精品久久久国产老师 | а天堂中文在线官网 | 无码人中文字幕 | 亚洲日韩一区二区 | 欧美性生交xxxxx久久久 | 国产极品美女高潮无套在线观看 | 亚洲呦女专区 | aⅴ在线视频男人的天堂 | 亚洲午夜无码久久 | 中文字幕无码日韩专区 | 久久亚洲日韩精品一区二区三区 | 欧美人与善在线com | 国产婷婷色一区二区三区在线 | 国产亚洲视频中文字幕97精品 | 久久99精品久久久久婷婷 | 久久精品国产日本波多野结衣 | 亚洲综合无码一区二区三区 | 狠狠色丁香久久婷婷综合五月 | 99久久精品无码一区二区毛片 | 99久久久无码国产精品免费 | 国产高清av在线播放 | 久久久精品欧美一区二区免费 | 欧美日韩视频无码一区二区三 | 一本久久伊人热热精品中文字幕 | 亚洲天堂2017无码中文 | 美女扒开屁股让男人桶 | 精品aⅴ一区二区三区 | 亚洲s色大片在线观看 | 日产精品99久久久久久 | 午夜精品一区二区三区的区别 | 久久精品女人的天堂av | 性欧美熟妇videofreesex | 熟妇人妻无码xxx视频 | 久久久久久亚洲精品a片成人 | 天下第一社区视频www日本 | 黑人粗大猛烈进出高潮视频 | 高清国产亚洲精品自在久久 | 在线成人www免费观看视频 | 日韩视频 中文字幕 视频一区 | 国产日产欧产精品精品app | 国内精品一区二区三区不卡 | 国产成人久久精品流白浆 | 亚洲国产成人av在线观看 | 中文字幕日产无线码一区 | 内射白嫩少妇超碰 | 亚洲精品鲁一鲁一区二区三区 | 成人无码视频在线观看网站 | 水蜜桃av无码 | 好屌草这里只有精品 | 国产免费无码一区二区视频 | 国产艳妇av在线观看果冻传媒 | 亚洲精品一区国产 | 亚洲人成无码网www | 中文字幕无码免费久久99 | 欧美丰满少妇xxxx性 | 99精品无人区乱码1区2区3区 | 亚洲狠狠婷婷综合久久 | 一本一道久久综合久久 | 国产精品美女久久久网av | 永久免费观看美女裸体的网站 | 天堂亚洲2017在线观看 | 99久久人妻精品免费二区 | www国产亚洲精品久久网站 | 最近的中文字幕在线看视频 | 丰满少妇弄高潮了www | 人人妻人人澡人人爽精品欧美 | 在线亚洲高清揄拍自拍一品区 | 麻豆精品国产精华精华液好用吗 | 国产亚洲欧美在线专区 | 玩弄中年熟妇正在播放 | 欧美精品无码一区二区三区 | 高清国产亚洲精品自在久久 | 日本va欧美va欧美va精品 | 亚洲一区二区三区偷拍女厕 | 久久精品国产一区二区三区 | 色欲综合久久中文字幕网 | 老熟女重囗味hdxx69 | 免费人成网站视频在线观看 | 无码一区二区三区在线 | 国产成人精品久久亚洲高清不卡 | 国产精品亚洲а∨无码播放麻豆 | 国内少妇偷人精品视频免费 | 国产莉萝无码av在线播放 | 亚洲国产午夜精品理论片 | 麻豆成人精品国产免费 | 99riav国产精品视频 | 成人一在线视频日韩国产 | 鲁鲁鲁爽爽爽在线视频观看 | 伊人久久大香线蕉午夜 | 丝袜 中出 制服 人妻 美腿 | 少妇被黑人到高潮喷出白浆 | 在线精品亚洲一区二区 | 51国偷自产一区二区三区 | 青青青爽视频在线观看 | 色窝窝无码一区二区三区色欲 | 国产xxx69麻豆国语对白 | 欧美激情内射喷水高潮 | 国产成人无码a区在线观看视频app | 成人三级无码视频在线观看 | 任你躁在线精品免费 | 欧美精品在线观看 | 99久久99久久免费精品蜜桃 | 国产成人无码午夜视频在线观看 | 99精品无人区乱码1区2区3区 | 对白脏话肉麻粗话av | 国内精品久久毛片一区二区 | 激情爆乳一区二区三区 | 国产精品无码成人午夜电影 | 亚洲国产精品成人久久蜜臀 | 动漫av网站免费观看 | 丰满人妻一区二区三区免费视频 | 一本久道久久综合狠狠爱 | 久久久久久久久888 | 99久久99久久免费精品蜜桃 | 国产精品久久久久9999小说 | 领导边摸边吃奶边做爽在线观看 | 麻豆果冻传媒2021精品传媒一区下载 | 亚洲 激情 小说 另类 欧美 | 蜜臀aⅴ国产精品久久久国产老师 | 丰满人妻翻云覆雨呻吟视频 | 日韩 欧美 动漫 国产 制服 | 国产性猛交╳xxx乱大交 国产精品久久久久久无码 欧洲欧美人成视频在线 | 娇妻被黑人粗大高潮白浆 | 水蜜桃色314在线观看 | а天堂中文在线官网 | 美女扒开屁股让男人桶 | 又大又黄又粗又爽的免费视频 | 最新国产麻豆aⅴ精品无码 | 国产莉萝无码av在线播放 | 狠狠色丁香久久婷婷综合五月 | 久久99精品国产麻豆 | 国产成人无码av在线影院 | 无码av最新清无码专区吞精 | 欧美丰满少妇xxxx性 | 亚洲精品国产精品乱码不卡 | 欧美真人作爱免费视频 | 日韩精品一区二区av在线 | 欧美丰满少妇xxxx性 | 欧美激情综合亚洲一二区 | 国产在线一区二区三区四区五区 | 色综合天天综合狠狠爱 | 中文字幕av无码一区二区三区电影 | 中文字幕人成乱码熟女app | 图片小说视频一区二区 | 日韩欧美中文字幕在线三区 | 美女黄网站人色视频免费国产 | 丰满人妻翻云覆雨呻吟视频 | 亚洲区欧美区综合区自拍区 | 丰满少妇弄高潮了www | 2019nv天堂香蕉在线观看 | 国产精品第一区揄拍无码 | 在线天堂新版最新版在线8 | 国产精品亚洲专区无码不卡 | av在线亚洲欧洲日产一区二区 | 性生交片免费无码看人 | 久久精品女人天堂av免费观看 | 色老头在线一区二区三区 | 玩弄人妻少妇500系列视频 | 又大又紧又粉嫩18p少妇 | 国产色xx群视频射精 | 天天拍夜夜添久久精品 | 亚洲成a人片在线观看无码 | 日本一区二区三区免费高清 | 亚洲色成人中文字幕网站 | 四虎国产精品免费久久 | 精品无码av一区二区三区 | 人人妻人人澡人人爽人人精品浪潮 | 国产特级毛片aaaaaa高潮流水 | 欧美日本免费一区二区三区 | 国内老熟妇对白xxxxhd | 亚洲天堂2017无码 | 中文字幕无线码免费人妻 | 精品夜夜澡人妻无码av蜜桃 | 国产精品自产拍在线观看 | aⅴ亚洲 日韩 色 图网站 播放 | 久久久久久亚洲精品a片成人 | 国产成人综合色在线观看网站 | 波多野结衣高清一区二区三区 | 久久久国产精品无码免费专区 | 久久无码专区国产精品s | 色一情一乱一伦一区二区三欧美 | 亚洲国产精品毛片av不卡在线 | 国产色在线 | 国产 | 免费网站看v片在线18禁无码 | 老太婆性杂交欧美肥老太 | 国产精品久久久久久无码 | 又大又紧又粉嫩18p少妇 | 日韩精品成人一区二区三区 | 激情五月综合色婷婷一区二区 | 国产精品国产三级国产专播 | 中文字幕av伊人av无码av | 又粗又大又硬毛片免费看 | 性生交大片免费看l | 日韩精品久久久肉伦网站 | 欧美日韩在线亚洲综合国产人 | 2020久久香蕉国产线看观看 | 97se亚洲精品一区 | 国产真人无遮挡作爱免费视频 | 国产网红无码精品视频 | 无码纯肉视频在线观看 | 亚洲中文字幕无码中字 | 日韩成人一区二区三区在线观看 | 久久精品人妻少妇一区二区三区 | 大屁股大乳丰满人妻 | 粉嫩少妇内射浓精videos | 精品人人妻人人澡人人爽人人 | 中文字幕无码av激情不卡 | 狠狠亚洲超碰狼人久久 | 国产精品久久福利网站 | 又粗又大又硬又长又爽 | 欧美丰满熟妇xxxx | 图片小说视频一区二区 | 四虎国产精品免费久久 | 精品久久久无码人妻字幂 | 性色av无码免费一区二区三区 | 性欧美熟妇videofreesex | 精品水蜜桃久久久久久久 | 少妇性l交大片欧洲热妇乱xxx | 水蜜桃色314在线观看 | 人人妻人人澡人人爽欧美一区 | 福利一区二区三区视频在线观看 | 国产xxx69麻豆国语对白 | 美女毛片一区二区三区四区 | 精品久久久无码人妻字幂 | 久久精品人人做人人综合 | 少妇厨房愉情理9仑片视频 | 亚洲精品久久久久久一区二区 | 少妇高潮一区二区三区99 | 亚洲国产精品一区二区美利坚 | 激情内射日本一区二区三区 | 狠狠综合久久久久综合网 | 久久精品国产一区二区三区肥胖 | 人妻熟女一区 | 无码精品国产va在线观看dvd | 亚洲の无码国产の无码步美 | 亚洲日韩精品欧美一区二区 | 国产一精品一av一免费 | 亚洲日韩中文字幕在线播放 | 成人无码视频在线观看网站 | 亚洲综合无码一区二区三区 | 国产午夜亚洲精品不卡下载 | 国产激情无码一区二区app | 亚洲 激情 小说 另类 欧美 | 99精品无人区乱码1区2区3区 | 婷婷色婷婷开心五月四房播播 | 人人妻人人澡人人爽欧美精品 | 国产在线精品一区二区三区直播 | 无套内谢老熟女 | 性欧美疯狂xxxxbbbb | 久精品国产欧美亚洲色aⅴ大片 | 天天综合网天天综合色 | 一区二区三区高清视频一 | 欧美黑人巨大xxxxx | 欧美老妇与禽交 | 国产亚洲欧美在线专区 | 久久视频在线观看精品 | 日韩 欧美 动漫 国产 制服 | 精品国产精品久久一区免费式 | 日韩无套无码精品 | 日日麻批免费40分钟无码 | 18黄暴禁片在线观看 | 在线播放免费人成毛片乱码 | 亚洲熟女一区二区三区 | 国产人妻人伦精品 | 亚洲第一无码av无码专区 | 人人妻人人澡人人爽精品欧美 | 永久免费观看美女裸体的网站 | 亚洲欧美综合区丁香五月小说 | www国产精品内射老师 | 欧美一区二区三区 | 麻豆蜜桃av蜜臀av色欲av | 日韩精品成人一区二区三区 | 熟女体下毛毛黑森林 | 婷婷丁香五月天综合东京热 | 久久精品国产精品国产精品污 | 一本色道久久综合狠狠躁 | 无码av最新清无码专区吞精 | 波多野42部无码喷潮在线 | 欧美大屁股xxxxhd黑色 | 亚洲国产精品毛片av不卡在线 | 国产成人综合色在线观看网站 | 久久久精品成人免费观看 | 国内精品一区二区三区不卡 | 2019午夜福利不卡片在线 | a片在线免费观看 | 国产成人一区二区三区别 | 国产97人人超碰caoprom | 国产9 9在线 | 中文 | 亚洲成av人综合在线观看 | 亚洲国产精品久久久天堂 | 国产农村妇女aaaaa视频 撕开奶罩揉吮奶头视频 | 无码免费一区二区三区 | 国内老熟妇对白xxxxhd | 亚洲精品一区三区三区在线观看 | 人人超人人超碰超国产 | 国产色视频一区二区三区 | 亚洲成av人片天堂网无码】 | 一本精品99久久精品77 | 又湿又紧又大又爽a视频国产 | 欧美肥老太牲交大战 | 亚洲国产成人av在线观看 | 无套内射视频囯产 | 久久久久久国产精品无码下载 | 国产精品99爱免费视频 | 成人无码影片精品久久久 | 国产精品人人爽人人做我的可爱 | 久久精品中文闷骚内射 | 国产精品多人p群无码 | 日本一卡二卡不卡视频查询 | 四十如虎的丰满熟妇啪啪 | 日本va欧美va欧美va精品 | 亚洲乱码国产乱码精品精 | 久久国语露脸国产精品电影 | 四十如虎的丰满熟妇啪啪 | 亚洲成a人片在线观看日本 | 欧美人与禽猛交狂配 | 亚洲性无码av中文字幕 | 亚洲色大成网站www | a片在线免费观看 | 成人影院yy111111在线观看 | 亚洲午夜福利在线观看 | 伊人久久大香线蕉av一区二区 | 亚洲一区av无码专区在线观看 | 色一情一乱一伦 | 欧美猛少妇色xxxxx | 一区二区三区高清视频一 | 色欲综合久久中文字幕网 | 人人妻人人澡人人爽欧美精品 | 丰满肥臀大屁股熟妇激情视频 | 中文字幕无线码免费人妻 | 无遮无挡爽爽免费视频 | 亚洲成a人一区二区三区 | 国产精品第一区揄拍无码 | 国产国语老龄妇女a片 | 99久久99久久免费精品蜜桃 | 久久国产精品二国产精品 | 国产无遮挡又黄又爽又色 | 午夜福利试看120秒体验区 | 宝宝好涨水快流出来免费视频 | 狠狠色噜噜狠狠狠7777奇米 | 又黄又爽又色的视频 | 欧美三级不卡在线观看 | 日本熟妇乱子伦xxxx | 天天摸天天碰天天添 | 精品熟女少妇av免费观看 | 精品国产一区二区三区四区 | 久久久久久国产精品无码下载 | 99国产欧美久久久精品 | 亚洲一区二区三区含羞草 | 国产人妻久久精品二区三区老狼 | 麻豆果冻传媒2021精品传媒一区下载 | 亚洲一区二区三区在线观看网站 | 精品成在人线av无码免费看 | 国产福利视频一区二区 | av在线亚洲欧洲日产一区二区 | 国产欧美精品一区二区三区 | 中文字幕无码av波多野吉衣 | 久久久久成人精品免费播放动漫 | 无码人妻丰满熟妇区毛片18 | 亚洲综合伊人久久大杳蕉 | 特黄特色大片免费播放器图片 | 久久久精品欧美一区二区免费 | 无码人妻丰满熟妇区五十路百度 | 1000部啪啪未满十八勿入下载 | 在线看片无码永久免费视频 | 荫蒂被男人添的好舒服爽免费视频 | 精品久久久无码中文字幕 | 亚洲精品久久久久久久久久久 | 亚洲高清偷拍一区二区三区 | 伊人久久大香线蕉av一区二区 | 欧美日韩在线亚洲综合国产人 | 久久久久成人精品免费播放动漫 | 午夜熟女插插xx免费视频 | 国产精品香蕉在线观看 | 国产亚洲人成在线播放 | 国产亚洲欧美在线专区 | 澳门永久av免费网站 | 久久久久成人精品免费播放动漫 | 亚洲国产av美女网站 | 纯爱无遮挡h肉动漫在线播放 | 国产av久久久久精东av | 日本精品久久久久中文字幕 | 亚洲精品中文字幕乱码 | 男人扒开女人内裤强吻桶进去 | 又大又黄又粗又爽的免费视频 | 亚洲 另类 在线 欧美 制服 | 国产偷国产偷精品高清尤物 | 中文字幕色婷婷在线视频 | 精品国产一区二区三区av 性色 | 欧美日韩在线亚洲综合国产人 | 国产精品美女久久久久av爽李琼 | 日本一区二区更新不卡 | www国产精品内射老师 | 精品国产一区二区三区四区 | 丰满肥臀大屁股熟妇激情视频 | 狠狠色欧美亚洲狠狠色www | 亚洲熟妇自偷自拍另类 | 夜夜躁日日躁狠狠久久av | 久久亚洲精品中文字幕无男同 | 色婷婷香蕉在线一区二区 | 奇米影视7777久久精品人人爽 | 精品久久久无码中文字幕 | 欧洲熟妇色 欧美 | 欧美性猛交xxxx富婆 | 国产成人一区二区三区别 | 无码人妻精品一区二区三区不卡 | 午夜精品一区二区三区在线观看 | 亚洲综合在线一区二区三区 | 国产精品无套呻吟在线 | 撕开奶罩揉吮奶头视频 | 国产午夜亚洲精品不卡 | 色综合久久88色综合天天 | 国产婷婷色一区二区三区在线 | 亚洲精品久久久久久久久久久 | 久久亚洲精品中文字幕无男同 | 精品国产aⅴ无码一区二区 | 色婷婷欧美在线播放内射 | 亚洲精品综合五月久久小说 | 中文无码伦av中文字幕 | 亚洲狠狠婷婷综合久久 | 亚洲精品成a人在线观看 | 国产又爽又猛又粗的视频a片 | 在线播放亚洲第一字幕 | 综合激情五月综合激情五月激情1 | 亚洲区欧美区综合区自拍区 | 九九在线中文字幕无码 | 真人与拘做受免费视频 | 97夜夜澡人人爽人人喊中国片 | 国产激情一区二区三区 | 一本久久伊人热热精品中文字幕 | 日本熟妇浓毛 | 日本精品高清一区二区 | 久久无码中文字幕免费影院蜜桃 | 鲁大师影院在线观看 | 亚洲精品久久久久avwww潮水 | 欧美放荡的少妇 | 波多野结衣一区二区三区av免费 | 久青草影院在线观看国产 | 久久99精品久久久久久动态图 | 中文字幕乱码中文乱码51精品 | 国产又爽又黄又刺激的视频 | 国产成人综合美国十次 | 国产成人精品三级麻豆 | 熟妇人妻无码xxx视频 | 国产 浪潮av性色四虎 | 婷婷丁香六月激情综合啪 | 粗大的内捧猛烈进出视频 | 亚洲色成人中文字幕网站 | 亚洲日韩av一区二区三区中文 | 国产成人综合在线女婷五月99播放 | 久久综合色之久久综合 | 久久精品无码一区二区三区 | 人妻体内射精一区二区三四 | 国产无套内射久久久国产 | 欧美阿v高清资源不卡在线播放 | 国产激情精品一区二区三区 | 在线播放无码字幕亚洲 | 久久国产精品二国产精品 | 国产国产精品人在线视 | 亚洲成a人一区二区三区 | 国产女主播喷水视频在线观看 | 久久国产精品二国产精品 | 欧美freesex黑人又粗又大 | 永久免费观看国产裸体美女 | 欧美精品免费观看二区 | 成人无码精品1区2区3区免费看 | 啦啦啦www在线观看免费视频 | 国产又爽又猛又粗的视频a片 | 熟女俱乐部五十路六十路av | 丰满少妇高潮惨叫视频 | 日本饥渴人妻欲求不满 | 中文精品久久久久人妻不卡 | 欧美性猛交内射兽交老熟妇 | 强伦人妻一区二区三区视频18 | 强开小婷嫩苞又嫩又紧视频 | 亚洲人成影院在线观看 | 一本久道高清无码视频 | 欧美变态另类xxxx | a在线观看免费网站大全 | 日韩人妻系列无码专区 | 性开放的女人aaa片 | 狠狠综合久久久久综合网 | 无码人妻丰满熟妇区毛片18 | 国产精品人人妻人人爽 | 成人精品一区二区三区中文字幕 | 国产午夜精品一区二区三区嫩草 | 欧美人妻一区二区三区 | 国内综合精品午夜久久资源 | 波多野结衣一区二区三区av免费 | 无码av岛国片在线播放 | 极品尤物被啪到呻吟喷水 | 四虎国产精品一区二区 | 水蜜桃av无码 | 偷窥日本少妇撒尿chinese | 国产精品多人p群无码 | 精品国产av色一区二区深夜久久 | 成人片黄网站色大片免费观看 | 高潮毛片无遮挡高清免费 | 久久久中文久久久无码 | 国产口爆吞精在线视频 | 日日橹狠狠爱欧美视频 | 无码精品国产va在线观看dvd | 亚洲男人av天堂午夜在 | 亚洲精品鲁一鲁一区二区三区 | 亚洲 日韩 欧美 成人 在线观看 | 日本精品少妇一区二区三区 | 久久亚洲中文字幕精品一区 | 人人妻人人澡人人爽精品欧美 | 国产9 9在线 | 中文 | 牲交欧美兽交欧美 | 无码av最新清无码专区吞精 | 成熟妇人a片免费看网站 | 日本一本二本三区免费 | 国产97色在线 | 免 | 国产内射老熟女aaaa | 久久国产精品萌白酱免费 | 亚洲无人区午夜福利码高清完整版 | 未满小14洗澡无码视频网站 | 成人亚洲精品久久久久软件 | 亚洲色www成人永久网址 | 老头边吃奶边弄进去呻吟 | 婷婷综合久久中文字幕蜜桃三电影 | 樱花草在线播放免费中文 | 国产乱人伦av在线无码 | 国产福利视频一区二区 | 亚洲国产精品美女久久久久 | 国产午夜精品一区二区三区嫩草 | 性欧美videos高清精品 | 国产乱人伦av在线无码 | 爆乳一区二区三区无码 | 久久97精品久久久久久久不卡 | 国产精品va在线播放 | 国产精品无套呻吟在线 | 在线天堂新版最新版在线8 | 久久精品女人的天堂av | 亚洲色偷偷男人的天堂 | 日本精品人妻无码77777 天堂一区人妻无码 | 亚洲综合伊人久久大杳蕉 | 色婷婷香蕉在线一区二区 | 欧美性生交活xxxxxdddd | 国产极品美女高潮无套在线观看 | 大胆欧美熟妇xx | 最新国产麻豆aⅴ精品无码 | 国产精品人人爽人人做我的可爱 | 激情五月综合色婷婷一区二区 | 人人澡人人透人人爽 | av人摸人人人澡人人超碰下载 | 国产欧美精品一区二区三区 | 久久99精品国产麻豆 | 成人免费无码大片a毛片 | 无遮无挡爽爽免费视频 | 国产成人人人97超碰超爽8 | 国产性猛交╳xxx乱大交 国产精品久久久久久无码 欧洲欧美人成视频在线 | 婷婷综合久久中文字幕蜜桃三电影 | 久久久久成人片免费观看蜜芽 | 国产又爽又黄又刺激的视频 | 国产舌乚八伦偷品w中 | 亚洲日韩精品欧美一区二区 | 午夜成人1000部免费视频 | 亚洲无人区午夜福利码高清完整版 | 偷窥日本少妇撒尿chinese | 国产成人无码区免费内射一片色欲 | 55夜色66夜色国产精品视频 | 水蜜桃亚洲一二三四在线 | 97久久超碰中文字幕 | 国产精品久久久av久久久 | 精品国产一区二区三区四区在线看 | 中文字幕人妻无码一区二区三区 | 中文字幕无码av激情不卡 | 久久99热只有频精品8 | 精品无人区无码乱码毛片国产 | 亚洲精品久久久久中文第一幕 | 久在线观看福利视频 | 欧美 亚洲 国产 另类 | 国模大胆一区二区三区 | 国产精品办公室沙发 | 奇米影视7777久久精品 | 色婷婷综合中文久久一本 | 国产亲子乱弄免费视频 | 成人毛片一区二区 | 国产精品久久久av久久久 | 伦伦影院午夜理论片 | 国产无套内射久久久国产 | 日本肉体xxxx裸交 | 精品国产一区二区三区四区 | 亚洲熟妇色xxxxx欧美老妇 | 99久久精品无码一区二区毛片 | 亚洲 高清 成人 动漫 | 久久99精品国产麻豆蜜芽 | 精品日本一区二区三区在线观看 | 99国产精品白浆在线观看免费 | 亚洲精品中文字幕 | 免费乱码人妻系列无码专区 | 亚洲综合无码久久精品综合 | 人人妻人人澡人人爽欧美一区 | 曰韩无码二三区中文字幕 | 国产日产欧产精品精品app | 夜精品a片一区二区三区无码白浆 | 俄罗斯老熟妇色xxxx | 97色伦图片97综合影院 | 国产综合色产在线精品 | 国产精品多人p群无码 | 黑森林福利视频导航 | 5858s亚洲色大成网站www | 亚洲国产精品成人久久蜜臀 | 人人妻人人澡人人爽欧美一区九九 | 欧美老妇与禽交 | 最近中文2019字幕第二页 | 欧美一区二区三区视频在线观看 | 久久久久久国产精品无码下载 | 国内揄拍国内精品少妇国语 | 动漫av网站免费观看 | 久久久亚洲欧洲日产国码αv | 日本www一道久久久免费榴莲 | 性啪啪chinese东北女人 | 国产精品igao视频网 | 成年美女黄网站色大免费视频 | 2019午夜福利不卡片在线 | 亚洲人成网站色7799 | 欧美日本精品一区二区三区 | 国产精品亚洲lv粉色 | 久久久久亚洲精品中文字幕 | 亚洲一区二区三区国产精华液 | 亚洲色在线无码国产精品不卡 | 少妇高潮一区二区三区99 | 亚洲日本va中文字幕 | 欧美兽交xxxx×视频 | 麻豆蜜桃av蜜臀av色欲av | 一本大道伊人av久久综合 | 高清无码午夜福利视频 | 无套内谢老熟女 | 欧美 亚洲 国产 另类 | 97色伦图片97综合影院 | 青草青草久热国产精品 | 熟妇人妻无码xxx视频 | 人妻插b视频一区二区三区 | 欧洲熟妇色 欧美 | 国语自产偷拍精品视频偷 | 99久久人妻精品免费一区 | 色欲av亚洲一区无码少妇 | 人人澡人人妻人人爽人人蜜桃 | 国内精品一区二区三区不卡 | 永久免费观看美女裸体的网站 | 亚洲精品中文字幕久久久久 | 亚洲の无码国产の无码影院 | 澳门永久av免费网站 | 亚洲 高清 成人 动漫 | 欧美阿v高清资源不卡在线播放 | 久久97精品久久久久久久不卡 | 亚洲日韩av片在线观看 | 麻豆国产人妻欲求不满谁演的 | 国产农村乱对白刺激视频 | 国产亚洲精品久久久久久久 | 一本精品99久久精品77 | 300部国产真实乱 | 免费观看黄网站 | 极品尤物被啪到呻吟喷水 | 欧美性猛交内射兽交老熟妇 | 鲁鲁鲁爽爽爽在线视频观看 | 国产精品久久久久久无码 | 日韩精品无码一区二区中文字幕 | 国产精品高潮呻吟av久久4虎 | 亚洲s色大片在线观看 | 国产无套粉嫩白浆在线 | 久久亚洲精品成人无码 | 麻花豆传媒剧国产免费mv在线 | 中文亚洲成a人片在线观看 | 久久99精品国产麻豆蜜芽 | 亚洲精品国产a久久久久久 | 沈阳熟女露脸对白视频 | 奇米影视7777久久精品人人爽 | 日本精品久久久久中文字幕 | 天堂一区人妻无码 | 99久久99久久免费精品蜜桃 | 丝袜人妻一区二区三区 | 亚洲日韩av一区二区三区四区 | 国产精品永久免费视频 | 日韩在线不卡免费视频一区 |