avx指令+openmp多线程实现一个基本算法作业 c++
生活随笔
收集整理的這篇文章主要介紹了
avx指令+openmp多线程实现一个基本算法作业 c++
小編覺得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.
原代碼
數(shù)學(xué)思路
point類
//文件1,類的定義,point.h #ifndef _POINT_H #define _POINT_Hclass Point{ public:Point(float x=0, float y=0) :x(x), y(y) {}float GetX() const {return x;}float GetY() const {return y;} private:float x,y; }; #endif主函數(shù)
//主函數(shù),main.cpp #include "point.h" #include <iostream> #include <cmath> using namespace std;//直線線性擬合,points 為各點(diǎn),nPoint 為點(diǎn)數(shù) float lineFit(const Point points[], int nPoint) //友元函數(shù)體 {float avgX=0,avgY=0; //定義變量float lxx=0,lyy=0,lxy=0;for(int i=0;i<nPoint;i++) //計(jì)算X、Y的平均值{avgX+=points[i].GetX()/nPoint;avgY+=points[i].GetY()/nPoint;}for(int i=0;i<nPoint;i++) //計(jì)算Lxx、Lyy和Lxy{lxx+=(points[i].GetX()-avgX)*(points[i].GetX()-avgX);lyy+=(points[i].GetY()-avgY)*(points[i].GetY()-avgX);lxy+=(points[i].GetX()-avgX)*(points[i].GetY()-avgY);}cout<<"This line can be fitted by y=ax+b."<<endl;cout<<"a="<<lxy/lxx; //輸出回歸系數(shù)acout<<" b="<<avgY-lxy*avgX/lxx<<endl; //輸出回歸系數(shù)breturn float(lxy/sqrt(lxx*lyy)); //返回相關(guān)系數(shù)r }int main() {Point p[10]={Point(6,10),Point(14,20),Point(26,30),Point(33,40),Point(46,50),Point(54,60),Point(67,70),Point(75,80),Point(84,90),Point(100,100)}; //初始化數(shù)據(jù)點(diǎn)float r=lineFit(p,10); //進(jìn)行線性回歸計(jì)算cout<<"Line coefficient r="<<r<<endl; //輸出相關(guān)系數(shù)return 0; }輸出結(jié)果 This line can be fitted by y=ax+b. a=0.97223 b=5.90237 Line coefficient r=0.998193 Program ended with exit code: 0` 如上的部分轉(zhuǎn)載自https://www.jianshu.com/p/128924b2c846?utm_campaign=maleskine&utm_content=note&utm_medium=seo_notes&utm_source=recommendation 下面的部分為自己修改的代碼修改為avx+openmp多線程的代碼 ```cpp #include <iostream> #include <vector> #include <omp.h> #include "math.h" #include <cstdlib> #include <immintrin.h>using namespace std; class Point{ public:Point(double x=0, double y=0) : x(x), y(y) {}float GetX() const {return x;}float GetY() const {return y;} private:float x,y; }; //type為類型,size為p的大小 double add_help(__m256d num, int type, int size, vector<Point> &p, double avgx, double avgy) {double d[4];_mm256_storeu_pd(d, num);double res = 0;for (int i = 0; i < 4; i++) {res += d[i];}if (type == 0) {for (int i = size / 4 * 4; i < size; i++) {res += p[i].GetX();}} else if (type == 1) {for (int i = size / 4 * 4; i < size; i++) {res += p[i].GetY();}} else if (type == 2) {for (int i = size / 4 * 4; i < size; i++) {res += (p[i].GetX() - avgx) * (p[i].GetX() - avgy);}return res;}else if( type == 3){for (int i = size / 4 * 4; i < size; i++) {res += (p[i].GetY()-avgy)*(p[i].GetY()-avgx);}return res;}else {for (int i = size / 4 * 4; i < size; i++) {res += (p[i].GetX()-avgx)*(p[i].GetY()-avgy);}return res;}return res / size;}int main() { // vector<Point> p( // {Point(6, 10), Point(14, 20), Point(26, 30), Point(33, 40), Point(46, 50), Point(54, 60), Point(67, 70), // Point(75, 80), Point(84, 90), Point(100, 100)});vector<Point> p;//y = ax + b;int aa = 15, bb = 45;//設(shè)定p的size大小,產(chǎn)生size對(duì)x,y的值int size = 300;for(int i = 0; i < size; i++){//0-99的隨機(jī)數(shù)double a = rand()%100 * 1.0;//0-1的浮點(diǎn)數(shù),為噪聲double random_01 = rand() / double(RAND_MAX);p.push_back({a,a*aa + bb + random_01});}double avgx = 0, avgy = 0;//double為64位,所以256裝4個(gè)double//初始化sumx為__m256d類型的4個(gè)全0__m256d sumx = _mm256_set1_pd(0.0);__m256d sumy = _mm256_set1_pd(0.0);//8線程并行求和 #pragma omp parallel for num_threads(8)for (int i = 0; i < size / 4 * 4; i += 4) //計(jì)算X、Y的和,先算出4的倍數(shù)個(gè)數(shù)的和{//裝入4個(gè)double數(shù)__m256d a = _mm256_set_pd(p[i + 3].GetX(),p[i + 2].GetX(),p[i + 1].GetX(),p[i + 0].GetX());//兩個(gè)__m256d相加sumx = _mm256_add_pd(a, sumx);__m256d b = _mm256_set_pd(p[i + 3].GetY(),p[i + 2].GetY(),p[i + 1].GetY(),p[i + 0].GetY());sumy = _mm256_add_pd(b, sumy);}//補(bǔ)上余下的4個(gè)以內(nèi)的數(shù)avgx = add_help(sumx, 0, size, p, avgx, avgy);avgy = add_help(sumy, 1, size, p, avgx, avgy);cout << "avgx = " << avgx << endl;cout << "avgy = " << avgy << endl;//avgx_256設(shè)置為4個(gè)avgx__m256d avgx_256 = _mm256_set1_pd(avgx);__m256d avgy_256 = _mm256_set1_pd(avgy);double lxx = 0, lyy = 0, lxy = 0;__m256d lxx_256 = _mm256_set1_pd(0.0);__m256d lyy_256 = _mm256_set1_pd(0.0);__m256d lxy_256 = _mm256_set1_pd(0.0); #pragma omp parallel for num_threads(8)for (int i = 0; i < size / 4 * 4; i += 4) //計(jì)算Lxx、Lyy和Lxy{__m256d a = _mm256_set_pd(p[i + 3].GetX(),p[i + 2].GetX(),p[i + 1].GetX(),p[i + 0].GetX());__m256d b = _mm256_set_pd(p[i + 3].GetY(),p[i + 2].GetY(),p[i + 1].GetY(),p[i + 0].GetY());__m256d t = _mm256_sub_pd(a, avgx_256);__m256d f = _mm256_sub_pd(a, avgy_256);//先相乘再相加lxx_256 = _mm256_add_pd(_mm256_mul_pd(t, f), lxx_256);t = _mm256_sub_pd(b, avgy_256);f = _mm256_sub_pd(b, avgx_256);lyy_256 = _mm256_add_pd(_mm256_mul_pd(t, f), lyy_256);t = _mm256_sub_pd(a, avgx_256);f = _mm256_sub_pd(b, avgy_256);lxy_256 = _mm256_add_pd(_mm256_mul_pd(t, f), lxy_256);}lxx = add_help(lxx_256, 2, size, p, avgx, avgy);lxy = add_help(lxy_256, 4, size, p, avgx, avgy);lyy = add_help(lyy_256, 3, size, p, avgx, avgy);cout<<"a="<<lxy/lxx; //輸出回歸系數(shù)acout<<" b="<<avgy-lxy*avgx/lxx<<endl; //輸出回歸系數(shù)bcout<<"Line coefficient r="<<float(lxy/sqrt(lxx*lyy))<<endl; //輸出相關(guān)系數(shù)return 0;}總結(jié)
以上是生活随笔為你收集整理的avx指令+openmp多线程实现一个基本算法作业 c++的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: Protues 8.8 SP1 无需破解
- 下一篇: 主动轮廓线模型Snake模型简介open