项目优化之:GPU编程
1GPU編程,依賴于顯卡
2GPU變成依賴于OpenGL和direct
3CPU的特點是:頻率比較快,GPU的特點是寄存器非常非常的多。
4如果電腦是windows7,沒法直接調試GPU。Window8可以直接調試
5用VS2013新建一個項目,命名:GPU
6調試GPU的方式是VS中的:打斷點—>運行項目à調試à窗口àGPU線程(通過這種方式實現調試GPU項目)
8.修改項目屬性:右擊項目à屬性à配置屬性à常規,修改調試器類型為僅GPU
修改Amp默認快捷鍵可以選擇時時(Use C++ AMP runtime default)的方式,也可以使用軟件加速器(WARPsoftware accelerator)的方式,截圖
9.代碼:
#include <iostream>
#include <amp.h>? //GPU編程所需的頭文件
using namespace concurrency;
?
int main()
{
??? int v[11] = { 'G', 'd', 'k', 'k', 'n', 31, 'v', 'n', 'q', 'k', 'c' };
??? array_view<int> av(11, v);//array_view是GPU計算結構,av存儲到GPU顯存
?
??? //=表示直接操作AV
??? //(index<1> idx)操作每一個元素
??? //restrict(amp)定位GPU執行
??? parallel_for_each(av.extent, [=](index<1> idx) restrict(amp)
??? {
??????? av[idx] += 1;//加完后變成了hello world
??? });
??? for (unsigned int i = 0; i < 11; i++)
??? {
??????? std::cout << static_cast<char>(av[i]);
??? }
??? std::cin.get();
?
??? return 0;
}
10.CPU,GPU單值計算效率測試
案例:
#include <iostream>?
#include <amp.h>
#include <WinBase.h>
?
#define COUNT 100000
?
float nickName_GPU[COUNT];
float nickName_CPU[COUNT];
?
//GPU并行計算比較占有優勢,restrict(amp):限制使用GPU編程
double rungpu(int num) restrict(amp)
{
??? double temp = 0;
??? for (int i = 0; i < num; i++)
??? {
??????? temp += i;
??? }
?
??? return temp;
}
?
//cpu處理單值計算比較有優勢,單點計算比較有優勢,只能在GPU內部執行
double runcpu(int num) restrict(cpu)
{
??? //這是對一個數進行操作
??? double temp = 0;
??? for (int i = 0; i < num; i++)
??? {
??????? temp += i;
??? }
??? return temp;
}
?
//限制使用GPU或CPU運行
double runcpugpu(int num) restrict(amp, cpu)
{
??? double temp = 0;
??? for (int i = 0; i < num; i++)
??? {
??????? temp += i;
??? }
??? return temp;
}
?
//測試單值計算的運行效率
int main()
{
??? LARGE_INTEGER freq;
??? LARGE_INTEGER strt;
??? LARGE_INTEGER ed;
??? QueryPerformanceFrequency(&freq);
??? QueryPerformanceCounter(&strt);
??? double dx[1] = { 0.0 };
??? double? db = 0.0;
?
??? concurrency::array_view<double> myview(1, dx);
??? parallel_for_each(myview.extent,
??????? [=](concurrency::index<1> idx) restrict(amp)
??? {
??????? myview[idx] += rungpu(1000000);
??? });
?
??? myview.synchronize();//顯式等待GPU計算完成并將數據打回內存
??? printf("%f\n", dx[0]);
?
??? QueryPerformanceCounter(&ed);
??? printf("GPU耗時: %d 毫秒\r\n", (ed.QuadPart - strt.QuadPart) * 1000 / freq.QuadPart);
??? QueryPerformanceCounter(&strt);
?
??? printf("%f\n", runcpu(1000000));
?
??? QueryPerformanceCounter(&ed);
??? printf("CPU耗時: %d 毫秒\r\n", (ed.QuadPart - strt.QuadPart) * 1000 / freq.QuadPart);
??? puts("測試結束");
?
??? getchar();
??? return 0;
}
運行結果:
案例2:
#include <iostream>?
#include <amp.h>
#include <WinBase.h>
?
#define COUNT 3000
?
float nickName_GPU[COUNT];
float nickName_CPU[COUNT];
?
//GPU并行計算比較占有優勢,restrict(amp):限制使用GPU編程
double rungpu(int num) restrict(amp)
{
??? double temp = 0;
??? for (int i = 0; i < num; i++)
??? {
??????? temp += i;
??? }
?
??? return temp;
}
?
//cpu處理單值計算比較有優勢,單點計算比較有優勢,只能在GPU內部執行
double runcpu(int num) restrict(cpu)
{
??? //這是對一個數進行操作
??? double temp = 0;
??? for (int i = 0; i < num; i++)
??? {
??????? temp += i;
??? }
??? return temp;
}
?
//限制使用GPU或CPU運行
double runcpugpu(int num) restrict(amp, cpu)
{
??? double temp = 0;
??? for (int i = 0; i < num; i++)
??? {
??????? temp += i;
??? }
??? return temp;
}
?
int main()
{
??? LARGE_INTEGER freq;
??? LARGE_INTEGER strt;
??? LARGE_INTEGER ed;
??? QueryPerformanceFrequency(&freq);
??? QueryPerformanceCounter(&strt);
?
??? concurrency::array_view<float> myView(COUNT, nickName_GPU); //將數據打入顯存?
?
??? concurrency::parallel_for_each(myView.extent, [=](concurrency::index<1> idx) restrict(amp)
??? {
??????? for (int i = 0; i < COUNT / 10; i++)
??????? {
??????????? myView[idx] = (myView[idx] + 0.1f) / 2.3f;
??????? }
??? });
?
??? myView.synchronize();//顯式等待GPU計算完成并將數據打回內存?
?
??? QueryPerformanceCounter(&ed);
??? printf("GPU耗時: %d 毫秒\r\n", (ed.QuadPart - strt.QuadPart) * 1000 / freq.QuadPart);
??? QueryPerformanceCounter(&strt);
?
??? for (int idx = 0; idx < COUNT; idx++)
??? {
??????? for (int i = 0; i < COUNT / 10; i++)
??????? {
??????????? nickName_CPU[idx] = (nickName_CPU[idx] + 0.1f) /2.3f;
??????? }
??? }
??? QueryPerformanceCounter(&ed);
??? printf("CPU耗時: %d 毫秒\r\n", (ed.QuadPart - strt.QuadPart) * 1000 / freq.QuadPart);
?
??? for (int idx = 0; idx < COUNT; idx++)
??? {
??????? if (nickName_CPU[idx] != nickName_GPU[idx])
??????? {
??????????? puts("CPU和GPU的計算結果不相符!");
??????????? getchar();
??????????? return 0;
??????? }
??? }
??? puts("測試結束");
?
??? getchar();
??? return 0;
}
運行結果:
?
?
?
總結
以上是生活随笔為你收集整理的项目优化之:GPU编程的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: STL之multiset中equal_r
- 下一篇: 征信报告去哪里打