當前位置：首頁 > 编程资源 > 编程问答 >内容正文

编程问答

【opencv450-samples】digits_svm 手写数字识别SVM vs KNearest （SVM and KNearest digit recognition）

發布時間：2023/12/10 编程问答 31 豆豆

生活随笔收集整理的這篇文章主要介紹了【opencv450-samples】digits_svm 手写数字识别SVM vs KNearest （SVM and KNearest digit recognition）小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

digits.png 樣本數據

SVM 和 KNearest 數字識別。

示例從“digits.png”加載手寫數字數據集。
然后它訓練一個 SVM 和 KNearest 分類器并評估
他們的準確性。

以下預處理應用于數據集：
? - 基于矩的圖像去偏斜（見去偏斜（））
? - 數字圖像分為 4 個 10x10 單元和 16 個單元
? ? 為每個計算定向梯度的直方圖
? ? cell
? - 使用 Hellinger 度量將直方圖轉換為空間（參見 [1] (RootSIFT)）

[1] R. Arandjelovic, A. Zisserman

??? "Three things everyone should know to improve object retrieval"

??? http://www.robots.ox.ac.uk/~vgg/publications/2012/Arandjelovic12/arandjelovic12.pdf

#include "opencv2/core.hpp" #include "opencv2/highgui.hpp" #include "opencv2/imgcodecs.hpp" #include "opencv2/imgproc.hpp" #include "opencv2/ml.hpp"#include <algorithm> #include <iostream> #include <vector>using namespace cv; using namespace std;const int SZ = 20; // size of each digit is SZ x SZ 每個小數字圖像的尺寸 const int CLASS_N = 10;//10類 const char* DIGITS_FN = "digits.png";//樣本數字圖像static void help(char** argv) {cout <<"\n""SVM 和 KNearest 數字識別SVM and KNearest digit recognition.\n""\n""示例從“digits.png”加載手寫數字數據集。Sample loads a dataset of handwritten digits from 'digits.png'.\n""Then it trains a SVM and KNearest classifiers on it and evaluates\n""their accuracy.然后它訓練一個 SVM 和 KNearest 分類器并評估它們的準確性。\n""\n""以下預處理應用于數據集：Following preprocessing is applied to the dataset:\n"" - 基于矩的圖像偏斜Moment-based image deskew (see deskew())\n"" - Digit images are split into 4 10x10 cells and 16-bin\n"" histogram of oriented gradients is computed for each\n"" cell數字圖像是被分成 4 個區域的 10x10 單元格，并為每個單元格計算 16 位定向梯度直方圖\n"" - 使用 Hellinger 度量將直方圖轉換到空間Transform histograms to space with Hellinger metric (see [1] (RootSIFT))\n""\n""\n""[1] R. Arandjelovic, A. Zisserman\n"" \"每個人都應該知道改進對象檢索的三件事Three things everyone should know to improve object retrieval\"\n"" http://www.robots.ox.ac.uk/~vgg/publications/2012/Arandjelovic12/arandjelovic12.pdf\n""\n""Usage:\n"<< argv[0] << endl; } //分割圖像image，單元大小cell_size，得到小圖像集合cells static void split2d(const Mat& image, const Size cell_size, vector<Mat>& cells) { //源圖像尺寸int height = image.rows;int width = image.cols;//小圖像尺寸int sx = cell_size.width;int sy = cell_size.height;cells.clear();//清空集合for (int i = 0; i < height; i += sy){for (int j = 0; j < width; j += sx){cells.push_back(image(Rect(j, i, sx, sy)));//取小圖像區域}} } //加載樣本數字圖像fn,分割得到小圖像集合digits，以及每個小數字圖像對應的標簽 static void load_digits(const char* fn, vector<Mat>& digits, vector<int>& labels) {digits.clear();//單個數字圖像集合labels.clear();//單個數字圖像標簽String filename = samples::findFile(fn);//數字圖像文件cout << "Loading " << filename << " ..." << endl;Mat digits_img = imread(filename, IMREAD_GRAYSCALE);//讀取數字圖像灰度圖split2d(digits_img, Size(SZ, SZ), digits);//分割數字圖像得到單個數字圖像for (int i = 0; i < CLASS_N; i++)//N分類。每行數字對應標簽 0 1 2 3 ……{for (size_t j = 0; j < digits.size() / CLASS_N; j++)//遍歷列{labels.push_back(i);//第i行所有列（digits.size() / CLASS_N）對應標簽i}} } //去歪斜傾斜矯正 static void deskew(const Mat& img, Mat& deskewed_img) {Moments m = moments(img);//計算圖像三階矩if (abs(m.mu02) < 0.01){deskewed_img = img.clone();return;}//mu11/mu02來表示圖像的斜切系數，因為圖像斜切了，所以原本圖像的中心點就移動位置了，所以我們需要將圖像的中心點再移動回去，float skew = (float)(m.mu11 / m.mu02);float M_vals[2][3] = {{1, skew, -0.5f * SZ * skew}, {0, 1, 0}};//圖像的剛體變換矩陣MMat M(Size(3, 2), CV_32F);for (int i = 0; i < M.rows; i++){for (int j = 0; j < M.cols; j++){M.at<float>(i, j) = M_vals[i][j];}}//仿射變換去歪斜warpAffine(img, deskewed_img, M, Size(SZ, SZ), WARP_INVERSE_MAP | INTER_LINEAR); } //馬賽克網格 width:一排多少個小數字 static void mosaic(const int width, const vector<Mat>& images, Mat& grid) {int mat_width = SZ * width;int mat_height = SZ * (int)ceil((double)images.size() / width);if (!images.empty()){grid = Mat(Size(mat_width, mat_height), images[0].type());//馬賽克網格：測試集小圖像拼接成gridfor (size_t i = 0; i < images.size(); i++){Mat location_on_grid = grid(Rect(SZ * ((int)i % width), SZ * ((int)i / width), SZ, SZ));//images[i].copyTo(location_on_grid);}} } //評估模型預測結果，測試集樣本，測試集標簽，可視化評估圖像矩陣 static void evaluate_model(const vector<float>& predictions, const vector<Mat>& digits, const vector<int>& labels, Mat& mos) {double err = 0;for (size_t i = 0; i < predictions.size(); i++)//遍歷預測結果集{if ((int)predictions[i] != labels[i]){err++;//預測失敗次數}}err /= predictions.size();//預測錯誤百分比cout << format("error: %.2f %%", err * 100) << endl;int confusion[10][10] = {};//預測結果統計for (size_t i = 0; i < labels.size(); i++)//遍歷測試集 {//測試集第i個樣本的標簽labels[i]，對應行第i個樣本的預測結果標簽對應列。confusion[labels[i]][(int)predictions[i]]++;// 測試樣本i，標簽實際為labels[i],預測結果為predictions[i]}//對角線上的元素為正確預測，非對角線上的元素為錯誤預測結果cout << "confusion matrix:" << endl;for (int i = 0; i < 10; i++){for (int j = 0; j < 10; j++){cout << format("%2d ", confusion[i][j]);//輸出預測統計結果}cout << endl;}cout << endl;vector<Mat> vis;//測試集小數字圖像向量for (size_t i = 0; i < digits.size(); i++)//遍歷測試集圖像{Mat img;cvtColor(digits[i], img, COLOR_GRAY2BGR);//灰度轉彩色if ((int)predictions[i] != labels[i])//測試集樣本預測失敗{for (int j = 0; j < img.rows; j++){for (int k = 0; k < img.cols; k++){img.at<Vec3b>(j, k)[0] = 0;img.at<Vec3b>(j, k)[1] = 0;//前兩個通道BG置為0. 顯示紅色小數字圖像}}}vis.push_back(img);}mosaic(25, vis, mos);//拼圖小數字圖像得到mos（紅色為錯誤預測） } //1/4 x:梯度圖像方向梯度圖像的幅度值weights，16個方向min_length，部分直方圖bins static void bincount(const Mat& x, const Mat& weights, const int min_length, vector<double>& bins) {double max_x_val = 0;minMaxLoc(x, NULL, &max_x_val);//最大梯度方向bin_nbins = vector<double>(max((int)max_x_val, min_length));//初始化binsfor (int i = 0; i < x.rows; i++){for (int j = 0; j < x.cols; j++){bins[x.at<int>(i, j)] += weights.at<float>(i, j);//方向和幅度相加}} }//處理小數字圖像向量，獲得hog描述子矩陣提取梯度方向直方圖hog特征 static void preprocess_hog(const vector<Mat>& digits, Mat& hog) {//cell數字圖像10x10 被分成 4 個單元格，并為每個單元格計算 16 位定向梯度直方圖int bin_n = 16;//16 位定向梯度直方圖int half_cell = SZ / 2;//小數字圖像尺寸一半double eps = 1e-7;//迭代條件：精度hog = Mat(Size(4 * bin_n, (int)digits.size()), CV_32F);//hog描述子矩陣for (size_t img_index = 0; img_index < digits.size(); img_index++)//遍歷所有小數字圖像{Mat gx;Sobel(digits[img_index], gx, CV_32F, 1, 0);//梯度gxMat gy;Sobel(digits[img_index], gy, CV_32F, 0, 1);//梯度gyMat mag;Mat ang;cartToPolar(gx, gy, mag, ang);//笛卡爾轉極坐標：幅度、角度計算每個 2D 向量 (x(I),y(I)) 的幅度、角度或兩者：Mat bin(ang.size(), CV_32S);// for (int i = 0; i < ang.rows; i++){for (int j = 0; j < ang.cols; j++){bin.at<int>(i, j) = (int)(bin_n * ang.at<float>(i, j) / (2 * CV_PI));//梯度方向 1 …… bin_n}}//梯度方向圖分為 4個bin區域。4個bin區域組合成10x10像素網格Mat bin_cells[] = {bin(Rect(0, 0, half_cell, half_cell)),//(0,0,5,5)bin(Rect(half_cell, 0, half_cell, half_cell)),//(5,0,5,5)bin(Rect(0, half_cell, half_cell, half_cell)),(0,5,5,5)bin(Rect(half_cell, half_cell, half_cell, half_cell))(5,5,5,5)};Mat mag_cells[] = {mag(Rect(0, 0, half_cell, half_cell)),mag(Rect(half_cell, 0, half_cell, half_cell)),mag(Rect(0, half_cell, half_cell, half_cell)),mag(Rect(half_cell, half_cell, half_cell, half_cell))};//梯度幅度分為四個幅度值單元vector<double> hist;//直方圖向量 4*16 個hist.reserve(4 * bin_n);//reserve的作用是更改vector的容量（capacity），使vector至少可以容納n個元素。for (int i = 0; i < 4; i++)//4部分{vector<double> partial_hist;//部分直方圖：方向bin_n和幅度相加bincount(bin_cells[i], mag_cells[i], bin_n, partial_hist);hist.insert(hist.end(), partial_hist.begin(), partial_hist.end());}// transform to Hellinger kernel轉換為 Hellinger 核//將得到梯度直方圖轉化為Hellinger Matrix.//將梯度直方圖Hellinger化，相當于求取了和0向量的海林格距離double sum = 0;for (size_t i = 0; i < hist.size(); i++){sum += hist[i];}for (size_t i = 0; i < hist.size(); i++){hist[i] /= sum + eps;hist[i] = sqrt(hist[i]);}double hist_norm = norm(hist);for (size_t i = 0; i < hist.size(); i++){hog.at<float>((int)img_index, (int)i) = (float)(hist[i] / (hist_norm + eps));//hog描述子}} } //隨機數字-洗牌：打亂順序的小數字圖像及其標簽。標簽與圖像保持對應關系。 static void shuffle(vector<Mat>& digits, vector<int>& labels) {vector<int> shuffled_indexes(digits.size());//隨機數字索引向量for (size_t i = 0; i < digits.size(); i++){shuffled_indexes[i] = (int)i;//初始化shuffled_indexes： 0 1 2 ……digits.size()-1}randShuffle(shuffled_indexes);//隨機打亂索引數組 vector<Mat> shuffled_digits(digits.size());vector<int> shuffled_labels(labels.size());for (size_t i = 0; i < shuffled_indexes.size(); i++){shuffled_digits[shuffled_indexes[i]] = digits[i];//根據打亂的索引數組生成小數字圖像向量shuffled_labels[shuffled_indexes[i]] = labels[i];//根據打亂的索引數組生成小數字圖像的標簽向量}digits = shuffled_digits;//更新整體數字圖像為打亂順序的數字圖像labels = shuffled_labels;//更新標簽向量為打亂順序的數字圖像對應的標簽 }int main(int /* argc */, char* argv[]) {help(argv);vector<Mat> digits;vector<int> labels;load_digits(DIGITS_FN, digits, labels);//加載樣本數字圖像獲得小數字圖像集合及分類標簽cout << "preprocessing..." << endl;// 隨機數字 shuffle digitsshuffle(digits, labels);//隨機打亂數組元素。洗牌，保持小數字圖像與其標簽的對應關系。vector<Mat> digits2;//去歪斜的小數字圖像向量for (size_t i = 0; i < digits.size(); i++)//遍歷小數字圖像{Mat deskewed_digit;deskew(digits[i], deskewed_digit);//小數字圖像傾斜校正digits2.push_back(deskewed_digit);}Mat samples;//hog描述子樣本矩陣preprocess_hog(digits2, samples);//計算hog矩陣//數據集的劃分--訓練集、驗證集和測試集int train_n = (int)(0.9 * samples.rows);//訓練數據集 90%樣本Mat test_set;//測試集vector<Mat> digits_test(digits2.begin() + train_n, digits2.end());//測試集圖像向量mosaic(25, digits_test, test_set);//測試集圖像馬賽克：每行25個小數字圖像。imshow("test set", test_set);//顯示測試集圖像Mat samples_train = samples(Rect(0, 0, samples.cols, train_n));//訓練集樣本：hog描述子Mat samples_test = samples(Rect(0, train_n, samples.cols, samples.rows - train_n));//測試集：hog描述子vector<int> labels_train(labels.begin(), labels.begin() + train_n);//訓練集標簽vector<int> labels_test(labels.begin() + train_n, labels.end());//測試集標簽Ptr<ml::KNearest> k_nearest;//K最近鄰(kNN，k-NearestNeighbor)分類算法Ptr<ml::SVM> svm;//支持向量機vector<float> predictions;//samples_test的預測結果Mat vis;//cout << "training KNearest..." << endl;k_nearest = ml::KNearest::create();//靜態方法創建空的 K Nearest 分類器。 k_nearest->train(samples_train, ml::ROW_SAMPLE, labels_train);//使用 StatsModel::train 方法對其進行訓練。// 通過K近鄰預測數字 predict digits with KNearestk_nearest->findNearest(samples_test, 4, predictions);//預測evaluate_model(predictions, digits_test, labels_test, vis);//評估模型預測情況，得到vis拼圖（紅色表示錯誤預測）imshow("KNearest test", vis);//顯示K近鄰預測結果圖（紅色表示錯誤預測）k_nearest.release();cout << "training SVM..." << endl;svm = ml::SVM::create();//使用 StatModel::train 訓練模型。由于 SVM 有多個參數，您可能希望為您的問題找到最佳參數，可以使用 SVM::trainAuto 來完成。svm->setGamma(5.383);//核函數的參數 gamma。對于 SVM::POLY、SVM::RBF、SVM::SIGMOID 或 SVM::CHI2。默認值為 1。svm->setC(2.67);//SVM 優化問題的參數 C。對于 SVM::C_SVC、SVM::EPS_SVR 或 SVM::NU_SVR。默認值為 0。svm->setKernel(ml::SVM::RBF);//使用預定義內核之一進行初始化。svm->setType(ml::SVM::C_SVC);//SVM 公式的類型。請參閱 SVM::類型。默認值為 SVM::C_SVC。svm->train(samples_train, ml::ROW_SAMPLE, labels_train);//訓練//使用SVM預測數字 predict digits with SVMsvm->predict(samples_test, predictions);//預測測試集樣本evaluate_model(predictions, digits_test, labels_test, vis);//評估預測結果imshow("SVM test", vis);//顯示預測結果（紅色表示失敗）cout << "Saving SVM as \"digits_svm.yml\"..." << endl;svm->save("digits_svm.yml");//保存SVM參數svm.release();waitKey();return 0; }

運行結果

preprocessing... training KNearest... error: 2.80 % confusion matrix: 42 1 0 0 0 0 1 0 0 00 45 1 0 0 0 0 0 0 00 0 45 0 0 0 0 0 0 10 0 0 34 0 0 0 0 0 00 0 0 0 43 0 0 0 2 10 0 0 0 0 49 1 0 1 01 0 0 0 0 0 55 0 0 00 0 0 0 0 0 0 55 1 00 0 0 1 0 0 0 0 62 00 0 0 1 0 0 0 0 1 56training SVM... error: 2.40 % confusion matrix: 43 0 0 0 0 0 1 0 0 00 44 1 0 0 0 0 1 0 00 0 45 0 0 0 0 0 0 10 0 0 32 0 0 0 1 0 10 0 0 0 45 0 0 0 1 00 0 0 0 0 50 1 0 0 00 0 0 0 0 0 56 0 0 00 0 0 0 0 0 0 55 1 00 0 0 0 1 0 0 0 62 00 0 1 0 1 0 0 0 0 56Saving SVM as "digits_svm.yml"...

筆記：

/** @brief 隨機打亂數組元素。Shuffles the array elements randomly.函數 cv::randShuffle 通過隨機選擇元素對并交換它們來打亂指定的一維數組。此類交換操作的數量將為 dst.rows\*dst.cols\*iterFactor . @param dst 輸入/輸出數字一維數組input/output numerical 1D array. @param iterFactor scale factor that determines the number of random swap operations (see the details below). 決定隨機交換操作數量的比例因子（請參閱下面的詳細信息）。 @param rng optional random number generator used for shuffling; if it is zero, theRNG () is used instead. 用于洗牌的可選隨機數生成器；如果為零，則使用 theRNG() 代替。 @sa RNG, sort */ CV_EXPORTS_W void randShuffle(InputOutputArray dst, double iterFactor = 1., RNG* rng = 0);

/** @brief 查找近鄰并預測輸入向量的響應。@param samples 按行存儲的輸入樣本。它是一個 `<number_of_samples> * k` 大小的單精度浮點矩陣。@param k 使用的最近鄰居數。應該大于 1。@param results 帶有每個輸入樣本的預測結果（回歸或分類）的向量。它是一個帶有 `<number_of_samples>` 元素的單精度浮點向量。@param neighborResponses 對應鄰居的可選輸出值。它是一個 `<number_of_samples> * k` 大小的單精度浮點矩陣。@param dist 從輸入向量到相應鄰居的可選輸出距離。它是一個 `<number_of_samples> * k` 大小的單精度浮點矩陣。對于每個輸入向量（矩陣樣本的一行），該方法會找到 k 個最近鄰。在回歸的情況下，預測結果是特定向量的鄰居響應的平均值。在分類的情況下，通過投票確定類別。對于每個輸入向量，鄰居按它們到向量的距離排序。在 C++ 接口的情況下，您可以使用指向空矩陣的輸出指針，函數將自行分配內存。如果只傳遞一個輸入向量，則所有輸出矩陣都是可選的，并且預測值由方法返回。該函數與 TBB 庫并行化。*/CV_WRAP virtual float findNearest( InputArray samples, int k,OutputArray results,OutputArray neighborResponses=noArray(),OutputArray dist=noArray() ) const = 0; /** @brief 計算多邊形或光柵化形狀的所有三階矩。Calculates all of the moments up to the third order of a polygon or rasterized shape.The function computes moments, up to the 3rd order, of a vector shape or a rasterized shape. The results are returned in the structure cv::Moments. 該函數計算向量形狀或光柵化形狀的最高 3 階矩。結果在結構 cv::Moments 中返回。@param array Raster image (single-channel, 8-bit or floating-point 2D array) or an array (\f$1 \times N\f$ or \f$N \times 1\f$ ) of 2D points (Point or Point2f ). 光柵圖像（單通道、8 位或浮點二維數組）或二維點（Point 或 Point2f）的數組（乘 N 或 N 乘 1）。@param binaryImage If it is true, all non-zero image pixels are treated as 1's. The parameter is used for images only. 如果為真，則所有非零圖像像素都被視為 1。該參數僅用于圖像。 @returns moments矩.@note Only applicable to contour moments calculations from Python bindings: Note that the numpy type for the input array should be either np.int32 or np.float32. 僅適用于 Python 綁定的輪廓矩計算：請注意，輸入數組的 numpy 類型應為 np.int32 或 np.float32。@sa 輪廓區域，弧長contourArea, arcLength*/ CV_EXPORTS_W Moments moments( InputArray array, bool binaryImage = false );

KNN定義

?? K最近鄰(kNN，k-NearestNeighbor)分類算法是數據挖掘分類技術中最簡單的方法之一, 通俗理解它，就是近朱者赤，近墨者黑。

KNN原理

?? 為了判斷未知樣本的類別，以所有已知類別的樣本作為參照，計算未知樣本與所有已知樣本的距離，從中選取與未知樣本距離最近的K個已知樣本，根據少數服從多數的投票法則（majority-voting），將未知樣本與K個最鄰近樣本中所屬類別占比較多的歸為一類

算法的描述

1）計算測試數據與各個訓練數據之間的距離；

2）按照距離的遞增關系進行排序；

3）選取距離最小的K個點；

4）確定前K個點所在類別的出現頻率；

5）返回前K個點中出現頻率最高的類別作為測試數據的預測分類。

優點

1.簡單，易于理解，易于實現，無需估計參數，無需訓練；

2.適合對稀有事件進行分類；

3.特別適合于多分類問題， kNN比SVM的表現要好。

缺點

1> 當訓練數據集很大時，需要大量的存儲空間，而且需要計算待測樣本和訓練數據集中所有樣本的距離，所以非常耗時；

2> KNN對于樣本不均衡，以及隨機分布的數據效果不好。

算法的使用場景：

1、適合用于類別間差異較大，同類別間數據差異較小的場景；

2、對于類別間的界限不清晰的場景，效果好于基于線性分類的邏輯回歸；

3、單個測試樣本計算都需要計算與訓練集中所有訓練樣本的距離，在數據量較大時會占用非常多的計算力并增加計算時間；

4、對于各個類別中數據數量差異較大的場景效果較差，特別在K取值又較大時，占數量優勢的類別對于結果的影響非常明顯。

參考：

機器學習算法—KNN算法原理及阿里云PAI平臺算法模塊參數說明-阿里云開發者社區 (aliyun.com)https://developer.aliyun.com/article/722515?spm=a2c6h.13148508.0.0.45304f0eaHYhmN

OpenCV圖像處理-KNN&決策樹算法 - 知乎 (zhihu.com)https://zhuanlan.zhihu.com/p/85636009

opencv手寫數字識別：SVM和KNearest - 知乎 (zhihu.com)https://zhuanlan.zhihu.com/p/401039799

基于視覺的特征匹配算法（持續更新） - 知乎 (zhihu.com)https://zhuanlan.zhihu.com/p/147325381

（四十五）OpenCV中的機器學習-用SVM做圖像識別 - 知乎 (zhihu.com)https://zhuanlan.zhihu.com/p/93224022

總結

以上是生活随笔為你收集整理的【opencv450-samples】digits_svm 手写数字识别SVM vs KNearest （SVM and KNearest digit recognition）的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇： day02-java关键字
下一篇：游侠联机显示无法链接服务器,我的世界用游