Java整合ORC识别验证码
生活随笔
收集整理的這篇文章主要介紹了
Java整合ORC识别验证码
小編覺(jué)得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.
首先需要下載orc軟件,這里使用tesseract-orc軟件好了
下載地址:點(diǎn)擊下載
下載好了,就安裝。
我們可以測(cè)試一下,在命令行輸入tesseract 會(huì)出現(xiàn)以下情況
如果沒(méi)有出現(xiàn)以下情況,需要手動(dòng)的配置環(huán)境變量,方法如下:將安裝目錄添加到path中。
由于有的驗(yàn)證碼有干擾點(diǎn)和顏色差,這里提供源碼,將圖片進(jìn)行處理。
ClearImageHelper.java
import java.awt.Color; import java.awt.image.BufferedImage; import java.io.File; import java.io.IOException;import javax.imageio.ImageIO;public class ClearImageHelper {public static void main(String[] args) throws IOException {cleanImage(new File("1.jpg"), "2.jpg");}// sfile是帶處理的圖像// destDir是處理后保存的路徑public static void cleanImage(File sfile, String destDir) throws IOException {File destF = new File(destDir);if (!destF.exists()) {destF.mkdirs();}BufferedImage bufferedImage = ImageIO.read(sfile);int h = bufferedImage.getHeight();int w = bufferedImage.getWidth();// 灰度化int[][] gray = new int[w][h];for (int x = 0; x < w; x++) {for (int y = 0; y < h; y++) {int argb = bufferedImage.getRGB(x, y);// 圖像加亮int r = (int) (((argb >> 16) & 0xFF) * 1.1 + 30);int g = (int) (((argb >> 8) & 0xFF) * 1.1 + 30);int b = (int) (((argb >> 0) & 0xFF) * 1.1 + 30);if (r >= 255) {r = 255;}if (g >= 255) {g = 255;}if (b >= 255) {b = 255;}gray[x][y] = (int) Math.pow((Math.pow(r, 2.2) * 0.2973 + Math.pow(g, 2.2) * 0.6274 + Math.pow(b, 2.2) * 0.0753), 1 / 2.2);}}// 二值化int threshold = ostu(gray, w, h);BufferedImage binaryBufferedImage = new BufferedImage(w, h, BufferedImage.TYPE_BYTE_BINARY);for (int x = 0; x < w; x++) {for (int y = 0; y < h; y++) {if (gray[x][y] > threshold) {gray[x][y] |= 0x00FFFF;} else {gray[x][y] &= 0xFF0000;}binaryBufferedImage.setRGB(x, y, gray[x][y]);}}// 去除噪點(diǎn)for (int y = 0; y < h; y++) {for (int x = 0; x < w; x++) {int sum = 0;if (isBlack(binaryBufferedImage.getRGB(x, y))) {sum = totalBlack(binaryBufferedImage, x, y);}// 如果非空白點(diǎn)的總數(shù)小于1,則認(rèn)為是噪點(diǎn),去除該噪點(diǎn)if (sum <= 1) {binaryBufferedImage.setRGB(x, y, 0xffffff);}}}ImageIO.write(binaryBufferedImage, "jpg", new File(destDir));}// 得到該點(diǎn)周?chē)姆强瞻c(diǎn)public static int totalBlack(BufferedImage binaryBufferedImage, int x, int y) {int sum = 0;for (int i = x - 1; i <= x + 1; i++) {if (i < 0 || i >= binaryBufferedImage.getWidth()) {continue;}for (int j = y - 1; j <= y + 1; j++) {if (j < 0 || j >= binaryBufferedImage.getHeight()) {continue;}try {if (isBlack(binaryBufferedImage.getRGB(i, j))) {sum++;}} catch (Exception e) {continue;}}}return sum;}public static boolean isBlack(int colorInt) {Color color = new Color(colorInt);if (color.getRed() + color.getGreen() + color.getBlue() <= 300) {return true;}return false;}public static boolean isWhite(int colorInt) {Color color = new Color(colorInt);if (color.getRed() + color.getGreen() + color.getBlue() > 300) {return true;}return false;}public static int isBlackOrWhite(int colorInt) {if (getColorBright(colorInt) < 30 || getColorBright(colorInt) > 730) {return 1;}return 0;}public static int getColorBright(int colorInt) {Color color = new Color(colorInt);return color.getRed() + color.getGreen() + color.getBlue();}public static int ostu(int[][] gray, int w, int h) {int[] histData = new int[w * h];// Calculate histogramfor (int x = 0; x < w; x++) {for (int y = 0; y < h; y++) {int red = 0xFF & gray[x][y];histData[red]++;}}// Total number of pixelsint total = w * h;float sum = 0;for (int t = 0; t < 256; t++)sum += t * histData[t];float sumB = 0;int wB = 0;int wF = 0;float varMax = 0;int threshold = 0;for (int t = 0; t < 256; t++) {wB += histData[t]; // Weight Backgroundif (wB == 0)continue;wF = total - wB; // Weight Foregroundif (wF == 0)break;sumB += (float) (t * histData[t]);float mB = sumB / wB; // Mean Backgroundfloat mF = (sum - sumB) / wF; // Mean Foreground// Calculate Between Class Variancefloat varBetween = (float) wB * (float) wF * (mB - mF) * (mB - mF);// Check if new maximum foundif (varBetween > varMax) {varMax = varBetween;threshold = t;}}return threshold;} }該算法有待改進(jìn),只能處理一部分驗(yàn)證碼。運(yùn)行以上代碼后,將會(huì)生成2.jpg即修改過(guò)后的驗(yàn)證碼圖片。
我們接下來(lái)對(duì)2.jpg進(jìn)行識(shí)別。
這樣字符串validate就是驗(yàn)證碼的文字了。
原理就是Java代理模擬cmd命令,執(zhí)行了tesseract 2.jpg 1 命令。
tesseract識(shí)別2.jpg,并將圖片內(nèi)容輸出到1.txt文件中。再將1.txt文件讀取到代碼中。
總結(jié)
以上是生活随笔為你收集整理的Java整合ORC识别验证码的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問(wèn)題。
- 上一篇: gcd算法以及exgcd
- 下一篇: 创业管理:创业者的十八般武艺——1.2