挖金子---小爬虫
馬薩瑪索(http://www.masamaso.com/index.shtml)每天10點(diǎn)都會推出一折商品5件,就是秒購。男裝質(zhì)量還不錯,所以就經(jīng)常去搶,感覺手動太慢了,就寫了一個小爬蟲程序,讓自己去爬,如果是金子頁面(免費(fèi)商品)就會自動打開,我就可以搶到了。和大家分享一下。這個應(yīng)該不算廣告吧,之所以給鏈接和網(wǎng)站名是想著便于各位感興趣看官測試,如果管理員覺得不妥,請通知哈,我再修改,不要直接封我哈,謝了。
思路:
1. 把所有想要的商品的鏈接讀到程序中。
2. 分別打開每一個鏈接讀取源代碼
3. 驗(yàn)證是否是金子商品(源代碼中含有free_msg字符串)
4. 如果是金子就把該鏈接用IE打開
源代碼:
讀鏈接文件:
import java.io.BufferedReader; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.util.LinkedList; import java.util.List;/*** @author Administrator**/ public class FileReader {private String fileName;public FileReader() {}public FileReader(String fileName) {this.fileName = fileName;}/*** 讀取鏈接,返回一個List* @return*/public List<String> getLines() {BufferedReader reader = null;try {reader = new BufferedReader(new InputStreamReader(new FileInputStream(this.fileName)));} catch (FileNotFoundException e) {e.printStackTrace();}List<String> lines = new LinkedList<String>();String line = null;try {while ( (line = reader.readLine()) != null) {lines.add(line);}} catch (IOException e) {e.printStackTrace();} finally {try {reader.close();} catch (IOException e) {e.printStackTrace();}}return lines;} }
URL類:
import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection;/*** @author Administrator**/ public class Url {private String url;public Url() {}public Url(String url) {this.url = url;}public String getUrl() {return url;}public void setUrl(String url) {this.url = url;}/*** 獲得鏈接* @return*/public URLConnection getConnection() {URL httpUrl = null;try {httpUrl = new URL(url);} catch (MalformedURLException e) {e.printStackTrace();}URLConnection conn = null;if(httpUrl != null) {try {conn = httpUrl.openConnection();} catch (IOException e) {e.printStackTrace();}}return conn;}/*** 獲得鏈接上的輸出流* @return*/public BufferedReader getReader() {URLConnection conn = getConnection();BufferedReader br = null;if(conn == null) {return null;}conn.setConnectTimeout(9000);try {conn.connect();br = new BufferedReader(new InputStreamReader(conn.getInputStream()));} catch (IOException e) {e.printStackTrace();return null;}return br;}/*** 從輸出流中一行一行讀取文件,查看是否含有str字符串,有就返回真* @param str* @return*/public boolean isExist(String str) {BufferedReader bis = getReader();boolean exist = false;String line = null;try {while ( (line = bis.readLine()) != null) {exist = line.contains(str);if(exist) {break;}}} catch (IOException e) {e.printStackTrace();} finally {try {bis.close();} catch (IOException e) {e.printStackTrace();}}return exist;} }
Digger類:
import java.io.IOException; import java.util.List;/*** @author Administrator**/ public class Digger extends Thread{private Url url;public Digger() {super();}public Digger(Url url) {this.url = url;}/*** main方法,把配置信息(鏈接)讀入程序,并為每一個鏈接開啟一個線程* @param args* @throws IOException*/public static void main(String[] args) throws IOException {FileReader reader = new FileReader("F:/myworkspace/workspace/diggold/src/url.txt");List<String> urls = reader.getLines();for (String string : urls) {Url url = new Url(string);Digger digger = new Digger(url);digger.start();} // Runtime.getRuntime().exec("C:/Program Files/Internet Explorer/iexplore.exe http://www.masamaso.com/index.shtml");}/*** 查看該鏈接是否存在free_msg字段,存在即為金子 用IE打開該鏈接*/@Overridepublic void run() {if(url.isExist("free_msg")) {try {Runtime.getRuntime().exec("C:/Program Files/Internet Explorer/iexplore.exe " + url.getUrl());} catch (IOException e) {e.printStackTrace();}}System.out.println(url.getUrl() + "END!");} }
url.txt配置文件
http://www.masamaso.com/goods.php?id=3128 http://www.masamaso.com/goods.php?id=3132 http://www.masamaso.com/goods.php?id=3120
寫的比較簡單,但是挺實(shí)用,各位看官莫笑話哈。
思路:
1. 把所有想要的商品的鏈接讀到程序中。
2. 分別打開每一個鏈接讀取源代碼
3. 驗(yàn)證是否是金子商品(源代碼中含有free_msg字符串)
4. 如果是金子就把該鏈接用IE打開
源代碼:
讀鏈接文件:
import java.io.BufferedReader; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.util.LinkedList; import java.util.List;/*** @author Administrator**/ public class FileReader {private String fileName;public FileReader() {}public FileReader(String fileName) {this.fileName = fileName;}/*** 讀取鏈接,返回一個List* @return*/public List<String> getLines() {BufferedReader reader = null;try {reader = new BufferedReader(new InputStreamReader(new FileInputStream(this.fileName)));} catch (FileNotFoundException e) {e.printStackTrace();}List<String> lines = new LinkedList<String>();String line = null;try {while ( (line = reader.readLine()) != null) {lines.add(line);}} catch (IOException e) {e.printStackTrace();} finally {try {reader.close();} catch (IOException e) {e.printStackTrace();}}return lines;} }
URL類:
import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection;/*** @author Administrator**/ public class Url {private String url;public Url() {}public Url(String url) {this.url = url;}public String getUrl() {return url;}public void setUrl(String url) {this.url = url;}/*** 獲得鏈接* @return*/public URLConnection getConnection() {URL httpUrl = null;try {httpUrl = new URL(url);} catch (MalformedURLException e) {e.printStackTrace();}URLConnection conn = null;if(httpUrl != null) {try {conn = httpUrl.openConnection();} catch (IOException e) {e.printStackTrace();}}return conn;}/*** 獲得鏈接上的輸出流* @return*/public BufferedReader getReader() {URLConnection conn = getConnection();BufferedReader br = null;if(conn == null) {return null;}conn.setConnectTimeout(9000);try {conn.connect();br = new BufferedReader(new InputStreamReader(conn.getInputStream()));} catch (IOException e) {e.printStackTrace();return null;}return br;}/*** 從輸出流中一行一行讀取文件,查看是否含有str字符串,有就返回真* @param str* @return*/public boolean isExist(String str) {BufferedReader bis = getReader();boolean exist = false;String line = null;try {while ( (line = bis.readLine()) != null) {exist = line.contains(str);if(exist) {break;}}} catch (IOException e) {e.printStackTrace();} finally {try {bis.close();} catch (IOException e) {e.printStackTrace();}}return exist;} }
Digger類:
import java.io.IOException; import java.util.List;/*** @author Administrator**/ public class Digger extends Thread{private Url url;public Digger() {super();}public Digger(Url url) {this.url = url;}/*** main方法,把配置信息(鏈接)讀入程序,并為每一個鏈接開啟一個線程* @param args* @throws IOException*/public static void main(String[] args) throws IOException {FileReader reader = new FileReader("F:/myworkspace/workspace/diggold/src/url.txt");List<String> urls = reader.getLines();for (String string : urls) {Url url = new Url(string);Digger digger = new Digger(url);digger.start();} // Runtime.getRuntime().exec("C:/Program Files/Internet Explorer/iexplore.exe http://www.masamaso.com/index.shtml");}/*** 查看該鏈接是否存在free_msg字段,存在即為金子 用IE打開該鏈接*/@Overridepublic void run() {if(url.isExist("free_msg")) {try {Runtime.getRuntime().exec("C:/Program Files/Internet Explorer/iexplore.exe " + url.getUrl());} catch (IOException e) {e.printStackTrace();}}System.out.println(url.getUrl() + "END!");} }
url.txt配置文件
http://www.masamaso.com/goods.php?id=3128 http://www.masamaso.com/goods.php?id=3132 http://www.masamaso.com/goods.php?id=3120
寫的比較簡單,但是挺實(shí)用,各位看官莫笑話哈。
- diggold.rar (7.5 KB)
- 下載次數(shù): 299
總結(jié)
- 上一篇: 【国内下载Android系统源码的方法】
- 下一篇: java如何生成jar包