Java读写大文本文件(2GB以上)
生活随笔
收集整理的這篇文章主要介紹了
Java读写大文本文件(2GB以上)
小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.
如下的程序,將一個行數(shù)為fileLines的文本文件平均分為splitNum個小文本文件,其中換行符'r'是linux上的,windows的java換行符是'\r\n':
package kddcup2012.task2.FileSystem; import java.io.BufferedInputStream; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileWriter; import java.io.IOException; import java.io.InputStreamReader; public class FileSplit { public static void main(String[] args) throws IOException { long timer = System.currentTimeMillis(); int bufferSize = 20 * 1024 * 1024;//設讀取文件的緩存為20MB //建立緩沖文本輸入流 File file = new File("/media/Data/畢業(yè)設計/kdd cup/數(shù)據(jù)/userid_profile.txt"); FileInputStream fileInputStream = new FileInputStream(file); BufferedInputStream bufferedInputStream = new BufferedInputStream(fileInputStream); InputStreamReader inputStreamReader = new InputStreamReader(bufferedInputStream); BufferedReader input = new BufferedReader(inputStreamReader, bufferSize); int splitNum = 112-1;//要分割的塊數(shù)減一 int fileLines = 23669283;//輸入文件的行數(shù) long perSplitLines = fileLines / splitNum;//每個塊的行數(shù) for (int i = 0; i <= splitNum; ++i) { //分割 //每個塊建立一個輸出 FileWriter output = new FileWriter("/home/haoqiong/part" + i + ".txt"); String line = null; //逐行讀取,逐行輸出 for (long lineCounter = 0; lineCounter < perSplitLines && (line = input.readLine()) != null; ++lineCounter) { output.append(line + "\r"); } output.flush(); output.close(); output = null; } input.close(); timer = System.currentTimeMillis() - timer; System.out.println("處理時間:" + timer); } }?
以上程序處理大文本文件只需要30MB左右的內(nèi)存空間(這和所設的讀取緩沖大小有關),但是速度不是很快,在磁盤沒有其他程序占用的情況下,將200MB文件分割為112份需要20秒(機器配置:Centrino2 P7450 CPU,2GB DDR3內(nèi)存,Ubuntu?11.10系統(tǒng),硬盤最大讀寫速度大約60MB/S)。
另外,對于幾百兆到2GB大小的文件,使用內(nèi)存映射文件的話,速度會塊一些,但是內(nèi)存映射由于映射的文件長度不能超過java中int類型的最大值,所以只能處理2GB以下的文件。
?
java 讀取一個巨大的文本文件既能保證內(nèi)存不溢出又能保證性能 ? ? package helloword.helloword;import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel;public class ReadBig {public static String fff = "C:\\mq\\read\\from.xml";public static void main1(String[] args) throws Exception {final int BUFFER_SIZE = 0x300000;// 緩沖區(qū)大小為3M File f = new File(fff);MappedByteBuffer inputBuffer = new RandomAccessFile(f, "r").getChannel().map(FileChannel.MapMode.READ_ONLY,f.length() / 2, f.length() / 2);byte[] dst = new byte[BUFFER_SIZE];// 每次讀出3M的內(nèi)容long start = System.currentTimeMillis();for (int offset = 0; offset < inputBuffer.capacity(); offset += BUFFER_SIZE) {if (inputBuffer.capacity() - offset >= BUFFER_SIZE) {for (int i = 0; i < BUFFER_SIZE; i++)dst[i] = inputBuffer.get(offset + i);} else {for (int i = 0; i < inputBuffer.capacity() - offset; i++)dst[i] = inputBuffer.get(offset + i);}int length = (inputBuffer.capacity() % BUFFER_SIZE == 0) ? BUFFER_SIZE: inputBuffer.capacity() % BUFFER_SIZE;System.out.println(new String(dst, 0, length));// new// String(dst,0,length)這樣可以取出緩存保存的字符串,可以對其進行操作 }long end = System.currentTimeMillis();System.out.println("讀取文件文件一半內(nèi)容花費:" + (end - start) + "毫秒");}public static void main2(String[] args) throws Exception {int bufSize = 1024;byte[] bs = new byte[bufSize];ByteBuffer byteBuf = ByteBuffer.allocate(1024);FileChannel channel = new RandomAccessFile(fff, "r").getChannel();while (channel.read(byteBuf) != -1) {int size = byteBuf.position();byteBuf.rewind();byteBuf.get(bs); // 把文件當字符串處理,直接打印做為一個例子。System.out.print(new String(bs, 0, size));byteBuf.clear();}}public static void main3(String[] args) throws Exception {BufferedReader br = new BufferedReader(new FileReader(fff));String line = null;while ((line = br.readLine()) != null) {System.out.println(line);}}public static void main(String[] args) throws Exception {int bufSize = 1024;byte[] bs = new byte[bufSize];ByteBuffer byteBuf = ByteBuffer.allocate(1024);FileChannel channel = new RandomAccessFile("d:\\filename", "r").getChannel();while (channel.read(byteBuf) != -1) {int size = byteBuf.position();byteBuf.rewind();byteBuf.get(bs);// 把文件當字符串處理,直接打印做為一個例子。System.out.print(new String(bs, 0, size));byteBuf.clear();}}}?
java 讀取大容量文件,內(nèi)存溢出?怎么按幾行讀取,讀取多次。?最佳答案 package helloword.helloword;import java.io.BufferedInputStream; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.InputStreamReader; import java.io.RandomAccessFile; import java.util.Scanner;public class TestPrint {public static void main(String[] args) throws IOException {String path = "你要讀的文件的路徑";RandomAccessFile br = new RandomAccessFile(path, "rw");// 這里rw看你了。要是之都就只寫rString str = null, app = null;int i = 0;while ((str = br.readLine()) != null) {i++;app = app + str;if (i >= 100) {// 假設讀取100行i = 0;// 這里你先對這100行操作,然后繼續(xù)讀app = null;}}br.close();}// 當逐行讀寫大于2G的文本文件時推薦使用以下代碼void largeFileIO(String inputFile, String outputFile) {try {BufferedInputStream bis = new BufferedInputStream(new FileInputStream(new File(inputFile)));BufferedReader in = new BufferedReader(new InputStreamReader(bis, "utf-8"), 10 * 1024 * 1024);// 10M緩存FileWriter fw = new FileWriter(outputFile);while (in.ready()) {String line = in.readLine();fw.append(line + " ");}in.close();fw.flush();fw.close();} catch (IOException ex) {ex.printStackTrace();}} } jdk本身就支持超大文件的讀寫。 ? 網(wǎng)上的文章基本分為兩大類: 一類是使用BufferedReader類讀寫超大文件; 另一類是使用RandomAccessFile類讀取,經(jīng)過比較,最后使用了前一種方式進行超大文件的讀取,下面是相關代碼,其實很簡單 ------------------------------------------------------------------- File file = new File(filepath); BufferedInputStream fis = new BufferedInputStream(new FileInputStream(file)); BufferedReader reader = new BufferedReader(new InputStreamReader(fis,"utf-8"),5*1024*1024);// 用5M的緩沖讀取文本文件 String line = ""; while((line = reader.readLine()) != null){ //TODO: write your business } --------------------------------------------------------------------- ? 注意代碼,在實例化BufferedReader時,增加一個分配緩存的參數(shù)即可總結(jié)
以上是生活随笔為你收集整理的Java读写大文本文件(2GB以上)的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: Adapter
- 下一篇: 《Agile Impressions》作