データの処理:テストセットとトレーニングセットに分ける

1516 ワード

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;


public class DataHandler {

	/**
	 * @param args
	 */
	static final String inputFile = "E:\\DataSet\\HOSVD\\artistsratings.dat";
	static final String TrainingFile = "E:\\DataSet\\HOSVD\\training.dat";
	static final String TestFile = "E:\\DataSet\\HOSVD\\testing.dat";

	
	private static void CreateCsvRatingsFile() throws FileNotFoundException, IOException{
	BufferedReader br = new BufferedReader(new FileReader(inputFile));
	BufferedWriter bw1 = new BufferedWriter(new FileWriter(TrainingFile));
	BufferedWriter bw2 = new BufferedWriter(new FileWriter(TestFile));

	String line = null;
	String line2write = null;
	String[] temp;
	int i = 0;
	while ((line = br.readLine()) != null && i < 100000){
		i++;
		if((int)(Math.random()*10)>2){
			temp = line.split(" ");
			line2write = temp[0] + " " + temp[1]+" "+temp[2];
			bw1.write(line2write);
			bw1.newLine();
			bw1.flush();
		}else{
			temp = line.split(" ");
			line2write = temp[0] + " " + temp[1]+" "+temp[2];
			bw2.write(line2write);
			bw2.newLine();
			bw2.flush();
		}
		
	}
	br.close();
	bw1.close();
	bw2.close();
}
	public static void main(String[] args) throws IOException {
		// TODO Auto-generated method stub

		CreateCsvRatingsFile();
	}
}