62,615
社区成员
发帖
与我相关
我的任务
分享
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
public class FileGenerator {
private static MessageDigest md;
public final static String FIELD_SEPARATOR = "|";
public final static String LINE_SEPARATOR = System.getProperty("line.separator");
public final static int FILE_LINE = 10;
public static void main(String[] args) {
generateFile("f:/diff1.txt");
System.out.println("ok");
generateFile("f:/diff2.txt");
System.out.println("ok");
}
static {
try {
md = MessageDigest.getInstance("sha-1");
} catch (NoSuchAlgorithmException e) {
e.printStackTrace();
}
}
private static void init(String algorithm) {
try {
md = MessageDigest.getInstance(algorithm);
} catch (NoSuchAlgorithmException e) {
e.printStackTrace();
}
}
public static String encrypte(String plainText) {
md.update(plainText.getBytes());
byte[] b = md.digest();
StringBuffer output = new StringBuffer(32);
for (int i = 0; i < b.length; i++) {
String temp = Integer.toHexString(b[i] & 0xff);
if (temp.length() < 2) {
output.append("0");
}
output.append(temp);
}
return output.toString();
}
public static void generateFile(String filename) {
Set<Integer> set = new HashSet<Integer>();
File file = new File(filename);
Random ran = new Random();
BufferedWriter bw = null;
String[] content = new String[4];
init("sha-512");
for(int i = 0; i < 4; i++) {
content[i] = encrypte((i + System.nanoTime()) + "") + FIELD_SEPARATOR + encrypte(i+"");
}
try {
bw = new BufferedWriter(new FileWriter(file));
int key = -1;
set.add(key);
int ranNum = FILE_LINE + (FILE_LINE * 2) / 10;
for(int i = 0; i < FILE_LINE; i++) {
while(set.contains(key)) {
key = ran.nextInt(ranNum);
}
set.add(key);
String str = key + FIELD_SEPARATOR + content[(int)(System.nanoTime()%4)] + LINE_SEPARATOR;
bw.write(str);
}
set.clear();
bw.close();
}catch(IOException e) {
e.printStackTrace();
}
}
}
/**************
* 续,承上帖 *
**************/
/**
* 从文件中读取 key
* @param file 文件
* @param size 文件的最大行数
* @return int[] 已排序的 key 数组
*/
private int[] readKey(File file, int size) {
int[] keys = new int[size];
BufferedReader br = null;
try {
br = new BufferedReader(new FileReader(file));
String str = "";
int i = 0;
while ((str = br.readLine()) != null) {
keys[i++] = getKey(str);
}
br.close();
} catch (IOException e) {
e.printStackTrace();
}
Arrays.sort(keys);
br = null;
System.out.printf(" %s 的键读取完成,大小 %d%n", file.getName(), size);
System.gc();
return keys;
}
/**
* 映射源文件
* @param file 源文件
* @param fileLine 源文件的行数
* @param excludeFile 可以被排除的文件(删除/新增的记录集)
* @param excludeSize 被排除文件的行数
* @return int[][] int[0][] 源文件的 key 数组,按照 int[0][] 值的大小排序
* int[1][] 源文件的记录数组(已映射成为int数据)
*/
private int[][] readFile(File file, int fileLine, File excludeFile, int excludeSize) {
int[][] map = new int[2][fileLine - excludeSize];
BufferedReader br = null;
int[] excludeKeys = readKey(excludeFile, excludeSize);
int i = 0;
try {
br = new BufferedReader(new FileReader(oldFile));
String oldFileLine = "";
int key;
while((oldFileLine = br.readLine()) != null) {
key = getKey(oldFileLine);
if(Arrays.binarySearch(excludeKeys, key) < 0) {
map[0][i] = key;
map[1][i] = getValue(oldFileLine);
i++;
}
}
br.close();
}catch(IOException e) {
e.printStackTrace();
}
sort(map, 0, map[0].length);
excludeKeys = null;
System.out.printf(" %s 文件映射完成,记录数 %d%n", file.getName(), i);
System.gc();
return map;
}
/******************************************************
* 这个方法是核心,比较的成败取决于这个方法,当前采用的
* 是 hashcode,但有待于改进
*
* 为了节省存储空间,需要将字符串转换成一个int类型的值
* 期望能做到:
* 两个字符串相同时,其int值也相同;
* 两个字符串不同时,其int值也不同。
*
* @param str 需要转换的字符串
* @return 字符串的“int值”
******************************************************/
private int getValue(String str) {
return str.hashCode();
}
/**
* 从一行字符串中获得键
* @param str
* @return
*/
private int getKey(String str) {
char[] chars = str.toCharArray();
int i = 0;
int num = 0;
while(chars[i] != '|') {
num = num * 10 + (chars[i++] - '0');
}
return num;
}
/**
* 从JDK源代码中抄的Arrays.sort()排序算法,
* 改进一下,以x[0]排序,同时交换a[1]的值,即同步交换
* 排序前:
* [0] [1]
* 63 9
* 51 12
* 7 33
* 48 15
* 45 82
* 55 76
* 排序后([0]作为键,[1]作为值,交换键的同时交换值):
* 7 33
* 45 82
* 48 15
* 51 12
* 55 76
* 63 9
*/
private void sort(int x[][], int off, int len) {
if (len < 7) {
for (int i = off; i < len + off; i++)
for (int j = i; j > off && x[0][j - 1] > x[0][j]; j--)
swap(x, j, j - 1);
return;
}
// Choose a partition element, v
int m = off + (len >> 1); // Small arrays, middle element
if (len > 7) {
int l = off;
int n = off + len - 1;
if (len > 40) { // Big arrays, pseudomedian of 9
int s = len / 8;
l = med3(x, l, l + s, l + 2 * s);
m = med3(x, m - s, m, m + s);
n = med3(x, n - 2 * s, n - s, n);
}
m = med3(x, l, m, n); // Mid-size, med of 3
}
int v = x[0][m];
int a = off, b = a, c = off + len - 1, d = c;
while (true) {
while (b <= c && x[0][b] <= v) {
if (x[0][b] == v)
swap(x, a++, b);
b++;
}
while (c >= b && x[0][c] >= v) {
if (x[0][c] == v)
swap(x, c, d--);
c--;
}
if (b > c)
break;
swap(x, b++, c--);
}
int s, n = off + len;
s = Math.min(a - off, b - a);
vecswap(x, off, b - s, s);
s = Math.min(d - c, n - d - 1);
vecswap(x, b, n - s, s);
if ((s = b - a) > 1)
sort(x, off, s);
if ((s = d - c) > 1)
sort(x, n - s, s);
}
private void swap(int x[][], int a, int b) {
int t = x[0][a];
x[0][a] = x[0][b];
x[0][b] = t;
t = x[1][a];
x[1][a] = x[1][b];
x[1][b] = t;
}
private void vecswap(int x[][], int a, int b, int n) {
for (int i = 0; i < n; i++, a++, b++)
swap(x, a, b);
}
private int med3(int x[][], int a, int b, int c) {
return (x[0][a] < x[0][b] ? (x[0][b] < x[0][c] ? b : x[0][a] < x[0][c] ? c : a)
: (x[0][b] > x[0][c] ? b : x[0][a] > x[0][c] ? c : a));
}
import java.io.File;
public class Test {
public static void main(String[] args) {
File oldFile = new File("f:/diff3.txt");
File newFile = new File("f:/diff4.txt");
File addFile = new File("f:/diff_add.txt");
File deleteFile = new File("f:/diff_delete.txt");
File modifyFile = new File("f:/diff_modify.txt");
FileComparison fc = new FileComparison(oldFile, newFile, addFile, deleteFile, modifyFile);
// 这两个需要在recordModified前执行
fc.recordAdded();
fc.recordDeleted();
fc.recordModified();
}
}
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Arrays;
public class FileComparison {
// 文件中最大的行数,不会超过500万的话,就设为500万
private final int LINE = 1000000;
// 字段分隔符
private final String FIELD_SEPARATOR = "|";
// 行结束符
private final String LINE_SEPARATOR = System.getProperty("line.separator");
// 新增加条目所加的后缀
private final String ADD = FIELD_SEPARATOR + "1" + LINE_SEPARATOR;
// 被删除条目所加的后缀
private final String MODIFY = FIELD_SEPARATOR + "2" + LINE_SEPARATOR;
// 被修改过条目所加的后缀
private final String DELETE = FIELD_SEPARATOR + "3" + LINE_SEPARATOR;
// 旧文件
private File oldFile;
// 新文件
private File newFile;
// 记录增加的文件
private File addedFile;
// 记录删除的文件
private File deletedFile;
// 记录被修改的文件
private File modifyFile;
// 增加记录的数量
private int addedNumber = 0;
// 删除记录的数量
private int deletedNumber = 0;
public FileComparison(File oldFile, File newFile, File addedFile, File deletedFile, File modifyFile) {
this.oldFile = oldFile;
this.newFile = newFile;
this.addedFile = addedFile;
this.deletedFile = deletedFile;
this.modifyFile = modifyFile;
}
/**
* 比较被修改的条目,以新文件为依据,若有不同则存入新文件的条目,
* 并在最后标记为“2”
*/
public void recordModified() {
long t0, t1;
System.out.println("开始比较修改的记录...");
t0 = System.currentTimeMillis();
int[][] oldFileMap = readFile(oldFile, LINE, deletedFile, deletedNumber);
BufferedReader br = null;
BufferedWriter bw = null;
int[] addedKeys = readKey(addedFile, addedNumber);
int count = 0;
try {
br = new BufferedReader(new FileReader(newFile));
bw = new BufferedWriter(new FileWriter(modifyFile));
String newFileLine = "";
int key;
int value;
while((newFileLine = br.readLine()) != null) {
key = getKey(newFileLine);
if(Arrays.binarySearch(addedKeys, key) < 0) {
int index = Arrays.binarySearch(oldFileMap[0], key);
if(index >= 0) {
value = getValue(newFileLine);
if(oldFileMap[1][index] != value) {
bw.write(newFileLine + MODIFY);
count++;
}
}
}
}
bw.close();
br.close();
}catch(IOException e) {
e.printStackTrace();
}
t1 = System.currentTimeMillis();
System.out.printf("被修改的记录比较完成,有 %d 条被修,耗时 %.1f 秒,存放于:%s%n", count, (t1-t0)/1000F, modifyFile.getName());
}
/**
* 比较新文件中,新增的条目(新文件中有的,旧文件中没有的),
* 记录到文件中,并在最后标记为“1”
*/
public void recordAdded() {
long t0, t1;
System.out.println("开始比较新增加的记录...");
t0 = System.currentTimeMillis();
int[] oldFileKey = readKey(oldFile, LINE);
BufferedReader br = null;
BufferedWriter bw = null;
try {
br = new BufferedReader(new FileReader(newFile));
bw = new BufferedWriter(new FileWriter(addedFile));
String newFileLine = "";
while((newFileLine = br.readLine()) != null) {
int key = getKey(newFileLine);
if(Arrays.binarySearch(oldFileKey, key) < 0){
addedNumber++;
bw.write(newFileLine + ADD);
}
}
br.close();
bw.close();
}catch(IOException e) {
e.printStackTrace();
}
oldFileKey = null;
br = null;
bw = null;
t1 = System.currentTimeMillis();
System.out.printf("新增加的记录比较完成,有 %d 条新增,耗时 %.1f 秒,存放于:%s%n", addedNumber, (t1-t0)/1000F, addedFile.getName());
System.gc();
}
/**
* 比较新文件中,删除的条目(旧文件中有的,新文件中没有的),
* 记录到文件中,并在最后标记为“3”
*/
public void recordDeleted() {
long t0, t1;
System.out.println("开始比较删除的记录...");
t0 = System.currentTimeMillis();
int[] newFileKey = readKey(newFile, LINE);
BufferedReader br = null;
BufferedWriter bw = null;
try {
br = new BufferedReader(new FileReader(oldFile));
bw = new BufferedWriter(new FileWriter(deletedFile));
String newFileLine = "";
int key;
while((newFileLine = br.readLine()) != null) {
key = getKey(newFileLine);
if(Arrays.binarySearch(newFileKey, key) < 0) {
deletedNumber++;
bw.write(newFileLine + DELETE);
}
}
br.close();
bw.close();
}catch(IOException e) {
e.printStackTrace();
}
newFileKey = null;
bw = null;
br = null;
t1 = System.currentTimeMillis();
System.out.printf("被删除的记录比较完成,有 %d 条删除,耗时 %.1f 秒,存放于:%s%n", deletedNumber, (t1-t0)/1000F, deletedFile.getName());
System.gc();
}
/****************
* 未完,接下帖 *
****************/
}