【问题标题】:Tracking Frequencies of Characters in a Text File跟踪文本文件中字符的频率
【发布时间】:2015-03-18 03:19:56
【问题描述】:
因此,对于我当前的数据结构项目,我必须读取一个文本文件并最终对字符进行 Huffman 编码,然后返回新编码的文件(我还没有接近这些文件)。
我要做的第一件事是扫描文件并确定每个字符的频率,然后创建所有字符及其频率的有序列表。但是,我很难想出一个很好的方法来保持频率和现有字符的运行总数。我认为 Hashtable 可能是一个好主意,其中每个键都是一个字符,它映射到的值是它的频率。
有没有更有效的方法来做到这一点?
提前致谢!
【问题讨论】:
标签:
java
file-io
data-structures
hashtable
【解决方案1】:
这是我使用文本文件查找字符频率的代码。
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.Locale;
public class Frequency2 {
private static char[] myarry;
private static int[] count = new int[26];
private static double[] precntage=new double[26];
private static double totCount=0;
public static void main(String[] args) throws FileNotFoundException, IOException {
String ss = new Frequency().readFile("E:/look.txt");//path of the text file
//System.out.println(ss);
ss = ss.toLowerCase(Locale.ENGLISH); // put all text to lowercase
myarry = ss.toCharArray(); // text put a char array
//count the characters
for (int i = 0; i < myarry.length; i++) {
if (myarry[i] == 'a') {
count[0] = count[0] + 1;
} else if (myarry[i] == 'b') {
count[1] = count[1] + 1;
} else if (myarry[i] == 'c') {
count[2] = count[2] + 1;
} else if (myarry[i] == 'd') {
count[3] = count[3] + 1;
} else if (myarry[i] == 'e') {
count[4] = count[4] + 1;
} else if (myarry[i] == 'f') {
count[5] = count[5] + 1;
} else if (myarry[i] == 'g') {
count[6] = count[6] + 1;
} else if (myarry[i] == 'h') {
count[7] = count[7] + 1;
} else if (myarry[i] == 'i') {
count[8] = count[8] + 1;
} else if (myarry[i] == 'j') {
count[9] = count[9] + 1;
} else if (myarry[i] == 'k') {
count[10] = count[10] + 1;
} else if (myarry[i] == 'l') {
count[11] = count[11] + 1;
} else if (myarry[i] == 'm') {
count[12] = count[12] + 1;
} else if (myarry[i] == 'n') {
count[13] = count[13] + 1;
} else if (myarry[i] == 'o') {
count[14] = count[14] + 1;
} else if (myarry[i] == 'p') {
count[15] = count[15] + 1;
} else if (myarry[i] == 'q') {
count[16] = count[16] + 1;
} else if (myarry[i] == 'r') {
count[17] = count[17] + 1;
} else if (myarry[i] == 's') {
count[18] = count[18] + 1;
} else if (myarry[i] == 't') {
count[19] = count[19] + 1;
} else if (myarry[i] == 'u') {
count[20] = count[20] + 1;
} else if (myarry[i] == 'v') {
count[21] = count[21] + 1;
} else if (myarry[i] == 'w') {
count[22] = count[22] + 1;
} else if (myarry[i] == 'x') {
count[23] = count[23] + 1;
} else if (myarry[i] == 'y') {
count[24] = count[24] + 1;
} else if (myarry[i] == 'z') {
count[25] = count[25] + 1;
}
}
for (int i = 0; i <count.length; i++) {
totCount+=count[i];
}
System.out.println("tot "+ totCount);
// calculate presentage
for (int i = 0; i <count.length; i++) {
precntage[i]=((count[i]/totCount)*100);
precntage[i]=Math.round(precntage[i]);
}
char s1='A';
System.out.println("Letter\tPrecentage\tFrequency");
for (int i = 0; i < count.length; i++) {
String gs=Character.toString(s1++);
System.out.println(gs+"\t"+precntage[i]+"%"+"\t\t"+count[i]);
}
}
String readFile(String fileName) throws IOException {
BufferedReader buff = new BufferedReader(new FileReader(fileName));
try {
StringBuilder sb = new StringBuilder();
String l = buff.readLine();
while (l != null) {
sb.append(l);
sb.append("\n");
l = buff.readLine();
}
return sb.toString();
} finally {
buff.close();
}
}
}
希望这会对你有所帮助。