急..用JAVA语言实现数据结构实验!

对一个超大文档统计单词出现的频率。注意:单词”The”和”the”算同一个单词(也就是不区分大小写)。
统计好后,输出出现频率最高的5个单词和它对应的频率
为了保存出现的单词和它的频率,可以采用两种方式保存:ArrayList和Hashtable。
要求用java语言实现。很急,希望达人帮忙,肯定有追加分
那个匿名的程序没有统计输出嘛。。谁还能帮帮忙?

发了我大半天时间专门帮你写的,注释没写仔细,有什么不明白的或者有什么小bebug就给我留言。

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.List;
import java.util.Vector;

public class Test {
//I/O读取文件
public String getFile(String path) {
StringBuffer context = new StringBuffer();
try {
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(path)));
String temp = br.readLine();
while (temp!=null) {
context.append(temp+"\n");
temp = br.readLine();
}
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return context.toString().toLowerCase();
}
//分割内容方法
public List mySplit(String context) {
String[] words = {};
List all = new ArrayList();
//按标点符号,分割内容的正则表达式
String regex = "\\W";//[():,./'\"\n\r\f\\s-]
words = context.split(regex);
for(int i=0; i<words.length; i++) {
if(!words[i].equals(""))//把空格去掉
all.add(words[i]);
}
return all;
}
//统计全部单词及其个数
public Hashtable contWords(List all) {
//用于保存全部的单词及其个数
Hashtable allTable = new Hashtable();
for(int i=0; i<all.size(); i++) {
//两个临时的变量,一个键一个值
String temp = all.get(i).toString();
int count = 0;

for(int j=0; j<all.size(); j++) {
if(temp.equalsIgnoreCase(all.get(j).toString())) {
count++;
}
}
allTable.put(temp, count);
}
return allTable;
}
//找出个数最多的那五个单词
public Hashtable findMax5(Hashtable allTable) {
//用于保存结果的Hashtable
Hashtable result = new Hashtable();

Object[] keyToValue = allTable.entrySet().toArray();
Object[] values = allTable.values().toArray();
int[] v = new int[5];

for(int i=0; i<values.length; i++) {
int value = Integer.parseInt(values[i].toString());
int min = findMin(v);
if(value>min) {
for(int j=0; j<v.length; j++) {
if(v[j]==min) {
v[j] = value;
break;
}
}
}
}
//把v里面的无素从大到小排序一下
for(int i=0; i<v.length; i++) {
for(int j=i+1; j<v.length; j++) {
if(v[i]<v[j]) {
int temp = v[i];
v[i] = v[j];
v[j] = temp;
}
}
}
for(int i=0; i<v.length; i++) {
// System.out.println(v[i]);
for(int j=0; j<keyToValue.length; j++) {
String ktv = keyToValue[j].toString();
int tv = Integer.parseInt(ktv.substring(ktv.indexOf("=")+1));
if(v[i]==tv) {
//保证只取五个频率最高的单词
if(result.size()>=5) break;
String key = ktv.substring(0, ktv.indexOf("="));
result.put(key, v[i]);
}
}
}
return result;
}
//简单的查找数组中最小的那个数
public int findMin(int[] v) {
for(int i=0; i<v.length-1; i++) {
if(v[i]<v[i+1]) {
int temp = v[i];
v[i] = v[i+1];
v[i+1] = temp;
}
}
return v[v.length-1];
}
//打印出结果
public static void printResult(Hashtable result) {
System.out.println("排前五的单词情况如下:");
Enumeration e = result.keys();
Iterator it = result.values().iterator();
while (e.hasMoreElements()) {
System.out.println(e.nextElement() + " 的个数为: " + it.next());
}
}
//main方法
public static void main(String[] args) {
Test test = new Test();
String context = test.getFile("c:/test3.txt");
List all = test.mySplit(context);
Hashtable allTable = test.contWords(all);
Hashtable result = test.findMax5(allTable);
// System.out.println(result);
Test.printResult(result);
}
}
温馨提示:答案为网友推荐,仅供参考
第1个回答  2009-01-07
楼上LUOJUNSONG20的代码140行,我的代码不到50行。你是不是应该多给我加点分呢?

import java.io.*;
import java.util.regex.*;
import java.util.*;

public class WordCount {
public static void main(String[] args) throws Exception {
count("d:\\test3.txt");
}

public static void count(String filePath) throws Exception {
Hashtable<String, Integer> map = new Hashtable<String, Integer>();
String text = file2String(new File(filePath));
String[] words = text.split("\\W");

for(String s :words) {
if(s.equals("")) {continue;}
s = s.toLowerCase();
Integer count = map.get(s);
if(count == null) {count = 0;}
map.put(s, count + 1);
}
List<Map.Entry<String, Integer>> entrys =
new ArrayList<Map.Entry<String, Integer>>(map.entrySet());
Collections.sort(entrys, new Comparator<Map.Entry<String, Integer>>() {
public int compare(Map.Entry<String, Integer> e1,
Map.Entry<String, Integer> e2) {
return e2.getValue() - e1.getValue();
}
});
for(int i = 0; i < entrys.size() && i < 5; i++) {
System.out.println(entrys.get(i).getKey() + "的个数为"
+ entrys.get(i).getValue());
}
}

public static String file2String(File file) throws Exception {
BufferedReader in = new BufferedReader(new FileReader(file));
StringBuilder builder = new StringBuilder();
String s;
while ((s = in.readLine()) != null) {
builder.append(s);
builder.append("\n");
}
in.close();
return builder.toString();
}
}
第2个回答  2009-01-06
//JDK must be 1.4 and later
//Save as 'Statistics.java' ,Compile and run.

import java.io.*;
import java.util.*;

public class Statistics {

private static ArrayList words= new ArrayList();;

public static void main(String[] args) {

int top = 5;

String filePath = javax.swing.JOptionPane
.showInputDialog("Please input full path of file:\nExample: 'C:\\words.txt'");

if (filePath != null)
start(filePath, top);
}

private static void start(final String filePath, final int top) {
new Thread() {

public void run() {

Thread t = process(filePath);

if(t==null)

return;

t.start();

while (t.isAlive()) {

sleep();

if(t.isAlive())

System.out.print('*');
}

// Print the final result:

Collections.sort(words);

int size = words.size();

size = size < top ? size : top;
System.out.println("\nFile: " + filePath);
System.out.println("==========Top:" + top + "==========");
for (int i = 0; i < size; i++) {
System.out.println(words.get(i));
}
}

private void sleep() {
try {
sleep(500);
} catch (Exception e) {
}
}
}.start();
}

public static Thread process(final String f) {
File file = new File(f);
if(!file.exists()||!file.canRead()){
System.out.println("File '"+f+"' is not exists or can not be read.");
return null;
}
return process(new File(f));
}

public static Thread process(final File f) {
Thread t = new Thread() {
public void run() {
try {
BufferedReader br = new BufferedReader(new FileReader(f));
String tmp = null;
int count = 0;
while ((tmp = br.readLine()) != null) {
++count;
String[] sp = tmp.trim().split("\\W+");
for (int i=0; i<sp.length; i++) {
String word = sp[i];
if (word.isEmpty())
continue;
WordWrapper wrapper = new WordWrapper(word);
int index = words.indexOf(wrapper);
if (index > -1)
((WordWrapper)words.get(index)).plus();
else
words.add(wrapper);
}
if (count % 100 == 0)
try {
sleep(50);
} catch (Exception e) {
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
};
return t;
}
}

class WordWrapper implements Comparable {

private String word;

private int amount;

public WordWrapper(String w) {
word = w;
plus();
}

public WordWrapper plus() {
++amount;
return this;
}

public int getAmount() {
return amount;
}

public String getWorld() {
return word;
}

public boolean equals(Object o) {
return word.equalsIgnoreCase(((WordWrapper) o).word);
}

public int compareTo(Object o) {
return ((WordWrapper)o).amount - amount;
}

public String toString() {
return word + " = " + amount;
}
}
第3个回答  2009-01-05
就一个文档?
还是一堆文件?
第4个回答  2009-01-04
我只会c……
可以吗?
相似回答