首页 | 编程语言 | 网站建设 | 游戏天堂 | 冲浪宝典 | 网络安全 | 操作系统 | 软件时空 | 硬件指南 | 病毒相关 | IT 认证
软讯网络 > 编程语言 > Java > 英文单词频率统计工具
【标  题】:英文单词频率统计工具
【关键字】:
【来  源】:http://blog.csdn.net/greenkugua/archive/2006/11/09/1374838.aspx

英文单词频率统计工具

记得有一段时间因为想过要考研,当时要记单词,当然我想要记一些常用的单词,但是哪些单词是常用的呢?现在外面有很多的分频词汇的册子,我也买过,但是总是不放心.于是决定自己写一个程序来统计一下单词的出现频率.这个程序也是那天晚上写的,还比较管用,我们只要把要分析的英文文章放到一个指定的目录下面,它就可以自动的去统计这个目录下面的所有英文资料中各个单词出现的频率并排序后输出.你不妨试试看哦,哈哈.编程是一种乐趣.

import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.File;
import java.util.
import java.io.IOException;
import java.io.File;
import java.util.Enumeration;
import java.util.Hashtable;
/**
 * This class is designed to contain word and it's frequency.
 * There is also some useful method.
 * @author ZhuTao HUST 2006.6.2-2006.6.3
 * Email:greenkugua@sina.com.cn.
 * QQ:307356132
 * @version:1.0
 */
class Set{
 private int num;//The num of the word it already contained.
 private int []times;
 private String[] word;
 public Set(int size){
  this.num = 0;
  this.times = new int[size];
  this.word = new String[size];
 }
/**
 * This mothod is used to add element into the Set.
 * @param word is the word you want to add.
 * @param times is the times it appears.
 */
public void addElement(String word,int times)
{
 this.times[num] = times;
 this.word[num] = word;
 num++;
}
/**
 * This method is used to sort word by frequency.
 */
public void sort()
{
 for(int i = 0;i<this.num;i++)
 { 
  int num =  this.times[i];
  for(int j = i+1;j<this.num;j++)
  {//冒泡排序;
   if(this.times[j]>num)
   {
    num = this.times[j];
    String word = this.word[i];
    int times = this.times[i];
    
    this.times[i] = this.times[j];
    this.word[i] = this.word[j];
    
    this.times[j] = times;
    this.word[j] = word;
   }
  }
 }
}
/**
 * This method is designed to show the result.
 */
public void showResult()
{
 System.out.println("总共有" +this.num+"个单词,它们的出现频率降序排列如下:");
 for(int i = 0;i<this.num;i++)
  System.out.println(this.word[i]+"  :  "+this.times[i]);
}
/**
 * This method is designed to get the number of words.
 * @return The number of words.
 */
public int getCount()
{
 return this.num;
}
/**
 * This method is designed to get word at number i.
 * @param i is the number of word you want to get.
 * @return the word[i].
 */
public String getWordAt(int i)
{
 return this.word[i];
}
/**
 * This method is designed to get frequency of word[i];
 * @param i is the number of word.
 * @return the frequency of word[i].
 */
public int getFrequency(int i)
{
 return this.times[i];
}
}
/**
 * This class is designed to annalyse
 * word's frequency of English articles.
 * @author ZhuTao HUST.
 * 2006.6.2-2006.6.3
 * Email:greenkugua@sina.com.cn.
 * QQ:307356132
 * @version:1.0
 */
public class EnglishWord {
private Set resultSet;
 /**
  * @param args
  */
 public static void main(String[] args) {
  // TODO Auto-generated method stub
  EnglishWord analyser = new EnglishWord("G:\\test","G:\\tao.txt");  
 }
public EnglishWord(String filepath,String savepath)
{
 String content = this.readFile(filepath);
 Hashtable table = this.getWordList(content);
 
 this.initSet(table);
 this.resultSet.sort();
 this.resultSet.showResult();
 this.saveResult(savepath);
}
/**
 * This method is used to analyse the times of each word appears in
 * the file.
 * @param content is the content of file you.
 * @return the word map.Which contains words and times it appeared.
 */

public Hashtable getWordList(String content)
{
 Hashtable wordList = new Hashtable();
 int i = 0;
 for(;i<content.length();i++)
 {
  char ch = content.charAt(i);
  if((ch>'Z'&&ch<'a')||ch<'A'||ch>'z');
  else break;
 }
 
 boolean flag = true;
 String word = new String();
 char ch ;
 for(;i<content.length();i++)
 {
  ch =  content.charAt(i);
  if((ch>='A'&&ch<='Z')||(ch>='a'&&ch<='z')||ch == '\''){
   word+=ch;
   flag = true;
  }
  else{
   //如果已经包含该单词,就计数加一;
   if(ch == '-'){i+=2;continue;}
   if(flag)
   {
    if(wordList.containsKey(word))
      {
     Integer num = (Integer)wordList.get(word);
     int t = num.intValue()+1;
     wordList.put(word,new Integer(t));
       }
      else
      {
    wordList.put(word,new Integer(1));
      }
   }
   flag = false;
   word = "";
  }
 }
 if(!word.equals(""))
 {
  if(wordList.containsKey(word))
     {
    Integer num = (Integer)wordList.get(word);
    int t = num.intValue()+1;
    wordList.put(word,new Integer(t));
      }
     else
     {
   wordList.put(word,new Integer(1));
     }
 }
 return wordList;
}
/**
 * @see This method is used to read content from file.
 * @param String filepath is the path and name of the file
 * which you want to analyse.
 * @return return the content of file in String form.
 */
public String readFile(String filepath)
{
     try{
      File file= new File(filepath);
      if(file.isDirectory())
      {
       String[] list = file.list();
             String str = new String();
             System.out.println("文件的个数:"+list.length+" 文件列表如下:");
                for(int i =0;i<list.length;i++)
            {
         System.out.println(list[i]);
                  FileInputStream read = new FileInputStream(filepath+'\\'+list[i]);
            byte[]data = new byte[read.available()];
            read.read(data);
            read.close();
            String content = new String(data);
            str +=content;
            }
          return str;
      }
      else{
         FileInputStream read = new FileInputStream(filepath);
            byte[]data = new byte[read.available()];
            read.read(data);
            read.close();
            String content = new String(data);
            return content;
      }
 }catch(IOException e){
  System.out.println(e);
  return null;
 }
}
/**
 * This method is designed to init a set of word and the times it appeared.
 * @param wordList
 */
public void initSet(Hashtable wordList)
{
  Enumeration e = wordList.keys();
  this.resultSet = new Set(wordList.size());
  while(e.hasMoreElements())
  {
   Object key = e.nextElement();
   String word = key.toString();
   Integer num = (Integer)wordList.get(key);
   int times = num.intValue();
   this.resultSet.addElement(word,times);
  }
}
/**
 * This method is designed to show the word list.
 * @param wordList
 */
public void showTable(Hashtable wordList)
{
 Enumeration e = wordList.keys();
  while(e.hasMoreElements())
   {
    Object key = e.nextElement();
    String word = key.toString();
    Integer num = (Integer)wordList.get(key);
    int times = num.intValue();
    System.out.println(word+" : "+times);
   }
}
/**
 * This method is designed to write the result into file.
 * @param filepath
 */
public void saveResult(String filepath)
{
 try{
  FileOutputStream write = new FileOutputStream(filepath);
  for(int i = 0;i<this.resultSet.getCount();i++)
  {
   String word = this.resultSet.getWordAt(i);
   int times = this.resultSet.getFrequency(i);
   write.write(this.format(word,times));
  }
  write.close();
  
 }catch(IOException e)
 {
  System.out.println(e);
 }
 
}
/**@author ZhuTao
 * This method is designed to format information
 * to byte stream.
 * @param word
 * @param times
 * @return a byte stream.
 */
public byte[] format(String word,int times)
{
 String str =  new String (word+" : "+times+"\r\n");
 byte []data = new byte[str.length()];
 for(int i = 0;i<str.length();i++)
  data[i] = (byte)str.charAt(i);
 return data;
}
}

import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.File;
import java.util.Enumeration;
import java.util.Hashtable;
/**
 * This class is designed to contain word and it's frequency.
 * There is also some useful method.
 * @author ZhuTao HUST 2006.6.2-2006.6.3
 * Email:greenkugua@sina.com.cn.
 * QQ:307356132
 * @version:1.0
 */
class Set{
 private int num;//The num of the word it already contained.
 private int []times;
 private String[] word;
 public Set(int size){
  this.num = 0;
  this.times = new int[size];
  this.word = new String[size];
 }
/**
 * This mothod is used to add element into the Set.
 * @param word is the word you want to add.
 * @param times is the times it appears.
 */
public void addElement(String word,int times)
{
 this.times[num] = times;
 this.word[num] = word;
 num++;
}
/**
 * This method is used to sort word by frequency.
 */
public void sort()
{
 for(int i = 0;i<this.num;i++)
 { 
  int num =  this.times[i];
  for(int j = i+1;j<this.num;j++)
  {//冒泡排序;
   if(this.times[j]>num)
   {
    num = this.times[j];
    String word = this.word[i];
    int times = this.times[i];
    
    this.times[i] = this.times[j];
    this.word[i] = this.word[j];
    
    this.times[j] = times;
    this.word[j] = word;
   }
  }
 }
}
/**
 * This method is designed to show the result.
 */
public void showResult()
{
 System.out.println("总共有" +this.num+"个单词,它们的出现频率降序排列如下:");
 for(int i = 0;i<this.num;i++)
  System.out.println(this.word[i]+"  :  "+this.times[i]);
}
/**
 * This method is designed to get the number of words.
 * @return The number of words.
 */
public int getCount()
{
 return this.num;
}
/**
 * This method is designed to get word at number i.
 * @param i is the number of word you want to get.
 * @return the word[i].
 */
public String getWordAt(int i)
{
 return this.word[i];
}
/**
 * This method is designed to get frequency of word[i];
 * @param i is the number of word.
 * @return the frequency of word[i].
 */
public int getFrequency(int i)
{
 return this.times[i];
}
}
/**
 * This class is designed to annalyse
 * word's frequency of English articles.
 * @author ZhuTao HUST.
 * 2006.6.2-2006.6.3
 * Email:greenkugua@sina.com.cn.
 * QQ:307356132
 * @version:1.0
 */
public class EnglishWord {
private Set resultSet;
 /**
  * @param args
  */
 public static void main(String[] args) {
  // TODO Auto-generated method stub
  EnglishWord analyser = new EnglishWord("G:\\test","G:\\tao.txt");  
 }
public EnglishWord(String filepath,String savepath)
{
 String content = this.readFile(filepath);
 Hashtable table = this.getWordList(content);
 
 this.initSet(table);
 this.resultSet.sort();
 this.resultSet.showResult();
 this.saveResult(savepath);
}
/**
 * This method is used to analyse the times of each word appears in
 * the file.
 * @param content is the content of file you.
 * @return the word map.Which contains words and times it appeared.
 */

public Hashtable getWordList(String content)
{
 Hashtable wordList = new Hashtable();
 int i = 0;
 for(;i<content.length();i++)
 {
  char ch = content.charAt(i);
  if((ch>'Z'&&ch<'a')||ch<'A'||ch>'z');
  else break;
 }
 
 boolean flag = true;
 String word = new String();
 char ch ;
 for(;i<content.length();i++)
 {
  ch =  content.charAt(i);
  if((ch>='A'&&ch<='Z')||(ch>='a'&&ch<='z')||ch == '\''){
   word+=ch;
   flag = true;
  }
  else{
   //如果已经包含该单词,就计数加一;
   if(ch == '-'){i+=2;continue;}
   if(flag)
   {
    if(wordList.containsKey(word))
      {
     Integer num = (Integer)wordList.get(word);
     int t = num.intValue()+1;
     wordList.put(word,new Integer(t));
       }
      else
      {
    wordList.put(word,new Integer(1));
      }
   }
   flag = false;
   word = "";
  }
 }
 if(!word.equals(""))
 {
  if(wordList.containsKey(word))
     {
    Integer num = (Integer)wordList.get(word);
    int t = num.intValue()+1;
    wordList.put(word,new Integer(t));
      }
     else
     {
   wordList.put(word,new Integer(1));
     }
 }
 return wordList;
}
/**
 * @see This method is used to read content from file.
 * @param String filepath is the path and name of the file
 * which you want to analyse.
 * @return return the content of file in String form.
 */
public String readFile(String filepath)
{
     try{
      File file= new File(filepath);
      if(file.isDirectory())
      {
       String[] list = file.list();
             String str = new String();
             System.out.println("文件的个数:"+list.length+" 文件列表如下:");
                for(int i =0;i<list.length;i++)
            {
         System.out.println(list[i]);
                  FileInputStream read = new FileInputStream(filepath+'\\'+list[i]);
            byte[]data = new byte[read.available()];
            read.read(data);
            read.close();
            String content = new String(data);
            str +=content;
            }
          return str;
      }
      else{
         FileInputStream read = new FileInputStream(filepath);
            byte[]data = new byte[read.available()];
            read.read(data);
            read.close();
            String content = new String(data);
            return content;
      }
 }catch(IOException e){
  System.out.println(e);
  return null;
 }
}
/**
 * This method is designed to init a set of word and the times it appeared.
 * @param wordList
 */
public void initSet(Hashtable wordList)
{
  Enumeration e = wordList.keys();
  this.resultSet = new Set(wordList.size());
  while(e.hasMoreElements())
  {
   Object key = e.nextElement();
   String word = key.toString();
   Integer num = (Integer)wordList.get(key);
   int times = num.intValue();
   this.resultSet.addElement(word,times);
  }
}
/**
 * This method is designed to show the word list.
 * @param wordList
 */
public void showTable(Hashtable wordList)
{
 Enumeration e = wordList.keys();
  while(e.hasMoreElements())
   {
    Object key = e.nextElement();
    String word = key.toString();
    Integer num = (Integer)wordList.get(key);
    int times = num.intValue();
    System.out.println(word+" : "+times);
   }
}
/**
 * This method is designed to write the result into file.
 * @param filepath
 */
public void saveResult(String filepath)
{
 try{
  FileOutputStream write = new FileOutputStream(filepath);
  for(int i = 0;i<this.resultSet.getCount();i++)
  {
   String word = this.resultSet.getWordAt(i);
   int times = this.resultSet.getFrequency(i);
   write.write(this.format(word,times));
  }
  write.close();
  
 }catch(IOException e)
 {
  System.out.println(e);
 }
 
}
/**@author ZhuTao
 * This method is designed to format information
 * to byte stream.
 * @param word
 * @param times
 * @return a byte stream.
 */
public byte[] format(String word,int times)
{
 String str =  new String (word+" : "+times+"\r\n");
 byte []data = new byte[str.length()];
 for(int i = 0;i<str.length();i++)
  data[i] = (byte)str.charAt(i);
 return data;
}
}

import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.File;
import java.util.Enumeration;
import java.util.Hashtable;
/**
 * This class is designed to contain word and it's frequency.
 * There is also some useful method.
 * @author ZhuTao HUST 2006.6.2-2006.6.3
 * Email:greenkugua@sina.com.cn.
 * QQ:307356132
 * @version:1.0
 */
class Set{
 private int num;//The num of the word it already contained.
 private int []times;
 private String[] word;
 public Set(int size){
  this.num = 0;
  this.times = new int[size];
  this.word = new String[size];
 }
/**
 * This mothod is used to add element into the Set.
 * @param word is the word you want to add.
 * @param times is the times it appears.
 */
public void addElement(String word,int times)
{
 this.times[num] = times;
 this.word[num] = word;
 num++;
}
/**
 * This method is used to sort word by frequency.
 */
public void sort()
{
 for(int i = 0;i<this.num;i++)
 { 
  int num =  this.times[i];
  for(int j = i+1;j<this.num;j++)
  {//冒泡排序;
   if(this.times[j]>num)
   {
    num = this.times[j];
    String word = this.word[i];
    int times = this.times[i];
    
    this.times[i] = this.times[j];
    this.word[i] = this.word[j];
    
    this.times[j] = times;
    this.word[j] = word;
   }
  }
 }
}
/**
 * This method is designed to show the result.
 */
public void showResult()
{
 System.out.println("总共有" +this.num+"个单词,它们的出现频率降序排列如下:");
 for(int i = 0;i<this.num;i++)
  System.out.println(this.word[i]+"  :  "+this.times[i]);
}
/**
 * This method is designed to get the number of words.
 * @return The number of words.
 */
public int getCount()
{
 return this.num;
}
/**
 * This method is designed to get word at number i.
 * @param i is the number of word you want to get.
 * @return the word[i].
 */
public String getWordAt(int i)
{
 return this.word[i];
}
/**
 * This method is designed to get frequency of word[i];
 * @param i is the number of word.
 * @return the frequency of word[i].
 */
public int getFrequency(int i)
{
 return this.times[i];
}
}
/**
 * This class is designed to annalyse
 * word's frequency of English articles.
 * @author ZhuTao HUST.
 * 2006.6.2-2006.6.3
 * Email:greenkugua@sina.com.cn.
 * QQ:307356132
 * @version:1.0
 */
public class EnglishWord {
private Set resultSet;
 /**
  * @param args
  */
 public static void main(String[] args) {
  // TODO Auto-generated method stub
  EnglishWord analyser = new EnglishWord("G:\\test","G:\\tao.txt");  
 }
public EnglishWord(String filepath,String savepath)
{
 String content = this.readFile(filepath);
 Hashtable table = this.getWordList(content);
 
 this.initSet(table);
 this.resultSet.sort();
 this.resultSet.showResult();
 this.saveResult(savepath);
}
/**
 * This method is used to analyse the times of each word appears in
 * the file.
 * @param content is the content of file you.
 * @return the word map.Which contains words and times it appeared.
 */

public Hashtable getWordList(String content)
{
 Hashtable wordList = new Hashtable();
 int i = 0;
 for(;i<content.length();i++)
 {
  char ch = content.charAt(i);
  if((ch>'Z'&&ch<'a')||ch<'A'||ch>'z');
  else break;
 }
 
 boolean flag = true;
 String word = new String();
 char ch ;
 for(;i<content.length();i++)
 {
  ch =  content.charAt(i);
  if((ch>='A'&&ch<='Z')||(ch>='a'&&ch<='z')||ch == '\''){
   word+=ch;
   flag = true;
  }
  else{
   //如果已经包含该单词,就计数加一;
   if(ch == '-'){i+=2;continue;}
   if(flag)
   {
    if(wordList.containsKey(word))
      {
     Integer num = (Integer)wordList.get(word);
     int t = num.intValue()+1;
     wordList.put(word,new Integer(t));
       }
      else
      {
    wordList.put(word,new Integer(1));
      }
   }
   flag = false;
   word = "";
  }
 }
 if(!word.equals(""))
 {
  if(wordList.containsKey(word))
     {
    Integer num = (Integer)wordList.get(word);
    int t = num.intValue()+1;
    wordList.put(word,new Integer(t));
      }
     else
     {
   wordList.put(word,new Integer(1));
     }
 }
 return wordList;
}
/**
 * @see This method is used to read content from file.
 * @param String filepath is the path and name of the file
 * which you want to analyse.
 * @return return the content of file in String form.
 */
public String readFile(String filepath)
{
     try{
      File file= new File(filepath);
      if(file.isDirectory())
      {
       String[] list = file.list();
             String str = new String();
             System.out.println("文件的个数:"+list.length+" 文件列表如下:");
                for(int i =0;i<list.length;i++)
            {
         System.out.println(list[i]);
                  FileInputStream read = new FileInputStream(filepath+'\\'+list[i]);
            byte[]data = new byte[read.available()];
            read.read(data);
            read.close();
            String content = new String(data);
            str +=content;
            }
          return str;
      }
      else{
         FileInputStream read = new FileInputStream(filepath);
            byte[]data = new byte[read.available()];
            read.read(data);
            read.close();
            String content = new String(data);
            return content;
      }
 }catch(IOException e){
  System.out.println(e);
  return null;
 }
}
/**
 * This method is designed to init a set of word and the times it appeared.
 * @param wordList
 */
public void initSet(Hashtable wordList)
{
  Enumeration e = wordList.keys();
  this.resultSet = new Set(wordList.size());
  while(e.hasMoreElements())
  {
   Object key = e.nextElement();
   String word = key.toString();
   Integer num = (Integer)wordList.get(key);
   int times = num.intValue();
   this.resultSet.addElement(word,times);
  }
}
/**
 * This method is designed to show the word list.
 * @param wordList
 */
public void showTable(Hashtable wordList)
{
 Enumeration e = wordList.keys();
  while(e.hasMoreElements())
   {
    Object key = e.nextElement();
    String word = key.toString();
    Integer num = (Integer)wordList.get(key);
    int times = num.intValue();
    System.out.println(word+" : "+times);
   }
}
/**
 * This method is designed to write the result into file.
 * @param filepath
 */
public void saveResult(String filepath)
{
 try{
  FileOutputStream write = new FileOutputStream(filepath);
  for(int i = 0;i<this.resultSet.getCount();i++)
  {
   String word = this.resultSet.getWordAt(i);
   int times = this.resultSet.getFrequency(i);
   write.write(this.format(word,times));
  }
  write.close();
  
 }catch(IOException e)
 {
  System.out.println(e);
 }
 
}
/**@author ZhuTao
 * This method is designed to format information
 * to byte stream.
 * @param word
 * @param times
 * @return a byte stream.
 */
public byte[] format(String word,int times)
{
 String str =  new String (word+" : "+times+"\r\n");
 byte []data = new byte[str.length()];
 for(int i = 0;i<str.length();i++)
  data[i] = (byte)str.charAt(i);
 return data;
}
}

import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.File;
import java.util.Enumeration;
import java.util.Hashtable;
/**
 * This class is designed to contain word and it's frequency.
 * There is also some useful method.
 * @author ZhuTao HUST 2006.6.2-2006.6.3
 * Email:greenkugua@sina.com.cn.
 * QQ:307356132
 * @version:1.0
 */
class Set{
 private int num;//The num of the word it already contained.
 private int []times;
 private String[] word;
 public Set(int size){
  this.num = 0;
  this.times = new int[size];
  this.word = new String[size];
 }
/**
 * This mothod is used to add element into the Set.
 * @param word is the word you want to add.
 * @param times is the times it appears.
 */
public void addElement(String word,int times)
{
 this.times[num] = times;
 this.word[num] = word;
 num++;
}
/**
 * This method is used to sort word by frequency.
 */
public void sort()
{
 for(int i = 0;i<this.num;i++)
 { 
  int num =  this.times[i];
  for(int j = i+1;j<this.num;j++)
  {//冒泡排序;
   if(this.times[j]>num)
   {
    num = this.times[j];
    String word = this.word[i];
    int times = this.times[i];
    
    this.times[i] = this.times[j];
    this.word[i] = this.word[j];
    
    this.times[j] = times;
    this.word[j] = word;
   }
  }
 }
}
/**
 * This method is designed to show the result.
 */
public void showResult()
{
 System.out.println("总共有" +this.num+"个单词,它们的出现频率降序排列如下:");
 for(int i = 0;i<this.num;i++)
  System.out.println(this.word[i]+"  :  "+this.times[i]);
}
/**
 * This method is designed to get the number of words.
 * @return The number of words.
 */
public int getCount()
{
 return this.num;
}
/**
 * This method is designed to get word at number i.
 * @param i is the number of word you want to get.
 * @return the word[i].
 */
public String getWordAt(int i)
{
 return this.word[i];
}
/**
 * This method is designed to get frequency of word[i];
 * @param i is the number of word.
 * @return the frequency of word[i].
 */
public int getFrequency(int i)
{
 return this.times[i];
}
}
/**
 * This class is designed to annalyse
 * word's frequency of English articles.
 * @author ZhuTao HUST.
 * 2006.6.2-2006.6.3
 * Email:greenkugua@sina.com.cn.
 * QQ:307356132
 * @version:1.0
 */
public class EnglishWord {
private Set resultSet;
 /**
  * @param args
  */
 public static void main(String[] args) {
  // TODO Auto-generated method stub
  EnglishWord analyser = new EnglishWord("G:\\test","G:\\tao.txt");  
 }
public EnglishWord(String filepath,String savepath)
{
 String content = this.readFile(filepath);
 Hashtable table = this.getWordList(content);
 
 this.initSet(table);
 this.resultSet.sort();
 this.resultSet.showResult();
 this.saveResult(savepath);
}
/**
 * This method is used to analyse the times of each word appears in
 * the file.
 * @param content is the content of file you.
 * @return the word map.Which contains words and times it appeared.
 */

public Hashtable getWordList(String content)
{
 Hashtable wordList = new Hashtable();
 int i = 0;
 for(;i<content.length();i++)
 {
  char ch = content.charAt(i);
  if((ch>'Z'&&ch<'a')||ch<'A'||ch>'z');
  else break;
 }
 
 boolean flag = true;
 String word = new String();
 char ch ;
 for(;i<content.length();i++)
 {
  ch =  content.charAt(i);
  if((ch>='A'&&ch<='Z')||(ch>='a'&&ch<='z')||ch == '\''){
   word+=ch;
   flag = true;
  }
  else{
   //如果已经包含该单词,就计数加一;
   if(ch == '-'){i+=2;continue;}
   if(flag)
   {
    if(wordList.containsKey(word))
      {
     Integer num = (Integer)wordList.get(word);
     int t = num.intValue()+1;
     wordList.put(word,new Integer(t));
       }
      else
      {
    wordList.put(word,new Integer(1));
      }
   }
   flag = false;
   word = "";
  }
 }
 if(!word.equals(""))
 {
  if(wordList.containsKey(word))
     {
    Integer num = (Integer)wordList.get(word);
    int t = num.intValue()+1;
    wordList.put(word,new Integer(t));
      }
     else
     {
   wordList.put(word,new Integer(1));
     }
 }
 return wordList;
}
/**
 * @see This method is used to read content from file.
 * @param String filepath is the path and name of the file
 * which you want to analyse.
 * @return return the content of file in String form.
 */
public String readFile(String filepath)
{
     try{
      File file= new File(filepath);
      if(file.isDirectory())
      {
       String[] list = file.list();
             String str = new String();
             System.out.println("文件的个数:"+list.length+" 文件列表如下:");
                for(int i =0;i<list.length;i++)
            {
         System.out.println(list[i]);
                  FileInputStream read = new FileInputStream(filepath+'\\'+list[i]);
            byte[]data = new byte[read.available()];
            read.read(data);
            read.close();
            String content = new String(data);
            str +=content;
            }
          return str;
      }
      else{
         FileInputStream read = new FileInputStream(filepath);
            byte[]data = new byte[read.available()];
            read.read(data);
            read.close();
            String content = new String(data);
            return content;
      }
 }catch(IOException e){
  System.out.println(e);
  return null;
 }
}
/**
 * This method is designed to init a set of word and the times it appeared.
 * @param wordList
 */
public void initSet(Hashtable wordList)
{
  Enumeration e = wordList.keys();
  this.resultSet = new Set(wordList.size());
  while(e.hasMoreElements())
  {
   Object key = e.nextElement();
   String word = key.toString();
   Integer num = (Integer)wordList.get(key);
   int times = num.intValue();
   this.resultSet.addElement(word,times);
  }
}
/**
 * This method is designed to show the word list.
 * @param wordList
 */
public void showTable(Hashtable wordList)
{
 Enumeration e = wordList.keys();
  while(e.hasMoreElements())
   {
    Object key = e.nextElement();
    String word = key.toString();
    Integer num = (Integer)wordList.get(key);
    int times = num.intValue();
    System.out.println(word+" : "+times);
   }
}
/**
 * This method is designed to write the result into file.
 * @param filepath
 */
public void saveResult(String filepath)
{
 try{
  FileOutputStream write = new FileOutputStream(filepath);
  for(int i = 0;i<this.resultSet.getCount();i++)
  {
   String word = this.resultSet.getWordAt(i);
   int times = this.resultSet.getFrequency(i);
   write.write(this.format(word,times));
  }
  write.close();
  
 }catch(IOException e)
 {
  System.out.println(e);
 }
 
}
/**@author ZhuTao
 * This method is designed to format information
 * to byte stream.
 * @param word
 * @param times
 * @return a byte stream.
 */
public byte[] format(String word,int times)
{
 String str =  new String (word+" : "+times+"\r\n");
 byte []data = new byte[str.length()];
 for(int i = 0;i<str.length();i++)
  data[i] = (byte)str.charAt(i);
 return data;
}
}

import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.File;
import java.util.Enumeration;
import java.util.Hashtable;
/**
 * This class is designed to contain word and it's frequency.
 * There is also some useful method.
 * @author ZhuTao HUST 2006.6.2-2006.6.3
 * Email:greenkugua@sina.com.cn.
 * QQ:307356132
 * @version:1.0
 */
class Set{
 private int num;//The num of the word it already contained.
 private int []times;
 private String[] word;
 public Set(int size){
  this.num = 0;
  this.times = new int[size];
  this.word = new String[size];
 }
/**
 * This mothod is used to add element into the Set.
 * @param word is the word you want to add.
 * @param times is the times it appears.
 */
public void addElement(String word,int times)
{
 this.times[num] = times;
 this.word[num] = word;
 num++;
}
/**
 * This method is used to sort word by frequency.
 */
public void sort()
{
 for(int i = 0;i<this.num;i++)
 { 
  int num =  this.times[i];
  for(int j = i+1;j<this.num;j++)
  {//冒泡排序;
   if(this.times[j]>num)
   {
    num = this.times[j];
    String word = this.word[i];
    int times = this.times[i];
    
    this.times[i] = this.times[j];
    this.word[i] = this.word[j];
    
    this.times[j] = times;
    this.word[j] = word;
   }
  }
 }
}
/**
 * This method is designed to show the result.
 */
public void showResult()
{
 System.out.println("总共有" +this.num+"个单词,它们的出现频率降序排列如下:");
 for(int i = 0;i<this.num;i++)
  System.out.println(this.word[i]+"  :  "+this.times[i]);
}
/**
 * This method is designed to get the number of words.
 * @return The number of words.
 */
public int getCount()
{
 return this.num;
}
/**
 * This method is designed to get word at number i.
 * @param i is the number of word you want to get.
 * @return the word[i].
 */
public String getWordAt(int i)
{
 return this.word[i];
}
/**
 * This method is designed to get frequency of word[i];
 * @param i is the number of word.
 * @return the frequency of word[i].
 */
public int getFrequency(int i)
{
 return this.times[i];
}
}
/**
 * This class is designed to annalyse
 * word's frequency of English articles.
 * @author ZhuTao HUST.
 * 2006.6.2-2006.6.3
 * Email:greenkugua@sina.com.cn.
 * QQ:307356132
 * @version:1.0
 */
public class EnglishWord {
private Set resultSet;
 /**
  * @param args
  */
 public static void main(String[] args) {
  // TODO Auto-generated method stub
  EnglishWord analyser = new EnglishWord("G:\\test","G:\\tao.txt");  
 }
public EnglishWord(String filepath,String savepath)
{
 String content = this.readFile(filepath);
 Hashtable table = this.getWordList(content);
 
 this.initSet(table);
 this.resultSet.sort();
 this.resultSet.showResult();
 this.saveResult(savepath);
}
/**
 * This method is used to analyse the times of each word appears in
 * the file.
 * @param content is the content of file you.
 * @return the word map.Which contains words and times it appeared.
 */

public Hashtable getWordList(String content)
{
 Hashtable wordList = new Hashtable();
 int i = 0;
 for(;i<content.length();i++)
 {
  char ch = content.charAt(i);
  if((ch>'Z'&&ch<'a')||ch<'A'||ch>'z');
  else break;
 }
 
 boolean flag = true;
 String word = new String();
 char ch ;
 for(;i<content.length();i++)
 {
  ch =  content.charAt(i);
  if((ch>='A'&&ch<='Z')||(ch>='a'&&ch<='z')||ch == '\''){
   word+=ch;
   flag = true;
  }
  else{
   //如果已经包含该单词,就计数加一;
   if(ch == '-'){i+=2;continue;}
   if(flag)
   {
    if(wordList.containsKey(word))
      {
     Integer num = (Integer)wordList.get(word);
     int t = num.intValue()+1;
     wordList.put(word,new Integer(t));
       }
      else
      {
    wordList.put(word,new Integer(1));
      }
   }
   flag = false;
   word = "";
  }
 }
 if(!word.equals(""))
 {
  if(wordList.containsKey(word))
     {
    Integer num = (Integer)wordList.get(word);
    int t = num.intValue()+1;
    wordList.put(word,new Integer(t));
      }
     else
     {
   wordList.put(word,new Integer(1));
     }
 }
 return wordList;
}
/**
 * @see This method is used to read content from file.
 * @param String filepath is the path and name of the file
 * which you want to analyse.
 * @return return the content of file in String form.
 */
public String readFile(String filepath)
{
     try{
      File file= new File(filepath);
      if(file.isDirectory())
      {
       String[] list = file.list();
             String str = new String();
             System.out.println("文件的个数:"+list.length+" 文件列表如下:");
                for(int i =0;i<list.length;i++)
            {
         System.out.println(list[i]);
                  FileInputStream read = new FileInputStream(filepath+'\\'+list[i]);
            byte[]data = new byte[read.available()];
            read.read(data);
            read.close();
            String content = new String(data);
            str +=content;
            }
          return str;
      }
      else{
         FileInputStream read = new FileInputStream(filepath);
            byte[]data = new byte[read.available()];
            read.read(data);
            read.close();
            String content = new String(data);
            return content;
      }
 }catch(IOException e){
  System.out.println(e);
  return null;
 }
}
/**
 * This method is designed to init a set of word and the times it appeared.
 * @param wordList
 */
public void initSet(Hashtable wordList)
{
  Enumeration e = wordList.keys();
  this.resultSet = new Set(wordList.size());
  while(e.hasMoreElements())
  {
   Object key = e.nextElement();
   String word = key.toString();
   Integer num = (Integer)wordList.get(key);
   int times = num.intValue();
   this.resultSet.addElement(word,times);
  }
}
/**
 * This method is designed to show the word list.
 * @param wordList
 */
public void showTable(Hashtable wordList)
{
 Enumeration e = wordList.keys();
  while(e.hasMoreElements())
   {
    Object key = e.nextElement();
    String word = key.toString();
    Integer num = (Integer)wordList.get(key);
    int times = num.intValue();
    System.out.println(word+" : "+times);
   }
}
/**
 * This method is designed to write the result into file.
 * @param filepath
 */
public void saveResult(String filepath)
{
 try{
  FileOutputStream write = new FileOutputStream(filepath);
  for(int i = 0;i<this.resultSet.getCount();i++)
  {
   String word = this.resultSet.getWordAt(i);
   int times = this.resultSet.getFrequency(i);
   write.write(this.format(word,times));
  }
  write.close();
  
 }catch(IOException e)
 {
  System.out.println(e);
 }
 
}
/**@author ZhuTao
 * This method is designed to format information
 * to byte stream.
 * @param word
 * @param times
 * @return a byte stream.
 */
public byte[] format(String word,int times)
{
 String str =  new String (word+" : "+times+"\r\n");
 byte []data = new byte[str.length()];
 for(int i = 0;i<str.length();i++)
  data[i] = (byte)str.charAt(i);
 return data;
}
}

Java的多进程运行模式分析:【上一篇】
AES算法的JAVA实现:【下一篇】
【相关文章】
没有相关文章
【随机文章】
  • debug与release区别和使用的常见错误(ZT)
  • XP实用技巧:映射网络驱动器
  • 业务流程:Business Request的虚实之道与Business Action的设计模式
  • 内存映射(2)More ways to map memory
  • Windows XP 中如何为用户和计算机指定策略结果集(RSoP)
  • What is an ORA-600 Internal Error?
  • 19宽屏在FC5 linux下的设置
  • glTranslate*, glScale*, glRotate*的使用练习
  • NetBIOS介绍
  • 关于SNMPV3协议开发
  • 【相关评论】
    没有相关评论
    【发表评论】
    姓名:
    邮件:
    随机码*
    评论*
          
    |  首 页  |  版权声明  |  联系我们   |  网站地图  |
    CopyRight © 2004-2007 软讯网络 All Rigths Reserved.