initial the project nlp-sentiment

10 years ago · 941597f3dd
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,7 @@
 .project
 .classpath
 target/
 Result/
 Model/
 .settings/
--- a/+ 15
+++ b/+ 15
@@ -0,0 +1,15 @@
 基于自然语言处理的情感分析工具
 本程序依赖data目录下面的data.zip,先解压缩  data 目录下面的 data.zip到当前目录。
 1、基于词典和贝叶斯模型的情感分析
 主程序：eshore.cn.it.sentiment.Sentiment 此类通过
 data/Sentiment_Dictionary中的正负面词语建立模型。
 测试： eshore.cn.it.sentiment.SentimentTest
 通过这个类就可以测试 data/500trainblogxml中的某个文件夹下面的博客的情感。
 2、直接利用lingpipe的情感分析模块测试情感分析
 直接运行程序：  eshore.cn.it.sentiment.ChinesePolarityBasic
 程序就会通过：  data/polarity_corpus/hotel_reviews/train2训练
 然后自动测试: data/polarity_corpus/hotel_reviews/test2
 最后给出程序测试结果。
--- a/data/.gitignore
+++ b/data/.gitignore
@@ -0,0 +1,5 @@
 500trainblogxml/
 nerws_corpus/
 output/
 polarity_corpus/
 Sentiment_Dictionary/
--- a/data/data.zip
+++ b/data/data.zip
--- a/libs/lingpipe-4.1.0.jar
+++ b/libs/lingpipe-4.1.0.jar
--- a/pom.xml
+++ b/pom.xml
@@ -0,0 +1,58 @@
 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>eshore.cn.it</groupId>
  <artifactId>nlp-sentiment</artifactId>
  <version>0.0.1-SNAPSHOT</version>
  <packaging>jar</packaging>
  <name>nlp-sentiment</name>
  <url>http://maven.apache.org</url>
  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <commons.io.version>2.4</commons.io.version>
    <dom4j.version>1.6.1</dom4j.version>
    <lingpipe.version>4.1.0</lingpipe.version>
    <jieba.version>1.0.0</jieba.version>
    <!-- 无法到中央仓库下载的jar包就集中存放到这个位置  -->
    <maven.libs.home>F:/java_git_projects/nlp-sentiment/libs</maven.libs.home>
  </properties>
  <dependencies>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>3.8.1</version>
      <scope>test</scope>
    </dependency>
    <dependency>
 	<groupId>commons-io</groupId>
 		<artifactId>commons-io</artifactId>
 		<version>${commons.io.version}</version>
 	</dependency>
    <dependency>
 		<groupId>dom4j</groupId>
 		<artifactId>dom4j</artifactId>
 		<version>${dom4j.version}</version>
 	</dependency>
    <!-- 此处需要手动到lingpipe官网下载lingpipe的corejar包  -->
    <dependency>
    	<groupId>com.aliasi</groupId>
    	<artifactId>lingpipe</artifactId>
    	<version>${lingpipe.version}</version>
    	<scope>system</scope>
    	<systemPath>${maven.libs.home}/lingpipe-4.1.0.jar</systemPath>
    </dependency>
    <!-- 此处添加结巴分词器  -->
    <dependency>
 		<groupId>com.huaban</groupId>
 		<artifactId>jieba-analysis</artifactId>
 		<version>${jieba.version}</version>
 	</dependency>
  </dependencies>
 </project>
--- a/src/main/java/eshore/cn/it/sentiment/ChinesePolarityBasic.java
+++ b/src/main/java/eshore/cn/it/sentiment/ChinesePolarityBasic.java
@@ -0,0 +1,146 @@
 package eshore.cn.it.sentiment;
 import java.io.File;
 import java.io.FileReader;
 import java.io.IOException;
 import java.util.List;
 import org.apache.commons.io.IOUtils;
 import com.aliasi.classify.Classification;
 import com.aliasi.classify.Classified;
 import com.aliasi.classify.DynamicLMClassifier;
 import com.aliasi.lm.NGramProcessLM;
 import com.aliasi.util.Files;
 import com.huaban.analysis.jieba.JiebaSegmenter;
 import com.huaban.analysis.jieba.JiebaSegmenter.SegMode;
 import com.huaban.analysis.jieba.SegToken;
 /**
 * ChinesePolarityBasic 此类是利用lingpipe作中文情感预测的示例类
 * lingpipe适合做增量分析
 * @clebeg 2015-03-13
 * @version 0.0.1
 * */
 public class ChinesePolarityBasic {
 	private String[] mCategories = new String[]{"+1", "-1"};
 	//这就是分类模型
 	private DynamicLMClassifier<NGramProcessLM> mClassifier;
 	private int numTests = 0;
    private int numCorrect = 0;
 	private static final String TRAINFILES_INFO = 
 			"data/polarity_corpus/hotel_reviews/train2.rlabelclass";
 	private static final String TRAINFILES_DIR = 
 			"data/polarity_corpus/hotel_reviews/train2";
 	private static final String TESTFILES_DIR = 
 			"data/polarity_corpus/hotel_reviews/test2";
 	private static final String TESTFILES_INFO = 
 			"data/polarity_corpus/hotel_reviews/test2.rlabelclass"; 
 	private static final String ENCODING = "GBK";
 	private final JiebaSegmenter jiebaSegmenter = new JiebaSegmenter();
 	private final SegMode segMode = SegMode.INDEX;
 	public static void main(String[] args) {
 		try {
 			new ChinesePolarityBasic().run();
 	    } catch (Throwable t) {
 	        System.out.println("Thrown: " + t);
 	        t.printStackTrace(System.out);
 	    }
 	}
 	public ChinesePolarityBasic() {
 		super();
 		int nGram = 8;
 		mClassifier
 	        = DynamicLMClassifier
 	          .createNGramProcess(mCategories,nGram);
 	}
 	private void run() throws ClassNotFoundException,
    	IOException {
 		train();
 		evaluate();
 	}
 	private void train() throws IOException {
 		FileReader input = new FileReader(new File(TRAINFILES_INFO));
 		List<String> trainInfos = IOUtils.readLines(input);
 		for (String str : trainInfos){
 			String[] train = str.split(" ");
 			train(train[1], new File(TRAINFILES_DIR, train[0]), ENCODING);
 		}
 	}
 	private void evaluate() throws IOException {
 		FileReader input = new FileReader(new File(TESTFILES_INFO));
 		List<String> trainInfos = IOUtils.readLines(input);
 		for (String str : trainInfos){
 			String[] train = str.split(" ");
 			evaluate(train[1], new File(TESTFILES_DIR, train[0]), ENCODING);
 		}
 		System.out.println("  # Test Cases="
                + numTests);
 	    System.out.println("  # Correct="
                + numCorrect);
 	    System.out.println("  % Correct="
                + ((double)numCorrect)
                   /(double)numTests);
 	}
 	/**
 	 * 给定分类标识，给定训练文本，给定文本的编码，即可作分类训练
 	 * 分类完成之后就会加入到分类模型中
 	 * @throws IOException 
 	 * */
 	private void train(String category, File trainFile, String fileEncoding) 
 			throws IOException {
 		Classification classification = new Classification(category);
 		String review = Files.readFromFile(trainFile, fileEncoding);
 		//此处加入中文分词器，得到分词之后的字符串
 		List<SegToken> segTokens = jiebaSegmenter.process(review, segMode);
 		review = "";
 		for (SegToken seg : segTokens) {
 			review += seg.word.getToken() + " ";
 		}
 		Classified<CharSequence> classified
 			= new Classified<CharSequence>(review,classification);
 		mClassifier.handle(classified);
 	}
 	/**
 	 * 给定分类标识，给定测试文本，给定文本的编码，即可作测试模型
 	 * @throws IOException 
 	 * */
 	private void evaluate(String category, File testFile, String fileEncoding) 
 			throws IOException {
 	    String review
        	= Files.readFromFile(testFile, fileEncoding);
 	    //同理，这里可以加入分词器，这样可以试试效果如何。
 	    List<SegToken> segTokens = jiebaSegmenter.process(review, segMode);
 		review = "";
 		for (SegToken seg : segTokens) {
 			review += seg.word.getToken() + " ";
 		}
 	    ++numTests;
 	    Classification classification
        	= mClassifier.classify(review);
 	    //得到训练结果
 	    String resultCategory
        	= classification.bestCategory();
 	    if (resultCategory.equals(category))
 	    	++numCorrect;
 	}
 }
--- a/src/main/java/eshore/cn/it/sentiment/PolarityBasic.java
+++ b/src/main/java/eshore/cn/it/sentiment/PolarityBasic.java
@@ -0,0 +1,94 @@
 package eshore.cn.it.sentiment;
 import java.io.File;
 import java.io.IOException;
 import com.aliasi.classify.Classification;
 import com.aliasi.classify.Classified;
 import com.aliasi.classify.DynamicLMClassifier;
 import com.aliasi.lm.NGramProcessLM;
 import com.aliasi.util.Files;
 public class PolarityBasic {
 	File mPolarityDir;
 	String[] mCategories;
 	DynamicLMClassifier<NGramProcessLM> mClassifier;
 	public PolarityBasic(String[] args) {
 		mPolarityDir = new File("data/polarity_corpus","txt_sentoken");
 	    mCategories = mPolarityDir.list();
 	    int nGram = 8;
 	    mClassifier
 	        = DynamicLMClassifier
 	          .createNGramProcess(mCategories,nGram);
 	}
 	public static void main(String[] args) {
 	    try {
 	        new PolarityBasic(args).run();
 	    } catch (Throwable t) {
 	        System.out.println("Thrown: " + t);
 	        t.printStackTrace(System.out);
 	    }
 	}
 	private void run() throws ClassNotFoundException,
 	    IOException {
 		train();
 		evaluate();
 	}
 	private void train() throws IOException {
 	    for (int i = 0; i < mCategories.length; ++i) {
 	        String category = mCategories[i];
 	        Classification classification
 	            = new Classification(category);
 	        File dir = new File(mPolarityDir, mCategories[i]);
 	        File[] trainFiles = dir.listFiles();
 	        for (int j = 0; j < trainFiles.length; ++j) {
 	            File trainFile = trainFiles[j];
 	            if (isTrainingFile(trainFile)) {
 	                String review
 	                    = Files.readFromFile(trainFile,"ISO-8859-1");
 	                Classified<CharSequence> classified
 	                    = new Classified<CharSequence>(review,classification);
 	                mClassifier.handle(classified);
 	             }
 	        }
 	    }
 	}
 	boolean isTrainingFile(File file) {
 	    return file.getName().charAt(2) != '9';  // test on fold 9
 	}
 	void evaluate() throws IOException {
 	    int numTests = 0;
 	    int numCorrect = 0;
 	    for (int i = 0; i < mCategories.length; ++i) {
 	        String category = mCategories[i];
 	        File file = new File(mPolarityDir,mCategories[i]);
 	        File[] testFiles = file.listFiles();
 	        for (int j = 0; j < testFiles.length; ++j) {
 	            File testFile = testFiles[j];
 	            if (!isTrainingFile(testFile)) {
 	                String review
 	                    = Files.readFromFile(testFile,"ISO-8859-1");
 	                ++numTests;
 	                Classification classification
 	                    = mClassifier.classify(review);
 	                String resultCategory
 	                    = classification.bestCategory();
 	                if (resultCategory.equals(category))
 	                    ++numCorrect;
 	            }
 	        }
 	    }
 	    System.out.println("  # Test Cases="
                + numTests);
 	    System.out.println("  # Correct="
                + numCorrect);
 	    System.out.println("  % Correct="
                + ((double)numCorrect)
                   /(double)numTests);
 	}
 }
--- a/src/main/java/eshore/cn/it/sentiment/Sentiment.java
+++ b/src/main/java/eshore/cn/it/sentiment/Sentiment.java
@@ -0,0 +1,211 @@
 package eshore.cn.it.sentiment;
 import java.io.*;
 import java.util.*;
 import org.apache.commons.io.FileUtils;
 import org.dom4j.Document;
 import org.dom4j.Element;
 import org.dom4j.io.SAXReader;
 public class Sentiment {
 	static private HashSet<String> Negative, Positive; //两种情感词典
 	static private Integer NegativeDoc, PositiveDoc, UnsureDoc; 	//属于两种情感的文本数 	- 所构建模型需要保存下的值
 	static private Hashtable<String, Integer> NegativeWeight, PositiveWeight, UnsureWeight; 	//两种情感中所有词与他的权值 - 所构建模型需要保存下的值
 	static final String SENTIMENT_DOC_WEIGHT_PATH = "data/500trainblogxml/";
 	static final String POSITIVE_DIC_PATH = "data/Sentiment_Dictionary/positive_submit.txt";
 	static final String NEGATIVE_DIC_PATH = "data/Sentiment_Dictionary/negative_submit.txt";
 	static final String FILE_ENCODING = "UTF-8";
 	public static void main(String[] args) throws Exception {
 		// TODO 自动生成的方法存根
 		Sentiment Sentiment = new Sentiment();
 		Sentiment.Model( );
 		Sentiment.Save_Model();
 	}
 	public void Model( ) throws Exception {
 		this.Read_Sentiment_Dictionary();
 		this.Sentiment_Doc_Weight(SENTIMENT_DOC_WEIGHT_PATH);
 	}
 	@SuppressWarnings("resource")
 	public void Read_Sentiment_Dictionary() throws Exception {
 		BufferedReader buf;
 		String str;
 		//集合，里面元素不允许重复
 		Negative = new HashSet<String>();
 		buf = new BufferedReader( new InputStreamReader(new FileInputStream(NEGATIVE_DIC_PATH), FILE_ENCODING) );
 		while( (str = buf.readLine()) != null ) {
 			Negative.add(str);
 		}
 		Positive = new HashSet<String>();
 		buf = new BufferedReader( new InputStreamReader(new FileInputStream(POSITIVE_DIC_PATH), FILE_ENCODING) );
 		while( (str = buf.readLine()) != null ) {
 			Positive.add(str);
 		}
 	}
 	public void Sentiment_Doc_Weight( String DirPath ) throws Exception {
 		File NegativeDir = new File( DirPath + "negativeout" );
 		String[] NegativeFiles = NegativeDir.list();
 		NegativeDoc = NegativeFiles.length;
 		ArrayList<String> NegativeCurrentList = new ArrayList<String>();
 		for ( int i = 0; i < NegativeFiles.length; i ++ ) {
 			System.out.println("NegativeFiles No."+(i+1)+" "+DirPath+"negativeout/"+NegativeFiles[i]);
 			this.ReadXML(DirPath+"negativeout/"+NegativeFiles[i], NegativeCurrentList);
 		}
 		NegativeWeight = HashTable( NegativeCurrentList );
 		/**********************************************************************************************************/
 		File PositiveDir = new File( DirPath + "positiveout" );
 		String[] PositiveFiles = PositiveDir.list();
 		PositiveDoc = PositiveFiles.length;
 		ArrayList<String> PositiveCurrentList = new ArrayList<String>();
 		for ( int i = 0; i < PositiveFiles.length; i ++ ) {
 			System.out.println("PositiveFiles No."+(i+1)+" "+DirPath+"positiveout/"+PositiveFiles[i]);
 			this.ReadXML(DirPath+"positiveout/"+PositiveFiles[i], PositiveCurrentList);
 		}
 		PositiveWeight = HashTable( PositiveCurrentList );
 		/*********************************************************************************************************/
 		File UnsureDir = new File( DirPath + "unsureout" );
 		String[] UnsureFiles = UnsureDir.list();
 		UnsureDoc = UnsureFiles.length;
 		ArrayList<String> UnsureCurrentList = new ArrayList<String>();
 		for ( int i = 0; i < UnsureFiles.length; i ++ ) {
 			System.out.println("UnsureFiles No."+(i+1)+" "+DirPath+"unsureout/"+UnsureFiles[i]);
 			this.ReadXML(DirPath+"unsureout/"+UnsureFiles[i], UnsureCurrentList);
 		}
 		UnsureWeight = HashTable( UnsureCurrentList );
 		/********************************************************************************************************/
 		System.out.println("UnsureCurrent = " + UnsureCurrentList.size() + "  UnsureHashTable = " + UnsureWeight.size());
 		System.out.println("PositiveCurrent = " + PositiveCurrentList.size() + "  PositiveHashTable = " + PositiveWeight.size());
 		System.out.println("NegativeCurrent = " + NegativeCurrentList.size() + "  NegativeHashTable = " + NegativeWeight.size());
 		System.out.println("NegativeDoc = " + NegativeDoc +  "  PositiveDoc = " + PositiveDoc + "  UnsureDoc = " + UnsureDoc);
 	}
 	public void ReadXML( String FilePath, ArrayList<String> currentList ) throws Exception { 	//从指定路径读取XML文件并提取出其情感词返回
 		SAXReader SaxReader = new SAXReader();
 		Document Doc = SaxReader.read(new File(FilePath));
 		Element root = Doc.getRootElement();
 		Element content = root.element("content");
 		List<?> sentenses = content.elements("sentence");	 //每一句话作为一项
 		for ( Iterator<?> iter = sentenses.iterator(); iter.hasNext();  ) {
 			Element sentense = (Element)iter.next();
 			List<?> toks = sentense.elements();
 			for ( Iterator<?> iter1 = toks.iterator(); iter1.hasNext(); ) {
 				Element tok = (Element)iter1.next();
 				String Type = tok.attributeValue("type");
 				if ( Type.equals("group") ) { 		//如果是"atom"一定不存在于情感词中
 					GetWord( tok, currentList ); 	//从"group"中获取词
 				}
 			}
 		}
 	}
 	public void GetWord( Element root, ArrayList<String> currentList ) { 		//获取XML中的情感词
 		String Word = "";
 		List<?> elements = root.elements("tok");
 		for ( Iterator<?> iter = elements.iterator(); iter.hasNext(); ) {
 			Element tok = (Element)iter.next();
 			String Type = tok.attributeValue("type");
 			if ( Type.compareTo("atom") == 0 ) {
 				Word += tok.getText().trim();
 			}
 			else {
 				GetWord( tok, currentList );
 			}
 		}
 		if ( Word.length() > 1 && (Positive.contains(Word) || Negative.contains(Word)) ) {  //筛选出情感词
 			currentList.add(Word);
 		}
 	}
 	public Hashtable<String, Integer> HashTable( ArrayList<String> currentList ) { 	//根据文本中的情感词构建哈希表
 		Hashtable<String, Integer> HashTable = new Hashtable<String, Integer>();
 		for ( Iterator<String> iter = currentList.iterator(); iter.hasNext();  ) {
 			String Word = (String)iter.next();
 			if ( HashTable.containsKey(Word) ) {
 				Integer Weight = HashTable.get(Word);
 				HashTable.put(Word, Weight+1);
 			}
 			else {
 				HashTable.put(Word, 1);
 			}
 		}
 		return HashTable;
 	}
 	@SuppressWarnings("resource")
 	public void Save_Model( ) throws Exception {
 		ObjectOutputStream OOS;
 		File ModelPath = new File("Model");
 		File NegativeModel = new File(ModelPath, "NegativeModel.txt");
 		File PositiveModel = new File(ModelPath, "PositiveModel.txt");
 		File UnsureModel = new File(ModelPath, "UnsureModel.txt");
 		if ( !ModelPath.exists() ) {	ModelPath.mkdir();	}
 		System.out.println("Saving NegativeModel...");
 		OOS = new ObjectOutputStream( new FileOutputStream( NegativeModel ) ); 	//对象流直接写入
 		OOS.writeObject(NegativeDoc);
 		OOS.writeObject(NegativeWeight);
 		System.out.println("Saving PositiveModel...");
 		OOS = new ObjectOutputStream( new FileOutputStream( PositiveModel ) );
 		OOS.writeObject(PositiveDoc);
 		OOS.writeObject(PositiveWeight);
 		System.out.println("Saving UnsureModel...");
 		OOS = new ObjectOutputStream( new FileOutputStream( UnsureModel ) );
 		OOS.writeObject(UnsureDoc);
 		OOS.writeObject(UnsureWeight);
 		Enumeration<String> Keys;
 		System.out.println("Saving NegativeWeight...");
 		Keys = NegativeWeight.keys();
 		while( Keys.hasMoreElements() ) {
 			String Key = Keys.nextElement();
 			FileUtils.writeStringToFile(new File("Model", "NegativeWeight.txt"), Key+"\t\t\t"+NegativeWeight.get(Key)+"\r\n", "UTF-8", true);
 		}
 		System.out.println("Saving PositiveWeight...");
 		Keys = PositiveWeight.keys();
 		while( Keys.hasMoreElements() ) {
 			String Key = Keys.nextElement();
 			FileUtils.writeStringToFile(new File("Model", "PositiveWeight.txt"), Key+"\t\t\t"+PositiveWeight.get(Key)+"\r\n", "UTF-8", true);
 		}
 		System.out.println("Saving UnsureWeight...");
 		Keys = UnsureWeight.keys();
 		while( Keys.hasMoreElements() ) {
 			String Key = Keys.nextElement();
 			FileUtils.writeStringToFile(new File("Model", "UnsureWeight.txt"), Key+"\t\t\t"+UnsureWeight.get(Key)+"\r\n", "UTF-8", true);
 		}
 		System.out.println("Save Success!");
 	}
 }
--- a/src/test/java/eshore/cn/it/sentiment/SentimentTest.java
+++ b/src/test/java/eshore/cn/it/sentiment/SentimentTest.java
@@ -0,0 +1,244 @@
 package eshore.cn.it.sentiment;
 import java.io.*;
 import java.util.*;
 import org.apache.commons.io.FileUtils;
 import org.dom4j.Document;
 import org.dom4j.Element;
 import org.dom4j.io.SAXReader;
 public class SentimentTest {
 	static private HashSet<String> Negative, Positive; //两种情感词典
 	static private Integer NegativeDoc, PositiveDoc, UnsureDoc; 	//属于两种情感的文本数 	- 所构建模型需要保存下的值
 	static private Hashtable<String, Integer> NegativeWeight, PositiveWeight, UnsureWeight; 	//两种情感中所有词与他的权值 - 所构建模型需要保存下的值
 	static final String SENTIMENT_DOC_WEIGHT_PATH = "data/500trainblogxml/negativeout/";
 	static final String POSITIVE_DIC_PATH = "data/Sentiment_Dictionary/positive_submit.txt";
 	static final String NEGATIVE_DIC_PATH = "data/Sentiment_Dictionary/negative_submit.txt";
 	static final String FILE_ENCODING = "UTF-8";
 	public static void main(String[] args) throws Exception {
 		// TODO 自动生成的方法存根
 		SentimentTest Sentiment_Test = new SentimentTest();
 		Sentiment_Test.Read_Model(); 					//读取模型
 		Sentiment_Test.Classify_Directory(SENTIMENT_DOC_WEIGHT_PATH);
 	}	
 	@SuppressWarnings({ "resource", "unchecked" })
 	public void Read_Model() throws Exception {
 		this.Read_Sentiment_Dictionary();
 		ObjectInputStream OIS; 		//对象流直接读入
 		File ModelPath = new File("Model");
 		File NegativeModel = new File(ModelPath, "NegativeModel.txt");
 		File PositiveModel = new File(ModelPath, "PositiveModel.txt");
 		File UnsureModel = new File(ModelPath, "UnsureModel.txt");
 		System.out.println("Reading NegativeModel...");
 		OIS = new ObjectInputStream( new FileInputStream( NegativeModel ) );
 		NegativeDoc = (Integer) OIS.readObject();
 		NegativeWeight = (Hashtable<String, Integer>) OIS.readObject();
 		System.out.println("Reading PositiveModel...");
 		OIS = new ObjectInputStream( new FileInputStream( PositiveModel ) );
 		PositiveDoc = (Integer) OIS.readObject();
 		PositiveWeight = (Hashtable<String, Integer>) OIS.readObject();
 		System.out.println("Reading UnsureModel...");
 		OIS = new ObjectInputStream( new FileInputStream( UnsureModel ) );
 		UnsureDoc = (Integer) OIS.readObject();
 		UnsureWeight = (Hashtable<String, Integer>) OIS.readObject();
 		System.out.println("Read Success.");
 	}
 	@SuppressWarnings("resource")
 	public void Read_Sentiment_Dictionary( ) throws Exception  { 	//读入情感词典
 		BufferedReader buf;
 		String str;
 		Negative = new HashSet<String>();
 		buf = new BufferedReader( new InputStreamReader(new FileInputStream(NEGATIVE_DIC_PATH), FILE_ENCODING) );
 		while( (str = buf.readLine()) != null ) {
 			Negative.add(str);
 		}
 		Positive = new HashSet<String>();
 		buf = new BufferedReader( new InputStreamReader(new FileInputStream(POSITIVE_DIC_PATH), FILE_ENCODING) );
 		while( (str = buf.readLine()) != null ) {
 			Positive.add(str);
 		}
 	}
 	public void Classify_Directory( String DirectoryPath ) throws Exception {
 		int PositiveNum = 0, NegativeNum = 0, UnsureNum = 0;
 		String[] Text_Path = new File( DirectoryPath ).list();
 		for ( int i = 0; i < Text_Path.length; i ++ ) {
 			Classify( DirectoryPath+Text_Path[i] );
 			double Ans = Classify( DirectoryPath+Text_Path[i] ); 	//对当前目录下的每一个文件进行测试
 			if ( Ans < 0 ) { 		//根据测试结果将测试文本进行分类
 				FileUtils.copyFile(new File(DirectoryPath+Text_Path[i]), new File( new File("Result", "Positive"), Text_Path[i]));
 				PositiveNum ++;
 			}
 			else if ( Ans > 0 ) {
 				FileUtils.copyFile(new File(DirectoryPath+Text_Path[i]), new File( new File("Result", "Negative"), Text_Path[i]));
 				NegativeNum ++;
 			}
 			else {
 				FileUtils.copyFile(new File(DirectoryPath+Text_Path[i]), new File( new File("Result", "Unsure"), Text_Path[i]));
 				UnsureNum ++;
 			}
 			System.out.print( "No." + (i+1) + "  " + Text_Path[i] + ": " );
 			if ( Ans < 0 ) {	System.out.println("Positive");	}
 			else if ( Ans > 0 ) {	System.out.println("Negative");	}
 			else {	System.out.println("Unsure");	}
 		}
 		System.out.println("End.");
 		System.out.println("NegativeNum = " + NegativeNum + "  PositiveNum = " + PositiveNum + "  UnsureNum = " + UnsureNum);
 	}
 	public double Classify( String FilePath ) throws Exception {
 		Hashtable<String, Integer> FileHashTable = Read_TestFile( FilePath );
 		Enumeration<String> Keys;
 		double NegativeAns = 1, PositiveAns = 1;
 		Keys = FileHashTable.keys();
 		while( Keys.hasMoreElements() ) {
 			String Word = Keys.nextElement();
 			NegativeAns *= ( Math.pow(this.PostProbability(Word, NegativeWeight), FileHashTable.get(Word)) );
 		}
 		NegativeAns *= this.PriorProbability(NegativeDoc);
 		Keys = FileHashTable.keys();
 		while( Keys.hasMoreElements() ) {
 			String Word = Keys.nextElement();
 			PositiveAns *= ( Math.pow(this.PostProbability(Word, PositiveWeight), FileHashTable.get(Word)) );
 		}
 		PositiveAns *= this.PriorProbability(PositiveDoc);
 		return ( NegativeAns-PositiveAns );
 	}
 	public Hashtable<String, Integer> Read_TestFile( String FilePath ) throws Exception {
 		ArrayList<String> FileCurrentList = new ArrayList<String>();
 		ReadXML( FilePath, FileCurrentList );
 		Hashtable<String, Integer> FileHashTable = HashTable( FileCurrentList );
 		return FileHashTable;
 	}
 	public void ReadXML( String FilePath, ArrayList<String> currentList ) throws Exception { 	//从指定路径读取XML文件并提取出其情感词返回
 		SAXReader SaxReader = new SAXReader();
 		Document Doc = SaxReader.read(new File(FilePath));
 		Element root = Doc.getRootElement();
 		Element content = root.element("content");
 		List<?> sentenses = content.elements("sentence");	 //每一句话作为一项
 		for ( Iterator<?> iter = sentenses.iterator(); iter.hasNext();  ) {
 			Element sentense = (Element)iter.next();
 			List<?> toks = sentense.elements();
 			for ( Iterator<?> iter1 = toks.iterator(); iter1.hasNext(); ) {
 				Element tok = (Element)iter1.next();
 				String Type = tok.attributeValue("type");
 				if ( Type.equals("group") ) { 		//如果是"atom"一定不存在于情感词中
 					GetWord( tok, currentList ); 	//从"group"中获取词
 				}
 			}
 		}
 	}
 	public void GetWord( Element root, ArrayList<String> currentList ) { 		//获取XML中的情感词
 		String Word = "";
 		List<?> elements = root.elements("tok");
 		for ( Iterator<?> iter = elements.iterator(); iter.hasNext(); ) {
 			Element tok = (Element)iter.next();
 			String Type = tok.attributeValue("type");
 			if ( Type.compareTo("atom") == 0 ) {
 				Word += tok.getText().trim();
 			}
 			else {
 				GetWord( tok, currentList );
 			}
 		}
 		if ( Word.length() > 1 && (Positive.contains(Word) || Negative.contains(Word)) ) {  //筛选出情感词
 			currentList.add(Word);
 		}
 	}
 	public Hashtable<String, Integer> HashTable( ArrayList<String> currentList ) { 	//根据文本中的情感词构建哈希表
 		Hashtable<String, Integer> HashTable = new Hashtable<String, Integer>();
 		for ( Iterator<String> iter = currentList.iterator(); iter.hasNext();  ) {
 			String Word = (String)iter.next();
 			if ( HashTable.containsKey(Word) ) {
 				Integer Weight = HashTable.get(Word);
 				HashTable.put(Word, Weight+1);
 			}
 			else {
 				HashTable.put(Word, 1);
 			}
 		}
 		return HashTable;
 	}
 	public double PriorProbability( Integer SentimentDoc ) {
 		double Ans = 1;
 		Ans = ( (double)SentimentDoc/( (double)NegativeDoc+(double)PositiveDoc+(double)UnsureDoc ) );
 		return Ans;
 	}
 	public double PostProbability( String Word, Hashtable<String, Integer> SentimentWeight ) {
 		double Ans, V, E;
 		double Weight = 0, Weights = 0;
 		if ( SentimentWeight.containsKey(Word) )
 			Weight = (double)SentimentWeight.get(Word);
 		Weights = PostWeights( SentimentWeight );
 		V = PostWeights( NegativeWeight ) + PostWeights( PositiveWeight ) + PostWeights( UnsureWeight );
 		E = 1/Math.abs(V);
 		Ans = ( Weight + E )/( Weights + E*Math.abs(V) );
 		return Ans;
 	}
 	public double PostWeights( Hashtable<String, Integer> SentimentWeight ) {
 		double Weights = 0;
 		Enumeration<String> Keys;
 		Keys = SentimentWeight.keys();
 		while( Keys.hasMoreElements() ) {
 			String Key = Keys.nextElement();
 			Weights += (double)SentimentWeight.get(Key);
 		}
 		return Weights;
 	}
 }