commit 941597f3ddb382c877bda4426ff56473d5eb41d9
Author: gitclebeg <clebeg@163.com>
Date:   Thu Apr 9 08:45:05 2015 +0800

    initial the project nlp-sentiment

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..806dab5
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,7 @@
+.project
+.classpath
+target/
+Result/
+Model/
+
+.settings/
\ No newline at end of file
diff --git a/README b/README
new file mode 100644
index 0000000..6340f9a
--- /dev/null
+++ b/README
@@ -0,0 +1,15 @@
+基于自然语言处理的情感分析工具
+本程序依赖data目录下面的data.zip,先解压缩  data 目录下面的 data.zip到当前目录。
+
+1、基于词典和贝叶斯模型的情感分析
+主程序：eshore.cn.it.sentiment.Sentiment 此类通过
+data/Sentiment_Dictionary中的正负面词语建立模型。
+
+测试： eshore.cn.it.sentiment.SentimentTest
+通过这个类就可以测试 data/500trainblogxml中的某个文件夹下面的博客的情感。
+
+2、直接利用lingpipe的情感分析模块测试情感分析
+直接运行程序：  eshore.cn.it.sentiment.ChinesePolarityBasic
+程序就会通过：  data/polarity_corpus/hotel_reviews/train2训练
+然后自动测试: data/polarity_corpus/hotel_reviews/test2
+最后给出程序测试结果。
\ No newline at end of file
diff --git a/data/.gitignore b/data/.gitignore
new file mode 100644
index 0000000..31367df
--- /dev/null
+++ b/data/.gitignore
@@ -0,0 +1,5 @@
+500trainblogxml/
+nerws_corpus/
+output/
+polarity_corpus/
+Sentiment_Dictionary/
\ No newline at end of file
diff --git a/data/data.zip b/data/data.zip
new file mode 100644
index 0000000..3e4c5d7
Binary files /dev/null and b/data/data.zip differ
diff --git a/libs/lingpipe-4.1.0.jar b/libs/lingpipe-4.1.0.jar
new file mode 100644
index 0000000..654e653
Binary files /dev/null and b/libs/lingpipe-4.1.0.jar differ
diff --git a/pom.xml b/pom.xml
new file mode 100644
index 0000000..66850e2
--- /dev/null
+++ b/pom.xml
@@ -0,0 +1,58 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <groupId>eshore.cn.it</groupId>
+  <artifactId>nlp-sentiment</artifactId>
+  <version>0.0.1-SNAPSHOT</version>
+  <packaging>jar</packaging>
+
+  <name>nlp-sentiment</name>
+  <url>http://maven.apache.org</url>
+
+  <properties>
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+    <commons.io.version>2.4</commons.io.version>
+    <dom4j.version>1.6.1</dom4j.version>
+    <lingpipe.version>4.1.0</lingpipe.version>
+    <jieba.version>1.0.0</jieba.version>
+    <!-- 无法到中央仓库下载的jar包就集中存放到这个位置  -->
+    <maven.libs.home>F:/java_git_projects/nlp-sentiment/libs</maven.libs.home>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>3.8.1</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+	<groupId>commons-io</groupId>
+		<artifactId>commons-io</artifactId>
+		<version>${commons.io.version}</version>
+	</dependency>
+    <dependency>
+		<groupId>dom4j</groupId>
+		<artifactId>dom4j</artifactId>
+		<version>${dom4j.version}</version>
+	</dependency>
+
+    
+    <!-- 此处需要手动到lingpipe官网下载lingpipe的corejar包  -->
+    <dependency>
+    	<groupId>com.aliasi</groupId>
+    	<artifactId>lingpipe</artifactId>
+    	<version>${lingpipe.version}</version>
+    	<scope>system</scope>
+    	<systemPath>${maven.libs.home}/lingpipe-4.1.0.jar</systemPath>
+    </dependency>
+    
+    <!-- 此处添加结巴分词器  -->
+    <dependency>
+		<groupId>com.huaban</groupId>
+		<artifactId>jieba-analysis</artifactId>
+		<version>${jieba.version}</version>
+	</dependency>
+  </dependencies>
+</project>
diff --git a/src/main/java/eshore/cn/it/sentiment/ChinesePolarityBasic.java b/src/main/java/eshore/cn/it/sentiment/ChinesePolarityBasic.java
new file mode 100644
index 0000000..d1100a9
--- /dev/null
+++ b/src/main/java/eshore/cn/it/sentiment/ChinesePolarityBasic.java
@@ -0,0 +1,146 @@
+package eshore.cn.it.sentiment;
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.List;
+
+
+
+import org.apache.commons.io.IOUtils;
+
+import com.aliasi.classify.Classification;
+import com.aliasi.classify.Classified;
+import com.aliasi.classify.DynamicLMClassifier;
+import com.aliasi.lm.NGramProcessLM;
+import com.aliasi.util.Files;
+import com.huaban.analysis.jieba.JiebaSegmenter;
+import com.huaban.analysis.jieba.JiebaSegmenter.SegMode;
+import com.huaban.analysis.jieba.SegToken;
+
+/**
+ * ChinesePolarityBasic 此类是利用lingpipe作中文情感预测的示例类
+ * lingpipe适合做增量分析
+ * @clebeg 2015-03-13
+ * @version 0.0.1
+ * */
+public class ChinesePolarityBasic {
+	private String[] mCategories = new String[]{"+1", "-1"};
+	//这就是分类模型
+	private DynamicLMClassifier<NGramProcessLM> mClassifier;
+	
+	private int numTests = 0;
+    private int numCorrect = 0;
+	private static final String TRAINFILES_INFO = 
+			"data/polarity_corpus/hotel_reviews/train2.rlabelclass";
+	private static final String TRAINFILES_DIR = 
+			"data/polarity_corpus/hotel_reviews/train2";
+	private static final String TESTFILES_DIR = 
+			"data/polarity_corpus/hotel_reviews/test2";
+	private static final String TESTFILES_INFO = 
+			"data/polarity_corpus/hotel_reviews/test2.rlabelclass"; 
+	private static final String ENCODING = "GBK";
+	
+	private final JiebaSegmenter jiebaSegmenter = new JiebaSegmenter();
+	private final SegMode segMode = SegMode.INDEX;
+	
+	public static void main(String[] args) {
+		try {
+			new ChinesePolarityBasic().run();
+	    } catch (Throwable t) {
+	        System.out.println("Thrown: " + t);
+	        t.printStackTrace(System.out);
+	    }
+
+	}
+
+
+	public ChinesePolarityBasic() {
+		super();
+		int nGram = 8;
+		mClassifier
+	        = DynamicLMClassifier
+	          .createNGramProcess(mCategories,nGram);
+	}
+
+	private void run() throws ClassNotFoundException,
+    	IOException {
+		train();
+		evaluate();
+	}
+
+	private void train() throws IOException {
+		FileReader input = new FileReader(new File(TRAINFILES_INFO));
+		List<String> trainInfos = IOUtils.readLines(input);
+		for (String str : trainInfos){
+			String[] train = str.split(" ");
+			train(train[1], new File(TRAINFILES_DIR, train[0]), ENCODING);
+		}
+	}
+	
+	private void evaluate() throws IOException {
+		FileReader input = new FileReader(new File(TESTFILES_INFO));
+		List<String> trainInfos = IOUtils.readLines(input);
+		for (String str : trainInfos){
+			String[] train = str.split(" ");
+			evaluate(train[1], new File(TESTFILES_DIR, train[0]), ENCODING);
+		}
+		System.out.println("  # Test Cases="
+                + numTests);
+	    System.out.println("  # Correct="
+                + numCorrect);
+	    System.out.println("  % Correct="
+                + ((double)numCorrect)
+                   /(double)numTests);
+	}
+
+	/**
+	 * 给定分类标识，给定训练文本，给定文本的编码，即可作分类训练
+	 * 分类完成之后就会加入到分类模型中
+	 * @throws IOException 
+	 * */
+	private void train(String category, File trainFile, String fileEncoding) 
+			throws IOException {
+		Classification classification = new Classification(category);
+		String review = Files.readFromFile(trainFile, fileEncoding);
+		
+		//此处加入中文分词器，得到分词之后的字符串
+		List<SegToken> segTokens = jiebaSegmenter.process(review, segMode);
+		review = "";
+		for (SegToken seg : segTokens) {
+			review += seg.word.getToken() + " ";
+		}
+		
+		Classified<CharSequence> classified
+			= new Classified<CharSequence>(review,classification);
+		mClassifier.handle(classified);
+	}
+	
+	/**
+	 * 给定分类标识，给定测试文本，给定文本的编码，即可作测试模型
+	 * @throws IOException 
+	 * */
+	private void evaluate(String category, File testFile, String fileEncoding) 
+			throws IOException {
+		
+	    String review
+        	= Files.readFromFile(testFile, fileEncoding);
+	    //同理，这里可以加入分词器，这样可以试试效果如何。
+	    List<SegToken> segTokens = jiebaSegmenter.process(review, segMode);
+		review = "";
+		for (SegToken seg : segTokens) {
+			review += seg.word.getToken() + " ";
+		}
+	    
+	    ++numTests;
+	    Classification classification
+        	= mClassifier.classify(review);
+	    //得到训练结果
+	    String resultCategory
+        	= classification.bestCategory();
+	    if (resultCategory.equals(category))
+	    	++numCorrect;
+	    
+	}
+
+}
diff --git a/src/main/java/eshore/cn/it/sentiment/PolarityBasic.java b/src/main/java/eshore/cn/it/sentiment/PolarityBasic.java
new file mode 100644
index 0000000..36fad6b
--- /dev/null
+++ b/src/main/java/eshore/cn/it/sentiment/PolarityBasic.java
@@ -0,0 +1,94 @@
+package eshore.cn.it.sentiment;
+
+import java.io.File;
+import java.io.IOException;
+
+import com.aliasi.classify.Classification;
+import com.aliasi.classify.Classified;
+import com.aliasi.classify.DynamicLMClassifier;
+import com.aliasi.lm.NGramProcessLM;
+import com.aliasi.util.Files;
+
+public class PolarityBasic {
+	File mPolarityDir;
+	String[] mCategories;
+	DynamicLMClassifier<NGramProcessLM> mClassifier;
+	
+	public PolarityBasic(String[] args) {
+		mPolarityDir = new File("data/polarity_corpus","txt_sentoken");
+	    mCategories = mPolarityDir.list();
+	    int nGram = 8;
+	    mClassifier
+	        = DynamicLMClassifier
+	          .createNGramProcess(mCategories,nGram);
+	}
+
+
+	public static void main(String[] args) {
+	    try {
+	        new PolarityBasic(args).run();
+	    } catch (Throwable t) {
+	        System.out.println("Thrown: " + t);
+	        t.printStackTrace(System.out);
+	    }
+	}
+	
+	private void run() throws ClassNotFoundException,
+	    IOException {
+		train();
+		evaluate();
+	}
+	private void train() throws IOException {
+	    for (int i = 0; i < mCategories.length; ++i) {
+	        String category = mCategories[i];
+	        Classification classification
+	            = new Classification(category);
+	        File dir = new File(mPolarityDir, mCategories[i]);
+	        File[] trainFiles = dir.listFiles();
+	        for (int j = 0; j < trainFiles.length; ++j) {
+	            File trainFile = trainFiles[j];
+	            if (isTrainingFile(trainFile)) {
+	                String review
+	                    = Files.readFromFile(trainFile,"ISO-8859-1");
+	                Classified<CharSequence> classified
+	                    = new Classified<CharSequence>(review,classification);
+	                mClassifier.handle(classified);
+	             }
+	        }
+	    }
+	}
+	boolean isTrainingFile(File file) {
+	    return file.getName().charAt(2) != '9';  // test on fold 9
+	}
+	
+	void evaluate() throws IOException {
+	    int numTests = 0;
+	    int numCorrect = 0;
+	    for (int i = 0; i < mCategories.length; ++i) {
+	        String category = mCategories[i];
+	        File file = new File(mPolarityDir,mCategories[i]);
+	        File[] testFiles = file.listFiles();
+	        for (int j = 0; j < testFiles.length; ++j) {
+	            File testFile = testFiles[j];
+	            if (!isTrainingFile(testFile)) {
+	                String review
+	                    = Files.readFromFile(testFile,"ISO-8859-1");
+	                ++numTests;
+	                Classification classification
+	                    = mClassifier.classify(review);
+	                String resultCategory
+	                    = classification.bestCategory();
+	                if (resultCategory.equals(category))
+	                    ++numCorrect;
+	            }
+	        }
+	    }
+	    System.out.println("  # Test Cases="
+                + numTests);
+	    System.out.println("  # Correct="
+                + numCorrect);
+	    System.out.println("  % Correct="
+                + ((double)numCorrect)
+                   /(double)numTests);
+	}
+}
diff --git a/src/main/java/eshore/cn/it/sentiment/Sentiment.java b/src/main/java/eshore/cn/it/sentiment/Sentiment.java
new file mode 100644
index 0000000..b97a0ca
--- /dev/null
+++ b/src/main/java/eshore/cn/it/sentiment/Sentiment.java
@@ -0,0 +1,211 @@
+package eshore.cn.it.sentiment;
+
+import java.io.*;
+import java.util.*;
+
+import org.apache.commons.io.FileUtils;
+import org.dom4j.Document;
+import org.dom4j.Element;
+import org.dom4j.io.SAXReader;
+
+public class Sentiment {
+
+	static private HashSet<String> Negative, Positive; //两种情感词典
+	static private Integer NegativeDoc, PositiveDoc, UnsureDoc; 	//属于两种情感的文本数 	- 所构建模型需要保存下的值
+	static private Hashtable<String, Integer> NegativeWeight, PositiveWeight, UnsureWeight; 	//两种情感中所有词与他的权值 - 所构建模型需要保存下的值
+	
+	static final String SENTIMENT_DOC_WEIGHT_PATH = "data/500trainblogxml/";
+	static final String POSITIVE_DIC_PATH = "data/Sentiment_Dictionary/positive_submit.txt";
+	static final String NEGATIVE_DIC_PATH = "data/Sentiment_Dictionary/negative_submit.txt";
+	
+	static final String FILE_ENCODING = "UTF-8";
+	public static void main(String[] args) throws Exception {
+		// TODO 自动生成的方法存根
+		Sentiment Sentiment = new Sentiment();
+		
+		Sentiment.Model( );
+		Sentiment.Save_Model();
+	}
+	
+	public void Model( ) throws Exception {
+		
+		this.Read_Sentiment_Dictionary();
+		this.Sentiment_Doc_Weight(SENTIMENT_DOC_WEIGHT_PATH);
+	
+	}
+	
+	@SuppressWarnings("resource")
+	public void Read_Sentiment_Dictionary() throws Exception {
+		
+		BufferedReader buf;
+		String str;
+		
+		//集合，里面元素不允许重复
+		Negative = new HashSet<String>();
+		buf = new BufferedReader( new InputStreamReader(new FileInputStream(NEGATIVE_DIC_PATH), FILE_ENCODING) );
+		while( (str = buf.readLine()) != null ) {
+			Negative.add(str);
+		}
+		
+		Positive = new HashSet<String>();
+		buf = new BufferedReader( new InputStreamReader(new FileInputStream(POSITIVE_DIC_PATH), FILE_ENCODING) );
+		while( (str = buf.readLine()) != null ) {
+			Positive.add(str);
+		}
+	}
+	
+	
+	public void Sentiment_Doc_Weight( String DirPath ) throws Exception {
+		
+		File NegativeDir = new File( DirPath + "negativeout" );
+		String[] NegativeFiles = NegativeDir.list();
+		NegativeDoc = NegativeFiles.length;
+		ArrayList<String> NegativeCurrentList = new ArrayList<String>();
+		for ( int i = 0; i < NegativeFiles.length; i ++ ) {
+			System.out.println("NegativeFiles No."+(i+1)+" "+DirPath+"negativeout/"+NegativeFiles[i]);
+			this.ReadXML(DirPath+"negativeout/"+NegativeFiles[i], NegativeCurrentList);
+		}
+		NegativeWeight = HashTable( NegativeCurrentList );
+		
+		/**********************************************************************************************************/
+		
+		File PositiveDir = new File( DirPath + "positiveout" );
+		String[] PositiveFiles = PositiveDir.list();
+		PositiveDoc = PositiveFiles.length;
+		ArrayList<String> PositiveCurrentList = new ArrayList<String>();
+		for ( int i = 0; i < PositiveFiles.length; i ++ ) {
+			System.out.println("PositiveFiles No."+(i+1)+" "+DirPath+"positiveout/"+PositiveFiles[i]);
+			this.ReadXML(DirPath+"positiveout/"+PositiveFiles[i], PositiveCurrentList);
+		}
+		PositiveWeight = HashTable( PositiveCurrentList );
+		
+		/*********************************************************************************************************/
+		
+		File UnsureDir = new File( DirPath + "unsureout" );
+		String[] UnsureFiles = UnsureDir.list();
+		UnsureDoc = UnsureFiles.length;
+		ArrayList<String> UnsureCurrentList = new ArrayList<String>();
+		for ( int i = 0; i < UnsureFiles.length; i ++ ) {
+			System.out.println("UnsureFiles No."+(i+1)+" "+DirPath+"unsureout/"+UnsureFiles[i]);
+			this.ReadXML(DirPath+"unsureout/"+UnsureFiles[i], UnsureCurrentList);
+		}
+		UnsureWeight = HashTable( UnsureCurrentList );
+		
+		/********************************************************************************************************/
+		System.out.println("UnsureCurrent = " + UnsureCurrentList.size() + "  UnsureHashTable = " + UnsureWeight.size());
+		System.out.println("PositiveCurrent = " + PositiveCurrentList.size() + "  PositiveHashTable = " + PositiveWeight.size());
+		System.out.println("NegativeCurrent = " + NegativeCurrentList.size() + "  NegativeHashTable = " + NegativeWeight.size());
+		System.out.println("NegativeDoc = " + NegativeDoc +  "  PositiveDoc = " + PositiveDoc + "  UnsureDoc = " + UnsureDoc);
+		
+	}
+	
+	public void ReadXML( String FilePath, ArrayList<String> currentList ) throws Exception { 	//从指定路径读取XML文件并提取出其情感词返回
+		
+		SAXReader SaxReader = new SAXReader();
+		Document Doc = SaxReader.read(new File(FilePath));
+		Element root = Doc.getRootElement();
+		
+		Element content = root.element("content");
+		List<?> sentenses = content.elements("sentence");	 //每一句话作为一项
+		
+		for ( Iterator<?> iter = sentenses.iterator(); iter.hasNext();  ) {
+			Element sentense = (Element)iter.next();
+			
+			List<?> toks = sentense.elements();
+			for ( Iterator<?> iter1 = toks.iterator(); iter1.hasNext(); ) {
+				Element tok = (Element)iter1.next();
+				String Type = tok.attributeValue("type");
+				
+				if ( Type.equals("group") ) { 		//如果是"atom"一定不存在于情感词中
+					GetWord( tok, currentList ); 	//从"group"中获取词
+				}
+			}
+		}
+	}
+	
+	public void GetWord( Element root, ArrayList<String> currentList ) { 		//获取XML中的情感词
+		
+		String Word = "";
+		List<?> elements = root.elements("tok");
+		for ( Iterator<?> iter = elements.iterator(); iter.hasNext(); ) {
+			Element tok = (Element)iter.next();
+			String Type = tok.attributeValue("type");
+			
+			if ( Type.compareTo("atom") == 0 ) {
+				Word += tok.getText().trim();
+			}
+			else {
+				GetWord( tok, currentList );
+			}
+		}
+		if ( Word.length() > 1 && (Positive.contains(Word) || Negative.contains(Word)) ) {  //筛选出情感词
+			currentList.add(Word);
+		}
+	}
+	
+	public Hashtable<String, Integer> HashTable( ArrayList<String> currentList ) { 	//根据文本中的情感词构建哈希表
+		
+		Hashtable<String, Integer> HashTable = new Hashtable<String, Integer>();
+		
+		for ( Iterator<String> iter = currentList.iterator(); iter.hasNext();  ) {
+			String Word = (String)iter.next();
+			if ( HashTable.containsKey(Word) ) {
+				Integer Weight = HashTable.get(Word);
+				HashTable.put(Word, Weight+1);
+			}
+			else {
+				HashTable.put(Word, 1);
+			}
+		}
+		return HashTable;
+	}
+	
+	@SuppressWarnings("resource")
+	public void Save_Model( ) throws Exception {
+		
+		ObjectOutputStream OOS;
+		File ModelPath = new File("Model");
+		File NegativeModel = new File(ModelPath, "NegativeModel.txt");
+		File PositiveModel = new File(ModelPath, "PositiveModel.txt");
+		File UnsureModel = new File(ModelPath, "UnsureModel.txt");
+		
+		if ( !ModelPath.exists() ) {	ModelPath.mkdir();	}
+		
+		System.out.println("Saving NegativeModel...");
+		OOS = new ObjectOutputStream( new FileOutputStream( NegativeModel ) ); 	//对象流直接写入
+		OOS.writeObject(NegativeDoc);
+		OOS.writeObject(NegativeWeight);
+		
+		System.out.println("Saving PositiveModel...");
+		OOS = new ObjectOutputStream( new FileOutputStream( PositiveModel ) );
+		OOS.writeObject(PositiveDoc);
+		OOS.writeObject(PositiveWeight);
+		
+		System.out.println("Saving UnsureModel...");
+		OOS = new ObjectOutputStream( new FileOutputStream( UnsureModel ) );
+		OOS.writeObject(UnsureDoc);
+		OOS.writeObject(UnsureWeight);
+		
+		Enumeration<String> Keys;
+		System.out.println("Saving NegativeWeight...");
+		Keys = NegativeWeight.keys();
+		while( Keys.hasMoreElements() ) {
+			String Key = Keys.nextElement();
+			FileUtils.writeStringToFile(new File("Model", "NegativeWeight.txt"), Key+"\t\t\t"+NegativeWeight.get(Key)+"\r\n", "UTF-8", true);
+		}
+		System.out.println("Saving PositiveWeight...");
+		Keys = PositiveWeight.keys();
+		while( Keys.hasMoreElements() ) {
+			String Key = Keys.nextElement();
+			FileUtils.writeStringToFile(new File("Model", "PositiveWeight.txt"), Key+"\t\t\t"+PositiveWeight.get(Key)+"\r\n", "UTF-8", true);
+		}
+		System.out.println("Saving UnsureWeight...");
+		Keys = UnsureWeight.keys();
+		while( Keys.hasMoreElements() ) {
+			String Key = Keys.nextElement();
+			FileUtils.writeStringToFile(new File("Model", "UnsureWeight.txt"), Key+"\t\t\t"+UnsureWeight.get(Key)+"\r\n", "UTF-8", true);
+		}
+		
+		System.out.println("Save Success!");
+	}
+}
\ No newline at end of file
diff --git a/src/test/java/eshore/cn/it/sentiment/SentimentTest.java b/src/test/java/eshore/cn/it/sentiment/SentimentTest.java
new file mode 100644
index 0000000..283d91f
--- /dev/null
+++ b/src/test/java/eshore/cn/it/sentiment/SentimentTest.java
@@ -0,0 +1,244 @@
+package eshore.cn.it.sentiment;
+
+
+import java.io.*;
+import java.util.*;
+
+import org.apache.commons.io.FileUtils;
+import org.dom4j.Document;
+import org.dom4j.Element;
+import org.dom4j.io.SAXReader;
+
+
+public class SentimentTest {
+
+	static private HashSet<String> Negative, Positive; //两种情感词典
+	static private Integer NegativeDoc, PositiveDoc, UnsureDoc; 	//属于两种情感的文本数 	- 所构建模型需要保存下的值
+	static private Hashtable<String, Integer> NegativeWeight, PositiveWeight, UnsureWeight; 	//两种情感中所有词与他的权值 - 所构建模型需要保存下的值
+	
+	static final String SENTIMENT_DOC_WEIGHT_PATH = "data/500trainblogxml/negativeout/";
+	static final String POSITIVE_DIC_PATH = "data/Sentiment_Dictionary/positive_submit.txt";
+	static final String NEGATIVE_DIC_PATH = "data/Sentiment_Dictionary/negative_submit.txt";
+	
+	static final String FILE_ENCODING = "UTF-8";
+	
+	public static void main(String[] args) throws Exception {
+		// TODO 自动生成的方法存根
+		SentimentTest Sentiment_Test = new SentimentTest();
+		
+		Sentiment_Test.Read_Model(); 					//读取模型
+		Sentiment_Test.Classify_Directory(SENTIMENT_DOC_WEIGHT_PATH);
+	}	
+	
+	@SuppressWarnings({ "resource", "unchecked" })
+	public void Read_Model() throws Exception {
+		
+		this.Read_Sentiment_Dictionary();
+		
+		ObjectInputStream OIS; 		//对象流直接读入
+		File ModelPath = new File("Model");
+		File NegativeModel = new File(ModelPath, "NegativeModel.txt");
+		File PositiveModel = new File(ModelPath, "PositiveModel.txt");
+		File UnsureModel = new File(ModelPath, "UnsureModel.txt");
+		
+		System.out.println("Reading NegativeModel...");
+		OIS = new ObjectInputStream( new FileInputStream( NegativeModel ) );
+		NegativeDoc = (Integer) OIS.readObject();
+		NegativeWeight = (Hashtable<String, Integer>) OIS.readObject();
+		
+		System.out.println("Reading PositiveModel...");
+		OIS = new ObjectInputStream( new FileInputStream( PositiveModel ) );
+		PositiveDoc = (Integer) OIS.readObject();
+		PositiveWeight = (Hashtable<String, Integer>) OIS.readObject();
+		
+		System.out.println("Reading UnsureModel...");
+		OIS = new ObjectInputStream( new FileInputStream( UnsureModel ) );
+		UnsureDoc = (Integer) OIS.readObject();
+		UnsureWeight = (Hashtable<String, Integer>) OIS.readObject();
+		
+		System.out.println("Read Success.");
+	}
+	
+	@SuppressWarnings("resource")
+	public void Read_Sentiment_Dictionary( ) throws Exception  { 	//读入情感词典
+		BufferedReader buf;
+		String str;
+		
+		Negative = new HashSet<String>();
+		buf = new BufferedReader( new InputStreamReader(new FileInputStream(NEGATIVE_DIC_PATH), FILE_ENCODING) );
+		while( (str = buf.readLine()) != null ) {
+			Negative.add(str);
+		}
+		
+		Positive = new HashSet<String>();
+		buf = new BufferedReader( new InputStreamReader(new FileInputStream(POSITIVE_DIC_PATH), FILE_ENCODING) );
+		while( (str = buf.readLine()) != null ) {
+			Positive.add(str);
+		}
+	}
+	
+	public void Classify_Directory( String DirectoryPath ) throws Exception {
+		
+		int PositiveNum = 0, NegativeNum = 0, UnsureNum = 0;
+		String[] Text_Path = new File( DirectoryPath ).list();
+		
+		for ( int i = 0; i < Text_Path.length; i ++ ) {
+			
+			Classify( DirectoryPath+Text_Path[i] );
+			double Ans = Classify( DirectoryPath+Text_Path[i] ); 	//对当前目录下的每一个文件进行测试
+			if ( Ans < 0 ) { 		//根据测试结果将测试文本进行分类
+				FileUtils.copyFile(new File(DirectoryPath+Text_Path[i]), new File( new File("Result", "Positive"), Text_Path[i]));
+				PositiveNum ++;
+			}
+			else if ( Ans > 0 ) {
+				FileUtils.copyFile(new File(DirectoryPath+Text_Path[i]), new File( new File("Result", "Negative"), Text_Path[i]));
+				NegativeNum ++;
+			}
+			else {
+				FileUtils.copyFile(new File(DirectoryPath+Text_Path[i]), new File( new File("Result", "Unsure"), Text_Path[i]));
+				UnsureNum ++;
+			}
+			System.out.print( "No." + (i+1) + "  " + Text_Path[i] + ": " );
+			if ( Ans < 0 ) {	System.out.println("Positive");	}
+			else if ( Ans > 0 ) {	System.out.println("Negative");	}
+			else {	System.out.println("Unsure");	}
+		}
+		System.out.println("End.");
+		System.out.println("NegativeNum = " + NegativeNum + "  PositiveNum = " + PositiveNum + "  UnsureNum = " + UnsureNum);
+	}
+	
+	public double Classify( String FilePath ) throws Exception {
+		
+		Hashtable<String, Integer> FileHashTable = Read_TestFile( FilePath );
+		
+		Enumeration<String> Keys;
+		double NegativeAns = 1, PositiveAns = 1;
+		
+		Keys = FileHashTable.keys();
+		while( Keys.hasMoreElements() ) {
+			String Word = Keys.nextElement();
+			NegativeAns *= ( Math.pow(this.PostProbability(Word, NegativeWeight), FileHashTable.get(Word)) );
+		}
+		NegativeAns *= this.PriorProbability(NegativeDoc);
+		
+		Keys = FileHashTable.keys();
+		while( Keys.hasMoreElements() ) {
+			String Word = Keys.nextElement();
+			PositiveAns *= ( Math.pow(this.PostProbability(Word, PositiveWeight), FileHashTable.get(Word)) );
+		}
+		PositiveAns *= this.PriorProbability(PositiveDoc);
+		
+		return ( NegativeAns-PositiveAns );
+	}
+	
+	public Hashtable<String, Integer> Read_TestFile( String FilePath ) throws Exception {
+		
+		ArrayList<String> FileCurrentList = new ArrayList<String>();
+		ReadXML( FilePath, FileCurrentList );
+		Hashtable<String, Integer> FileHashTable = HashTable( FileCurrentList );
+		
+		return FileHashTable;
+	}
+	
+	public void ReadXML( String FilePath, ArrayList<String> currentList ) throws Exception { 	//从指定路径读取XML文件并提取出其情感词返回
+		
+		SAXReader SaxReader = new SAXReader();
+		Document Doc = SaxReader.read(new File(FilePath));
+		Element root = Doc.getRootElement();
+		
+		Element content = root.element("content");
+		List<?> sentenses = content.elements("sentence");	 //每一句话作为一项
+		
+		for ( Iterator<?> iter = sentenses.iterator(); iter.hasNext();  ) {
+			Element sentense = (Element)iter.next();
+			
+			List<?> toks = sentense.elements();
+			for ( Iterator<?> iter1 = toks.iterator(); iter1.hasNext(); ) {
+				Element tok = (Element)iter1.next();
+				String Type = tok.attributeValue("type");
+				
+				if ( Type.equals("group") ) { 		//如果是"atom"一定不存在于情感词中
+					GetWord( tok, currentList ); 	//从"group"中获取词
+				}
+			}
+		}
+	}
+	
+	public void GetWord( Element root, ArrayList<String> currentList ) { 		//获取XML中的情感词
+		
+		String Word = "";
+		List<?> elements = root.elements("tok");
+		for ( Iterator<?> iter = elements.iterator(); iter.hasNext(); ) {
+			Element tok = (Element)iter.next();
+			String Type = tok.attributeValue("type");
+			
+			if ( Type.compareTo("atom") == 0 ) {
+				Word += tok.getText().trim();
+			}
+			else {
+				GetWord( tok, currentList );
+			}
+		}
+		if ( Word.length() > 1 && (Positive.contains(Word) || Negative.contains(Word)) ) {  //筛选出情感词
+			currentList.add(Word);
+		}
+	}
+	
+	public Hashtable<String, Integer> HashTable( ArrayList<String> currentList ) { 	//根据文本中的情感词构建哈希表
+		
+		Hashtable<String, Integer> HashTable = new Hashtable<String, Integer>();
+		
+		for ( Iterator<String> iter = currentList.iterator(); iter.hasNext();  ) {
+			String Word = (String)iter.next();
+			if ( HashTable.containsKey(Word) ) {
+				Integer Weight = HashTable.get(Word);
+				HashTable.put(Word, Weight+1);
+			}
+			else {
+				HashTable.put(Word, 1);
+			}
+		}
+		return HashTable;
+	}
+	
+	public double PriorProbability( Integer SentimentDoc ) {
+		
+		double Ans = 1;
+		
+		Ans = ( (double)SentimentDoc/( (double)NegativeDoc+(double)PositiveDoc+(double)UnsureDoc ) );
+		
+		return Ans;
+	}
+	
+	public double PostProbability( String Word, Hashtable<String, Integer> SentimentWeight ) {
+		
+		double Ans, V, E;
+		double Weight = 0, Weights = 0;
+		
+		if ( SentimentWeight.containsKey(Word) )
+			Weight = (double)SentimentWeight.get(Word);
+		
+		Weights = PostWeights( SentimentWeight );
+		
+		V = PostWeights( NegativeWeight ) + PostWeights( PositiveWeight ) + PostWeights( UnsureWeight );
+		E = 1/Math.abs(V);
+		
+		Ans = ( Weight + E )/( Weights + E*Math.abs(V) );
+		
+		return Ans;
+	}
+	
+	public double PostWeights( Hashtable<String, Integer> SentimentWeight ) {
+		
+		double Weights = 0;
+		
+		Enumeration<String> Keys;
+		Keys = SentimentWeight.keys();
+		while( Keys.hasMoreElements() ) {
+			String Key = Keys.nextElement();
+			Weights += (double)SentimentWeight.get(Key);
+		}
+		
+		return Weights;
+	}
+}
\ No newline at end of file