You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cppjieba.patch001 4.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. diff -Npur cppjieba/include/cppjieba/Jieba.hpp cppjiebap/include/cppjieba/Jieba.hpp
  2. --- cppjieba/include/cppjieba/Jieba.hpp 2020-05-07 15:27:16.490147073 +0800
  3. +++ cppjiebap/include/cppjieba/Jieba.hpp 2020-05-07 15:51:15.315931163 +0800
  4. @@ -10,17 +10,14 @@ class Jieba {
  5. public:
  6. Jieba(const string& dict_path,
  7. const string& model_path,
  8. - const string& user_dict_path,
  9. - const string& idfPath,
  10. - const string& stopWordPath)
  11. + const string& user_dict_path)
  12. : dict_trie_(dict_path, user_dict_path),
  13. model_(model_path),
  14. mp_seg_(&dict_trie_),
  15. hmm_seg_(&model_),
  16. mix_seg_(&dict_trie_, &model_),
  17. full_seg_(&dict_trie_),
  18. - query_seg_(&dict_trie_, &model_),
  19. - extractor(&dict_trie_, &model_, idfPath, stopWordPath) {
  20. + query_seg_(&dict_trie_, &model_) {
  21. }
  22. ~Jieba() {
  23. }
  24. @@ -121,8 +118,6 @@ class Jieba {
  25. FullSegment full_seg_;
  26. QuerySegment query_seg_;
  27. - public:
  28. - KeywordExtractor extractor;
  29. }; // class Jieba
  30. } // namespace cppjieba
  31. diff -Npur cppjieba/test/demo.cpp cppjiebap/test/demo.cpp
  32. --- cppjieba/test/demo.cpp 2020-05-07 15:27:16.490147073 +0800
  33. +++ cppjiebap/test/demo.cpp 2020-05-07 15:53:21.630248552 +0800
  34. @@ -11,9 +11,7 @@ const char* const STOP_WORD_PATH = "../d
  35. int main(int argc, char** argv) {
  36. cppjieba::Jieba jieba(DICT_PATH,
  37. HMM_PATH,
  38. - USER_DICT_PATH,
  39. - IDF_PATH,
  40. - STOP_WORD_PATH);
  41. + USER_DICT_PATH);
  42. vector<string> words;
  43. vector<cppjieba::Word> jiebawords;
  44. string s;
  45. @@ -71,10 +69,5 @@ int main(int argc, char** argv) {
  46. cout << tagres << endl;
  47. cout << "[demo] Keyword Extraction" << endl;
  48. - const size_t topk = 5;
  49. - vector<cppjieba::KeywordExtractor::Word> keywordres;
  50. - jieba.extractor.Extract(s, keywordres, topk);
  51. - cout << s << endl;
  52. - cout << keywordres << endl;
  53. return EXIT_SUCCESS;
  54. }
  55. diff -Npur cppjieba/test/unittest/jieba_test.cpp cppjiebap/test/unittest/jieba_test.cpp
  56. --- cppjieba/test/unittest/jieba_test.cpp 2020-05-07 15:27:16.522146752 +0800
  57. +++ cppjiebap/test/unittest/jieba_test.cpp 2020-05-07 15:59:11.630860061 +0800
  58. @@ -6,9 +6,7 @@ using namespace cppjieba;
  59. TEST(JiebaTest, Test1) {
  60. cppjieba::Jieba jieba("../dict/jieba.dict.utf8",
  61. "../dict/hmm_model.utf8",
  62. - "../dict/user.dict.utf8",
  63. - "../dict/idf.utf8",
  64. - "../dict/stop_words.utf8");
  65. + "../dict/user.dict.utf8");
  66. vector<string> words;
  67. string result;
  68. @@ -43,9 +41,7 @@ TEST(JiebaTest, Test1) {
  69. TEST(JiebaTest, WordTest) {
  70. cppjieba::Jieba jieba("../dict/jieba.dict.utf8",
  71. "../dict/hmm_model.utf8",
  72. - "../dict/user.dict.utf8",
  73. - "../dict/idf.utf8",
  74. - "../dict/stop_words.utf8");
  75. + "../dict/user.dict.utf8");
  76. vector<Word> words;
  77. string result;
  78. @@ -85,9 +81,7 @@ TEST(JiebaTest, WordTest) {
  79. TEST(JiebaTest, InsertUserWord) {
  80. cppjieba::Jieba jieba("../dict/jieba.dict.utf8",
  81. "../dict/hmm_model.utf8",
  82. - "../dict/user.dict.utf8",
  83. - "../dict/idf.utf8",
  84. - "../dict/stop_words.utf8");
  85. + "../dict/user.dict.utf8");
  86. vector<string> words;
  87. string result;
  88. @@ -120,14 +114,4 @@ TEST(JiebaTest, InsertUserWord) {
  89. jieba.Cut("同一个世界,同一个梦想", words);
  90. result = Join(words.begin(), words.end(), "/");
  91. ASSERT_EQ(result, "同一个世界,同一个梦想");
  92. -
  93. - {
  94. - string s("一部iPhone6");
  95. - string res;
  96. - vector<KeywordExtractor::Word> wordweights;
  97. - size_t topN = 5;
  98. - jieba.extractor.Extract(s, wordweights, topN);
  99. - res << wordweights;
  100. - ASSERT_EQ(res, "[{\"word\": \"iPhone6\", \"offset\": [6], \"weight\": 11.7392}, {\"word\": \"\xE4\xB8\x80\xE9\x83\xA8\", \"offset\": [0], \"weight\": 6.47592}]");
  101. - }
  102. }