You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cppjieba.patch001 5.5 kB

5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. diff -Npur cppjieba/deps/limonp/StringUtil.hpp cppjiebap/deps/limonp/StringUtil.hpp
  2. --- cppjieba/deps/limonp/StringUtil.hpp 2020-03-11 09:30:52.000000000 +0800
  3. +++ cppjiebap/deps/limonp/StringUtil.hpp 2020-12-15 16:02:38.000000000 +0800
  4. @@ -84,12 +84,12 @@ inline bool IsSpace(unsigned c) {
  5. }
  6. inline std::string& LTrim(std::string &s) {
  7. - s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))));
  8. + s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::function<unsigned(bool)>(IsSpace))));
  9. return s;
  10. }
  11. inline std::string& RTrim(std::string &s) {
  12. - s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))).base(), s.end());
  13. + s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::function<unsigned(bool)>(IsSpace))).base(), s.end());
  14. return s;
  15. }
  16. @@ -98,12 +98,12 @@ inline std::string& Trim(std::string &s)
  17. }
  18. inline std::string& LTrim(std::string & s, char x) {
  19. - s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::bind2nd(std::equal_to<char>(), x))));
  20. + s.erase(s.begin(), std::find_if(s.begin(), s.end(), [x](char c) -> bool { return c != x; }));
  21. return s;
  22. }
  23. inline std::string& RTrim(std::string & s, char x) {
  24. - s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::bind2nd(std::equal_to<char>(), x))).base(), s.end());
  25. + s.erase(std::find_if(s.rbegin(), s.rend(), [x](char c) -> bool { return c != x; } ).base(), s.end());
  26. return s;
  27. }
  28. diff -Npur cppjieba/include/cppjieba/Jieba.hpp cppjiebap/include/cppjieba/Jieba.hpp
  29. --- cppjieba/include/cppjieba/Jieba.hpp 2020-03-11 09:30:52.000000000 +0800
  30. +++ cppjiebap/include/cppjieba/Jieba.hpp 2020-12-15 16:01:46.000000000 +0800
  31. @@ -10,17 +10,14 @@ class Jieba {
  32. public:
  33. Jieba(const string& dict_path,
  34. const string& model_path,
  35. - const string& user_dict_path,
  36. - const string& idfPath,
  37. - const string& stopWordPath)
  38. + const string& user_dict_path)
  39. : dict_trie_(dict_path, user_dict_path),
  40. model_(model_path),
  41. mp_seg_(&dict_trie_),
  42. hmm_seg_(&model_),
  43. mix_seg_(&dict_trie_, &model_),
  44. full_seg_(&dict_trie_),
  45. - query_seg_(&dict_trie_, &model_),
  46. - extractor(&dict_trie_, &model_, idfPath, stopWordPath) {
  47. + query_seg_(&dict_trie_, &model_) {
  48. }
  49. ~Jieba() {
  50. }
  51. @@ -121,8 +118,6 @@ class Jieba {
  52. FullSegment full_seg_;
  53. QuerySegment query_seg_;
  54. - public:
  55. - KeywordExtractor extractor;
  56. }; // class Jieba
  57. } // namespace cppjieba
  58. diff -Npur cppjieba/test/demo.cpp cppjiebap/test/demo.cpp
  59. --- cppjieba/test/demo.cpp 2020-03-11 09:30:52.000000000 +0800
  60. +++ cppjiebap/test/demo.cpp 2020-12-15 16:01:46.000000000 +0800
  61. @@ -11,9 +11,7 @@ const char* const STOP_WORD_PATH = "../d
  62. int main(int argc, char** argv) {
  63. cppjieba::Jieba jieba(DICT_PATH,
  64. HMM_PATH,
  65. - USER_DICT_PATH,
  66. - IDF_PATH,
  67. - STOP_WORD_PATH);
  68. + USER_DICT_PATH);
  69. vector<string> words;
  70. vector<cppjieba::Word> jiebawords;
  71. string s;
  72. @@ -71,10 +69,5 @@ int main(int argc, char** argv) {
  73. cout << tagres << endl;
  74. cout << "[demo] Keyword Extraction" << endl;
  75. - const size_t topk = 5;
  76. - vector<cppjieba::KeywordExtractor::Word> keywordres;
  77. - jieba.extractor.Extract(s, keywordres, topk);
  78. - cout << s << endl;
  79. - cout << keywordres << endl;
  80. return EXIT_SUCCESS;
  81. }
  82. diff -Npur cppjieba/test/unittest/jieba_test.cpp cppjiebap/test/unittest/jieba_test.cpp
  83. --- cppjieba/test/unittest/jieba_test.cpp 2020-03-11 09:30:52.000000000 +0800
  84. +++ cppjiebap/test/unittest/jieba_test.cpp 2020-12-15 16:01:46.000000000 +0800
  85. @@ -6,9 +6,7 @@ using namespace cppjieba;
  86. TEST(JiebaTest, Test1) {
  87. cppjieba::Jieba jieba("../dict/jieba.dict.utf8",
  88. "../dict/hmm_model.utf8",
  89. - "../dict/user.dict.utf8",
  90. - "../dict/idf.utf8",
  91. - "../dict/stop_words.utf8");
  92. + "../dict/user.dict.utf8");
  93. vector<string> words;
  94. string result;
  95. @@ -43,9 +41,7 @@ TEST(JiebaTest, Test1) {
  96. TEST(JiebaTest, WordTest) {
  97. cppjieba::Jieba jieba("../dict/jieba.dict.utf8",
  98. "../dict/hmm_model.utf8",
  99. - "../dict/user.dict.utf8",
  100. - "../dict/idf.utf8",
  101. - "../dict/stop_words.utf8");
  102. + "../dict/user.dict.utf8");
  103. vector<Word> words;
  104. string result;
  105. @@ -85,9 +81,7 @@ TEST(JiebaTest, WordTest) {
  106. TEST(JiebaTest, InsertUserWord) {
  107. cppjieba::Jieba jieba("../dict/jieba.dict.utf8",
  108. "../dict/hmm_model.utf8",
  109. - "../dict/user.dict.utf8",
  110. - "../dict/idf.utf8",
  111. - "../dict/stop_words.utf8");
  112. + "../dict/user.dict.utf8");
  113. vector<string> words;
  114. string result;
  115. @@ -120,14 +114,4 @@ TEST(JiebaTest, InsertUserWord) {
  116. jieba.Cut("同一个世界,同一个梦想", words);
  117. result = Join(words.begin(), words.end(), "/");
  118. ASSERT_EQ(result, "同一个世界,同一个梦想");
  119. -
  120. - {
  121. - string s("一部iPhone6");
  122. - string res;
  123. - vector<KeywordExtractor::Word> wordweights;
  124. - size_t topN = 5;
  125. - jieba.extractor.Extract(s, wordweights, topN);
  126. - res << wordweights;
  127. - ASSERT_EQ(res, "[{\"word\": \"iPhone6\", \"offset\": [6], \"weight\": 11.7392}, {\"word\": \"\xE4\xB8\x80\xE9\x83\xA8\", \"offset\": [0], \"weight\": 6.47592}]");
  128. - }
  129. }