|
1234567891011121314151617181920 |
- from nltk.corpus import stopwords
- from nltk.tokenize import word_tokenize
-
- def remove_stop_words_from_sentence(sentence):
- stop_words = set(stopwords.words('english'))
- word_tokens = word_tokenize(sentence)
- filtered_ouput = ' '.join([w for w in word_tokens if not w in stop_words])
- return filtered_ouput
-
- def remove_stop_words(train_texts, test_texts=None):
- ### Remove stop words from sentences
- filtered_texts_train = []
- filtered_texts_test = []
- for text in train_texts:
- filtered_texts_train.append(remove_stop_words_from_sentence(text))
- if test_texts is not None:
- for text in test_texts:
- filtered_texts_test.append(remove_stop_words_from_sentence(text))
-
- return filtered_texts_train, filtered_texts_test
|