fromwordcloudimportSTOPWORDSwordcloud=WordCloud(font_path='font/NanumGothic.ttf',background_color='white').generate_from_frequencies(df2.to_dict())plt.imshow(wordcloud,interpolation='bilinear')# 외국어면 stopwords = STOPWORDS 설정, countvector에서도 설정 가능
tfidf
1
2
3
4
5
6
7
8
9
fromsklearn.feature_extraction.textimportTfidfVectorizercorpus=['you know I want your love','I like you','what should I do',]tfidfv=TfidfVectorizer().fit(corpus)print(tfidfv.transform(corpus).toarray())print(tfidfv.vocabulary_)
word_tokens=nltk.word_tokenize(cleaned_content)tokens_pos=nltk.pos_tag(word_tokens)NN_words=[]forword,posintokens_pos:if'NN'inpos:NN_words.append(word)# 형용사 RB, 동사 VBforwordinunique_NN_words:ifwordinstopwords_list:whilewordinfinal_NN_words:final_NN_words.remove(word)# 빈도fromcollectionsimportCounterc=Counter(final_NN_words)# input type should be a list of words (or tokens)print(c)k=20print(c.most_common(k))# 빈도수 기준 상위 k개 단어 출력