《Python数据分析实践Python数据分析实践 (49).pdf》由会员分享,可在线阅读,更多相关《Python数据分析实践Python数据分析实践 (49).pdf(2页珍藏版)》请在taowenge.com淘文阁网|工程机械CAD图纸|机械工程制图|CAD装配图下载|SolidWorks_CaTia_CAD_UG_PROE_设计图分享下载上搜索。
1、stop=standard_stop=after_text=with open(stoplists.txt,r,encoding=utf-8)as f:lines=f.readlines()for line in lines:lline =line.strip()stop.append(lline)for i in range(0,len(stop):for word in stopi.split():standard_stop.append(word)text=pd.read_csv(Comment.csv)text.head()for line in text.review:lline=l
2、ine.split()for i in lline:if i not in standard_stop:after_text.append(i)with open(file.txt,w+,encoding=utf-8)as f:for i in after_text:f.write(i)results=txt=open(file.txt,r,encoding=utf-8).read()words=jieba.lcut(txt)counts=for word in words:if len(word)=1:continue results+=word countsword=counts.get(
3、word,0)+1 items=list(counts.items()items.sort(key=lambda x:x1,reverse=True)for i in range(20):word,count=itemsi print(01.format(word,count)keywords=jieba.analyse.extract_tags (results,topK=25,withWeight=True,allowPOS=()for keyword in keywords:print(keyword0,keyword1)word_dict=for i in keywords:word_dicti0=i1 wc=wordcloud.WordCloud(font_path=./font/simhei.ttf,background_color=white,max_words=1000,max_font_size=150,margin=5,width=2000,height=1000,)wc.generate_from_frequencies(word_dict)plt.imshow(wc,interpolation=bilinear)plt.axis(off)plt.show()wc.to_file(ciyun.jpg)