Gonewind词云图
library(tm)
library(wordcloud)
article <- tolower(readLines('D:/RWorkPlace/第3章_类别比较型图表/gonewind.txt', warn = FALSE))
article <- Corpus(VectorSource(article))
# 去除标点
article <- tm_map(article, content_transformer(tolower))
article <- tm_map(article, removePunctuation)
article <- tm_map(article, removeNumbers)
article <- tm_map(article, removeWords, stopwords("en")) # 去除英文停用词
article <- tm_map(article, stripWhitespace)
wordcloud(words = article, min.freq = 5, scale = c(3, 0.5), colors = brewer.pal(8, "Dark2"))
Sanguo1词云图
library(tm)
library(wordcloud)
library(jiebaR)
# 读取中文文本文件
text <- readLines("D:/RWorkPlace/第3章_类别比较型图表/sanguo1.txt", encoding = "UTF-8")
# 将文本合并为一个字符串
text <- paste(text, collapse = " ")
# 使用jiebaR分词
engine<-worker()
segmented_text <- segment(text,engine)
# 创建语料库
corpus <- Corpus(VectorSource(segmented_text))
# 进行文本预处理
corpus <- tm_map(corpus, removePunctuation)
corpus <- tm_map(corpus, removeNumbers)
corpus <- tm_map(corpus, stripWhitespace)
# 将Corpus对象转换为数据框
corpus_df <- data.frame(text = sapply(corpus, as.character))
# 创建词云图
wordcloud(words = corpus_df$text, min.freq = 50, scale = c(3, 0.5), colors = brewer.pal(8, "Dark2"))
NEWS词云图
library(tm)
library(wordcloud)
article <- tolower(readLines('D:/RWorkPlace/第3章_类别比较型图表/NEWS.txt', warn = FALSE))
article <- Corpus(VectorSource(article))
# 去除标点
article <- tm_map(article, content_transformer(tolower))
article <- tm_map(article, removePunctuation)
article <- tm_map(article, removeNumbers)
article <- tm_map(article, removeWords, stopwords("en")) # 去除英文停用词
article <- tm_map(article, stripWhitespace)
wordcloud(words = article, min.freq = 10, scale = c(3, 0.5), colors = brewer.pal(8, "Dark2"))
全唐诗词云图
library(tm)
library(wordcloud)
library(RColorBrewer)
library(openxlsx)
library(jiebaR)
# 导入xlsx文件
file_path <- "D:/RWorkPlace/第3章_类别比较型图表/全唐诗.xlsx"
data <- read.xlsx(file_path, detectDates = TRUE)
# 合并多列文本为一个文本向量
text <- paste(data$poet, data$title, data$poem, sep = " ")
# 使用jiebaR分词
engine<-worker()
segmented_text <- segment(text,engine)
# 创建语料库
corpus <- Corpus(VectorSource(segmented_text))
# 进行文本预处理
corpus <- tm_map(corpus, removePunctuation)
corpus <- tm_map(corpus, removeNumbers)
corpus <- tm_map(corpus, stripWhitespace)
# 将Corpus对象转换为数据框
corpus_df <- data.frame(text = sapply(corpus, as.character))
# 创建词云图
wordcloud(words = corpus_df$text, min.freq = 70, scale = c(3, 0.5), colors = brewer.pal(8, "Dark2"))