如果每个主题标签都有一年的唯一信息,您可以创建一个新的数据框。
之后,您可以使用 geom_bar 处理数据。
因为是新账号,所以无法上传剧情。
library(tidyverse)
library(data.table)
#your Data:
#terms year
#1 #A;#B;#C 2017
#2 #B;#C;#D 2016
#3 #C;#D;#E 2021
#4 #D;#E;#F 2020
#5 #E;#F;#G 2020
#6 #F;#G;#H 2020
#7 #G;#H;#I 2019
#8 #H;#I;#J 2018
#9 #I;#J;#K 2020
#10 #J;#K;#L 2020
# make a df that looks like your data:
terms<- c("#A;#B;#C",
"#B;#C;#D",
"#C;#D;#E",
"#D;#E;#F",
"#E;#F;#G",
"#G;#H;#I",
"#H;#I;#J",
"#I;#J;#K",
"#J;#K;#L")
terms<-as.data.frame(terms)
year<-c(2017,2016,2021,2020,2020,2019,2018,2020,2020)
year<-as.data.frame(year)
df<-cbind(terms,year)
# read your data from what I assume is your Data frame
terms<-c(df$terms)
year.list<-c(df$year)
loopcount<-length(terms)
# make new dummys
year<-c()
hashtags<-c()
all.years<-as.data.frame(hashtags,year)
#split hashtags based on ";"
hashtag.list<-str_split(terms, ";")
通过这个循环,你可以创建一个新的 DF
# make new df were every hashtags gets the information for year
for (i in 1:loopcount){
hashtags<-hashtag.list[[i]]
hashtags<-as.data.frame(hashtags)
year<-c()
for(k in 1:nrow(hashtags)) {
year[k]<- year.list[i]
}
year<-as.data.frame(year)
one.year<-cbind(hashtags,year)
all.years<-rbind(all.years,one.year)
}
hashtagDF<-all.years
head(hashtagDF)
然后可以使用新的 DF 来绘制您想要的内容
或者
如果我理解你是正确的
如果显示每年主题标签的频率,您可以创建一个新的 df
并且只包含前 3 个主题标签
#only include the three most used hashtags per year
# dummys for new df
hashtags<-c()
year<-c()
Freq<-c()
top.3<-as.data.frame(hashtags,year,Freq)
years.in.study<-unique(hashtagDF$year)
#i<-3
for ( i in 1: length(years.in.study)){
what.year<-paste(years.in.study[i])
#subset per year
one.subset<-subset(hashtagDF, year == what.year)
# calculate how often a hashtag is present per year
freq<-table(one.subset)
frequency.per.year<-as.data.frame(freq)
frequency.per.year<-frequency.per.year[order(-frequency.per.year[,3]), ]
# only keep the 3 most occurring terms
lenght.of.file.to.delete<-nrow(frequency.per.year)
if (nrow(frequency.per.year) == 3){
lenght.of.file.to.delete<-lenght.of.file.to.delete+1
}
frequency.per.year<-frequency.per.year[-c(4:lenght.of.file.to.delete), ]
# make a df with all years
top.3<-rbind(top.3,frequency.per.year)
}
top.3
#order for year
top.3$year<-as.character(top.3$year)
top.3[order(top.3[,2]), ]
#year should be a factor
top.3$yearF<-as.factor(top.3$year)
然后你可以绘制它
# plot as barplot
# with
# the frequencies of the hashtags in different years.
# the top 3 most frequent hashtag terms per year
barplot<-ggplot(data=top.3, aes(x=yearF, y=Freq,fill=hashtags)) +
geom_bar(stat="identity")+
labs(title = "",
subtitle = "",
caption = "",
x= "",
y= "")
barplot
ggsave(file="hashtag.png", barplot, width = 210, height = 297, units = "mm")