RGui的arules程序包里含有Groceries数据集,该数据集是某个杂货店一个月真实的交易记录,共有9835条消费记录,169个商品
#install.packages("arules") library(arules) setwd('D:\\data') #读入数据 #Groceries数据集 Groceries groceries<-read.transactions("groceries.txt",format="basket",sep=",") #查看groceries中的数据 summary(groceries) class(groceries) groceries dim(groceries) colnames(groceries)[1:5] #rownames(groceries)[1:5] basketSize<-size(groceries) summary(basketSize) sum(basketSize) #size函数和itemFrequency函数都是arules包中的函数,前者是为了计算购物篮里商品数量,后者是为了计算每种商品的支持度 itemFreq<-itemFrequency(groceries) itemFreq[1:5] sum(itemFreq) itemCount<-(itemFreq/sum(itemFreq))*sum(basketSize) summary(itemCount) #按支持度itemFrequency排序,查看支持度的最大值 orderedItem<-sort(itemCount,decreasing=T) orderedItem[1:10] orderedItemFreq<-sort(itemFrequency(groceries),decreasing=T) orderedItemFreq[1:10] #切除第100行到800行,计算第1列到第3列的支持度 itemFrequency(groceries[100:800,1:3]) #itemFrequencyPlot 画频繁项的图 #按最小支持度查看 itemFrequencyPlot(groceries,support=0.1) #按照排序查看 itemFrequencyPlot(groceries,topN=10,horiz=T) #只关心购买两件商品以上的交易 groceries_use<-groceries[basketSize>1] dim(groceries_use) inspect(groceries[1:5]) #一个点代表在某个transaction上购买了item。 image(groceries[1:10]) #当数据集很大的时候,这张稀疏矩阵图是很难展现的,一般可以用sample函数进行采样显示 image(sample(groceries,100)) groceryrules<-apriori(groceries,parameter=list(support=0.03,confidence=0.25,minlen=2)) summary(groceryrules) #inspect查看具体的规则 inspect(groceryrules[1:5]) inspect(groceryrules) #按照某种度量,对规则进行排序。 ordered_groceryrules<-sort(groceryrules,by="lift") inspect(ordered_groceryrules[1:5]) yogurtrules<-subset(groceryrules,items%in%c("yogurt")) inspect(yogurtrules) fruitrules<-subset(groceryrules,items%pin%c("fruit")) inspect(fruitrules) byrules<-subset(groceryrules,items%ain%c("berries","yogurt")) inspect(byrules) fruitrules<-subset(groceryrules,items%pin%c("fruit")&lift>2) inspect(fruitrules) berriesInLHS<-apriori(groceries,parameter=list(support=0.001,confidence=0.1),appearance=list(lhs=c("berries"),default="rhs")) summary(berriesInLHS) inspect(berriesInLHS) inspect(head(rhs(berriesInLHS),n=5)) berrySub<-subset(berriesInLHS,subset=!(rhs%in%c("root vegetables","whole milk"))) inspect(head(rhs(sort(berrySub,by="confidence")),n=5)) write(groceryrules,file="groceryrules.csv",sep=",",quote=TRUE,row.names=FALSE) groceryrules_df<-as(groceryrules,"data.frame") str(groceryrules_df) data(Groceries) summary(Groceries) print(levels(itemInfo(Groceries)[["level1"]])) print(levels(itemInfo(Groceries)[["level2"]])) inspect(Groceries[1:3]) groceries=aggregate(Groceries,itemInfo(Groceries)[["level2"]]) inspect(groceries[1:3]) itemFrequencyPlot(Groceries,support=0.025,cex.names=0.8,xlim=c(0,0.3), type="relative",horiz=TRUE,col="darkred",las=1, xlab=paste("ProportionofMarketBasketsContainingItem", "\n(ItemRelativeFrequencyorSupport)")) second.rules<-apriori(groceries,parameter=list(support=0.025,confidence=0.05)) print(summary(second.rules)) install.packages("RColorBrewer") install.packages("arulesViz") #library(RColorBrewer) #library(arulesViz) inspect(second.rules) plot(second.rules,control=list(jitter=2,col=rev(brewer.pal(9,"Greens")[4:9])),shading="lift") plot(second.rules,measure="confidence",method="graph",control=list(type="items"),shading="lift") plot(second.rules,method="grouped",control=list(col=rev(brewer.pal(9,"Greens")[4:9]))) groceryrules.eclat<-eclat(groceries,parameter=list(support=0.05,minlen=2)) summary(groceryrules.eclat) inspect(groceryrules.eclat)