我想发布整个解决方案 -
第 1 步——创建初始身份验证对象
#Initial Objects
# dev tools is needed because we need to use it's function install_github
# install.packages("devtools")
library(devtools)
# install_github("rga", "skardhamar")
library(rga)
library(RODBC)
config.folder.location <- "C:\\Users\\sc\\Documents\\R Sources\\SocialMedia\\Config"
working.directory <- config.folder.location
rga.open(instance="ga")
save(ga, file="C:/Users/sc/Documents/R Sources/SocialMedia/config/ga.rga")
connection.string <- 'driver={SQL Server};server=localhost;database=SocialMedia;trusted_connection=true'
save(connection.string, file = paste0(config.folder.location,'\\connection.string '))
# Google Analytics ID for ur site
google.analytics.id <- "XXXXXX"
save(google.analytics.id, file="google.analytics.id")
第 2 步 -- 查询 Google Analaytics
# dev tools is needed because we need to use it's function install_github
# install.packages("devtools")
library(devtools)
# install_github("rga", "skardhamar")
library(rga)
# install.packages("RODBC")
library(RODBC)
#*********************************************************************************************************************
# Read arguments and store them in variables -- START
#*********************************************************************************************************************
args <- commandArgs(trailingOnly = TRUE)
# First argument is read as working directory (location where config folder resides)
working.directory <- as.character(args[1])
if (length(args) >= 2){
# Second argument is used as start date in GA Query
start <- as.Date(args[2])
}else{
start <- Sys.Date() - 1
}
if (length(args) == 3){
# Third argument is used as end date in GA Query
end <- as.Date(args[3])
}else{
end <- start
}
#*********************************************************************************************************************
# Read arguments and store them in variables -- END
#*********************************************************************************************************************
#*********************************************************************************************************************
# Use arguments values to setup the environment and load initial objects -- START
#*********************************************************************************************************************
# Set working directory to the passed value
setwd(working.directory)
# load Google Analytics ID from Config folder
load("google.analytics.id")
# Load SQL Connection String
load("connection.string")
options(RCurlOptions = list(cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl")))
# load pre-authentcated Google Analytics object "ga"
load("ga.rga")
# Check if the ga token has expired. If it has then refersh token.
if (ga$isTokenExpired()){
ga$refreshToken()
}
#*********************************************************************************************************************
# Use arguments values to setup the environment and load initial objects -- END
#*********************************************************************************************************************
# Runs Google Analytics query for the provided id and limits data by date (i.e., start.date = date and end.date = date)
# Wrapped in try/catch to handle scenarios when GA does not return any rows
blog.stats = tryCatch({
ga$getData(google.analytics.id,
start.date = start,
end.date = end,
metrics = "ga:sessions, ga:users, ga:newUsers, ga:sessionDuration, ga:timeOnPage, ga:pageviews",
dimensions = "ga:date, ga:pageTitle, ga:medium, ga:hasSocialSourceReferral, ga:source, ga:referralPath",
sort = "",
filters = "",
segment = "",
start = 1,
max = 10000)
}, warning = function(w) {
# print("warning")
return(NULL)
}, error = function(e) {
# print("error")
return(NULL)
}, finally = {
# print("inside Finally")
}
)
if(length(blog.stats)>0 ){
# Convert Numeric To Integer
blog.stats[,"sessions"] <- as.integer(blog.stats[,"sessions"])
blog.stats[,"users"] <- as.integer(blog.stats[,"users"])
blog.stats[,"newUsers"] <- as.integer(blog.stats[,"newUsers"])
blog.stats[,"sessionDuration"] <- as.integer(blog.stats[,"sessionDuration"])
blog.stats[,"timeOnPage"] <- as.integer(blog.stats[,"timeOnPage"])
blog.stats[,"pageviews"] <- as.integer(blog.stats[,"pageviews"])
# Assign columnnames that match the table's column name in the database
colnames(blog.stats) <- c("Date", "PageTitle", "Medium", "HasSocialSourceReferral", "TrafficSource", "ReferralPath", "Sessions", "Users", "NewUsers", "SessionDurationSeconds", "TimeOnPageSeconds", "PageViews" )
# Open a Connection
myconn <- odbcDriverConnect(connection.string)
# Prepare Delete Query
delete.query <- paste0("DELETE FROM GoogleAnalyticsBlogStats WHERE Date >='",start,"' AND Date <='",end,"'")
# Execute Delete Query
sqlQuery(myconn,delete.query, errors= FALSE)
# Insert rows in the table
sqlSave(myconn, blog.stats, "GoogleAnalyticsBlogStats", safer = FALSE, append = TRUE, rownames = FALSE)
# Close the connection to the database
odbcClose(myconn)
}
步骤 3 --- 从批处理文件执行 Rscript
批处理文件看起来像这样 -
"C:\Program Files\R\R-3.1.0\bin\Rscript.exe" "D:\Social Media Analytics\R Scripts\ExtractGoogleAnalyticsBlogStats.r" "D:\\Social Media Analytics\\R Scripts\\Config"