这可能有帮助
library(dplyr)
library(tidyr)
#read the file using `readLines`
lines <- readLines('file.csv')
#remove the empty elements
lines1 <- lines[lines!='']
#create a grouping index based on the occurrence of non-numeric elements
indx <- cumsum(grepl('^[A-Za-z]', lines1))
#create another index for finding the position of non-numeric element
indx1 <- grep('^[A-Za-z]', lines1)
#split the lines based on the grouping index
lst <- setNames(split(lines1[-indx1], indx[-indx1]), lines1[indx1])
#use unnest from tidyr and split the `x` column into two
unnest(lst, Name) %>%
extract(x, c('Date', 'val'), '(.*),(.*)', convert=TRUE)
# Name Date val
#1 Alice 2015-01-01 8
#2 Alice 2015-01-02 7.5
#3 Alice 2015-01-03 6
#4 Bob 2015-01-02 6
#5 Bob 2015-01-03 8
或者你可以使用base R。
#read the data using `read.csv` or `read.xlsx2`. Here `,` is the delimiter
d1 <- read.csv('file.csv', header=FALSE, stringsAsFactors=FALSE)
#second column `V2` will have `NAs` for corresponding words in `V1`
indx <- is.na(d1$V2)
#subset the dataset by removing the `NA` rows
d2 <- d1[!indx,]
#use one of the aggregating functions
#remove the first element for each group
d2$names <- unlist(tapply(rep(d1$V1[indx], tabulate(cumsum(indx))),
cumsum(indx), FUN=tail,-1), use.names=FALSE)
d2
# V1 V2 names
#2 2015-01-01 8.0 Alice
#3 2015-01-02 7.5 Alice
#4 2015-01-03 6.0 Alice
#6 2015-01-02 6.0 Bob
#7 2015-01-03 8.0 Bob