给定一个字符向量
chars <- c("cannon", "jackson", "benford", "paws", "moxie")
任务是找出chars 的每个元素的字符数。然后我们想把这个结果和子集 letters 相应地。
第一种方法:复制和粘贴(不要那样做)
nchar_cannon <- nchar(chars[1])
nchar_jackson <- nchar(chars[2])
nchar_benford <- nchar(chars[3])
nchar_paws <- nchar(chars[4])
nchar_moxie <- nchar(chars[5])
letter_cannon <- letters[nchar_cannon]
letter_jackson <- letters[nchar_jackson]
letter_benford <- letters[nchar_benford]
letter_paws <- letters[nchar_paws]
letter_moxie <- letters[nchar_moxie]
out <- c(letter_cannon,
letter_jackson,
letter_benford,
letter_paws,
letter_moxie)
setNames(out, chars)
我想这就是你试图避免使用 ...
第二种方法:for-loop(很好,尽管在 R 社区 does not enjoy the best reputation)
out <- c()
for( position in c("cannon","jackson","benford","paws","moxie") ) {
n <- nchar(position)
out[position] <- letters[n]
}
out
第三种方法:使用a functional
sapply(chars, function(x) letters[nchar(x)])
第四种方法:vectorization(推荐)
setNames(letters[nchar(chars)], chars)
基准测试
library(microbenchmark)
chars_long <- c(replicate(1e6, chars))
benchmark <- microbenchmark(
loop_fun = loop_fun(chars_long),
functional_fun = functional_fun(chars_long),
vectorize_fun = vectorize_fun(chars_long),
times = 100L
)
autoplot(benchmark)
#Unit: seconds
# expr min lq mean median uq max neval
# loop_fun 7.217346 8.142811 9.481697 9.431999 10.472183 13.54128 100
# functional_fun 10.540376 12.064269 13.062617 12.873895 13.738929 17.90349 100
# vectorize_fun 1.227648 1.310427 1.450161 1.370944 1.552207 2.00134 100
基准测试中使用的函数
loop_fun <- function(x) {
out <- c()
for( position in x ) {
n <- nchar(position)
out[position] <- letters[n]
}
out
}
functional_fun <- function(x) {
sapply(x, function(y) letters[nchar(y)])
}
vectorize_fun <- function(x) {
setNames(letters[nchar(x)], x)
}