给你。我们使用stringr 包中的str_extract_all 来提取所有订单 - 由字符串 ORD 定义,后跟 5 位数字。请注意,您需要修改str_extract_all 的第二个参数,以防其他模式需要定义有效订单。来自tidyr 包的separate_rows 用于将多个订单分隔到它们自己的行中。最后,我们计算总订单数和已交付订单数。
df1 <- data.frame(
Country = c("France", "England", "India", "America", "England"),
City = c("Paris", "London", "Mumbai", "Los Angeles", "London"),
Order_Desc = c("No order was placed", "ORD-34212 was the order placed",
"ORD-12252 and ORD-78564 was the order placed",
"The orders placed before 2017 was ORD-56438, ORD-13198
and ORD-12258", "The order was ORD-34567"),
stringsAsFactors = FALSE
)
df2 <- data.frame(
OrderNo = c("ORD-34212", "ORD-12252", "ORD-78564", "ORD-56438",
"ORD-13198", "ORD-12258", "ORD-34567"),
Status = c("Delivered", "Not delivered", "Not delivered",
"Delivered", "Not delivered", "Delivered", "Delivered"),
stringsAsFactors = FALSE
)
library(stringr)
library(dplyr)
library(tidyr)
df1g <- df1 %>%
group_by(Country, City) %>%
mutate(
orders = paste(str_extract_all(Order_Desc, "ORD-\\d{5}", simplify = TRUE),
collapse = "|")
) %>%
distinct(Country, City, orders) %>%
separate_rows(orders, sep = "[|]") %>%
left_join(df2, by = c("orders" = "OrderNo"))
df1s <- df1g %>%
group_by(Country, City) %>%
summarise(
total_orders = sum(!is.na(Status)),
delivered_orders = sum(Status == "Delivered")
)