#sample data
df <- data.frame(id = c(1,2,3,4),
text = c("one three", "one four five", "one two five", "one three five six"))
#find common words
common.words <- Reduce(intersect, strsplit(df$text, " "))
#remove comming words, trim leftover whitespace
df$text2 <- trimws(gsub(paste0(common.words, collapse = "|"), "", df$text))
# id text text2
# 1 1 one three three
# 2 2 one four five four five
# 3 3 one two five two five
# 4 4 one three five six three five six
更新多列
#sample data
df <- data.frame(id = c(1,2,3,4),
text = c("one three", "one four three five", "one two five", "one three five six"),
text2 = c("five one three", "one four three five", "one two five", "one three five six"))
library(data.table)
#make data a data.table
setDT(df)
#columns to analyse
textcols <- c("text", "text2")
#check: find common words by column
# df[, lapply(.SD, function(x) Reduce(intersect, strsplit(x, " "))), .SDcols = textcols]
df[, (textcols) := lapply(.SD, function(x) {
common.words <- Reduce(intersect, strsplit(x, " "))
trimws(gsub(paste0(common.words, collapse = "|"), "", x))
}),
.SDcols = textcols]