【发布时间】:2020-05-12 14:08:28
【问题描述】:
对于临床研究分析,我想在 dataframe1 中创建一个事件,其中 dataframe2 的第一个日期位于 2 个日期之间,加/减 n(比如 7)天。问题是在数据框 1 中存在非唯一 ID。 换句话说,如果 dataframe2(Datum)中有日期介于 Datum_Implant - 7days 和 Datum_Explant + 7 天之间,我想将这些案例合并到 dataframe1 (并在此创建一个“事件”)。我该怎么做?
这是我的数据框 1
> dput(y2)
结构(列表(PIZ = c(“15597283”,“15597283”,“15597283”,“15597283”, “20116404”、“25562151”、“25562151”、“25936299”、“27172679”), 基准= c(“27.12.2004”,“27.12.2004”,“07.04.2005”,“12.05.2006”, NA, "29.12.2015", "22.01.2016", "13.12.2013", NA), Datum_Mibi = c(2004.98633879781, 2004.98633879781, 2005.26301369863, 2006.35890410959, NA, 2015.99178082192, 2016.05737704918, 2013.94794520548, NA), Keim = c("链球菌缓解", "链球菌缓解", "Koagulase neg.Staphylokokken", “Koagulase neg.Staphylokokken”,NA,“革兰氏阳性 Haufenkokken”, “表皮葡萄球菌(nach Anreicherung)”,“革兰氏阳性 Kettenkokken”, NA), 事件 = c(1, 1, 1, 1, NA, 1, 1, 1, NA), 改变 = c(NA, 14, 14, 15, 17, 10, 10, 10, 17), Geschlecht = c(NA, "M", “M”、“M”、“W”、“M”、“M”、“M”、“M”),Datum_Implant = c(NA, 2004.96721311475、2005.43561643836、2006.13698630137、2015.09863013699、 2015.90410958904、2016.0956284153、2013.28493150685、2015.29863013699 ), Datum_Explant = c(NA, 2005.26301369863, 2005.81095890411, NA, 2015.75068493151, 2016.05737704918, 2016.77322404372, 2014.31232876712, 2015.96164383562), KatheterNummer = c(NA, 2, 3, 4, 1, 1, 2, 1, 1)), class= c("data.table", "data.frame" ), row.names = c(NA, -9L), .internal.selfref = )
这个数据框 2
> dput(Labor_Neutrophile_alle2))
structure(list(PIZ = c("20116404", "20116404", "20116404", "20116404",
"20116404", "20116404", "20116404", "20116404", "20116404", "20116404",
"20116404", "20116404", "20116404", "20116404", "20116404", "20116404",
"20116404", "20116404", "20116404", "20116404", "20116404", "20116404",
"20116404", "20116404", "20116404", "20116404", "20116404", "20116404",
"20116404", "20116404", "20116404", "20116404", "20116404", "20116404",
"20116404", "20116404", "20116404", "20116404", "20116404", "20116404",
"20116404", "20116404", "20116404", "20116404", "20116404", "20116404",
"20116404", "20116404", "20116404", "20116404", "20116404", "20116404",
"20116404", "20116404", "20116404", "20116404", "20116404", "20116404",
"20116404", "20116404", "20116404", "20116404", "20116404", "20116404",
"20116404", "20116404", "20116404", "20116404", "20116404", "20116404",
"20116404", "20116404", "20116404", "20116404", "20116404", "20116404",
"20116404", "20116404", "20116404", "20116404", "20116404", "25936299",
"25936299", "25936299", "25936299", "25936299", "25936299", "25936299",
"25936299", "25936299", "25936299", "25936299", "25936299", "25936299",
"25936299", "25936299", "25936299", "25936299", "25936299", "25936299",
"25936299", "25936299", "25936299", "25936299", "25936299", "25936299",
"25936299", "25936299", "25936299", "25936299", "25936299", "25936299",
"25936299", "25936299", "25936299", "25936299", "25936299", "25936299",
"25936299", "25936299", "25936299", "25936299", "25936299", "25936299",
"25936299", "25936299", "25936299", "25936299", "25936299", "25936299",
"25936299", "25936299", "25936299", "25936299", "25936299", "25936299",
"25936299", "25936299", "25936299", "25936299", "25936299", "25936299",
"25936299", "25936299", "25936299", "25936299", "25936299", "25936299",
"25936299", "27172679", "27172679", "27172679", "27172679", "27172679",
"27172679", "27172679", "27172679", "27172679", "27172679", "27172679",
"27172679", "27172679", "27172679", "27172679", "27172679", "27172679",
"27172679", "27172679", "27172679", "27172679", "27172679", "27172679",
"27172679", "27172679", "27172679", "27172679", "27172679", "27172679",
"27172679", "27172679", "27172679", "27172679", "27172679", "27172679",
"27172679", "27172679", "27172679", "27172679", "27172679", "27172679",
"27172679", "27172679", "27172679", "27172679", "27172679", "27172679",
"27172679", "27172679", "27172679", "27172679", "27172679", "27172679",
"27172679", "27172679", "27172679", "27172679", "27172679", "27172679",
"27172679", "27172679"), Datum = c(2015.09863013699, 2015.10684931507,
2015.1095890411, 2015.17534246575, 2015.18630136986, 2015.19178082192,
2015.19452054795, 2015.20821917808, 2015.21095890411, 2015.21369863014,
2015.22191780822, 2015.23287671233, 2015.24383561644, 2015.24931506849,
2015.25479452055, 2015.26301369863, 2015.27945205479, 2015.28493150685,
2015.29863013699, 2015.32054794521, 2015.33698630137, 2015.36164383562,
2015.37534246575, 2015.41369863014, 2015.44109589041, 2015.45205479452,
2015.47945205479, 2015.50136986301, 2015.52054794521, 2015.5397260274,
2015.5397260274, 2015.55068493151, 2015.55890410959, 2015.57260273973,
2015.58630136986, 2015.61643835616, 2015.63835616438, 2015.66301369863,
2015.66575342466, 2015.67397260274, 2015.68493150685, 2015.69315068493,
2015.70684931507, 2015.74794520548, 2015.78904109589, 2015.86301369863,
2016.00819672131, 2016.03825136612, 2016.11475409836, 2016.22404371585,
2016.30601092896, 2016.36338797814, 2016.3825136612, 2016.41530054645,
2016.60655737705, 2016.6693989071, 2016.72677595628, 2016.80327868852,
2016.84153005464, 2016.86338797814, 2016.86885245902, 2016.87158469945,
2016.87431693989, 2016.87978142077, 2016.88797814208, 2016.89617486339,
2016.90710382514, 2016.91256830601, 2016.93169398907, 2016.93169398907,
2016.93169398907, 2016.93715846995, 2016.94808743169, 2016.98633879781,
2017.02739726027, 2017.06575342466, 2017.12054794521, 2017.23561643836,
2017.50410958904, 2017.75890410959, 2017.96438356164, 2013.75890410959,
2013.77808219178, 2013.79452054795, 2013.80273972603, 2013.81095890411,
2013.82191780822, 2013.82739726027, 2013.8301369863, 2013.83561643836,
2013.84109589041, 2013.84657534247, 2013.84931506849, 2013.90684931507,
2013.92602739726, 2013.99452054795, 2014.00273972603, 2014.00821917808,
2014.05205479452, 2014.07123287671, 2014.07945205479, 2014.09863013699,
2014.11780821918, 2014.13698630137, 2014.15616438356, 2014.16712328767,
2014.18630136986, 2014.2301369863, 2014.25205479452, 2014.26575342466,
2014.28493150685, 2014.30410958904, 2014.32328767123, 2014.34246575342,
2014.36438356164, 2014.4, 2014.42739726027, 2014.47397260274,
2014.49589041096, 2014.61917808219, 2014.71506849315, 2014.73424657534,
2014.78082191781, 2014.81095890411, 2014.84109589041, 2014.91506849315,
2014.95616438356, 2014.99452054795, 2015.04109589041, 2015.0602739726,
2015.06849315068, 2015.10684931507, 2015.14520547945, 2015.16438356164,
2015.18356164384, 2015.22191780822, 2015.26301369863, 2015.31780821918,
2015.43835616438, 2015.55890410959, 2015.64657534247, 2015.87671232877,
2016.09016393443, 2016.35245901639, 2017.0904109589, 2018.16438356164,
2018.4904109589, 2019.1397260274, 2019.44931506849, 2015.28493150685,
2015.29863013699, 2015.30684931507, 2015.32328767123, 2015.34794520548,
2015.35890410959, 2015.37534246575, 2015.38356164384, 2015.39726027397,
2015.40821917808, 2015.43561643836, 2015.44109589041, 2015.45205479452,
2015.4602739726, 2015.4602739726, 2015.51232876712, 2015.51232876712,
2015.52876712329, 2015.56712328767, 2015.60547945205, 2015.64383561644,
2015.70410958904, 2015.74246575342, 2015.76164383562, 2015.78082191781,
2015.78630136986, 2015.8, 2015.81095890411, 2015.81917808219,
2015.83835616438, 2015.84383561644, 2015.88219178082, 2015.91232876712,
2015.92328767123, 2015.9397260274, 2015.99452054795, 2016.08743169399,
2016.12568306011, 2016.24863387978, 2016.32513661202, 2016.41256830601,
2016.4781420765, 2016.55191256831, 2016.62021857923, 2016.70491803279,
2016.78142076503, 2016.87704918033, 2016.95628415301, 2017.03287671233,
2017.1095890411, 2017.18630136986, 2017.27945205479, 2017.43561643836,
2017.43561643836, 2017.47123287671, 2017.70410958904, 2017.92328767123,
2018.04931506849, 2018.16438356164, 2018.29863013699, 2019.58356164384
), Wert = c(150, 110, 120, 2190, 5600, 3600, 4100, 1700, 1550,
750, 750, 250, 400, 100, 250, 500, 750, 300, 600, 2400, 2500,
1050, 2200, 1900, 1750, 3050, 2300, 2150, 9400, 5800, 6600, 1210,
430, 40, 250, 200, 2500, 800, 1200, 1550, 300, 50, 1300, 1900,
4800, 1450, 1750, 2000, 3050, 2050, 2500, 4600, 2600, 2500, 2050,
1900, 1650, 3100, 1900, 1550, 1250, 820, 600, 930, 480, 320,
380, 500, 570, 650, 550, 800, 1400, 2900, 2100, 1950, 1950, 2900,
3000, 3050, 4800, 4300, 600, 2500, 300, 250, 20, 100, 10, 30,
100, 440, 500, 1950, 1350, 300, 250, 800, 2400, 600, 200, 500,
1150, 1100, 850, 1550, 8160, 200, 800, 250, 450, 1050, 2600,
2500, 2100, 3400, 1850, 1750, 3100, 1700, 2200, 2000, 1050, 1300,
2600, 2200, 1850, 1900, 1400, 1500, 1500, 900, 5500, 2200, 3400,
1650, 2200, 2050, 2010, 2800, 2500, 3700, 2900, 2500, 3700, 5100,
6300, 3050, 4050, 750, 560, 290, 970, 200, 450, 1240, 1200, 1500,
300, 200, 300, 300, 390, 450, 770, 800, 1850, 3050, 1000, 1050,
1300, 9050, 1930, 660, 390, 150, 100, 550, 2900, 2300, 700, 200,
550, 1100, 1950, 750, 1250, 1550, 1150, 1700, 600, 1350, 2200,
1350, 2050, 1300, 1800, 1500, 1900, 1200, 1800, 2100, 1700, 2300,
1900, 2400, 1740, 2500, 2300, 2600)), row.names = c(NA, -210L
), class = c("tbl_df", "tbl", "data.frame"))
所以我的想法是通过“KatheterNummer”创建一个唯一标识符,但我找不到任何合理的解决方案。有点惭愧,我建议你这样做:
names(Labor_Neutrophile_alle2)[names(Labor_Neutrophile_alle2) == "Pat-ID"] <- "PIZ"
Labor_Neutrophile_alle2$Datum <- as.Date(Labor_Neutrophile_alle2$Datum, "%d.%m.%Y")
Labor_Neutrophile_alle2$Datum <- decimal_date(Labor_Neutrophile_alle2$Datum)
#a_all_000$Neutrophil <- ifelse(a_all_000$PIZ == Labor_Neutrophile_alle2$PIZ & Labor_Neutrophile_alle2$Datum > a_all_000$Datum_Implant & Labor_Neutrophile_alle2$Datum < a_all_000$Datum_Explant & Labor_Neutrophile_alle2$Wert > 500,1,0)
library(dplyr)
Labor_Neutrophile_alle2 <- arrange(Labor_Neutrophile_alle2, PIZ, Datum)
a11 <- y2 %>% filter(KatheterNummer == "1") %>% select(PIZ, Datum_Mibi, Datum_Implant, Datum_Explant, Datum_Mibi, Keim, Event, Alter, Geschlecht, KatheterNummer)
labor <- Labor_Neutrophile_alle2 %>% group_by(PIZ, Datum) %>% filter(Wert > 500) %>% select(PIZ, Datum, Wert)
a111 <- full_join(a11, Labor_Neutrophile_alle2, by="PIZ") %>%
filter(Datum - 0.01917808219 >= Datum_Implant & Datum + 0.01917808219 <= Datum_Explant)
a111$Datum_Implant <- a111$Datum_Implant + 0.01917808219
a111$Datum_Explant <- a111$Datum_Implant - 0.01917808219
a111 <- a111 %>%
group_by(PIZ) %>%
filter(Wert > 500, row_number()==1)
a12 <- y2 %>% filter(KatheterNummer == "2") %>% select(PIZ, Datum_Mibi, Datum_Implant, Datum_Explant, Datum_Mibi, Keim, Event, Alter, Geschlecht, KatheterNummer)
a112 <- full_join(a12, Labor_Neutrophile_alle2, by="PIZ") %>%
filter(Datum - 0.01917808219 >= Datum_Implant & Datum + 0.01917808219 <= Datum_Explant)
a112$Datum_Implant <- a112$Datum_Implant + 0.01917808219
a112$Datum_Explant <- a112$Datum_Implant - 0.01917808219
a112 <- a112 %>%
group_by(PIZ) %>%
filter(Wert > 500, row_number()==1)
a13 <- y2 %>% filter(KatheterNummer == "3") %>% select(PIZ, Datum_Mibi, Datum_Implant, Datum_Explant, Datum_Mibi, Keim, Event, Alter, Geschlecht, KatheterNummer)
a113 <- full_join(a13, Labor_Neutrophile_alle2, by="PIZ") %>%
filter(Datum - 0.01917808219 >= Datum_Implant & Datum + 0.01917808219 <= Datum_Explant)
a113$Datum_Implant <- a113$Datum_Implant + 0.01917808219
a113$Datum_Explant <- a113$Datum_Implant - 0.01917808219
a113 <- a113 %>%
group_by(PIZ) %>%
filter(Wert > 500, row_number()==1)
a14 <- y2 %>% filter(KatheterNummer == "4") %>% select(PIZ, Datum_Mibi, Datum_Implant, Datum_Explant, Datum_Mibi, Keim, Event, Alter, Geschlecht, KatheterNummer)
a114 <- full_join(a14, Labor_Neutrophile_alle2, by="PIZ") %>%
filter(Datum - 0.01917808219 >= Datum_Implant & Datum + 0.01917808219 <= Datum_Explant)
a114$Datum_Implant <- a114$Datum_Implant + 0.01917808219
a114$Datum_Explant <- a114$Datum_Implant - 0.01917808219
a114 <- a114 %>%
group_by(PIZ) %>%
filter(Wert > 500, row_number()==1)
a15 <- y2 %>% filter(KatheterNummer == "5") %>% select(PIZ, Datum_Mibi, Datum_Implant, Datum_Explant, Datum_Mibi, Keim, Event, Alter, Geschlecht, KatheterNummer)
a115 <- left_join(a15, Labor_Neutrophile_alle2, by="PIZ") %>%
filter(Datum - 0.01917808219 >= Datum_Implant & Datum + 0.01917808219 <= Datum_Explant)
a115$Datum_Implant <- a115$Datum_Implant + 0.01917808219
a115$Datum_Explant <- a115$Datum_Implant - 0.01917808219
a115 <- a115 %>%
group_by(PIZ) %>%
filter(Wert > 500, row_number()==1)
# alle
a_all_000 <- rbind(a11, a12, a13, a14, a15)
# alle labor, erstes mal > 500 zwischen impl, expl
a_all_111 <- rbind(a111, a112, a113, a114, a115) # nur 50 sind neutrophil?
a_all_1111 <- a_all_111 %>% filter(KatheterNummer == 1) %>% select(PIZ, Datum, Neutro_Wert = Wert, Neutrophilie, KatheterNummer)
a_all_1112 <- a_all_111 %>% filter(KatheterNummer == 2) %>% select(PIZ, Datum, Neutro_Wert = Wert, Neutrophilie, KatheterNummer)
a_all_1113 <- a_all_111 %>% filter(KatheterNummer == 3) %>% select(PIZ, Datum, Neutro_Wert = Wert, Neutrophilie, KatheterNummer)
a_all_1114 <- a_all_111 %>% filter(KatheterNummer == 4) %>% select(PIZ, Datum, Neutro_Wert = Wert, Neutrophilie, KatheterNummer)
a_all_1115 <- a_all_111 %>% filter(KatheterNummer == 5) %>% select(PIZ, Datum, Neutro_Wert = Wert, Neutrophilie, KatheterNummer)
a_all_11111 <- rbind(a_all_1111, a_all_1112, a_all_1113, a_all_1114, a_all_1115)
...
...
我想要的结果是这样的:
> dput(Neutro)
structure(list(...1 = c(1, 2, 3, 3), PIZ = c(11364500, 11364500,
11364500, 11364500), Datum_Mibi = c(2001.132, 2002.132, 2003.142,
2003.152), Datum_Implant = c(2001.025, 2002.025, 2003.055, 2003.055
), Datum_Explant = c(2001.518, 2002.618, 2003.518, 2003.518),
Keim = c("Koagulase neg.Staphylokokken", "Koagulase ", NA,
"Koagulase "), Event = c(1, 1, 0, 1), Alter = c(13, 13, 14,
14), Katheter = c(1, 2, 3, 3), Neutrophilie = c(1, 0, 1,
1), Wert = c(1200, 3050, 3000, 3000)), row.names = c(NA,
-4L), class = c("tbl_df", "tbl", "data.frame"))
【问题讨论】:
-
嗨 Bengalepunktcom,您能提供一个您想要的输出示例吗?
-
当然,伊恩,我做到了。我第一次做临床研究...
-
您提供的
Labor_Neutrophile_alle2子集中的任何值都与y2的样本年份都不相同。我当然可以为您回答这个问题,但要做到这一点,我需要提供数据以重现您的预期输出。 -
好的,Ian,我选择了 3 个肯定出现在两个数据框中的 ID。伊恩,谢谢你的耐心。如果您对我如何自己学习这些东西有任何其他提示,请善待并告诉我。是否有任何关于临床研究的书籍/ youtube 视频。还是您提供在线课程?我要一个!
标签: r