二次/抛物线插值答案

【问题标题】：Quadratic/parabolic interpolation二次/抛物线插值
【发布时间】：2021-08-06 13:14:45
【问题描述】：

我有这条曲线（2020 年 9 月 1 日至 2021 年 3 月 1 日期间，加利福尼亚州每 100,000 名居民中的 COVID-19 病例）：

很明显，2020 年 12 月底的下降是寒假期间检测下降的产物（最低点恰好发生在圣诞节），而不是病例的真正下降。

我想做的是通过某种二次或抛物线插值来估算值，以得出 2020 年 12 月 12 日至 2021 年 1 月 13 日之间真实病例率（每 100k）的合理值。我该怎么做？

这是我用来生成绘图的代码：

x <- seq.Date(as.Date("2020-09-01"), as.Date("2021-03-01"), by=1)
y <- c(9.36,9.16,9.05,8.88,8.76,8.65,7.94,7.81,7.65,7.5,7.47,7.5,7.52,8.19,
       8.03,8.1,8.12,8.14,8.19,8.24,8.21,8.19,8.19,8.22,8.24,8.2,8.16,8.14,
       8.16,8.14,8.25,8.19,8.3,8.36,8.45,8.43,8.42,8.44,8.51,8.63,8.62,8.63,
       8.66,8.69,8.73,8.81,8.79,8.9,9.15,9.46,9.67,9.78,10.07,10.19,10.32,10.48,
       10.52,10.69,10.93,11.27,11.68,12.4,13.45,14.66,15.92,17.09,18.15,18.85,
       19.04,19.98,20.93,21.69,22.89,24.28,25.52,26.78,29.08,31.29,33.62,35.34,
       37.11,37.95,38.35,39.59,40.82,42.06,39.44,39.63,41.69,43.73,47.78,52.64,
       57.16,65.24,70.15,72.29,73.01,76.01,78.53,81.46,84.64,87.58,89.86,90.79,
       93.81,96.47,98.05,99.48,100.07,99.73,99.65,99.36,99.52,99.32,92.84,82.53,
       84.34,86.33,89.6,92.99,96.15,99.42,101.56,102.45,102.72,103.63,102.26,101,
       104.48,112.58,109.57,106.79,100.29,94.47,88.73,83.79,79.33,76.19,74.25,
       67.69,63.86,60.59,57.27,54.07,51.8,50.69,49.49,46.01,42.54,39.79,37.28,
       36.11,35.29,33.53,31.66,30.16,28.58,27.37,26.13,25.13,23.06,21.33,19.92,
       18.65,17.51,16.71,16.13,14.63,13.89,13.03,12.27,11.56,11.1,10.79,10.63,
       10.07,9.63,9.28,8.98,8.77,8.61,8.25)

df <- data.frame(x,y)

p <- ggplot(data=df) +
  geom_line(aes(x=as.Date(x, origin=as.Date("1970-01-01")),y=y))
p

我不确定从哪里开始，所以如果有人在这里向我扔了一根骨头，我将不胜感激。谢谢！ :)

【问题讨论】：

我会说这是重复的：stackoverflow.com/questions/3822535/…
这不是重复的，除非我遗漏了什么。给出的链接 (stackoverflow.com/questions/3822535/…) 处理将多项式曲线拟合到现有数据。我问的是对丢失的曲线部分进行插值（即插补）数据。
好的，我的意思是您可能正在寻找的是拟合多项式，然后对该多项式进行插值。您总是需要拟合某种模型才能进行插值。

标签： r interpolation curve-fitting quadratic

【解决方案1】：

一位同事提供了此代码：

    #Quadratic Interpolation 

library(magrittr)
library(dplyr)
library(ggplot2)
library(deSolve)
ca_pop <- 40129160L

x <- seq.Date(as.Date("2020-09-01"), as.Date("2021-03-01"), by=1)
y <- c(9.36,9.16,9.05,8.88,8.76,8.65,7.94,7.81,7.65,7.5,7.47,7.5,7.52,8.19,
       8.03,8.1,8.12,8.14,8.19,8.24,8.21,8.19,8.19,8.22,8.24,8.2,8.16,8.14,
       8.16,8.14,8.25,8.19,8.3,8.36,8.45,8.43,8.42,8.44,8.51,8.63,8.62,8.63,
       8.66,8.69,8.73,8.81,8.79,8.9,9.15,9.46,9.67,9.78,10.07,10.19,10.32,10.48,
       10.52,10.69,10.93,11.27,11.68,12.4,13.45,14.66,15.92,17.09,18.15,18.85,
       19.04,19.98,20.93,21.69,22.89,24.28,25.52,26.78,29.08,31.29,33.62,35.34,
       37.11,37.95,38.35,39.59,40.82,42.06,39.44,39.63,41.69,43.73,47.78,52.64,
       57.16,65.24,70.15,72.29,73.01,76.01,78.53,81.46,84.64,87.58,89.86,90.79,
       93.81,96.47,98.05,99.48,100.07,99.73,99.65,99.36,99.52,99.32,92.84,82.53,
       84.34,86.33,89.6,92.99,96.15,99.42,101.56,102.45,102.72,103.63,102.26,101,
       104.48,112.58,109.57,106.79,100.29,94.47,88.73,83.79,79.33,76.19,74.25,
       67.69,63.86,60.59,57.27,54.07,51.8,50.69,49.49,46.01,42.54,39.79,37.28,
       36.11,35.29,33.53,31.66,30.16,28.58,27.37,26.13,25.13,23.06,21.33,19.92,
       18.65,17.51,16.71,16.13,14.63,13.89,13.03,12.27,11.56,11.1,10.79,10.63,
       10.07,9.63,9.28,8.98,8.77,8.61,8.25)

df <- data.frame(x,y, day_num = 1:length(y))
leave_out <- which(df$x > "2020-12-18" & df$x < "2021-01-08")

#Plot curve with missing points
df[-leave_out,] %>% filter(x > "2020-10-15") %>%
  ggplot() +
  geom_point(aes(x=x,y=y), shape=1, alpha=.7, size=.6) + 
  theme_bw()

df_fit <- df %>%#[-leave_out,] %>%
  filter(x > "2020-10-15") %>%
  mutate(transform_day = 1 / day_num) 

#Plot df_fit
#ggplot(data=df_fit, aes(x=x, y=transform_day)) + geom_line()
  
#quad_model <- lm(y ~ (poly(transform_day, 2)), data = df_fit)
#y_fit <- predict(quad_model)
#model_fit <- data.frame(x = df_fit$x,y_fit)
#model_fit %>% filter(x > "2020-10-15") %>%
#  ggplot() +
#  geom_line(aes(x = x, y = y_fit)) +
#  geom_point(data = df_fit, aes(x = x, y =y)) + theme_bw()
halfway_ind <- round(mean(order(abs(y - 30))[1:2]))
halfway_ind #116
halfway_ind60 <- round(mean(order(abs(y - 60))[c(1,3)]))
halfway_ind60 #118
##Let's say 117 for the peak
df_fit$day_adj <- df_fit$day_num - 117
df_fit$model <- 150*375/ (df_fit$day_adj^2 + 375)
df_fit$cases <- df_fit$y * ca_pop / 1e5
df_fit %>% 
  ggplot() +
  geom_line(aes(x = day_adj, y = model)) +
  geom_point(aes(x = day_adj, y =y)) + theme_bw()

【讨论】：

此方法不需要包 deSolve。