横截面研究

文献实例

  • Cai, M., Zhang, S., Lin, X., Qian, Z., McMillin, S.E., Yang, Y., Zhang, Z., Pan, J., Lin, H.. (2022) Association of ambient particulate matter pollution of different sizes with in-hospital case fatality among stroke patients in China. Neurology. (IF=12.3) 98(4), e2474-e2486. PDF

  • Cai, M., Lin, X., Wang, X., Zhang, S., Wang, C., Zhang, Z., Pan, J., Lin, H. (2023). Long-term exposure to ambient fine particulate matter chemical composition and in-hospital case fatality among patients with stroke in China. The Lancet Regional Health-Western Pacific (IF=8.6), 100679. PDF

R语言代码实战

数据读取

pacman::p_load(
  arrow, dplyr, ggplot2, data.table, broom, splines, purrr, tableone, patchwork, latex2exp, glue)

cs_pm = arrow::read_parquet(
  'Data/cs_synthetic_pm_components.parquet')

glimpse(cs_pm)
Rows: 30,393
Columns: 17
$ inhosp_death      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ age               <int> 61, 68, 69, 74, 71, 52, 74, 72, 77, 66, 60, 73, 60, …
$ sex               <fct> Female, Female, Male, Male, Female, Male, Male, Male…
$ ethnicity         <fct> Han, Han, Han, Han, Han, Han, Han, Han, Han, Han, Ha…
$ occupation        <fct> Farmer, Other, Other, Farmer, Farmer, Other, Farmer,…
$ marriage          <fct> Married, Married, Married, Married, Married, Other, …
$ stroke_subtype    <chr> "Ischemic", "Hemorrhagic", "Hemorrhagic", "Ischemic"…
$ hypertension      <dbl> 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1…
$ diabetes          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0…
$ Temperature_MA_07 <dbl> 15.405740, 23.771006, -2.423174, 15.489920, 19.48610…
$ RH_MA_07          <dbl> 84.76136, 69.86074, 58.37018, 77.01996, 75.38288, 67…
$ PM2.5_MA_365      <dbl> 25.56762, 53.06090, 40.63659, 49.56892, 29.22137, 52…
$ SO4_MA_365        <dbl> 5.579043, 8.839439, 6.897126, 10.198819, 6.676281, 9…
$ NO3_MA_365        <dbl> 4.769624, 12.414940, 8.555672, 11.542473, 4.700461, …
$ NH4_MA_365        <dbl> 4.067301, 7.929543, 5.920617, 8.340625, 4.216512, 8.…
$ OM_MA_365         <dbl> 7.401939, 11.583662, 9.617350, 12.414788, 8.576074, …
$ BC_MA_365         <dbl> 1.634150, 2.142325, 2.070287, 2.311701, 2.003598, 2.…

表1制作

var_vec = c("age", "sex", "ethnicity", "occupation", "marriage", 
"stroke_subtype", "hypertension", "diabetes", "Temperature_MA_07", 
"RH_MA_07", "PM2.5_MA_365", "SO4_MA_365", "NO3_MA_365", "NH4_MA_365", 
"OM_MA_365", "BC_MA_365")

var_fct = c("sex", "ethnicity", "occupation", "marriage", 
"stroke_subtype", "hypertension", "diabetes")

tab1 = cs_pm %>% 
  CreateTableOne(
    vars = var_vec, 
    factorVars = var_fct,
    strata = 'inhosp_death', 
    addOverall = TRUE) %>% 
  print(formatOptions = list(big.mark = ","),
        missing = TRUE,
        catDigits = 2,
        contDigits = 2,
        showAllLevels = TRUE,
        printToggle = FALSE) %>% 
    as.data.frame() %>% 
    setDT(x = ., keep.rownames = 'variables') %>% 
    dplyr::select(-test)

tab1
                        variables               level         Overall
 1:                             n                              30,393
 2:               age..mean..SD..                        66.39 (7.88)
 3:                       sex....              Female  13316 (43.81) 
 4:                             X                Male  17077 (56.19) 
 5:                 ethnicity....                 Han  30067 (98.93) 
 6:                           X.1             non-Han    326 ( 1.07) 
 7:                occupation....  Public institution    649 ( 2.14) 
 8:                           X.2 Private institution   1604 ( 5.28) 
 9:                           X.3              Farmer  16603 (54.63) 
10:                           X.4             Jobless    727 ( 2.39) 
11:                           X.5             Retired   3749 (12.34) 
12:                           X.6               Other   7061 (23.23) 
13:                  marriage....             Married  25730 (84.66) 
14:                           X.7           Unmarried    848 ( 2.79) 
15:                           X.8             Widowed   1104 ( 3.63) 
16:                           X.9            Divorced    282 ( 0.93) 
17:                          X.10               Other   2429 ( 7.99) 
18:            stroke_subtype....         Hemorrhagic   7250 (23.85) 
19:                          X.11            Ischemic  21834 (71.84) 
20:                          X.12         Unspecified   1309 ( 4.31) 
21:              hypertension....                   0  14343 (47.19) 
22:                          X.13                   1  16050 (52.81) 
23:                  diabetes....                   0  26129 (85.97) 
24:                          X.14                   1   4264 (14.03) 
25: Temperature_MA_07..mean..SD..                        14.86 (8.99)
26:          RH_MA_07..mean..SD..                       65.70 (16.23)
27:      PM2.5_MA_365..mean..SD..                       49.03 (13.13)
28:        SO4_MA_365..mean..SD..                         8.78 (2.22)
29:        NO3_MA_365..mean..SD..                        10.68 (2.84)
30:        NH4_MA_365..mean..SD..                         7.27 (1.74)
31:         OM_MA_365..mean..SD..                        12.06 (3.12)
32:         BC_MA_365..mean..SD..                         2.31 (0.63)
                        variables               level         Overall
                  0              1      p Missing
 1:          30,121            272               
 2:    66.38 (7.87)   67.40 (8.53)  0.034     0.0
 3:  13213 (43.87)    103 (37.87)   0.054     0.0
 4:  16908 (56.13)    169 (62.13)                
 5:  29800 (98.93)    267 (98.16)   0.349     0.0
 6:    321 ( 1.07)      5 ( 1.84)                
 7:    647 ( 2.15)      2 ( 0.74)  <0.001     0.0
 8:   1586 ( 5.27)     18 ( 6.62)                
 9:  16497 (54.77)    106 (38.97)                
10:    721 ( 2.39)      6 ( 2.21)                
11:   3696 (12.27)     53 (19.49)                
12:   6974 (23.15)     87 (31.99)                
13:  25514 (84.71)    216 (79.41)  <0.001     0.0
14:    824 ( 2.74)     24 ( 8.82)                
15:   1090 ( 3.62)     14 ( 5.15)                
16:    277 ( 0.92)      5 ( 1.84)                
17:   2416 ( 8.02)     13 ( 4.78)                
18:   7086 (23.53)    164 (60.29)  <0.001     0.0
19:  21746 (72.20)     88 (32.35)                
20:   1289 ( 4.28)     20 ( 7.35)                
21:  14206 (47.16)    137 (50.37)   0.321     0.0
22:  15915 (52.84)    135 (49.63)                
23:  25910 (86.02)    219 (80.51)   0.012     0.0
24:   4211 (13.98)     53 (19.49)                
25:    14.84 (8.99)   16.65 (8.47)  0.001     0.0
26:   65.66 (16.25)  70.19 (13.59) <0.001     0.0
27:   49.02 (13.14)  50.28 (12.28)  0.114     0.0
28:     8.78 (2.22)    9.11 (2.09)  0.014     0.0
29:    10.68 (2.84)   10.95 (3.00)  0.121     0.0
30:     7.27 (1.74)    7.45 (1.75)  0.091     0.0
31:    12.06 (3.13)   12.70 (2.82)  0.001     0.0
32:     2.31 (0.63)    2.43 (0.55)  0.002     0.0
                  0              1      p Missing

清理结果

tab1_v2 = tab1 %>% 
  mutate(variables = gsub('\\.\\.\\.\\.', '', variables, perl = TRUE),
         variables = fifelse(grepl('X\\.', variables, perl = TRUE)|
                                  variables == 'X', '', variables),
         variables = gsub('\\.\\.mean\\.\\.SD\\.\\.', ', mean (SD)', 
                             variables, perl = TRUE)) 
tab1_v2
                       variables               level         Overall
 1:                            n                              30,393
 2:               age, mean (SD)                        66.39 (7.88)
 3:                          sex              Female  13316 (43.81) 
 4:                                             Male  17077 (56.19) 
 5:                    ethnicity                 Han  30067 (98.93) 
 6:                                          non-Han    326 ( 1.07) 
 7:                   occupation  Public institution    649 ( 2.14) 
 8:                              Private institution   1604 ( 5.28) 
 9:                                           Farmer  16603 (54.63) 
10:                                          Jobless    727 ( 2.39) 
11:                                          Retired   3749 (12.34) 
12:                                            Other   7061 (23.23) 
13:                     marriage             Married  25730 (84.66) 
14:                                        Unmarried    848 ( 2.79) 
15:                                          Widowed   1104 ( 3.63) 
16:                                         Divorced    282 ( 0.93) 
17:                                            Other   2429 ( 7.99) 
18:               stroke_subtype         Hemorrhagic   7250 (23.85) 
19:                                         Ischemic  21834 (71.84) 
20:                                      Unspecified   1309 ( 4.31) 
21:                 hypertension                   0  14343 (47.19) 
22:                                                1  16050 (52.81) 
23:                     diabetes                   0  26129 (85.97) 
24:                                                1   4264 (14.03) 
25: Temperature_MA_07, mean (SD)                        14.86 (8.99)
26:          RH_MA_07, mean (SD)                       65.70 (16.23)
27:      PM2.5_MA_365, mean (SD)                       49.03 (13.13)
28:        SO4_MA_365, mean (SD)                         8.78 (2.22)
29:        NO3_MA_365, mean (SD)                        10.68 (2.84)
30:        NH4_MA_365, mean (SD)                         7.27 (1.74)
31:         OM_MA_365, mean (SD)                        12.06 (3.12)
32:         BC_MA_365, mean (SD)                         2.31 (0.63)
                       variables               level         Overall
                  0              1      p Missing
 1:          30,121            272               
 2:    66.38 (7.87)   67.40 (8.53)  0.034     0.0
 3:  13213 (43.87)    103 (37.87)   0.054     0.0
 4:  16908 (56.13)    169 (62.13)                
 5:  29800 (98.93)    267 (98.16)   0.349     0.0
 6:    321 ( 1.07)      5 ( 1.84)                
 7:    647 ( 2.15)      2 ( 0.74)  <0.001     0.0
 8:   1586 ( 5.27)     18 ( 6.62)                
 9:  16497 (54.77)    106 (38.97)                
10:    721 ( 2.39)      6 ( 2.21)                
11:   3696 (12.27)     53 (19.49)                
12:   6974 (23.15)     87 (31.99)                
13:  25514 (84.71)    216 (79.41)  <0.001     0.0
14:    824 ( 2.74)     24 ( 8.82)                
15:   1090 ( 3.62)     14 ( 5.15)                
16:    277 ( 0.92)      5 ( 1.84)                
17:   2416 ( 8.02)     13 ( 4.78)                
18:   7086 (23.53)    164 (60.29)  <0.001     0.0
19:  21746 (72.20)     88 (32.35)                
20:   1289 ( 4.28)     20 ( 7.35)                
21:  14206 (47.16)    137 (50.37)   0.321     0.0
22:  15915 (52.84)    135 (49.63)                
23:  25910 (86.02)    219 (80.51)   0.012     0.0
24:   4211 (13.98)     53 (19.49)                
25:    14.84 (8.99)   16.65 (8.47)  0.001     0.0
26:   65.66 (16.25)  70.19 (13.59) <0.001     0.0
27:   49.02 (13.14)  50.28 (12.28)  0.114     0.0
28:     8.78 (2.22)    9.11 (2.09)  0.014     0.0
29:    10.68 (2.84)   10.95 (3.00)  0.121     0.0
30:     7.27 (1.74)    7.45 (1.75)  0.091     0.0
31:    12.06 (3.13)   12.70 (2.82)  0.001     0.0
32:     2.31 (0.63)    2.43 (0.55)  0.002     0.0
                  0              1      p Missing

每3位数字加上逗号

add_comma = function(x, separator = ','){
  str_pattern = '(\\d+|\\d+\\.\\d+)(\\s+\\((\\s+|)(\\d+| \\d+)\\.\\d+\\))'
  d_numb = gsub('^\\s+|^\\s+$', '', x) %>% 
    gsub(str_pattern, '\\1', .) %>% 
    as.numeric() %>% 
    suppressWarnings() %>% 
    scales::label_comma(
      accuracy = 0.01, 
      decimal.mark = ".", 
      big.mark = separator)(.)
  d_perc = gsub(str_pattern, '\\2', x)
  
  return_vec = ifelse(
    grepl('(\\d+)( \\()((\\d+| \\d+)\\.\\d+)(\\))', x),
    paste0(d_numb, d_perc) %>% 
      gsub('\\.00', '', .) %>% 
      gsub('\\s+\\(', ' \\(', .),
    x)
  
  return(return_vec)
}

tab1_v3 = tab1_v2 %>% 
  mutate(across(Overall:`1`, add_comma))

tab1_v3
                       variables               level         Overall
 1:                            n                              30,393
 2:               age, mean (SD)                        66.39 (7.88)
 3:                          sex              Female 13,316 (43.81) 
 4:                                             Male 17,077 (56.19) 
 5:                    ethnicity                 Han 30,067 (98.93) 
 6:                                          non-Han    326 ( 1.07) 
 7:                   occupation  Public institution    649 ( 2.14) 
 8:                              Private institution  1,604 ( 5.28) 
 9:                                           Farmer 16,603 (54.63) 
10:                                          Jobless    727 ( 2.39) 
11:                                          Retired  3,749 (12.34) 
12:                                            Other  7,061 (23.23) 
13:                     marriage             Married 25,730 (84.66) 
14:                                        Unmarried    848 ( 2.79) 
15:                                          Widowed  1,104 ( 3.63) 
16:                                         Divorced    282 ( 0.93) 
17:                                            Other  2,429 ( 7.99) 
18:               stroke_subtype         Hemorrhagic  7,250 (23.85) 
19:                                         Ischemic 21,834 (71.84) 
20:                                      Unspecified  1,309 ( 4.31) 
21:                 hypertension                   0 14,343 (47.19) 
22:                                                1 16,050 (52.81) 
23:                     diabetes                   0 26,129 (85.97) 
24:                                                1  4,264 (14.03) 
25: Temperature_MA_07, mean (SD)                        14.86 (8.99)
26:          RH_MA_07, mean (SD)                       65.70 (16.23)
27:      PM2.5_MA_365, mean (SD)                       49.03 (13.13)
28:        SO4_MA_365, mean (SD)                         8.78 (2.22)
29:        NO3_MA_365, mean (SD)                        10.68 (2.84)
30:        NH4_MA_365, mean (SD)                         7.27 (1.74)
31:         OM_MA_365, mean (SD)                        12.06 (3.12)
32:         BC_MA_365, mean (SD)                         2.31 (0.63)
                       variables               level         Overall
                  0             1      p Missing
 1:          30,121           272               
 2:    66.38 (7.87)  67.40 (8.53)  0.034     0.0
 3: 13,213 (43.87)   103 (37.87)   0.054     0.0
 4: 16,908 (56.13)   169 (62.13)                
 5: 29,800 (98.93)   267 (98.16)   0.349     0.0
 6:    321 ( 1.07)     5 ( 1.84)                
 7:    647 ( 2.15)     2 ( 0.74)  <0.001     0.0
 8:  1,586 ( 5.27)    18 ( 6.62)                
 9: 16,497 (54.77)   106 (38.97)                
10:    721 ( 2.39)     6 ( 2.21)                
11:  3,696 (12.27)    53 (19.49)                
12:  6,974 (23.15)    87 (31.99)                
13: 25,514 (84.71)   216 (79.41)  <0.001     0.0
14:    824 ( 2.74)    24 ( 8.82)                
15:  1,090 ( 3.62)    14 ( 5.15)                
16:    277 ( 0.92)     5 ( 1.84)                
17:  2,416 ( 8.02)    13 ( 4.78)                
18:  7,086 (23.53)   164 (60.29)  <0.001     0.0
19: 21,746 (72.20)    88 (32.35)                
20:  1,289 ( 4.28)    20 ( 7.35)                
21: 14,206 (47.16)   137 (50.37)   0.321     0.0
22: 15,915 (52.84)   135 (49.63)                
23: 25,910 (86.02)   219 (80.51)   0.012     0.0
24:  4,211 (13.98)    53 (19.49)                
25:    14.84 (8.99)  16.65 (8.47)  0.001     0.0
26:   65.66 (16.25) 70.19 (13.59) <0.001     0.0
27:   49.02 (13.14) 50.28 (12.28)  0.114     0.0
28:     8.78 (2.22)   9.11 (2.09)  0.014     0.0
29:    10.68 (2.84)     10.95 (3)  0.121     0.0
30:     7.27 (1.74)   7.45 (1.75)  0.091     0.0
31:    12.06 (3.13)  12.70 (2.82)  0.001     0.0
32:     2.31 (0.63)   2.43 (0.55)  0.002     0.0
                  0             1      p Missing

写出成excel进行整理

fwrite(tab1_v3, 'tab1.csv')
getwd()

逻辑回归

fit_logistic = function(x, data = cs_pm, outcome_var = 'inhosp_death'){
  cat(paste0(x, '\n'))
  x_iqr = IQR(data[[x]], na.rm = TRUE)
  
  clean_fit = glm(
    formula = as.formula(paste0(outcome_var, " ~ ", 
                    x, 
                    ' + age + sex + ethnicity + occupation + marriage + stroke_subtype + 
                    hypertension + diabetes + 
                    ns(Temperature_MA_07 , df = 5) + ns(RH_MA_07, df = 5)')),
    family = binomial, #Logistic regression设置连接函数
    data = data) %>% 
    tidy() %>% 
    #filter(grepl('PM|O3|CO|NO2|SO2', term)) %>% 
    mutate(OR = exp(estimate*x_iqr), 3,
           OR_round = round(exp(estimate*x_iqr), 3),
           CI_low = exp(estimate*x_iqr - 1.96*std.error*x_iqr),
           CI_high = exp(estimate*x_iqr + 1.96*std.error*x_iqr),
           CI_low_round = round(exp(estimate*x_iqr - 1.96*std.error*x_iqr), 3),
           CI_high_round = round(exp(estimate*x_iqr + 1.96*std.error*x_iqr), 3),
           predictor = x) %>% 
    mutate(result = paste0(format(OR_round, nsmall = 3), 
                           ' (', 
                           format(CI_low_round, nsmall = 3), 
                           '-', 
                           format(CI_high_round, nsmall = 3),
                           ')') %>% 
             gsub(' ', '', .) %>% 
             gsub('\\(', ' (', .),
           pollutant_IQR = x_iqr) %>% 
    mutate(result = gsub('000e\\+00', '', result)) %>% 
    select(predictor, term, pollutant_IQR, result, OR, CI_low, CI_high, p.value)
  
  return(clean_fit)
}

fit_longterm_365 = purrr::map_df(
  c("PM2.5_MA_365", "SO4_MA_365", "NO3_MA_365", 
    "NH4_MA_365", "OM_MA_365", "BC_MA_365"), 
  fit_logistic,
  data = cs_pm)
PM2.5_MA_365
SO4_MA_365
NO3_MA_365
NH4_MA_365
OM_MA_365
BC_MA_365
fit_longterm_365 %>% 
  filter(predictor == term)
# A tibble: 6 × 8
  predictor    term         pollutant_IQR result       OR CI_low CI_high p.value
  <chr>        <chr>                <dbl> <chr>     <dbl>  <dbl>   <dbl>   <dbl>
1 PM2.5_MA_365 PM2.5_MA_365        15.1   1.142 (0…  1.14  0.978    1.33 0.0939 
2 SO4_MA_365   SO4_MA_365           2.83  1.172 (0…  1.17  0.997    1.38 0.0551 
3 NO3_MA_365   NO3_MA_365           3.28  1.133 (0…  1.13  0.972    1.32 0.111  
4 NH4_MA_365   NH4_MA_365           2.00  1.116 (0…  1.12  0.961    1.30 0.150  
5 OM_MA_365    OM_MA_365            3.47  1.218 (1…  1.22  1.05     1.41 0.00732
6 BC_MA_365    BC_MA_365            0.712 1.197 (1…  1.20  1.04     1.38 0.0143 

暴露反应关系

quantile(cs_pm$PM2.5_MA_365, c(0.25, 0.5, 0.75))
     25%      50%      75% 
40.48217 47.54928 55.58546 
quantile(cs_pm$PM2.5_MA_365, c(0.01, 0.99))
      1%      99% 
21.38067 85.20396 
fit_pm2.5 = glm(
  inhosp_death ~ ns(PM2.5_MA_365, knots = c(40.48, 65.54, 75.58)) + 
    age + sex + ethnicity + occupation + marriage + stroke_subtype,
  family = binomial,
  data = cs_pm %>% 
    filter(PM2.5_MA_365 > 21.38 & PM2.5_MA_365 < 80)) 

sp_pm2.5 = fit_pm2.5 %>%
  termplot(se = T, plot = F) %>%
  .[['PM2.5_MA_365']] %>%
  as.data.table() %>% 
  .[,`:=`(HR = exp(y),
          HRmin = exp(y - 1.96*se),
          HRmax = exp(y + 1.96*se))] %>% 
  .[, `:=`(pollutant = 'PM2.5')] %>% 
  .[]

sp_pm2.5
              x          y        se        HR     HRmin    HRmax pollutant
    1: 21.38206 -0.1801450 0.4066939 0.8351491 0.3763390 1.853313     PM2.5
    2: 21.38321 -0.1801495 0.4066593 0.8351453 0.3763627 1.853179     PM2.5
    3: 21.38823 -0.1801692 0.4065088 0.8351289 0.3764664 1.852596     PM2.5
    4: 21.39237 -0.1801854 0.4063849 0.8351154 0.3765517 1.852116     PM2.5
    5: 21.39479 -0.1801949 0.4063122 0.8351074 0.3766018 1.851835     PM2.5
   ---                                                                     
29033: 79.90769 -0.4279915 0.7258555 0.6518169 0.1571321 2.703873     PM2.5
29034: 79.91672 -0.4278512 0.7271430 0.6519084 0.1567581 2.711085     PM2.5
29035: 79.94468 -0.4274167 0.7311304 0.6521917 0.1556054 2.733544     PM2.5
29036: 79.97850 -0.4268909 0.7359584 0.6525347 0.1542209 2.760985     PM2.5
29037: 79.99940 -0.4265660 0.7389445 0.6527468 0.1533707 2.778095     PM2.5
arr = list('PM1' = TeX("PM$_{1}$"),  
           "PM2.5" = TeX("PM$_{2.5}$"), 
           "PM10" = TeX("PM$_{10}$"))
mylabel = function(val) { return(lapply(val, function(x) arr[x])) }

color7 = c('#7570b3', '#d95f02', '#1b9e77')

p_pm2.5 = sp_pm2.5 %>% 
  ggplot(aes(x = x, y = HR)) +
  geom_hline(yintercept = 1, linewidth = 0.8, alpha = 0.6, linetype = 'dashed') +
  geom_ribbon(aes(ymin = HRmin, ymax = HRmax), alpha = 0.3, color = NA, fill = color7[1]) +
  geom_line(linewidth = 1.5, color = color7[1]) +
  facet_wrap(~ pollutant, scales = 'free', labeller = mylabel) +
  scale_x_continuous(expand = c(0, 0)) +
  scale_y_continuous(expand = c(0, 0.05)) +
  labs(x = NULL, y = 'Odds of death') +
  theme_test(base_size = 20, base_family = 'serif') + # , base_family = 'serif'
  theme(plot.margin = unit(c(0, 0.5, 0, 0.2), 'cm'),
        plot.title = element_text(size = 20, face = 'bold'),
        strip.background = element_rect(fill = ggplot2::alpha(color7[1], 0.3), color = NA),
        strip.text = element_text(size = 18, face = 'bold'))

p_pm2.5

PAF

PAF点估计值

af_point = function(quantile_threshold = 0.05, dat = cs_pm){

  PM2.5_threshold = dat[,quantile(PM2.5_MA_365, quantile_threshold, na.rm = TRUE)]
  
  dshort_new = dat %>% 
    copy() %>% 
    .[,`:=`(PM2.5_MA_365 = fifelse(PM2.5_MA_365 >= PM2.5_threshold, 
                                   PM2.5_threshold, 
                                   PM2.5_MA_365))]
  
  fit_PM2.5 = glm(formula = as.formula(glue::glue(
      "inhosp_death ~ PM2.5_MA_365  + age + sex + ethnicity + occupation + marriage + 
      stroke_subtype + hypertension + diabetes + 
      ns(Temperature_MA_07 , df = 5) + ns(RH_MA_07, df = 5)")), 
      family = binomial, data = dat)
  
  
  dshort_pred = dat %>% 
    .[,.(inhosp_death,
         death_PM2.5_pred = predict(fit_PM2.5, newdata = dshort_new, type = 'response'))]
  
  return(dshort_pred[,.(obs_outcome = mean(inhosp_death),
                        PM2.5_pred = mean(death_PM2.5_pred, na.rm = T))] %>% 
           mutate(PAF = (obs_outcome - PM2.5_pred)/obs_outcome))
}

af_result_0.05 = af_point(0.05)

af_result_0.05
   obs_outcome  PM2.5_pred       PAF
1: 0.008949429 0.007508829 0.1609711

PAF区间估计

利用Bootstrap 20次估计PAF的95%置信区间。这里作为示例只展示Bootstrap 20次,实际研究过程中建议bootstrap至少200次以上,一般都需要bootstrap 1000次。

set.seed(123)
n_interations = 100
paf_container = vector("list", length = n_interations)

for (i in 1:n_interations) {
  print(glue('***********{i}/{n_interations}, {round(i/n_interations*100, 1)}%***********'))
  cs_pm_resample = cs_pm[sample(.N, .N, replace = TRUE), ]
  
  paf_container[[i]] = af_point(dat = cs_pm_resample)
}
***********1/100, 1%***********
***********2/100, 2%***********
***********3/100, 3%***********
***********4/100, 4%***********
***********5/100, 5%***********
***********6/100, 6%***********
***********7/100, 7%***********
***********8/100, 8%***********
***********9/100, 9%***********
***********10/100, 10%***********
***********11/100, 11%***********
***********12/100, 12%***********
***********13/100, 13%***********
***********14/100, 14%***********
***********15/100, 15%***********
***********16/100, 16%***********
***********17/100, 17%***********
***********18/100, 18%***********
***********19/100, 19%***********
***********20/100, 20%***********
***********21/100, 21%***********
***********22/100, 22%***********
***********23/100, 23%***********
***********24/100, 24%***********
***********25/100, 25%***********
***********26/100, 26%***********
***********27/100, 27%***********
***********28/100, 28%***********
***********29/100, 29%***********
***********30/100, 30%***********
***********31/100, 31%***********
***********32/100, 32%***********
***********33/100, 33%***********
***********34/100, 34%***********
***********35/100, 35%***********
***********36/100, 36%***********
***********37/100, 37%***********
***********38/100, 38%***********
***********39/100, 39%***********
***********40/100, 40%***********
***********41/100, 41%***********
***********42/100, 42%***********
***********43/100, 43%***********
***********44/100, 44%***********
***********45/100, 45%***********
***********46/100, 46%***********
***********47/100, 47%***********
***********48/100, 48%***********
***********49/100, 49%***********
***********50/100, 50%***********
***********51/100, 51%***********
***********52/100, 52%***********
***********53/100, 53%***********
***********54/100, 54%***********
***********55/100, 55%***********
***********56/100, 56%***********
***********57/100, 57%***********
***********58/100, 58%***********
***********59/100, 59%***********
***********60/100, 60%***********
***********61/100, 61%***********
***********62/100, 62%***********
***********63/100, 63%***********
***********64/100, 64%***********
***********65/100, 65%***********
***********66/100, 66%***********
***********67/100, 67%***********
***********68/100, 68%***********
***********69/100, 69%***********
***********70/100, 70%***********
***********71/100, 71%***********
***********72/100, 72%***********
***********73/100, 73%***********
***********74/100, 74%***********
***********75/100, 75%***********
***********76/100, 76%***********
***********77/100, 77%***********
***********78/100, 78%***********
***********79/100, 79%***********
***********80/100, 80%***********
***********81/100, 81%***********
***********82/100, 82%***********
***********83/100, 83%***********
***********84/100, 84%***********
***********85/100, 85%***********
***********86/100, 86%***********
***********87/100, 87%***********
***********88/100, 88%***********
***********89/100, 89%***********
***********90/100, 90%***********
***********91/100, 91%***********
***********92/100, 92%***********
***********93/100, 93%***********
***********94/100, 94%***********
***********95/100, 95%***********
***********96/100, 96%***********
***********97/100, 97%***********
***********98/100, 98%***********
***********99/100, 99%***********
***********100/100, 100%***********
paf_ci = rbindlist(paf_container)
paf_ci
     obs_outcome  PM2.5_pred           PAF
  1: 0.008620406 0.007681057  0.1089680657
  2: 0.009179745 0.008659459  0.0566776718
  3: 0.008752015 0.006253478  0.2854814080
  4: 0.009837792 0.006377192  0.3517658983
  5: 0.009739085 0.007615361  0.2180619775
  6: 0.009574573 0.007910489  0.1738023853
  7: 0.009936499 0.008899455  0.1043670505
  8: 0.009212648 0.006777885  0.2642848170
  9: 0.007830751 0.006827075  0.1281710132
 10: 0.008620406 0.007781756  0.0972865861
 11: 0.009146843 0.007734354  0.1544236551
 12: 0.009179745 0.008443804  0.0801701249
 13: 0.008686211 0.007134025  0.1786953767
 14: 0.009344257 0.008165864  0.1261088339
 15: 0.009442964 0.007166227  0.2411040393
 16: 0.008357188 0.006683295  0.2002937134
 17: 0.009344257 0.008217721  0.1205591614
 18: 0.008916527 0.006886432  0.2276777558
 19: 0.010035205 0.008648296  0.1382043644
 20: 0.008784918 0.005455455  0.3789975504
 21: 0.007337216 0.005090031  0.3062721336
 22: 0.009113941 0.009101538  0.0013608323
 23: 0.009278452 0.007436352  0.1985353426
 24: 0.008719113 0.007863549  0.0981250576
 25: 0.008455894 0.006710232  0.2064433148
 26: 0.009377159 0.007469204  0.2034683349
 27: 0.008159774 0.008824539 -0.0814686645
 28: 0.008357188 0.006498610  0.2223926509
 29: 0.010101010 0.008610808  0.1475300196
 30: 0.008949429 0.006611611  0.2612254203
 31: 0.009640378 0.008126957  0.1569877047
 32: 0.008554601 0.007683337  0.1018474280
 33: 0.009771987 0.008388531  0.1415737064
 34: 0.009870694 0.008891180  0.0992345595
 35: 0.009081038 0.007691470  0.1530186919
 36: 0.008126871 0.006469609  0.2039237834
 37: 0.009771987 0.008752927  0.1042838305
 38: 0.008554601 0.006142376  0.2819798891
 39: 0.008455894 0.006618378  0.2173059278
 40: 0.008784918 0.006405792  0.2708192900
 41: 0.008159774 0.007544559  0.0753960324
 42: 0.007896555 0.007108500  0.0997973453
 43: 0.009344257 0.007038899  0.2467138592
 44: 0.008455894 0.007447039  0.1193079011
 45: 0.008817820 0.008030363  0.0893028858
 46: 0.009344257 0.007250965  0.2240191203
 47: 0.008784918 0.007685911  0.1251015271
 48: 0.008554601 0.005838311  0.3175238467
 49: 0.008883625 0.007912897  0.1092715815
 50: 0.009245550 0.007243881  0.2165008287
 51: 0.009574573 0.009036782  0.0561687166
 52: 0.009410062 0.007154873  0.2396571524
 53: 0.010002303 0.008537752  0.1464214248
 54: 0.008719113 0.007580557  0.1305816801
 55: 0.009541671 0.007707775  0.1921985957
 56: 0.008883625 0.007255302  0.1832947972
 57: 0.009804889 0.008590274  0.1238785335
 58: 0.009706182 0.008019480  0.1737760371
 59: 0.008488797 0.008486176  0.0003087035
 60: 0.009081038 0.008144506  0.1031305619
 61: 0.009048136 0.008198673  0.0938826819
 62: 0.009607475 0.008311688  0.1348728758
 63: 0.009048136 0.007776896  0.1404974944
 64: 0.008455894 0.007575844  0.1040753604
 65: 0.009377159 0.007608957  0.1885648219
 66: 0.008357188 0.007318631  0.1242710357
 67: 0.009015234 0.007737264  0.1417567305
 68: 0.008093969 0.006485356  0.1987421185
 69: 0.008817820 0.006924460  0.2147197696
 70: 0.009344257 0.008036570  0.1399455578
 71: 0.009377159 0.009474904 -0.0104237503
 72: 0.008521699 0.006982849  0.1805802273
 73: 0.009410062 0.009012932  0.0422026435
 74: 0.009212648 0.007879372  0.1447223365
 75: 0.008784918 0.008637802  0.0167464061
 76: 0.009179745 0.008190551  0.1077584053
 77: 0.009212648 0.007134454  0.2255805287
 78: 0.007929457 0.006998025  0.1174648460
 79: 0.010364229 0.007860364  0.2415872084
 80: 0.009969401 0.009802350  0.0167563156
 81: 0.008488797 0.007865742  0.0733973121
 82: 0.008258481 0.006478380  0.2155481958
 83: 0.008883625 0.006996482  0.2124293387
 84: 0.008521699 0.006974390  0.1815728790
 85: 0.008028164 0.005682696  0.2921549703
 86: 0.008949429 0.007296405  0.1847072211
 87: 0.009377159 0.007287803  0.2228133652
 88: 0.007995262 0.006496246  0.1874880446
 89: 0.009146843 0.008397647  0.0819076001
 90: 0.009015234 0.007131814  0.2089152432
 91: 0.009311355 0.008389162  0.0990395410
 92: 0.008291383 0.005797898  0.3007321204
 93: 0.009245550 0.008421172  0.0891647998
 94: 0.008093969 0.006994094  0.1358882429
 95: 0.008752015 0.008070753  0.0778406294
 96: 0.009146843 0.006956689  0.2394436688
 97: 0.008916527 0.006976922  0.2175292227
 98: 0.008653308 0.006339787  0.2673568962
 99: 0.008686211 0.005955926  0.3143240527
100: 0.008620406 0.006505312  0.2453589781
     obs_outcome  PM2.5_pred           PAF

报告PAF的95%置信区间

paf_ci[,quantile(PAF, c(0.025, 0.975))]
        2.5%        97.5% 
0.0008084647 0.3160039446