-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathProductionCode.Rmd
173 lines (125 loc) · 5.07 KB
/
ProductionCode.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
---
title: "RR"
output: html_document
date: '2022-03-01'
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
# Code design
```{r}
# Mock data
n <- 500
W1 <- runif(n, -1 , 1)
W2 <- rbinom(n, 1 , plogis(W1))
W <- cbind(W1, W2)
A <- rbinom(n, 1 , plogis(W1 + W2 - 0.5))
Y <- rbinom(n, 1, plogis(-2 + W1 + W2 + A*(1 + W1 + W2)))
R <- as.numeric(1:n %in% c(which(Y==1), which(Y==0)[rbinom(sum(Y==0), size = 1, prob = 0.5)==1]))
weights <- R/ifelse(Y==1, 1, 0.5)
keep <- R!=0
W <- W[keep,]
A <- A[keep]
Y <- Y[keep]
weights <- weights[keep]
lrnr <- Lrnr_glmnet$new(formula = ~.^2)
likelihood <- estimate_initial_likelihood(W, A, Y, weights, lrnr, lrnr)
data <- data.table(W)
covariates <- colnames(W)
data$Z <- runif(nrow(data))
task <- sl3_Task$new(data, covariates = covariates, outcome = "Z", outcome_type = "binomial")
```
```{r, include = T}
library(sl3)
library(data.table)
library(delayed)
library(stringr)
library(origami)
n <- 2500
W1 <- runif(n, -1 , 1)
W2 <- runif(n, -1 , 1)# rbinom(n, 1 , plogis(W1))
W <- cbind(W1, W2)
A <- rbinom(n, 1 , plogis(0.5*(W1 + W2 )))
Y <- rbinom(n, 1, plogis(-1 + W1 + W2 + A*(1 + W1 + W2)))
LRR <- log(plogis(-1 + W1 + W2 + 1*(1 + W1 + W2)) / plogis(-1 + W1 + W2 + 0*(1 + W1 + W2)))
R <- as.numeric(1:n %in% c(which(Y==1), which(Y==0)[rbinom(sum(Y==0), size = 1, prob = 0.5)==1]))
pR0 <- mean(R[Y==0])
pR1 <- mean(R[Y==1])
weights <- R / ifelse(Y==1, pR1, pR0)
keep <- R!=0
W <- W[keep,]
A <- A[keep]
Y <- Y[keep]
weights <- weights[keep]
LRR <- LRR[keep]
lrnr <- make_learner(Pipeline, Lrnr_cv$new(Stack$new(Lrnr_xgboost$new(max_depth = 5), Lrnr_xgboost$new(max_depth = 4), Lrnr_xgboost$new(max_depth = 3))), Lrnr_cv_selector$new(loss_loglik_binomial))
lrnr <- Lrnr_hal9001$new(max_degree = 2, smoothness_orders = 1, num_knots = c(2,1))
likelihood <- estimate_initial_likelihood(W, A, Y, weights, lrnr, lrnr)
lrnr <- Lrnr_glm$new(formula = ~.^2)
EY1 <- likelihood$EY1
EY0 <- likelihood$EY0
pA1 <- likelihood$pA1
data.table(EY1, plogis(-1 + W[,1] + W[,2] + 1*(1 + W[,1] + W[,2])),
EY0,
plogis(-1 + W[,1] + W[,2] + 0*(1 + W[,1] + W[,2])))
(data.table(EY1/EY0, exp(LRR)[R!=0]))
pred <- estimate_LRR_using_ERM(W, A, Y, EY1, EY0, pA1, weights, Lrnr_glm$new(family = binomial()), learning_method = c("plugin"))
quantile(exp(pred$LRR_train))
pred <- estimate_LRR_using_ERM(W, A, Y, EY1, EY0, pA1, weights, Lrnr_glm$new(family = binomial()), learning_method = c("IPW"))
quantile(exp(pred$LRR_train))
```
```{r, include = F}
# 3 additive/main-term sieves up to order 3
list_of_sieves <- list(
NULL,
fourier_basis$new(orders = c(1,0)),
fourier_basis$new(orders = c(2,0)),
fourier_basis$new(orders = c(1,1)),
fourier_basis$new(orders = c(2,1))
)
list_of_learners <- list(
Lrnr_gam$new(family = binomial()) ,
#Lrnr_xgboost$new(max_depth = 5, objective = "reg:logistic" ),
Lrnr_hal9001$new(max_degree = 2, smoothness_orders = 1, num_knots = c(8,5), lambda = c(0.0005, 0.01), fit_control = list(cv_select = FALSE) , family = binomial())
)
learner_names <- sapply(list_of_learners, `[[`, "name")
full_fit_LRR <- train_LRR_learners(V=W, A, Y, EY1, EY0, pA1, weights, list_of_learners, list_of_sieves, Vpred = W)
all_LRR_full_best <- subset_best_sieve(full_fit_LRR, learner_names, A, Y, EY1, EY0, pA1, weights)
#all_LRR_full <- do.call(cbind, lapply(full_fit_LRR, `[[`, "LRR_train"))
#list_of_learners <- list(Lrnr_glm$new(family = binomial()))
folds <- origami::make_folds(n=length(A))
LRR_learners_by_fold <- lapply(folds, train_LRR_learners_using_fold, W, A, Y, EY1, EY0, pA1, weights, list_of_learners, list_of_sieves )
names(LRR_learners_by_fold) <- seq_along(folds)
LRR_learners_by_fold <- unlist(LRR_learners_by_fold)
learner_sieve_names <- names(LRR_learners_by_fold)
LRR_learners_by_fold_delayed <- bundle_delayed(LRR_learners_by_fold)
LRR_learners_by_fold <- LRR_learners_by_fold_delayed$compute()
#names(LRR_learners_by_fold) <- learner_sieve_names
```
```{r, include = F}
output <- subset_best_sieve_all_folds(folds, LRR_learners_by_fold, learner_names, A, Y, EY1, EY0, pA1, weights)
cv_fun <- function(fold) {
fold_number <- fold_index()
index <- validation()
v <- origami::fold_index(fold = fold)
list(index = index,
fold_index = rep(fold_index(), length(index)), predictions=as.data.table(do.call(cbind, lapply(output[[v]] , `[[`, "LRR_pred"))))
}
comb_ctrl <- list(combiners = list(
index = combiner_c, fold_index = combiner_c,
predictions = function(x) rbindlist(x, fill = TRUE)
))
cv_predictions <- origami::cross_validate(cv_fun, folds, .combine_control = comb_ctrl)
cv_predictions <- as.matrix(cv_predictions$predictions[order(cv_predictions$index),] )
best_learner_index_cv <- which.min(DR_risk_function_LRR(cv_predictions, A , Y, EY1, EY0, pA1, weights))
```
```{r}
best_learner_index_cv
DR_risk_function_LRR(cv_predictions, A , Y, EY1, EY0, pA1, weights)
tmp <- lapply(all_LRR_full_best, `[[`, "LRR_pred")
tmp_names <- names(all_LRR_full_best)
all_LRR_full_predictions <- do.call(cbind, tmp)
colnames
dim(all_LRR_full_predictions)
dim(cv_predictions)
```