Skip to content

Commit dcda713

Browse files
committed
Nima approved version
1 parent 323e3b8 commit dcda713

35 files changed

+702
-473
lines changed

.Rbuildignore

+32-8
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,34 @@
1+
^\.git
2+
^\.git/*
3+
^\.gitignore$
14
^.*\.Rproj$
25
^\.Rproj\.user$
3-
^README-refs.bib
4-
^README.Rmd
5-
^README.knit.md
6-
^README.utf8.md
7-
^Simulations/
8-
^sandbox/
9-
^vignette/
10-
^Makefile
6+
^README\.md
7+
8+
# Automated testing files.
9+
^travis\.yml$
10+
^appveyor\.yml$
11+
^codecov\.yml$
12+
# Files from building the Guide-to-SuperLearner.rmd vignette.
13+
^SuperLearner.png$
14+
^vignettes/[^/]*_cache
15+
# other miscellaneous non-CRAN files
16+
xgboost.model
17+
deploy.sh
18+
^sandbox$
19+
^Simulations$
20+
^vignette$
21+
^docs$
22+
^README\.Rmd$
23+
^README\.utf8.md$
24+
^README\.knit.md$
25+
^README-.*\.Rmd$
26+
^README\.html$
27+
^README-.*\.png$
28+
^README-refs.bib$
29+
^cran-comments.md$
30+
^CONTRIBUTING\.md$
31+
^Makefile$
32+
^LICENSE$
33+
man-roxygen
34+
^_pkgdown\.yml$

.gitignore

+6-2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22
.Rhistory
33
.RData
44
.Ruserdata
5+
.future
6+
README.html
7+
vignettes/*.html
8+
9+
# macOS files
510
.DS_Store
6-
tmle3mopttx.Rproj
7-
.DS_Store
11+
inst/doc

DESCRIPTION

+18-13
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
Package: tmle3mopttx
2-
Title: Targeted Maximum Likelihood Estimation of the Mean under Optimal Individualized Treatment
2+
Title: Targeted Maximum Likelihood Estimation of the Mean under Optimal
3+
Individualized Treatment
34
Version: 0.1.0
45
Authors@R: c(
56
person("Ivana", "Malenica", email = "[email protected]",
@@ -10,32 +11,36 @@ Authors@R: c(
1011
comment = c(ORCID = "0000-0002-9874-6649")),
1112
person("Mark", "van der Laan", email = "[email protected]",
1213
role = c("aut", "ths")))
13-
Description: This package estimates the optimal individualized treatment rule for the categorical treatment
14-
using Super Learner (sl3). In order to avoid nesting cross-validation, it uses split-specific estimates
15-
of Q and g to estimate the rule as described by Coyle et al. In addition it provides the Targeted Maximum
16-
Likelihood estimates of the mean performance using CV-TMLE under such estimated rules. This is an
17-
adapter package for use with the tmle3 framework and the tlverse software ecosystem for Targeted Learning.
18-
Depends: R (>= 3.5.0)
14+
Description: This package estimates the optimal individualized treatment rule
15+
for the categorical treatment using Super Learner (sl3). In order to avoid
16+
nested cross-validation, it uses split-specific estimates of Q and g to
17+
estimate the rule as described by Coyle et al. In addition it provides the
18+
Targeted Maximum Likelihood estimates of the mean performance using CV-TMLE
19+
under such estimated rules. This is an adapter package for use with the
20+
tmle3 framework and the tlverse software ecosystem for Targeted Learning.
21+
Depends: R (>= 3.4.0)
1922
License: GPL-3
2023
Encoding: UTF-8
2124
LazyData: true
2225
LazyLoad: yes
2326
Imports:
24-
data.table,
25-
assertthat,
2627
R6,
2728
uuid,
29+
stats,
2830
methods,
29-
stats
31+
data.table,
32+
assertthat,
33+
sl3,
34+
tmle3
3035
Suggests:
3136
testthat,
3237
knitr,
3338
rmarkdown
3439
Remotes:
35-
github::tlverse/tmle3,
36-
github::tlverse/sl3,
40+
github::tlverse/delayed,
3741
github::tlverse/origami,
38-
github::tlverse/delayed
42+
github::tlverse/sl3,
43+
github::tlverse/tmle3
3944
BugReports: https://github.com/tlverse/tmle3mopttx/issues
4045
VignetteBuilder: knitr
4146
RoxygenNote: 6.1.1

R/LF_rule.R

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@
1414
#' \code{define_lf(LF_static, name, type, value, ...)}
1515
#'
1616
#' \describe{
17-
#' \item{\code{name}}{character, the name of the factor. Should match a node name
17+
#' \item{\code{name}}{character, the name of the factor. Should match a node name
1818
#' in the nodes specified by tmle3_Task.}
19-
#'
19+
#'
2020
#' \item{\code{type}}{character, either 'density', for conditional density or, 'mean' for conditional mean
2121
#' }
2222
#' \item{\code{value}}{the static value

R/Optimal_Rule_Q_learning.R

+6-6
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,14 @@ Optimal_Rule_Q_learning <- R6Class(
2424
# todo: function
2525
A_vals <- tmle_task$npsem$A$variable_type$levels
2626
A_vals <- factor(A_vals, A_vals)
27-
27+
2828
# Generate counterfactual tasks for each value of A:
2929
cf_tasks <- lapply(A_vals, function(A_val) {
30-
#if(is.character(A_val)){
30+
# if(is.character(A_val)){
3131
# A_val<-as.numeric(A_val)
32-
#A_val<-as.factor(A_val)
33-
#}
34-
A_val<-as.numeric(A_val)
32+
# A_val<-as.factor(A_val)
33+
# }
34+
A_val <- as.numeric(A_val)
3535
newdata <- data.table(A = A_val)
3636
cf_task <- tmle_task$generate_counterfactual_task(UUIDgenerate(), new_data = newdata)
3737
return(cf_task)
@@ -40,7 +40,7 @@ Optimal_Rule_Q_learning <- R6Class(
4040
private$.cf_tasks <- cf_tasks
4141
},
4242

43-
rule = function(tmle_task, fold_number="full") {
43+
rule = function(tmle_task, fold_number = "full") {
4444
# Get Q(a,W) for each level of A, all folds
4545
blip_fin <- sapply(private$.cf_tasks, private$.likelihood$get_likelihood, "Y", fold_number)
4646

R/Optimal_Rule_Revere.R

+70-73
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#' Learns the Optimal Rule given a tmle_task and likelihood, using the Revere framework.
22
#' Complements 'tmle3_Spec_mopttx_blip_revere'.
3-
#'
3+
#'
44
#'
55
#' @importFrom R6 R6Class
66
#' @importFrom data.table data.table
@@ -14,16 +14,16 @@ Optimal_Rule_Revere <- R6Class(
1414
inherit = tmle3_Spec,
1515
lock_objects = FALSE,
1616
public = list(
17-
initialize = function(tmle_task, likelihood, fold_number = "split-specific", V = NULL,
18-
blip_type = "blip2", learners, maximize = TRUE, realistic=FALSE) {
17+
initialize = function(tmle_task, likelihood, fold_number = "split-specific", V = NULL,
18+
blip_type = "blip2", learners, maximize = TRUE, realistic = FALSE) {
1919
private$.tmle_task <- tmle_task
2020
private$.likelihood <- likelihood
2121
private$.fold_number <- fold_number
2222
private$.blip_type <- blip_type
2323
private$.learners <- learners
2424
private$.maximize <- maximize
2525
private$.realistic <- realistic
26-
26+
2727
if (missing(V)) {
2828
V <- tmle_task$npsem$W$variables
2929
}
@@ -47,64 +47,63 @@ Optimal_Rule_Revere <- R6Class(
4747
DR <- data.frame(private$.DR_full[[v]])
4848
return(data.frame(DR[indx, ]))
4949
},
50-
51-
blip_revere_function = function(tmle_task, fold_number){
52-
50+
51+
blip_revere_function = function(tmle_task, fold_number) {
5352
likelihood <- self$likelihood
5453
A_vals <- tmle_task$npsem$A$variable_type$levels
5554
V <- self$V
56-
55+
5756
# Generate counterfactual tasks for each value of A:
5857
cf_tasks <- lapply(A_vals, function(A_val) {
59-
if(is.character(A_val)){
60-
A_val<-as.numeric(A_val)
61-
#A_val<-as.factor(A_val)
58+
if (is.character(A_val)) {
59+
A_val <- as.numeric(A_val)
60+
# A_val<-as.factor(A_val)
6261
}
6362
newdata <- data.table(A = A_val)
6463
cf_task <- tmle_task$generate_counterfactual_task(UUIDgenerate(), new_data = newdata)
6564
return(cf_task)
6665
})
67-
66+
6867
# DR A-IPW mapping of blip
6968
A <- tmle_task$get_tmle_node("A")
7069
Y <- tmle_task$get_tmle_node("Y")
7170
A_vals <- tmle_task$npsem$A$variable_type$levels
72-
A_ind <- self$factor_to_indicators(A,A_vals)
71+
A_ind <- self$factor_to_indicators(A, A_vals)
7372
Y_mat <- replicate(length(A_vals), Y)
74-
75-
#Use fold_number fits for Q and g:
73+
74+
# Use fold_number fits for Q and g:
7675
Q_vals <- sapply(cf_tasks, likelihood$get_likelihood, "Y", fold_number)
7776
g_vals <- sapply(cf_tasks, likelihood$get_likelihood, "A", fold_number)
7877
DR <- (A_ind / g_vals) * (Y_mat - Q_vals) + Q_vals
7978

8079
# Type of pseudo-blip:
8180
blip_type <- self$blip_type
82-
83-
if(blip_type=="blip1"){
84-
blip <- DR[,2] - DR[,1]
85-
}else if(blip_type=="blip2"){
81+
82+
if (blip_type == "blip1") {
83+
blip <- DR[, 2] - DR[, 1]
84+
} else if (blip_type == "blip2") {
8685
blip <- DR - rowMeans(DR)
87-
}else if(blip_type=="blip3"){
86+
} else if (blip_type == "blip3") {
8887
blip <- DR - (rowMeans(DR) * g_vals)
8988
}
90-
91-
#TO DO: Nicer solutions. Do it one by one, for now
92-
if(is.null(V)){
93-
data <- data.table(V=blip,blip=blip)
89+
90+
# TO DO: Nicer solutions. Do it one by one, for now
91+
if (is.null(V)) {
92+
data <- data.table(V = blip, blip = blip)
9493
outcomes <- grep("blip", names(data), value = TRUE)
9594
V <- grep("V", names(data), value = TRUE)
96-
revere_task <- make_sl3_Task(data, outcome=outcomes, covariates=V, folds=tmle_task$folds)
97-
}else{
98-
V <- tmle_task$data[,self$V,with=FALSE]
99-
data <- data.table(V,blip=blip)
95+
revere_task <- make_sl3_Task(data, outcome = outcomes, covariates = V, folds = tmle_task$folds)
96+
} else {
97+
V <- tmle_task$data[, self$V, with = FALSE]
98+
data <- data.table(V, blip = blip)
10099
outcomes <- grep("blip", names(data), value = TRUE)
101-
revere_task <- make_sl3_Task(data, outcome=outcomes, covariates=self$V, folds=tmle_task$folds)
100+
revere_task <- make_sl3_Task(data, outcome = outcomes, covariates = self$V, folds = tmle_task$folds)
102101
}
103-
102+
104103

105104
return(revere_task)
106105
},
107-
106+
108107
bound = function(cv_g) {
109108
cv_g[cv_g < 0.01] <- 0.01
110109
cv_g[cv_g > 0.99] <- 0.99
@@ -123,81 +122,79 @@ Optimal_Rule_Revere <- R6Class(
123122
private$.blip_fit <- blip_fit
124123
},
125124

126-
rule = function(tmle_task, fold_number="full") {
127-
125+
rule = function(tmle_task, fold_number = "full") {
128126
realistic <- private$.realistic
129127
likelihood <- self$likelihood
130-
128+
131129
# TODO: when applying the rule, we actually only need the covariates
132130
blip_task <- self$blip_revere_function(tmle_task, fold_number)
133131
blip_preds <- self$blip_fit$predict_fold(blip_task, fold_number)
134-
132+
135133
# Type of pseudo-blip:
136134
blip_type <- self$blip_type
137-
138-
if(is.list(blip_preds)){
135+
136+
if (is.list(blip_preds)) {
139137
blip_preds <- unpack_predictions(blip_preds)
140138
}
141-
139+
142140
rule_preds <- NULL
143-
144-
if(realistic){
145-
146-
#Need to grab the propensity score:
141+
142+
if (realistic) {
143+
144+
# Need to grab the propensity score:
147145
g_learner <- likelihood$factor_list[["A"]]$learner
148146
g_task <- tmle_task$get_regression_task("A")
149147
g_fits <- unpack_predictions(g_learner$predict(g_task))
150-
148+
151149
if (!private$.maximize) {
152150
blip_preds <- blip_preds * -1
153151
}
154-
155-
if(blip_type == "blip1"){
152+
153+
if (blip_type == "blip1") {
156154
rule_preds <- as.numeric(blip_preds > 0)
157-
158-
for(i in 1:length(rule_preds)){
159-
rule_preds_prob<-g_fits[i,]
160-
#TO DO: What is a realistic cutoff here?
161-
if(rule_preds_prob<0.05){
162-
#Switch- assumes options are 0 and 1.
155+
156+
for (i in 1:length(rule_preds)) {
157+
rule_preds_prob <- g_fits[i, ]
158+
# TO DO: What is a realistic cutoff here?
159+
if (rule_preds_prob < 0.05) {
160+
# Switch- assumes options are 0 and 1.
163161
rule_preds[i] <- abs(rule_preds[i] - 1)
164162
}
165163
}
166-
167-
}else{
168-
if(dim(blip_preds)[2]<3){
164+
} else {
165+
if (dim(blip_preds)[2] < 3) {
169166
rule_preds <- max.col(blip_preds) - 1
170-
for(i in 1:length(rule_preds)){
171-
rule_preds_prob<-g_fits[i,rule_preds[i]]
172-
#TO DO: What is a realistic cutoff here?
173-
if(rule_preds_prob<0.05){
174-
#Pick the next largest blip
175-
rule_preds[i] <- max.col(blip_preds[i,order(blip_preds[i,], decreasing = TRUE)[2]])
167+
for (i in 1:length(rule_preds)) {
168+
rule_preds_prob <- g_fits[i, rule_preds[i]]
169+
# TO DO: What is a realistic cutoff here?
170+
if (rule_preds_prob < 0.05) {
171+
# Pick the next largest blip
172+
rule_preds[i] <- max.col(blip_preds[i, order(blip_preds[i, ], decreasing = TRUE)[2]])
176173
}
177174
}
178-
}else{
175+
} else {
179176
rule_preds <- max.col(blip_preds)
180-
for(i in 1:length(rule_preds)){
181-
rule_preds_prob<-g_fits[i,rule_preds[i]]
182-
#TO DO: What is a realistic cutoff here?
183-
if(rule_preds_prob<0.07){
184-
#Pick the next largest blip
185-
rule_preds[i] <- order(blip_preds[i,], decreasing = TRUE)[2]
177+
for (i in 1:length(rule_preds)) {
178+
rule_preds_prob <- g_fits[i, rule_preds[i]]
179+
# TO DO: What is a realistic cutoff here?
180+
if (rule_preds_prob < 0.07) {
181+
# Pick the next largest blip
182+
rule_preds[i] <- order(blip_preds[i, ], decreasing = TRUE)[2]
186183
}
187184
}
188185
}
189186
}
190-
}else{
187+
} else {
191188
if (!private$.maximize) {
192189
blip_preds <- blip_preds * -1
193190
}
194-
195-
if(blip_type == "blip1"){
191+
192+
if (blip_type == "blip1") {
196193
rule_preds <- as.numeric(blip_preds > 0)
197-
}else{
198-
if(dim(blip_preds)[2]<3){
194+
} else {
195+
if (dim(blip_preds)[2] < 3) {
199196
rule_preds <- max.col(blip_preds) - 1
200-
}else{
197+
} else {
201198
rule_preds <- max.col(blip_preds)
202199
}
203200
}

0 commit comments

Comments
 (0)