ldamatch demos

Kyle Gorman & Géza Kiss

2024-04-14

Univariate case…

library(ldamatch)
set.seed(257)

default_method = "heuristic4"

SIZE <- 15
condition <- as.factor(c(rep("control", 2 * SIZE), rep("treatment", SIZE)))
covariate1 <- c(rnorm(2 * SIZE), rnorm(SIZE, 1, 2))

Univariate case (with default search method)…

is.in <- match_groups(condition, covariate1, t_halt, method = default_method)
## Search method:  heuristic4 
## Initial group sizes:  control: 30    treatment: 15 
## Starting heuristic4 search.
## Number of subjects: control:30, treatment:15; p/thresh ratio: 0.302957
## Warning: executing %dopar% sequentially: no parallel backend registered
## 
Lookahead 1, number of best sets: 1                    
## 

Lookahead 2, number of best sets: 4                    
## 
## Found 4 solution(s) in 2 steps
## 
## Finished heuristic4 search in 1.98 seconds (wall click time passed:2.04).
## Eventual group sizes: control: 30    treatment: 13 
## Removed subjects:     control: 0 treatment: 2 
## The p-value before matching: 0.06059131 
## The p-values after matching: 0.202160707808612, 0.213496988230277, 0.220897103271512, 0.22165738589329
print(table(condition, is.in))
##            is.in
## condition   FALSE TRUE
##   control       0   30
##   treatment     2   13

Multivariate case…

covariate2 <- c(rnorm(2 * SIZE), rnorm(SIZE, 1, 2))
covariates <- cbind(covariate1, covariate2)

Multivariate case (with default search method)…

is.in <- match_groups(condition, covariates, t_halt, method = default_method)
## Search method:  heuristic4 
## Initial group sizes:  control: 30    treatment: 15 
## Starting heuristic4 search.
## Number of subjects: control:30, treatment:15; p/thresh ratio: 0.302957
## 
Lookahead 1, number of best sets: 1                    
## 

Lookahead 2, number of best sets: 3                    
## 
## Found 3 solution(s) in 2 steps
## 
## Finished heuristic4 search in 2.61 seconds (wall click time passed:2.61).
## Eventual group sizes: control: 30    treatment: 13 
## Removed subjects:     control: 0 treatment: 2 
## The p-value before matching: 0.06059131 
## The p-values after matching: 0.202160707808612, 0.220897103271512, 0.22165738589329
print(table(condition, is.in))
##            is.in
## condition   FALSE TRUE
##   control       0   30
##   treatment     2   13

Multivariate case (with special proportions and Wilcox test)…

my.props <- prop.table(c(control = 4, treatment = 3))
is.in <- match_groups(condition, covariates, U_halt, props = my.props)
## Search method:  heuristic4 
## Initial group sizes:  control: 30    treatment: 15 
## Starting heuristic4 search.
## Number of subjects: control:30, treatment:15; p/thresh ratio: 0.803495
## 

Lookahead 1, number of best sets: 1                    
## 
## Found 1 solution(s) in 1 steps
## 
## Finished heuristic4 search in 0.38 seconds (wall click time passed:0.37).
## Eventual group sizes: control: 29    treatment: 15 
## Removed subjects:     control: 1 treatment: 0 
## The p-value before matching: 0.160699 
## The p-values after matching: 0.200379547096912
print(table(condition, is.in))
##            is.in
## condition   FALSE TRUE
##   control       1   29
##   treatment     0   15

Multivariate case (with Wilks test)…

is.in <- match_groups(condition, covariates, wilks_halt)
## Search method:  heuristic4 
## Initial group sizes:  control: 30    treatment: 15 
## Starting heuristic4 search.
## Number of subjects: control:30, treatment:15; p/thresh ratio: 0.159753
## 
Lookahead 1, number of best sets: 1                    
## 

Lookahead 2, number of best sets: 1                    
## 
Number of subjects: control:30, treatment:14; p/thresh ratio: 0.424942
## 

Lookahead 2, number of best sets: 30                    
## 
## Found 30 solution(s) in 3 steps
## 
## Finished heuristic4 search in 4.42 seconds (wall click time passed:4.42).
## Eventual group sizes: control: 29    treatment: 13 
## Removed subjects:     control: 1 treatment: 2 
## The p-value before matching: 0.03195062 
## The p-values after matching: 0.200388136169973, 0.20070104804625, 0.200978434271031, 0.202090574876842, 0.20316718370627, 0.204122628649838, 0.204259312376656, 0.204438975596233, 0.204479730822287, 0.204650735112843, 0.207695854258158, 0.209850624409276, 0.212329866531795, 0.213601103416089, 0.214793042459186, 0.217507416358202, 0.218271311214737, 0.219931870593934, 0.220656240469989, 0.22145394327565, 0.221501756165972, 0.223937954614188, 0.225841957702222, 0.226093391195097, 0.229019777753524, 0.229817811663357, 0.234037144986885, 0.23566831774506, 0.239863520338925, 0.248655369465288
print(table(condition, is.in))
##            is.in
## condition   FALSE TRUE
##   control       1   29
##   treatment     2   13

Multivariate case (with Anderson-Darling test and default search method)…

is.in <- match_groups(condition, covariates, ad_halt, method = default_method, prefer_test = TRUE)
## Search method:  heuristic4 
## Initial group sizes:  control: 30    treatment: 15 
## Starting heuristic4 search.
## Number of subjects: control:30, treatment:15; p/thresh ratio: 0.288285
## 

Lookahead 1, number of best sets: 4                    
## 

Lookahead 2, number of best sets: 3                    
## 
## Found 3 solution(s) in 2 steps
## 
## Finished heuristic4 search in 62.01 seconds (wall click time passed:62.28).
## Eventual group sizes: control: 30    treatment: 13 
## Removed subjects:     control: 0 treatment: 2 
## The p-value before matching: 0.057657 
## The p-values after matching: 0.21597
print(table(condition, is.in))
##            is.in
## condition   FALSE TRUE
##   control       0   30
##   treatment     2   13

Multivariate case (with t-test and Anderson-Darling test simultaneously)…

t_ad_halt <- create_halting_test(c(t_halt, ad_halt))
threshes <- c(.2, .02)
is.in <- match_groups(condition, covariates, t_ad_halt, threshes)
## Search method:  heuristic4 
## Initial group sizes:  control: 30    treatment: 15 
## Starting heuristic4 search.
## Number of subjects: control:30, treatment:15; p/thresh ratio: 0.302957
## 
Lookahead 1, number of best sets: 1                    
## 

Lookahead 2, number of best sets: 3                    
## 
## Found 3 solution(s) in 2 steps
## 
## Finished heuristic4 search in 66.83 seconds (wall click time passed:67.3).
## Eventual group sizes: control: 30    treatment: 13 
## Removed subjects:     control: 0 treatment: 2 
## The p-value before matching: 0.057657 
## The p-values after matching: 0.19925, 0.21597
print(table(condition, is.in))
##            is.in
## condition   FALSE TRUE
##   control       0   30
##   treatment     2   13

Univariate case for more than two groups…

set.seed(257)

SIZE <- 15
condition <- as.factor(c(rep("group1", SIZE), rep("group2", SIZE), rep("group3", SIZE)))
covariate1 <- c(rnorm(SIZE, 0, 1), rnorm(SIZE, 0, 2), rnorm(SIZE, 1, 2))
covariate2 <- c(rnorm(SIZE, 0, 1), rnorm(SIZE, 0, 2), rnorm(SIZE, 1, 2))
covariates <- cbind(covariate1, covariate2)

Univariate case for more than two groups (with default search method)…

is.in <- match_groups(condition, covariates, t_ad_halt, method = default_method)
## Search method:  heuristic4 
## Initial group sizes:  group1: 15 group2: 15  group3: 15 
## Starting heuristic4 search.
## Number of subjects: group1:15, group2:15, group3:15; p/thresh ratio: 0.063320
## 

Lookahead 1, number of best sets: 6                    
## 

Lookahead 2, number of best sets: 18                    
## 

Number of subjects: group1:15, group2:14, group3:15; p/thresh ratio: 0.126925
## 

Lookahead 2, number of best sets: 3                    
## Number of subjects: group1:15, group2:14, group3:14; p/thresh ratio: 0.190600
## 

Lookahead 2, number of best sets: 5                    
## Number of subjects: group1:15, group2:14, group3:13; p/thresh ratio: 0.269390
## 

Lookahead 2, number of best sets: 1                    
## 
Number of subjects: group1:15, group2:13, group3:13; p/thresh ratio: 0.469900
## 

Lookahead 2, number of best sets: 1                    
## 
Number of subjects: group1:15, group2:13, group3:12; p/thresh ratio: 0.507750
## 

Lookahead 2, number of best sets: 11                    
## 
## Found 11 solution(s) in 7 steps
## 
## Random choices: step: 2, num_choices: 2 
## Finished heuristic4 search in 357.95 seconds (wall click time passed:360.92).
## Eventual group sizes: group1: 15 group2: 12  group3: 11 
## Removed subjects:     group1: 0  group2: 3   group3: 4 
## The p-value before matching: 0.012664 
## The p-values after matching: 0.222261928685954, 0.225391352956842, 0.21875, 0.201370779344551, 0.223663369960827
print(table(condition, is.in))
##          is.in
## condition FALSE TRUE
##    group1     0   15
##    group2     3   12
##    group3     4   11