Dependencies:
In this vignette, varietal mixtures will be assembled from a panel of 200 genotypes. Only 1000 mixtures will be observed, among all 1.99^{4} possible ones, i.e., approximately 5%. This incomplete design will be optimized so that it is balanced: all genotype will be observed in the same number of mixtures. Phenotypes will then be simulated organized into a field trial in a randomized complete block design, according to all six GMA-SMA models that can be conceived: 1, 2, 2’, 2’‘, 3 and 3’. See the article by Forst et al (2019), especially for the first three models, as well as the introductory vignette.
nb_pops <- 10
weak_div_pops <- diag(nb_pops)
weak_div_pops[upper.tri(weak_div_pops)] <- 0.9
weak_div_pops[lower.tri(weak_div_pops)] <- weak_div_pops[upper.tri(weak_div_pops)]
tmp <- rep(nbGenos / nb_pops, nb_pops - 1)
tmp <- c(tmp, nbGenos - sum(tmp))
snpGenos <- simulGenosDoseStruct(
nb_genos = tmp,
nb_snps = nbSnps,
div_pops = weak_div_pops,
geno_IDs = levGenos,
snp_IDs = levSnps
)
dim(snpGenos)
## [1] 200 1000
snpGenos[1:3, 1:4]
## s0001 s0002 s0003 s0004
## g001 1 1 1 1
## g002 1 0 1 1
## g003 1 1 2 1For simplicity, the first estimator of VanRaden (2008) is used, that assumes linkage equilibrium and Hardy-Weinberg equilibrium.
nbMixes <- 1000
design <- getDesignBinaryVarMix(levGenos, nbMixes = nbMixes)
tmp <- getMixturesPerGeno(getMixtureList(design$combs))
table(sapply(tmp, length)) # each genotype is in the same nb of mixtures -> balanced design
##
## 10
## 200monoStands <- paste(levGenos, levGenos, sep = "_")
pairs <- design$combs
mixStands <- paste(pairs[, 1], pairs[, 2], sep = "_")
IDs <- c(monoStands, mixStands)
nbBlocks <- 3
blocks <- LETTERS[1:nbBlocks]
dat <- do.call(rbind, lapply(blocks, function(block) {
data.frame(
ID = IDs,
block = block,
stringsAsFactors = TRUE
)
}))listContr <- list(block = "contr.sum")
X <- model.matrix(~ 1 + block, data = dat, contrasts = listContr)
allZ <- list()
allZ$GMA <- mkZGMA(dat, col = "ID", sep = "_")
system.time(
allZ <- append(
allZ,
mkAllZSMA(dat, col = "ID", sep = "_", verbose = 1)
)
)
## [1] "model 2'"
## [1] "model 2"
## [1] "model 2''"
## [1] "model 3"
## [1] "model 3'"
## user system elapsed
## 9.983 0.686 10.670
sapply(allZ, dim)
## GMA SMA_mod2p SMA_mod2 SMA_mod2pp_ij SMA_mod2pp_ii SMA_mod3 SMA_mod3p_ij
## [1,] 3600 3600 3600 3600 3600 3600 3600
## [2,] 200 1000 1200 1000 200 1200 1000
## SMA_mod3p_ii
## [1,] 3600
## [2,] 200Several packages will be tested below, most notably lme4
and TMB. However, lme4 does not natively take
a GRM as input. Hence the GRM will be replaced by the identity matrix to
ensure a fair comparison between packages.
truth <- list(
"intercept" = 100,
"var_GMA" = 10,
"var_SMA" = 2,
"var_SMA_ij" = 1.5,
"var_SMA_ii" = 0.8,
"var_error" = 1
)
set.seed(1234)
truth[["blockEffs"]] <- sample(x = c(-1, 1), size = nbBlocks - 1, replace = TRUE) *
rnorm(n = nbBlocks - 1, mean = 3, sd = 2)
GRM <- diag(nbGenos)
truth[["GMAs"]] <- MASS::mvrnorm(
n = 1, mu = rep(0, nbGenos),
Sigma = truth$var_GMA * GRM
)
truth[["SMAs"]] <- rnorm(n = length(IDs), mean = 0, sd = sqrt(truth$var_SMA))
truth[["SMAs_ij"]] <- rnorm(n = length(mixStands), mean = 0, sd = sqrt(truth$var_SMA_ij))
truth[["SMAs_ii"]] <- rnorm(n = length(monoStands), mean = 0, sd = sqrt(truth$var_SMA_ii))
truth[["errors"]] <- rnorm(n = nrow(dat), mean = 0, sd = sqrt(truth$var_error))y1 <- X %*% c(truth$intercept, truth$blockEffs) +
allZ$GMA %*% truth$GMAs +
truth$errors
dat$pheno1 <- y1[, 1]
y2 <- X %*% c(truth$intercept, truth$blockEffs) +
allZ$GMA %*% truth$GMAs +
allZ$SMA_mod2 %*% truth$SMAs +
truth$errors
dat$pheno2 <- y2[, 1]
y3 <- X %*% c(truth$intercept, truth$blockEffs) +
allZ$GMA %*% truth$GMAs +
allZ$SMA_mod3 %*% truth$SMAs +
truth$errors
dat$pheno3 <- y3[, 1]
y2p <- X %*% c(truth$intercept, truth$blockEffs) +
allZ$GMA %*% truth$GMAs +
allZ$SMA_mod2p %*% truth$SMAs_ij +
truth$errors
dat$pheno2p <- y2p[, 1]
y2pp <- X %*% c(truth$intercept, truth$blockEffs) +
allZ$GMA %*% truth$GMAs +
allZ$SMA_mod2pp_ij %*% truth$SMAs_ij +
allZ$SMA_mod2pp_ii %*% truth$SMAs_ii +
truth$errors
dat$pheno2pp <- y2pp[, 1]
y3p <- X %*% c(truth$intercept, truth$blockEffs) +
allZ$GMA %*% truth$GMAs +
allZ$SMA_mod3p_ij %*% truth$SMAs_ij +
allZ$SMA_mod3p_ii %*% truth$SMAs_ii +
truth$errors
dat$pheno3p <- y3p[, 1]Function fitGMASMA allows to fit the models with various
packages. In this vignette, only lme4 and TMB
will be used and, as you can see below, both return the same estimates.
(To save time, MM4LMM is commented.)
pkgs <- c("lme4", "TMB") # , "MM4LMM")
if ("MM4LMM" %in% pkgs) {
suppressPackageStartupMessages(library(MM4LMM))
}
runAllPkgs <- function(pkgs, form, dat, listZ, listVCov, listContr, ...) {
## mcMap(function(pkg) {
Map(function(pkg) {
print(paste0("fit with ", pkg, "..."))
fitGMASMA(form, dat, listZ, pkg, listVCov, listContr)
st <- system.time(
try(fit <- fitGMASMA(form, dat, listZ, pkg, listVCov, listContr, REML = TRUE, ...))
)
print(st)
fit
}, pkgs) # , mc.cores = nbCores)
}form <- pheno1 ~ 1 + block
listZ <- list("GMA" = allZ$GMA)
listVCov <- list("GMA" = GRM)
listContr <- list(block = "contr.sum")
fits1 <- runAllPkgs(pkgs, form, dat, listZ, listVCov, listContr)
## [1] "fit with lme4..."
## user system elapsed
## 0.081 0.125 0.065
## [1] "fit with TMB..."
## user system elapsed
## 0.350 0.382 0.301print("lme4:")
## [1] "lme4:"
tmp <- as.data.frame(VarCorr(fits1$lme4))
tmp <- setNames(tmp$vcov, tmp$grp)
checks <- data.frame(
truth = do.call(c, truth[c("var_GMA", "var_error")]),
estim = tmp[c("GMA", "Residual")]
)
checks$nBE <- normBiasError(checks$estim, checks$truth)
print(checks)
## truth estim nBE
## var_GMA 10 10.5474529 5.474529
## var_error 1 0.9599666 -4.003335
print("TMB:")
## [1] "TMB:"
checks <- data.frame(
truth = do.call(c, truth[c("var_GMA", "var_error")]),
estim = do.call(c, fits1$TMB$report[c("var_GMA", "var_err")])
)
checks$nBE <- normBiasError(checks$estim, checks$truth)
print(checks)
## truth estim nBE
## var_GMA 10 10.5474516 5.474516
## var_error 1 0.9599667 -4.003335
if ("MM4LMM" %in% names(fits1)) {
print(fits1$MM4LMM$Sigma2)
}form <- pheno2 ~ 1 + block
listZ <- list(
"GMA" = allZ$GMA,
"SMA" = allZ$SMA_mod2
)
listVCov <- list(
"GMA" = GRM,
"SMA" = diag(ncol(listZ$SMA))
)
listContr <- list(block = "contr.sum")
fits2 <- runAllPkgs(pkgs, form, dat, listZ, listVCov, listContr)
## [1] "fit with lme4..."
## user system elapsed
## 0.399 0.734 0.315
## [1] "fit with TMB..."
## user system elapsed
## 1.339 1.405 1.237print("lme4:")
## [1] "lme4:"
tmp <- as.data.frame(VarCorr(fits2$lme4))
tmp <- setNames(tmp$vcov, tmp$grp)
checks <- data.frame(
truth = do.call(c, truth[c("var_GMA", "var_SMA", "var_error")]),
estim = tmp[c("GMA", "SMA", "Residual")]
)
checks$nBE <- normBiasError(checks$estim, checks$truth)
print(checks)
## truth estim nBE
## var_GMA 10 10.4978544 4.9785441
## var_SMA 2 2.0197144 0.9857179
## var_error 1 0.9453828 -5.4617170
print("TMB:")
## [1] "TMB:"
checks <- data.frame(
truth = do.call(c, truth[c("var_GMA", "var_SMA", "var_error")]),
estim = do.call(c, fits2$TMB$report[c("var_GMA", "var_SMA1", "var_err")])
)
checks$nBE <- normBiasError(checks$estim, checks$truth)
print(checks)
## truth estim nBE
## var_GMA 10 10.4979307 4.9793071
## var_SMA 2 2.0197059 0.9852969
## var_error 1 0.9453837 -5.4616269
if ("MM4LMM" %in% names(fits2)) {
print(fits2$MM4LMM$Sigma2)
}form <- pheno3 ~ 1 + block
listZ <- list(
"GMA" = allZ$GMA,
"SMA" = allZ$SMA_mod3
)
listVCov <- list(
"GMA" = GRM,
"SMA" = diag(ncol(listZ$SMA))
)
listContr <- list(block = "contr.sum")
fits3 <- runAllPkgs(pkgs, form, dat, listZ, listVCov, listContr)
## [1] "fit with lme4..."
## user system elapsed
## 0.538 0.897 0.392
## [1] "fit with TMB..."
## user system elapsed
## 1.665 1.985 1.477print("lme4:")
## [1] "lme4:"
tmp <- as.data.frame(VarCorr(fits3$lme4))
tmp <- setNames(tmp$vcov, tmp$grp)
checks <- data.frame(
truth = do.call(c, truth[c("var_GMA", "var_SMA", "var_error")]),
estim = tmp[c("GMA", "SMA", "Residual")]
)
checks$nBE <- normBiasError(checks$estim, checks$truth)
print(checks)
## truth estim nBE
## var_GMA 10 10.2611009 2.611009
## var_SMA 2 2.1054392 5.271960
## var_error 1 0.9469001 -5.309988
print("TMB:")
## [1] "TMB:"
checks <- data.frame(
truth = do.call(c, truth[c("var_GMA", "var_SMA", "var_error")]),
estim = do.call(c, fits3$TMB$report[c("var_GMA", "var_SMA1", "var_err")])
)
checks$nBE <- normBiasError(checks$estim, checks$truth)
print(checks)
## truth estim nBE
## var_GMA 10 10.2611011 2.611011
## var_SMA 2 2.1054394 5.271968
## var_error 1 0.9469001 -5.309992
if ("MM4LMM" %in% names(fits3)) {
print(fits3$MM4LMM$Sigma2)
}form <- pheno2p ~ 1 + block
listZ <- list(
"GMA" = allZ$GMA,
"SMA" = allZ$SMA_mod2p
)
listVCov <- list(
"GMA" = GRM,
"SMA" = diag(ncol(listZ$SMA))
)
listContr <- list(block = "contr.sum")
fits2p <- runAllPkgs(pkgs, form, dat, listZ, listVCov, listContr)
## [1] "fit with lme4..."
## Warning in split.default(x, g): data length is not a multiple of split variable
## Warning in split.default(seq_along(x), f, drop = drop, ...): data length is not
## a multiple of split variable
## Warning in split.default(x, g): data length is not a multiple of split variable
## Warning in split.default(seq_along(x), f, drop = drop, ...): data length is not
## a multiple of split variable
## user system elapsed
## 0.332 0.575 0.254
## [1] "fit with TMB..."
## user system elapsed
## 1.173 1.154 1.055print("lme4:")
## [1] "lme4:"
tmp <- as.data.frame(VarCorr(fits2p$lme4))
tmp <- setNames(tmp$vcov, tmp$grp)
checks <- data.frame(
truth = do.call(c, truth[c("var_GMA", "var_SMA", "var_error")]),
estim = tmp[c("GMA", "SMA", "Residual")]
)
checks$nBE <- normBiasError(checks$estim, checks$truth)
print(checks)
## truth estim nBE
## var_GMA 10 10.3596086 3.596086
## var_SMA 2 1.6026602 -19.866989
## var_error 1 0.9456708 -5.432918
print("TMB:")
## [1] "TMB:"
checks <- data.frame(
truth = do.call(c, truth[c("var_GMA", "var_SMA", "var_error")]),
estim = do.call(c, fits2p$TMB$report[c("var_GMA", "var_SMA1", "var_err")])
)
checks$nBE <- normBiasError(checks$estim, checks$truth)
print(checks)
## truth estim nBE
## var_GMA 10 10.3596074 3.596074
## var_SMA 2 1.6026609 -19.866954
## var_error 1 0.9456708 -5.432919
if ("MM4LMM" %in% names(fits2p)) {
print(fits2p$MM4LMM$Sigma2)
}form <- pheno2pp ~ 1 + block
listZ <- list(
"GMA" = allZ$GMA,
"SMA_ij" = allZ$SMA_mod2pp_ij,
"SMA_ii" = allZ$SMA_mod2pp_ii
)
listVCov <- list(
"GMA" = GRM,
"SMA_ij" = diag(ncol(listZ$SMA_ij)),
"SMA_ii" = diag(ncol(listZ$SMA_ii))
)
listContr <- list(block = "contr.sum")
fits2pp <- runAllPkgs(pkgs, form, dat, listZ, listVCov, listContr)
## [1] "fit with lme4..."
## Warning in split.default(x, g): data length is not a multiple of split variable
## Warning in split.default(seq_along(x), f, drop = drop, ...): data length is not
## a multiple of split variable
## Warning in split.default(x, g): data length is not a multiple of split variable
## Warning in split.default(seq_along(x), f, drop = drop, ...): data length is not
## a multiple of split variable
## user system elapsed
## 0.594 1.076 0.456
## [1] "fit with TMB..."
## user system elapsed
## 1.334 1.403 1.232print("lme4:")
## [1] "lme4:"
tmp <- as.data.frame(VarCorr(fits2pp$lme4))
tmp <- setNames(tmp$vcov, tmp$grp)
checks <- data.frame(
truth = do.call(c, truth[c("var_GMA", "var_SMA_ij", "var_SMA_ii", "var_error")]),
estim = tmp[c("GMA", "SMA_ij", "SMA_ii", "Residual")]
)
checks$nBE <- normBiasError(checks$estim, checks$truth)
print(checks)
## truth estim nBE
## var_GMA 10.0 10.0083128 0.08312812
## var_SMA_ij 1.5 1.6172868 7.81911753
## var_SMA_ii 0.8 0.8345907 4.32383996
## var_error 1.0 0.9453832 -5.46167587
print("TMB:")
## [1] "TMB:"
checks <- data.frame(
truth = do.call(c, truth[c("var_GMA", "var_SMA_ij", "var_SMA_ii", "var_error")]),
estim = do.call(c, fits2pp$TMB$report[c("var_GMA", "var_SMA1", "var_SMA2", "var_err")])
)
checks$nBE <- normBiasError(checks$estim, checks$truth)
print(checks)
## truth estim nBE
## var_GMA 10.0 10.0082750 0.08274965
## var_SMA_ij 1.5 1.6172864 7.81909464
## var_SMA_ii 0.8 0.8345879 4.32348766
## var_error 1.0 0.9453837 -5.46163333
if ("MM4LMM" %in% names(fits2pp)) {
print(fits2pp$MM4LMM$Sigma2)
}form <- pheno3p ~ 1 + block
listZ <- list(
"GMA" = allZ$GMA,
"SMA_ij" = allZ$SMA_mod3p_ij,
"SMA_ii" = allZ$SMA_mod3p_ii
)
listVCov <- list(
"GMA" = GRM,
"SMA_ij" = diag(ncol(listZ$SMA_ij)),
"SMA_ii" = diag(ncol(listZ$SMA_ii))
)
listContr <- list(block = "contr.sum")
fits3p <- runAllPkgs(pkgs, form, dat, listZ, listVCov, listContr)
## [1] "fit with lme4..."
## Warning in split.default(x, g): data length is not a multiple of split variable
## Warning in split.default(seq_along(x), f, drop = drop, ...): data length is not
## a multiple of split variable
## Warning in split.default(x, g): data length is not a multiple of split variable
## Warning in split.default(seq_along(x), f, drop = drop, ...): data length is not
## a multiple of split variable
## user system elapsed
## 1.031 1.658 0.708
## [1] "fit with TMB..."
## user system elapsed
## 1.674 1.797 1.420print("lme4:")
## [1] "lme4:"
tmp <- as.data.frame(VarCorr(fits3p$lme4))
tmp <- setNames(tmp$vcov, tmp$grp)
checks <- data.frame(
truth = do.call(c, truth[c("var_GMA", "var_SMA_ij", "var_SMA_ii", "var_error")]),
estim = tmp[c("GMA", "SMA_ij", "SMA_ii", "Residual")]
)
checks$nBE <- normBiasError(checks$estim, checks$truth)
print(checks)
## truth estim nBE
## var_GMA 10.0 10.0390280 0.3902804
## var_SMA_ij 1.5 1.6726172 11.5078114
## var_SMA_ii 0.8 0.9481630 18.5203751
## var_error 1.0 0.9440191 -5.5980891
print("TMB:")
## [1] "TMB:"
checks <- data.frame(
truth = do.call(c, truth[c("var_GMA", "var_SMA_ij", "var_SMA_ii", "var_error")]),
estim = do.call(c, fits3p$TMB$report[c("var_GMA", "var_SMA1", "var_SMA2", "var_err")])
)
checks$nBE <- normBiasError(checks$estim, checks$truth)
print(checks)
## truth estim nBE
## var_GMA 10.0 10.0390775 0.3907746
## var_SMA_ij 1.5 1.6726225 11.5081642
## var_SMA_ii 0.8 0.9481763 18.5220427
## var_error 1.0 0.9440195 -5.5980522
if ("MM4LMM" %in% names(fits3p)) {
print(fits3p$MM4LMM$Sigma2)
}t1 <- proc.time()
t1 - t0
## user system elapsed
## 37.260 34.380 34.265
print(sessionInfo(), locale = FALSE)
## R version 4.6.0 (2026-04-24)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] plantmix_1.0.2 lme4_2.0-1 Matrix_1.7-5 ggplot2_4.0.3 knitr_1.51
##
## loaded via a namespace (and not attached):
## [1] sass_0.4.10 generics_0.1.4 lattice_0.22-9 digest_0.6.39
## [5] magrittr_2.0.5 evaluate_1.0.5 grid_4.6.0 RColorBrewer_1.1-3
## [9] fastmap_1.2.0 jsonlite_2.0.0 scales_1.4.0 jquerylib_0.1.4
## [13] reformulas_0.4.4 Rdpack_2.6.6 cli_3.6.6 rlang_1.2.0
## [17] rbibutils_2.4.1 splines_4.6.0 withr_3.0.2 cachem_1.1.0
## [21] yaml_2.3.12 otel_0.2.0 tools_4.6.0 nloptr_2.2.1
## [25] minqa_1.2.8 dplyr_1.2.1 boot_1.3-32 buildtools_1.0.0
## [29] vctrs_0.7.3 R6_2.6.1 lifecycle_1.0.5 MASS_7.3-65
## [33] pkgconfig_2.0.3 pillar_1.11.1 bslib_0.11.0 gtable_0.3.6
## [37] glue_1.8.1 Rcpp_1.1.1-1.1 xfun_0.58 tibble_3.3.1
## [41] tidyselect_1.2.1 sys_3.4.3 farver_2.1.2 htmltools_0.5.9
## [45] nlme_3.1-169 igraph_2.3.2 rmarkdown_2.31 maketools_1.3.2
## [49] TMB_1.9.21 compiler_4.6.0 S7_0.2.2