10-11 Dec. 2015, breedR version: 0.11
data("douglas") str(douglas)
'data.frame': 9630 obs. of 15 variables: $ self : num 135 136 137 138 139 140 141 142 143 144 ... $ dad : num 41 41 41 41 41 41 41 41 41 41 ... $ mum : num 21 21 21 21 21 21 21 21 21 21 ... $ orig : Factor w/ 11 levels "pA","pB","pC",..: 1 1 1 1 1 1 1 1 1 1 ... $ site : Factor w/ 3 levels "s1","s2","s3": 1 1 1 1 1 1 1 1 1 1 ... $ block: Factor w/ 127 levels "s1:1","s1:2",..: 11 44 24 28 13 35 8 40 3 15 ... $ x : num 6 27 45 57 57 60 63 66 66 75 ... $ y : num 81 135 90 45 327 474 450 21 234 483 ... $ H02 : int NA NA NA NA NA NA NA NA NA NA ... $ H03 : int NA NA NA NA NA NA NA NA NA NA ... $ H04 : int NA NA NA NA NA NA NA NA NA NA ... $ H05 : int 634 581 611 370 721 488 574 498 528 620 ... $ C13 : int 586 474 715 372 665 558 490 372 527 612 ... $ AN : Factor w/ 5 levels "1","2","3","4",..: NA NA NA NA NA NA NA NA NA NA ... $ BR : Factor w/ 5 levels "1","2","3","4",..: NA NA NA NA NA NA NA NA NA NA ...
with(douglas, table(orig, site))
site orig s1 s2 s3 pA 1381 1326 635 pB 870 815 111 pC 930 1030 80 pD 28 1 0 pE 265 0 0 pF 502 673 388 pG 0 116 26 pH 0 78 54 pI 0 78 52 pJ 0 78 55 pK 0 0 58
ggplot(douglas, aes(x, y, color = block)) + geom_point(show_guide = FALSE) + facet_wrap(~ site)
Excercise: write the following model in breedR:
\[ \begin{aligned} \mathrm{CR13} = & \mathrm{site} + \mathrm{orig} + fam + bl_\mathrm{site} + fsi + \varepsilon\\ fam \sim & \mathcal{N}(0, \sigma_f^2) \\ bl_s \sim & \mathcal{N}(0, \sigma_{b(s)}^2), \quad s = 1, 2, 3 \\ fsi \sim & \mathcal{N}(0, \sigma_{i}^2) \\ \varepsilon \sim & \mathcal{N}(0, \sigma^2) \end{aligned} \]
## create site-wise block variables (i.e. bl_i = bl * Ind(i)) dat <- transform(douglas, bl1 = block, bl2 = block, bl3 = block) dat$bl1[dat$site != "s1"] <- NA dat$bl2[dat$site != "s2"] <- NA dat$bl3[dat$site != "s3"] <- NA dat <- droplevels(dat) ## variable family (taken as a maternal effect) dat$fam <- factor(dat$mum) ## family-site interaction variable dat <- transform(dat, famxsite = factor(fam:site))
Model with origin and site as fixed effects, and family, blocks and family-site interaction as random effects
reml.tree <- remlf90(fixed = C13 ~ site + orig, random = ~ fam + bl1 + bl2 + bl3 + famxsite, data = dat)
Linear Mixed Model with pedigree and spatial effects fit by AI-REMLF90 ver. 1.122 Data: dat AIC BIC logLik 109427 109470 -54708 Parameters of special components: Variance components: Estimated variances S.E. fam 1339.70 235.07 bl1 736.37 182.76 bl2 66.46 50.84 bl3 1233.80 385.47 famxsite 118.43 61.54 Residual 13653.00 211.56 Fixed effects: value s.e. site.s1 503.4873 7.4887 site.s2 550.2785 6.7923 site.s3 474.9609 9.6581 orig.pA -8.9503 4.4835 orig.pB 0.0000 0.0000 orig.pC -42.4425 9.4899 orig.pD 52.2017 44.8236 orig.pE -26.2352 18.7055 orig.pF -37.0728 11.3151 orig.pG -47.1587 23.2942 orig.pH -14.7085 26.4218 orig.pI -49.8486 29.8747 orig.pJ -57.2313 26.3383 orig.pK -53.7943 28.7336
Excercise: produce the following plots:
Excercise: produce the following interaction plot:
Consider only families present in all the 3 sites
famxsite.tbl <- table(dat$fam, dat$site) > 0 fam3.idx <- apply(famxsite.tbl, 1, all) table(fam3.idx)
fam3.idx FALSE TRUE 64 45
Sum of squares of interaction effects by family (across sites) divided by total sum of squares (for all families)
dplyr
)(dat.ecov <- dat.fsi %>% group_by(fam) %>% summarise(ssfam = sum(pred_fsi**2)) %>% filter(fam3.idx) %>% mutate(Ecovalence=ssfam/sum(ssfam))) ## Check stopifnot( all.equal(sum(dat.ecov$Ecovalence), 1) )
Source: local data frame [45 x 3] fam ssfam Ecovalence (fctr) (dbl) (dbl) 1 1 18.447072 0.006589986 2 2 8.286972 0.002960417 3 3 58.993908 0.021074836 4 4 109.377567 0.039073769 5 5 4.886922 0.001745792 6 7 20.096726 0.007179304 7 8 33.322147 0.011903920 8 9 10.683869 0.003816679 9 10 18.413615 0.006578034 10 11 67.250109 0.024024261 .. ... ... ...
threshold <- 0.05 # arbitrarily chosen dat.ecov$hi <- dat.ecov$Ecovalence > threshold ggplot(dat.ecov, aes(Ecovalence, fill = hi)) + geom_histogram(show_guide = FALSE)
Most interactive families: 24 28 38 48 105 106
Represent the families which interact the most with the sites:
\[ \rho_{B} = \frac{\sigma_f^2 - \sigma_{fs}^2/3}{\sigma_f^2 + 2*\sigma_{fs}^2/3} \]
se_covar_function
in the argument progsf90.options
.r_B.fml <- "(G_3_3_1_1-G_7_7_1_1/3)/(G_3_3_1_1+2*G_7_7_1_1/3)" reml.tree <- remlf90( fixed = C13 ~ site + orig, random = ~ fam + bl1 + bl2 + bl3 + famxsite, progsf90.options = paste("se_covar_function r_B", r_B.fml), data = dat )
Linear Mixed Model with pedigree and spatial effects fit by AI-REMLF90 ver. 1.122 Data: dat AIC BIC logLik 109427 109470 -54708 Parameters of special components: Variance components: Estimated variances S.E. fam 1339.70 235.07 bl1 736.37 182.76 bl2 66.46 50.84 bl3 1233.80 385.47 famxsite 118.43 61.54 Residual 13653.00 211.56 Estimate S.E. r_B 0.9157 0.04774 Fixed effects: value s.e. site.s1 503.4873 7.4887 site.s2 550.2785 6.7923 site.s3 474.9609 9.6581 orig.pA -8.9503 4.4835 orig.pB 0.0000 0.0000 orig.pC -42.4425 9.4899 orig.pD 52.2017 44.8236 orig.pE -26.2352 18.7055 orig.pF -37.0728 11.3151 orig.pG -47.1587 23.2942 orig.pH -14.7085 26.4218 orig.pI -49.8486 29.8747 orig.pJ -57.2313 26.3383 orig.pK -53.7943 28.7336
The predicted genetic effect is the sum of the family effect and the family-site interaction (Excercise).
## reshape the genetic values ## by family (dat.gen <- dat.fsi %>% select(fam, site, pred_gen) %>% tidyr::spread(site, pred_gen) %>% tbl_df() )
Source: local data frame [109 x 4] fam s1 s2 s3 (fctr) (dbl) (dbl) (dbl) 1 1 0.7708066 4.629670 6.755107 2 2 -54.7085993 -55.876654 -55.599421 3 3 -27.5767262 -17.541736 -19.202094 4 4 18.6370661 22.462160 32.831870 5 5 -9.5945562 -9.072929 -6.714122 6 6 -36.1778425 -31.010280 NA 7 7 44.9607625 39.364452 42.434652 8 8 41.1998076 44.446619 48.712357 9 9 40.6242870 36.905050 39.053858 10 10 14.6311835 10.742742 8.711473 .. ... ... ... ...
Kendall (top triangle) and Spearman (bottom triangle)
s1 | s2 | s3 | |
---|---|---|---|
s1 | 1.000 | 0.875 | 0.834 |
s2 | 0.970 | 1.000 | 0.851 |
s3 | 0.951 | 0.959 | 1.000 |
\[ \begin{aligned} \mathrm{CR13} = & \mathrm{site} + \mathrm{orig} + bl_\mathrm{site} + f_\mathrm{site} + \varepsilon\\ bl_s \sim & \mathcal{N}(0, \sigma_{b(s)}^2), \quad s = 1, 2, 3 \\ f_s \sim & \mathcal{N}(0, \sigma_{f(s)}^2), \quad s = 1, 2, 3 \\ \varepsilon \sim & \mathcal{N}(0, \sigma^2) \end{aligned} \]
Things improve a bit wrt the previous model with a single variance. It seems that in site 2 there is not much variation, neither from blocks nor for GxE.
Linear Mixed Model with pedigree and spatial effects fit by AI-REMLF90 ver. 1.122 Data: dat AIC BIC logLik 109520 109570 -54753 Parameters of special components: Variance components: Estimated variances S.E. bl1 741.74 184.06 bl2 64.86 50.54 bl3 1234.30 385.93 f1 1550.50 283.46 f2 907.78 201.97 f3 1968.80 561.03 Residual 13658.00 211.81 Fixed effects: value s.e. site.s1 465.1287 8.1128 site.s2 512.7597 6.7871 site.s3 439.4925 10.7214 orig.pA 29.9486 7.3873 orig.pB 37.6115 7.7293 orig.pC -2.1595 7.9720 orig.pD 90.4319 45.9454 orig.pE 12.7877 19.3840 orig.pF 0.0000 0.0000 orig.pG 8.5822 22.9116 orig.pH 9.4322 27.9432 orig.pI -8.4477 27.9889 orig.pJ -1.3922 28.0138 orig.pK 6.2548 47.8731
The most effective way is to introduce the (expected) correlation between interaction effects across sites into a model parameter.
Among other things, this allows us to predict the genetic value of a family in a site where it was not observed.
\[ \begin{aligned} \mathrm{CR13} = & \mathrm{site} + \mathrm{orig} + bl_\mathrm{site} + fsi + \varepsilon\\ bl_s \sim & \mathcal{N}(0, \sigma_{b(s)}^2), \quad s = 1, 2, 3 \\ fsi = \begin{pmatrix} f_1 \\ f_2 \\ f_3 \end{pmatrix} \sim & \mathcal{N}(0, \Sigma_i \otimes \mathrm{I}) \\ \varepsilon \sim & \mathcal{N}(0, \sigma^2) \end{aligned} \]
— {.columns-2 .smaller}
Linear Mixed Model with pedigree and spatial effects fit by AI-REMLF90 ver. 1.122 Data: dat AIC BIC logLik 109026 109119 -54500 Parameters of special components: Variance components: $bl1 [,1] [1,] 737.6 $bl2 [,1] [1,] 67.39 $bl3 [,1] [1,] 1238 $fsi f1 f2 f3 f1 1611 1315 1552 f2 1315 1083 1353 f3 1552 1353 2268 $Residual [,1] [1,] 13633 Fixed effects: value s.e. site.s1 464.26089 10.1003 site.s2 510.67266 9.1865 site.s3 435.06084 12.1328 orig.pA 30.82334 10.8268 orig.pB 38.63057 10.9586 orig.pC -3.98224 11.3373 orig.pD 91.40232 46.9774 orig.pE 21.03562 20.4085 orig.pF 0.00000 0.0000 orig.pG 10.56924 21.5596 orig.pH 60.05614 24.4392 orig.pI 6.04216 27.6052 orig.pJ 0.38203 24.5737 orig.pK -3.76085 38.3847
[,1] [,2] [,3] [1,] 1.000 0.996 0.812 [2,] 0.996 1.000 0.863 [3,] 0.812 0.863 1.000
## Use breedR internal functions to compute splines models on each site sp1 <- breedR:::breedr_splines(dat[dat$site == 's1', c('x', 'y')]) sp2 <- breedR:::breedr_splines(dat[dat$site == 's2' & !is.na(dat$x) & !is.na(dat$y), c('x', 'y')]) sp3 <- breedR:::breedr_splines(dat[dat$site == 's3', c('x', 'y')]) ## Manually build the full incidence matrices with 0 ## and the values computed before mm1 <- model.matrix(sp1) inc.sp1 <- Matrix::Matrix(0, nrow = nrow(dat), ncol = ncol(mm1)) inc.sp1[dat$site == 's1', ] <- mm1 mm2 <- model.matrix(sp2) inc.sp2 <- Matrix::Matrix(0, nrow = nrow(dat), ncol = ncol(mm2)) inc.sp2[dat$site == 's2' & !is.na(dat$x) & !is.na(dat$y), ] <- mm2 mm3 <- model.matrix(sp3) inc.sp3 <- Matrix::Matrix(0, nrow = nrow(dat), ncol = ncol(mm3)) inc.sp3[dat$site == 's3', ] <- mm3
reml.spl <- remlf90( fixed = C13 ~ site + orig, random = ~ fam + famxsite, generic = list(sp1 = list(inc.sp1, breedR:::get_structure(sp1)), sp2 = list(inc.sp2, breedR:::get_structure(sp2)), sp3 = list(inc.sp3, breedR:::get_structure(sp3))), data = dat, method = 'em' )
Yamada, Yukio. 1962. “Genotype by Environment Interaction and Genetic Correlation of the Same Trait Under Different Environments.” The Japanese Journal of Genetics 37 (6): 498–509. doi:10.1266/jjg.37.498.