2015-07-01 breedR version: 0.10.8
data("douglas")
str(douglas)
'data.frame': 9630 obs. of 15 variables: $ self : num 135 136 137 138 139 140 141 142 143 144 ... $ dad : num 41 41 41 41 41 41 41 41 41 41 ... $ mum : num 21 21 21 21 21 21 21 21 21 21 ... $ orig : Factor w/ 11 levels "pA","pB","pC",..: 1 1 1 1 1 1 1 1 1 1 ... $ site : Factor w/ 3 levels "s1","s2","s3": 1 1 1 1 1 1 1 1 1 1 ... $ block: Factor w/ 127 levels "s1:1","s1:2",..: 11 44 24 28 13 35 8 40 3 15 ... $ x : num 6 27 45 57 57 60 63 66 66 75 ... $ y : num 81 135 90 45 327 474 450 21 234 483 ... $ H02 : int NA NA NA NA NA NA NA NA NA NA ... $ H03 : int NA NA NA NA NA NA NA NA NA NA ... $ H04 : int NA NA NA NA NA NA NA NA NA NA ... $ H05 : int 634 581 611 370 721 488 574 498 528 620 ... $ C13 : int 586 474 715 372 665 558 490 372 527 612 ... $ AN : Factor w/ 5 levels "1","2","3","4",..: NA NA NA NA NA NA NA NA NA NA ... $ BR : Factor w/ 5 levels "1","2","3","4",..: NA NA NA NA NA NA NA NA NA NA ...
with(douglas, table(orig, site))
site orig s1 s2 s3 pA 1381 1326 635 pB 870 815 111 pC 930 1030 80 pD 28 1 0 pE 265 0 0 pF 502 673 388 pG 0 116 26 pH 0 78 54 pI 0 78 52 pJ 0 78 55 pK 0 0 58
ggplot(douglas, aes(x, y, color = block)) + geom_point(show_guide = FALSE) + facet_wrap(~ site)
Excercise: write the following model in breedR:
\[ \begin{aligned} \mathrm{CR13} = & \mathrm{site} + \mathrm{orig} + fam + bl_\mathrm{site} + fsi + \varepsilon\\ fam \sim & \mathcal{N}(0, \sigma_f^2) \\ bl_s \sim & \mathcal{N}(0, \sigma_{b(s)}^2), \quad s = 1, 2, 3 \\ fsi \sim & \mathcal{N}(0, \sigma_{i}^2) \\ \varepsilon \sim & \mathcal{N}(0, \sigma^2) \end{aligned} \]
## create site-wise block variables (i.e. bl_i = bl * Ind(i)) dat <- transform(douglas, bl1 = block, bl2 = block, bl3 = block) dat$bl1[dat$site != "s1"] <- NA dat$bl2[dat$site != "s2"] <- NA dat$bl3[dat$site != "s3"] <- NA dat <- droplevels(dat) ## variable family (taken as a maternal effect) dat$fam <- factor(dat$mum) ## family-site interaction variable dat <- transform(dat, famxsite = factor(fam:site))
Model with origin and site as fixed effects, and family, blocks and family-site interaction as random effects
reml.tree <- remlf90(fixed = C13 ~ site + orig,
random = ~ fam + bl1 + bl2 + bl3 + famxsite,
data = dat, method = 'ai')
Linear Mixed Model with pedigree and spatial effects fit by AI-REMLF90 ver. 1.110
Data: dat
AIC BIC logLik
109427 109470 -54708
Parameters of special components:
Variance components:
Estimated variances S.E.
fam 1339.70 235.07
bl1 736.37 182.76
bl2 66.46 50.84
bl3 1233.80 385.47
famxsite 118.43 61.54
Residual 13653.00 211.56
Fixed effects:
value s.e.
site.s1 503.4873 7.4887
site.s2 550.2785 6.7923
site.s3 474.9609 9.6581
orig.pA -8.9503 4.4835
orig.pB 0.0000 0.0000
orig.pC -42.4425 9.4899
orig.pD 52.2017 44.8236
orig.pE -26.2352 18.7055
orig.pF -37.0728 11.3151
orig.pG -47.1587 23.2942
orig.pH -14.7085 26.4218
orig.pI -49.8486 29.8747
orig.pJ -57.2313 26.3383
orig.pK -53.7943 28.7336
Excercise: produce the following plots:

reml.tree$var
Estimated variances S.E. fam 1339.700 235.070 bl1 736.370 182.760 bl2 66.463 50.844 bl3 1233.800 385.470 famxsite 118.430 61.543 Residual 13653.000 211.560
with(reml.tree,
var['fam', 1] / (var['fam', 1] + var['famxsite', 1])) %>%
round(2)
[1] 0.92
Excercise: produce the following interaction plot:
Consider only families present in all the 3 sites
famxsite.tbl <- table(dat$fam, dat$site) fam3.idx <- apply(famxsite.tbl, 1, function(x) all(x>0)) table(fam3.idx)
fam3.idx FALSE TRUE 64 45
Sum of squares by family (across sites) divided by total sum of squares
dplyr)(dat.ecov <-
dat.fsi %>%
group_by(fam) %>%
summarise(ssfam =
sum(pred_fsi**2)) %>%
filter(fam3.idx) %>%
mutate(fratio = ssfam / sum(ssfam)))
## Check
stopifnot(
all.equal(sum(dat.ecov$fratio), 1)
)
Source: local data frame [45 x 3] fam ssfam fratio 1 1 18.447072 0.006589986 2 2 8.286972 0.002960417 3 3 58.993908 0.021074836 4 4 109.377567 0.039073769 5 5 4.886922 0.001745792 6 7 20.096726 0.007179304 7 8 33.322147 0.011903920 8 9 10.683869 0.003816679 9 10 18.413615 0.006578034 10 11 67.250109 0.024024261 .. ... ... ...
threshold <- 0.05 dat.ecov$hi <- dat.ecov$fratio > threshold ggplot(dat.ecov, aes(fratio, fill = hi)) + geom_histogram(show_guide = FALSE)
Most interactive families: 24 28 38 48 105 106
Represent the families which interact the most with the sites:
The predicted genetic effect is the sum of the family effect and the family-site interaction (Excercise).
## reshape the genetic values ## by family (dat.gen <- dat.fsi %>% select(fam, site, pred_gen) %>% tidyr::spread(site, pred_gen) %>% tbl_df() )
Source: local data frame [109 x 4] fam s1 s2 s3 1 1 0.7708066 4.629670 6.755107 2 2 -54.7085993 -55.876654 -55.599421 3 3 -27.5767262 -17.541736 -19.202094 4 4 18.6370661 22.462160 32.831870 5 5 -9.5945562 -9.072929 -6.714122 6 6 -36.1778425 -31.010280 NA 7 7 44.9607625 39.364452 42.434652 8 8 41.1998076 44.446619 48.712357 9 9 40.6242870 36.905050 39.053858 10 10 14.6311835 10.742742 8.711473 .. ... ... ... ...
Kendall (top) and Spearman (bottom)
| s1 | s2 | s3 | |
|---|---|---|---|
| s1 | 1.000 | 0.875 | 0.834 |
| s2 | 0.970 | 1.000 | 0.851 |
| s3 | 0.951 | 0.959 | 1.000 |

\[ \begin{aligned} \mathrm{CR13} = & \mathrm{site} + \mathrm{orig} + fam + bl_\mathrm{site} + f_\mathrm{site} + \varepsilon\\ fam \sim & \mathcal{N}(0, \sigma_f^2) \\ bl_s \sim & \mathcal{N}(0, \sigma_{b(s)}^2), \quad s = 1, 2, 3 \\ f_s \sim & \mathcal{N}(0, \sigma_{f(s)}^2), \quad s = 1, 2, 3 \\ \varepsilon \sim & \mathcal{N}(0, \sigma^2) \end{aligned} \]
Things improve a bit wrt the previous model with a single variance. It seems that in site 2 there is not much variation, nor from blocks or for GxE.
Linear Mixed Model with pedigree and spatial effects fit by REMLF90 ver. 1.78
Data: dat
AIC BIC logLik
109421 unknown -54703
Parameters of special components:
Variance components:
Estimated variances
fam 1323.000
bl1 738.100
bl2 66.610
bl3 1237.000
f1 93.980
f2 5.789
f3 662.900
Residual 13630.000
Fixed effects:
value s.e.
site.s1 503.3411 7.4347
site.s2 549.4791 6.6367
site.s3 474.4960 10.2681
orig.pA -8.6398 4.4893
orig.pB 0.0000 0.0000
orig.pC -42.1887 9.3639
orig.pD 52.3152 44.3471
orig.pE -25.6809 18.4917
orig.pF -37.6245 11.2208
orig.pG -37.9558 22.4106
orig.pH 10.1126 25.9501
orig.pI -40.8323 29.5035
orig.pJ -48.0630 26.0029
orig.pK -49.6943 36.9587


The most effective way is to introduce the (expected) correlation between interaction effects across sites into a model parameter.
Among other things, this allows us to predict the genetic value of a family in a site where it was not observed.
\[ \begin{aligned} \mathrm{CR13} = & \mathrm{site} + \mathrm{orig} + fam + bl_\mathrm{site} + fsi + \varepsilon\\ fam \sim & \mathcal{N}(0, \sigma_f^2) \\ bl_s \sim & \mathcal{N}(0, \sigma_{b(s)}^2), \quad s = 1, 2, 3 \\ fsi = \begin{pmatrix} f_1 \\ f_2 \\ f_3 \end{pmatrix} \sim & \mathcal{N}(0, \Sigma_i \otimes \mathrm{I}) \\ \varepsilon \sim & \mathcal{N}(0, \sigma^2) \end{aligned} \]
Linear Mixed Model with pedigree and spatial effects fit by REMLF90 ver. 1.78
Data: dat
AIC BIC logLik
109022 unknown -54500
Parameters of special components:
Variance components:
$fam
[1] 74.67
$bl1
[1] 737.6
$bl2
[1] 67.36
$bl3
[1] 1238
$fsi
f1 f2 f3
f1 1538 1239 1480
f2 1239 1009 1277
f3 1480 1277 2194
$Residual
[1] 13630
Fixed effects:
value s.e.
site.s1 464.26362 10.1012
site.s2 510.67612 9.1863
site.s3 435.08511 12.1330
orig.pA 30.80217 10.8268
orig.pB 38.62992 10.9585
orig.pC -3.97525 11.3365
orig.pD 91.39946 46.9925
orig.pE 21.13727 20.4114
orig.pF 0.00000 0.0000
orig.pG 10.58885 21.5913
orig.pH 59.81861 24.5012
orig.pI 6.00987 27.6507
orig.pJ 0.37163 24.6342
orig.pK -3.75109 38.3734
[,1] [,2] [,3] [1,] 1.00 0.99 0.81 [2,] 0.99 1.00 0.86 [3,] 0.81 0.86 1.00


## Use breedR internal functions to compute splines models on each site
sp1 <- breedR:::breedr_splines(dat[dat$site == 's1', c('x', 'y')])
sp2 <- breedR:::breedr_splines(dat[dat$site == 's2' & !is.na(dat$x) & !is.na(dat$y), c('x', 'y')])
sp3 <- breedR:::breedr_splines(dat[dat$site == 's3', c('x', 'y')])
## Manually build the full incidence matrices with 0
## and the values computed before
mm1 <- model.matrix(sp1)
inc.sp1 <- Matrix::Matrix(0, nrow = nrow(dat), ncol = ncol(mm1))
inc.sp1[dat$site == 's1', ] <- mm1
mm2 <- model.matrix(sp2)
inc.sp2 <- Matrix::Matrix(0, nrow = nrow(dat), ncol = ncol(mm2))
inc.sp2[dat$site == 's2' & !is.na(dat$x) & !is.na(dat$y), ] <- mm2
mm3 <- model.matrix(sp3)
inc.sp3 <- Matrix::Matrix(0, nrow = nrow(dat), ncol = ncol(mm3))
inc.sp3[dat$site == 's3', ] <- mm3
reml.spl <- remlf90(fixed = C13 ~ site + orig,
random = ~ fam + famxsite,
generic = list(sp1 = list(inc.sp1,
breedR:::get_structure(sp1)),
sp2 = list(inc.sp2,
breedR:::get_structure(sp2)),
sp3 = list(inc.sp3,
breedR:::get_structure(sp3))),
data = dat,
method = 'em')
