# Q1: 9.12

S <- 10^(-3)*matrix(c(11.072, 8.019, 8.160,
8.019, 6.417, 6.005,
8.160, 6.005, 6.773),
nrow = 3, ncol = 3)

Sn <- (23/24)*S # n = 24, Convert to Sn for M.L.E.

$$\mathbf{S_n} = \frac{23}{24} \mathbf{S} = \begin{pmatrix} 0.01061 & 0.00768 & 0.00782 \\ 0.00768 & 0.00615 & 0.00575 \\ 0.00782 & 0.00575 & 0.00649 \\ \end{pmatrix}$$

## (a) Specific Variances

L <- c(.1022, .0752, .0765)
LL <- matrix(L, ncol = 1) %*% matrix(L, nrow = 1)
Psi <- diag(Sn - matrix(L, ncol = 1) %*% matrix(L, nrow = 1))

$$\mathbf{S_n} \approx \hat{\mathbf{L}} \hat{\mathbf{L}}^T + \hat{\mathbf{\Psi}}$$, where $$diag(\mathbf{S_n}) = diag(\hat{\mathbf{L}} \hat{\mathbf{L}}^T) + diag(\hat{\mathbf{\Psi}})$$

Hence, $$\hat{\mathbf{\Psi}} = diag(\hat{\mathbf{\Psi}}) = diag(\mathbf{S_n} - \hat{\mathbf{L}} \hat{\mathbf{L}}^T)$$

$$\hat{\mathbf{\Psi}} = \begin{pmatrix} 0.000166 & 0.000000 & 0.000000 \\ 0.000000 & 0.000495 & 0.000000 \\ 0.000000 & 0.000000 & 0.000639 \\ \end{pmatrix}$$

## (b) Communalities

$$\sigma_{ii} = \ell_{i1}^2 + \ell_{i2}^2 + ~...~ + \ell_{im}^2 + \psi_i$$

$$h_i^2 = \ell_{i1}^2 + \ell_{i2}^2 + ~...~ + \ell_{im}^2 = 0.022$$

## (c) Proportion of variance explained by the factor

p <-  sum(L^2) / (diag(Sn) %>% sum())

$$\frac{s_{11} + s_{22} + ~...~ + s_{pp}}{h_i^2} = 0.9441$$

## (d) Residual Matrix

Residual <- Sn - LL - Psi 

$- ^T - = xm(Residual, 6) %_% "$"
$\begin{pmatrix} 0.000000 & -0.000166 & -0.000164 \\ -0.000495 & 0.000000 & -0.000493 \\ -0.000637 & -0.000637 & 0.000000 \\ \end{pmatrix}$

# Q2: 9.32 library(readr) library(psych) bulls <- read_table2("T1-10.dat", col_names = FALSE) colnames(bulls) <- c("Breed", "SalePr", "YrHgt", "FtFrBody", "PrctFFB", "Frame", "BkFat", "SaleHt", "SaleWt") bulls <- bulls[,-c(1,2)] source("fa_functions.R") # for formating loading mt ## (a) S PC fa_S_pc <- principal(bulls, nfactors = 3, rotate = 'varimax', covar = TRUE) fa_S_pc[["loadings"]][,] %>% round(3) %>% format_loading_matrix(criterion = 30, markdown = T) %>% kable("markdown",align = 'c', escape = F) RC1 RC2 RC3 YrHgt 0.566 0.734 0.870 FtFrBody 31.179 83.935 24.022 PrctFFB 0.601 1.488 2.742 Frame 0.296 0.39 0.415 BkFat 0.014 -0.006 -0.055 SaleHt 1.021 0.934 0.820 SaleWt 122.551 39.068 -17.480 ## (b) S ML fa_S_ml <- fa(bulls, nfactors=3, rotate="varimax", covar = T, fm="ml", scores="regression") fa_S_ml[["loadings"]][,] %>% round(3) %>% format_loading_matrix(criterion = 30, markdown = T) %>% kable("markdown",align = 'c', escape = F) ML1 ML2 ML3 YrHgt -0.001 0.581 1.629 FtFrBody 21.802 84.458 31.383 PrctFFB -0.253 2.155 1.056 Frame 0.019 0.307 0.817 BkFat 0.033 -0.015 -0.027 SaleHt 0.498 0.841 1.532 SaleWt 119.136 33.925 38.8 ## (c) R PC fa_R_pc <- principal(bulls, nfactors = 3, rotate = 'varimax', covar = F) fa_R_pc[["loadings"]][,] %>% round(3) %>% format_loading_matrix(criterion = 0.65, markdown = T) %>% kable("markdown",align = 'c', escape = F) RC1 RC3 RC2 YrHgt 0.941 0.27 -0.082 FtFrBody 0.447 0.794 0.205 PrctFFB 0.262 0.859 -0.295 Frame 0.938 0.219 -0.028 BkFat -0.231 -0.339 0.812 SaleHt 0.833 0.419 0.109 SaleWt 0.352 0.43 0.722 ## (d) R ML fa_R_ml <- fa(bulls, nfactors=3, rotate="varimax", covar = F, fm="ml", scores="regression") fa_R_ml[["loadings"]][,] %>% round(3) %>% format_loading_matrix(criterion = 0.6, markdown = T) %>% kable("markdown",align = 'c', escape = F) ML1 ML2 ML3 YrHgt 0.941 0.286 0.164 FtFrBody 0.414 0.505 0.553 PrctFFB 0.231 0.947 0.212 Frame 0.891 0.251 0.18 BkFat -0.256 -0.514 0.273 SaleHt 0.755 0.269 0.434 SaleWt 0.253 -0.05 0.879 ## (e) Compare S & R ## (f) scatter plots of factor2 vs factor1 in (a) and (c) ### (a) library(ggplot2) fa_S_pc[["scores"]] %>% as.data.frame() %>% ggplot()+ geom_point(aes(x=RC1, y=RC2), size=1)+ labs(x="Factor 1", y="Factor 2", title="FA with S", caption="Principle component method") ### (c) library(ggplot2) fa_R_pc[["scores"]] %>% as.data.frame() %>% ggplot()+ geom_point(aes(x=RC1, y=RC2), size=1)+ labs(x="Factor 1", y="Factor 2", title="FA with R", caption="Principle component method") # Q3 ## (a) Appropriate number of factors library(readr) drinks <- read_table2("drinks.DAT", col_names = FALSE, skip = 2) colnames(drinks) <- c("ID", "BRAND", paste("X",1:10, sep="")) drinks <- drinks[-nrow(drinks),] library(psych) fa_R_pc2 <- principal(drinks[,3:12], nfactors = 4, rotate = 'varimax', covar = F) fa_R_pc2[["values"]] %>% cbind(X=1:10) %>% as.data.frame() %>% ggplot()+ geom_point(aes(x=X, y=.), size=2) + scale_x_continuous(breaks = 1:10)+ labs(x="i", y="eigenvalue", title="Scree Plot") By the unity criterion we use 4 factors1. source("fa_functions.R") fa_R_pc2[["loadings"]][,] %>% round(3) %>% format_loading_matrix(criterion = 0.5, markdown = T) %>% kable("markdown",align = 'c', escape = F) RC4 RC1 RC2 RC3 X1 0.131 -0.215 0.905 -0.044 X2 0.953 0.212 0.05 -0.023 X3 0.293 0.899 -0.165 -0.043 X4 0.027 -0.091 0.955 0.054 X5 0.933 0.261 0.111 -0.076 X6 -0.083 -0.043 0.041 0.994 X7 0.228 0.918 -0.23 -0.028 X8 0.057 -0.224 0.935 0.051 X9 0.935 0.276 0.073 -0.048 X10 0.273 0.896 -0.199 -0.016 ## (b) Label the Factors ## (c) Average factor scores of each brand br1 <- which(drinksBRAND == "1") #index for brand 1
br2 <- which(drinks$BRAND == "2") br3 <- which(drinks$BRAND == "3")
br4 <- which(drinks$BRAND == "4") br5 <- which(drinks$BRAND == "5")
br6 <- which(drinks\$BRAND == "6")
fs1 <- fa_R_pc2[["scores"]][br1,] %>% colMeans() %>% t() %>%
as.data.frame()
fs2 <- fa_R_pc2[["scores"]][br2,] %>% colMeans() %>% t() %>%
as.data.frame()
fs3 <- fa_R_pc2[["scores"]][br3,] %>% colMeans() %>% t() %>%
as.data.frame()
fs4 <- fa_R_pc2[["scores"]][br4,] %>% colMeans() %>% t() %>%
as.data.frame()
fs5 <- fa_R_pc2[["scores"]][br5,] %>% colMeans() %>% t() %>%
as.data.frame()
fs6 <- fa_R_pc2[["scores"]][br6,] %>% colMeans() %>% t() %>%
as.data.frame()

avg_fs <- rbind(fs1, fs2, fs3, fs4, fs5, fs6) %>%
as.data.frame()
"Allsport", "Lipton tea", "Nestea")

### Average factor scores

kable(avg_fs, "markdown", align = 'c',
digits = 3,
col.names= c("F1", "F2", "F3", "F4"),
row.names = T,
caption = "Average Factor Scores of each brand")
F1 F2 F3 F4
Pepsi -0.629 -1.008 -0.107 0.354
Coke -1.215 -0.403 0.194 -0.177
Allsport -0.177 0.666 -1.074 -0.181
Lipton tea 0.885 0.085 0.613 0.031
Nestea 0.615 0.122 0.821 -0.044

## (d)

avg_fs <- cbind(avg_fs, rownames(avg_fs))
colnames(avg_fs) <- c("F1", "F2", "F3", "F4", "Brand")

### F1, F2
ggplot(avg_fs[,c(1,2, 5)],aes(x=F1, y=F2, color=Brand)) +
geom_point() +
geom_text(aes(label=Brand),
hjust=0.5, vjust=-0.4, size = 3) +
theme(legend.position = 'none')

### F1, F3
ggplot(avg_fs[,c(1,3, 5)],aes(x=F1, y=F3, color=Brand)) +
geom_point() +
geom_text(aes(label=Brand),
hjust=0.5, vjust=-0.4, size = 3) +
theme(legend.position = 'none')