S <- 10^(-3)*matrix(c(11.072, 8.019, 8.160,
8.019, 6.417, 6.005,
8.160, 6.005, 6.773),
nrow = 3, ncol = 3)
Sn <- (23/24)*S # n = 24, Convert to Sn for M.L.E.
\(\mathbf{S_n} = \frac{23}{24} \mathbf{S} = \begin{pmatrix} 0.01061 & 0.00768 & 0.00782 \\ 0.00768 & 0.00615 & 0.00575 \\ 0.00782 & 0.00575 & 0.00649 \\ \end{pmatrix}\)
L <- c(.1022, .0752, .0765)
LL <- matrix(L, ncol = 1) %*% matrix(L, nrow = 1)
Psi <- diag(Sn - matrix(L, ncol = 1) %*% matrix(L, nrow = 1))
\(\mathbf{S_n} \approx \hat{\mathbf{L}} \hat{\mathbf{L}}^T + \hat{\mathbf{\Psi}}\), where \(diag(\mathbf{S_n}) = diag(\hat{\mathbf{L}} \hat{\mathbf{L}}^T) + diag(\hat{\mathbf{\Psi}})\)
Hence, \(\hat{\mathbf{\Psi}} = diag(\hat{\mathbf{\Psi}}) = diag(\mathbf{S_n} - \hat{\mathbf{L}} \hat{\mathbf{L}}^T)\)
\(\hat{\mathbf{\Psi}} = \begin{pmatrix} 0.000166 & 0.000000 & 0.000000 \\ 0.000000 & 0.000495 & 0.000000 \\ 0.000000 & 0.000000 & 0.000639 \\ \end{pmatrix}\)
\(\sigma_{ii} = \ell_{i1}^2 + \ell_{i2}^2 + ~...~ + \ell_{im}^2 + \psi_i\)
\(h_i^2 = \ell_{i1}^2 + \ell_{i2}^2 + ~...~ + \ell_{im}^2 = 0.022\)
p <- sum(L^2) / (diag(Sn) %>% sum())
\(\frac{s_{11} + s_{22} + ~...~ + s_{pp}}{h_i^2} = 0.9441\)
Residual <- Sn - LL - Psi
$ - ^T - =
xm(Residual, 6) %_% "$"
\[\begin{pmatrix}
0.000000 & -0.000166 & -0.000164 \\
-0.000495 & 0.000000 & -0.000493 \\
-0.000637 & -0.000637 & 0.000000 \\
\end{pmatrix}\]
$
library(readr)
library(psych)
bulls <- read_table2("T1-10.dat", col_names = FALSE)
colnames(bulls) <- c("Breed", "SalePr", "YrHgt",
"FtFrBody", "PrctFFB", "Frame",
"BkFat", "SaleHt", "SaleWt")
bulls <- bulls[,-c(1,2)]
source("fa_functions.R") # for formating loading mt
fa_S_pc <- principal(bulls,
nfactors = 3,
rotate = 'varimax',
covar = TRUE)
fa_S_pc[["loadings"]][,] %>% round(3) %>%
format_loading_matrix(criterion = 30, markdown = T) %>%
kable("markdown",align = 'c', escape = F)
RC1 | RC2 | RC3 | |
---|---|---|---|
YrHgt | 0.566 | 0.734 | 0.870 |
FtFrBody | 31.179 | 83.935 | 24.022 |
PrctFFB | 0.601 | 1.488 | 2.742 |
Frame | 0.296 | 0.39 | 0.415 |
BkFat | 0.014 | -0.006 | -0.055 |
SaleHt | 1.021 | 0.934 | 0.820 |
SaleWt | 122.551 | 39.068 | -17.480 |
fa_S_ml <- fa(bulls, nfactors=3,
rotate="varimax", covar = T,
fm="ml", scores="regression")
fa_S_ml[["loadings"]][,] %>% round(3) %>%
format_loading_matrix(criterion = 30, markdown = T) %>%
kable("markdown",align = 'c', escape = F)
ML1 | ML2 | ML3 | |
---|---|---|---|
YrHgt | -0.001 | 0.581 | 1.629 |
FtFrBody | 21.802 | 84.458 | 31.383 |
PrctFFB | -0.253 | 2.155 | 1.056 |
Frame | 0.019 | 0.307 | 0.817 |
BkFat | 0.033 | -0.015 | -0.027 |
SaleHt | 0.498 | 0.841 | 1.532 |
SaleWt | 119.136 | 33.925 | 38.8 |
fa_R_pc <- principal(bulls,
nfactors = 3,
rotate = 'varimax',
covar = F)
fa_R_pc[["loadings"]][,] %>% round(3) %>%
format_loading_matrix(criterion = 0.65, markdown = T) %>%
kable("markdown",align = 'c', escape = F)
RC1 | RC3 | RC2 | |
---|---|---|---|
YrHgt | 0.941 | 0.27 | -0.082 |
FtFrBody | 0.447 | 0.794 | 0.205 |
PrctFFB | 0.262 | 0.859 | -0.295 |
Frame | 0.938 | 0.219 | -0.028 |
BkFat | -0.231 | -0.339 | 0.812 |
SaleHt | 0.833 | 0.419 | 0.109 |
SaleWt | 0.352 | 0.43 | 0.722 |
fa_R_ml <- fa(bulls, nfactors=3,
rotate="varimax", covar = F,
fm="ml", scores="regression")
fa_R_ml[["loadings"]][,] %>% round(3) %>%
format_loading_matrix(criterion = 0.6, markdown = T) %>%
kable("markdown",align = 'c', escape = F)
ML1 | ML2 | ML3 | |
---|---|---|---|
YrHgt | 0.941 | 0.286 | 0.164 |
FtFrBody | 0.414 | 0.505 | 0.553 |
PrctFFB | 0.231 | 0.947 | 0.212 |
Frame | 0.891 | 0.251 | 0.18 |
BkFat | -0.256 | -0.514 | 0.273 |
SaleHt | 0.755 | 0.269 | 0.434 |
SaleWt | 0.253 | -0.05 | 0.879 |
library(ggplot2)
fa_S_pc[["scores"]] %>% as.data.frame() %>%
ggplot()+
geom_point(aes(x=RC1, y=RC2), size=1)+
labs(x="Factor 1", y="Factor 2", title="FA with S",
caption="Principle component method")
library(ggplot2)
fa_R_pc[["scores"]] %>% as.data.frame() %>%
ggplot()+
geom_point(aes(x=RC1, y=RC2), size=1)+
labs(x="Factor 1", y="Factor 2", title="FA with R",
caption="Principle component method")
library(readr)
drinks <- read_table2("drinks.DAT", col_names = FALSE, skip = 2)
colnames(drinks) <- c("ID", "BRAND",
paste("X",1:10, sep=""))
drinks <- drinks[-nrow(drinks),]
library(psych)
fa_R_pc2 <- principal(drinks[,3:12],
nfactors = 4,
rotate = 'varimax',
covar = F)
fa_R_pc2[["values"]] %>%
cbind(X=1:10) %>% as.data.frame() %>%
ggplot()+
geom_point(aes(x=X, y=.), size=2) +
scale_x_continuous(breaks = 1:10)+
labs(x="i", y="eigenvalue",
title="Scree Plot")
By the unity criterion we use 4 factors1.
source("fa_functions.R")
fa_R_pc2[["loadings"]][,] %>% round(3) %>%
format_loading_matrix(criterion = 0.5, markdown = T) %>%
kable("markdown",align = 'c', escape = F)
RC4 | RC1 | RC2 | RC3 | |
---|---|---|---|---|
X1 | 0.131 | -0.215 | 0.905 | -0.044 |
X2 | 0.953 | 0.212 | 0.05 | -0.023 |
X3 | 0.293 | 0.899 | -0.165 | -0.043 |
X4 | 0.027 | -0.091 | 0.955 | 0.054 |
X5 | 0.933 | 0.261 | 0.111 | -0.076 |
X6 | -0.083 | -0.043 | 0.041 | 0.994 |
X7 | 0.228 | 0.918 | -0.23 | -0.028 |
X8 | 0.057 | -0.224 | 0.935 | 0.051 |
X9 | 0.935 | 0.276 | 0.073 | -0.048 |
X10 | 0.273 | 0.896 | -0.199 | -0.016 |
br1 <- which(drinks$BRAND == "1") #index for brand 1
br2 <- which(drinks$BRAND == "2")
br3 <- which(drinks$BRAND == "3")
br4 <- which(drinks$BRAND == "4")
br5 <- which(drinks$BRAND == "5")
br6 <- which(drinks$BRAND == "6")
fs1 <- fa_R_pc2[["scores"]][br1,] %>% colMeans() %>% t() %>%
as.data.frame()
fs2 <- fa_R_pc2[["scores"]][br2,] %>% colMeans() %>% t() %>%
as.data.frame()
fs3 <- fa_R_pc2[["scores"]][br3,] %>% colMeans() %>% t() %>%
as.data.frame()
fs4 <- fa_R_pc2[["scores"]][br4,] %>% colMeans() %>% t() %>%
as.data.frame()
fs5 <- fa_R_pc2[["scores"]][br5,] %>% colMeans() %>% t() %>%
as.data.frame()
fs6 <- fa_R_pc2[["scores"]][br6,] %>% colMeans() %>% t() %>%
as.data.frame()
avg_fs <- rbind(fs1, fs2, fs3, fs4, fs5, fs6) %>%
as.data.frame()
rownames(avg_fs) <- c("Pepsi", "Coke", "Gatorade",
"Allsport", "Lipton tea", "Nestea")
kable(avg_fs, "markdown", align = 'c',
digits = 3,
col.names= c("F1", "F2", "F3", "F4"),
row.names = T,
caption = "Average Factor Scores of each brand")
F1 | F2 | F3 | F4 | |
---|---|---|---|---|
Pepsi | -0.629 | -1.008 | -0.107 | 0.354 |
Coke | -1.215 | -0.403 | 0.194 | -0.177 |
Gatorade | 0.420 | 0.928 | -0.980 | -0.077 |
Allsport | -0.177 | 0.666 | -1.074 | -0.181 |
Lipton tea | 0.885 | 0.085 | 0.613 | 0.031 |
Nestea | 0.615 | 0.122 | 0.821 | -0.044 |
avg_fs <- cbind(avg_fs, rownames(avg_fs))
colnames(avg_fs) <- c("F1", "F2", "F3", "F4", "Brand")
### F1, F2
ggplot(avg_fs[,c(1,2, 5)],aes(x=F1, y=F2, color=Brand)) +
geom_point() +
geom_text(aes(label=Brand),
hjust=0.5, vjust=-0.4, size = 3) +
theme(legend.position = 'none')
### F1, F3
ggplot(avg_fs[,c(1,3, 5)],aes(x=F1, y=F3, color=Brand)) +
geom_point() +
geom_text(aes(label=Brand),
hjust=0.5, vjust=-0.4, size = 3) +
theme(legend.position = 'none')
### F1, F4
ggplot(avg_fs[,c(1,4, 5)],aes(x=F1, y=F4, color=Brand)) +
geom_point() +
geom_text(aes(label=Brand),
hjust=0.5, vjust=-0.4, size = 3) +
theme(legend.position = 'none')
### F2, F3
ggplot(avg_fs[,c(2,3, 5)], aes(x=F2, y=F3, color=Brand)) +
geom_point() +
geom_text(aes(label=Brand),
hjust=0.5, vjust=-0.4, size = 3) +
theme(legend.position = 'none')
### F2, F4
ggplot(avg_fs[,c(2,4, 5)],aes(x=F2, y=F4, color=Brand)) +
geom_point() +
geom_text(aes(label=Brand),
hjust=0.5, vjust=-0.4, size = 3) +
theme(legend.position = 'none')
### F3, F4
ggplot(avg_fs[,c(3,4, 5)], aes(x=F3, y=F4, color=Brand)) +
geom_point() +
geom_text(aes(label=Brand),
hjust=0.5, vjust=-0.4, size = 3) +
theme(legend.position = 'none')
Although factor 4 is less than 1, it’s eigenvalue is very close to factor 3, hence we retain factor 4.↩