knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(warning = FALSE)
knitr::opts_chunk$set(message = FALSE)
# ==================================
# N736 Lesson 24 - Computing Reliability
# and Factor Analysis
#
# dated 11/20/2017
# Melinda Higgins, PhD
#
# ==================================
# ==================================
# we're be working with the
# helpmkh dataset
# ==================================
library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
library(haven)
helpdat <- haven::read_spss("helpmkh.sav")
h1 <- helpdat %>%
select(id,f1a,f1b,f1c,f1d,f1e,f1f,f1g,
f1h,f1i,f1j,f1k,f1l,f1m,f1n,f1o,
f1p,f1q,f1r,f1s,f1t,cesd)
# run a summary on all 20 items - check amounts of missing
summary(h1)
## id f1a f1b f1c
## Min. : 1.0 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:119.0 1st Qu.:1.000 1st Qu.:0.000 1st Qu.:1.000
## Median :233.0 Median :2.000 Median :1.000 Median :2.000
## Mean :233.4 Mean :1.634 Mean :1.391 Mean :1.923
## 3rd Qu.:348.0 3rd Qu.:3.000 3rd Qu.:2.000 3rd Qu.:3.000
## Max. :470.0 Max. :3.000 Max. :3.000 Max. :3.000
##
## f1d f1e f1f f1g
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.00
## 1st Qu.:0.000 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.00
## Median :1.000 Median :2.000 Median :2.000 Median :2.00
## Mean :1.565 Mean :1.695 Mean :2.018 Mean :1.73
## 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:3.00
## Max. :3.000 Max. :3.000 Max. :3.000 Max. :3.00
## NA's :1
## f1h f1i f1j f1k
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:1.000 1st Qu.:0.000 1st Qu.:1.000
## Median :1.000 Median :2.000 Median :2.000 Median :2.000
## Mean :1.481 Mean :1.883 Mean :1.525 Mean :2.035
## 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:3.000
## Max. :3.000 Max. :3.000 Max. :3.000 Max. :3.000
## NA's :2
## f1l f1m f1n f1o
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:0.000
## Median :1.000 Median :1.000 Median :2.000 Median :1.000
## Mean :1.084 Mean :1.507 Mean :2.009 Mean :1.219
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:3.000 3rd Qu.:2.000
## Max. :3.000 Max. :3.000 Max. :3.000 Max. :3.000
## NA's :1 NA's :1
## f1p f1q f1r f1s
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:1.000 1st Qu.:0.000
## Median :1.000 Median :1.000 Median :2.000 Median :1.000
## Mean :1.338 Mean :1.073 Mean :1.945 Mean :1.225
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:3.000 3rd Qu.:2.000
## Max. :3.000 Max. :3.000 Max. :3.000 Max. :3.000
## NA's :1 NA's :1
## f1t cesd
## Min. :0.00 Min. : 1.00
## 1st Qu.:1.00 1st Qu.:25.00
## Median :1.00 Median :34.00
## Mean :1.53 Mean :32.85
## 3rd Qu.:3.00 3rd Qu.:41.00
## Max. :3.00 Max. :60.00
##
# let's go ahead and reverse code the 4 items
h2 <- h1 %>%
mutate(f1dr = ifelse(f1d==0,3,
ifelse(f1d==1,2,
ifelse(f1d==2,1,0))),
f1hr = ifelse(f1h==0,3,
ifelse(f1h==1,2,
ifelse(f1h==2,1,0))),
f1lr = ifelse(f1l==0,3,
ifelse(f1l==1,2,
ifelse(f1l==2,1,0))),
f1pr = ifelse(f1p==0,3,
ifelse(f1p==1,2,
ifelse(f1p==2,1,0))))
# add the useNA="always" to check
# that missing data was handled correctly
table(h2$f1d, useNA="always")
##
## 0 1 2 3 <NA>
## 116 115 72 150 0
table(h2$f1dr, useNA="always")
##
## 0 1 2 3 <NA>
## 150 72 115 116 0
table(h2$f1h, useNA="always")
##
## 0 1 2 3 <NA>
## 124 108 100 121 0
table(h2$f1hr, useNA="always")
##
## 0 1 2 3 <NA>
## 121 100 108 124 0
table(h2$f1l, useNA="always")
##
## 0 1 2 3 <NA>
## 159 151 89 54 0
table(h2$f1lr, useNA="always")
##
## 0 1 2 3 <NA>
## 54 89 151 159 0
table(h2$f1p, useNA="always")
##
## 0 1 2 3 <NA>
## 137 129 82 104 1
table(h2$f1pr, useNA="always")
##
## 0 1 2 3 <NA>
## 104 82 129 137 1
# select the 20 cesd items
# with the 4 reverse coded ones
h2.cesd20 <- h2 %>%
select(f1a,f1b,f1c,f1dr,f1e,f1f,f1g,
f1hr,f1i,f1j,f1k,f1lr,f1m,f1n,f1o,
f1pr,f1q,f1r,f1s,f1t)
# compute the number missing by row (id)
nmiss_cesd <- rowSums(is.na(h2.cesd20))
table(nmiss_cesd)
## nmiss_cesd
## 0 1
## 446 7
# compute the sum with mean substitution
sum_cesd <- rowMeans(h2.cesd20, na.rm=TRUE)*20
# compute the sum without mean substitution
sum2_cesd <- rowSums(h2.cesd20, na.rm=TRUE)
h3 <- data.frame(h2,nmiss_cesd,sum_cesd,sum2_cesd)
# compare stats
h3 %>%
select(nmiss_cesd, cesd, sum_cesd, sum2_cesd) %>%
summary()
## nmiss_cesd cesd sum_cesd sum2_cesd
## Min. :0.00000 Min. : 1.00 Min. : 1.00 Min. : 1.00
## 1st Qu.:0.00000 1st Qu.:25.00 1st Qu.:25.00 1st Qu.:25.00
## Median :0.00000 Median :34.00 Median :34.00 Median :34.00
## Mean :0.01545 Mean :32.85 Mean :32.87 Mean :32.85
## 3rd Qu.:0.00000 3rd Qu.:41.00 3rd Qu.:41.00 3rd Qu.:41.00
## Max. :1.00000 Max. :60.00 Max. :60.00 Max. :60.00
# compute the correlation matrix
# using the reverse coded items
# remove missing values
cmtx <- cor(h2.cesd20, use="complete.obs",
method="pearson")
# load the psych package
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
# compute alpha
a1 <- psych::alpha(cmtx)
a1
##
## Reliability analysis
## Call: psych::alpha(x = cmtx)
##
## raw_alpha std.alpha G6(smc) average_r S/N
## 0.89 0.89 0.9 0.29 8
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N
## f1a 0.88 0.88 0.90 0.29 7.6
## f1b 0.88 0.88 0.90 0.29 7.7
## f1c 0.88 0.88 0.89 0.28 7.2
## f1dr 0.89 0.89 0.90 0.29 7.7
## f1e 0.88 0.88 0.90 0.29 7.6
## f1f 0.88 0.88 0.89 0.27 7.2
## f1g 0.89 0.89 0.90 0.29 7.9
## f1hr 0.89 0.89 0.90 0.29 7.9
## f1i 0.88 0.88 0.89 0.28 7.4
## f1j 0.88 0.88 0.89 0.28 7.4
## f1k 0.88 0.88 0.90 0.29 7.7
## f1lr 0.88 0.88 0.90 0.29 7.6
## f1m 0.89 0.89 0.90 0.30 8.1
## f1n 0.88 0.88 0.89 0.28 7.4
## f1o 0.89 0.89 0.90 0.29 7.8
## f1pr 0.89 0.89 0.90 0.29 7.7
## f1q 0.88 0.88 0.90 0.29 7.7
## f1r 0.88 0.88 0.89 0.27 7.2
## f1s 0.88 0.88 0.89 0.28 7.5
## f1t 0.88 0.88 0.89 0.28 7.3
##
## Item statistics
## r r.cor r.drop
## f1a 0.54 0.50 0.47
## f1b 0.53 0.49 0.46
## f1c 0.71 0.70 0.67
## f1dr 0.52 0.48 0.45
## f1e 0.55 0.52 0.48
## f1f 0.73 0.73 0.69
## f1g 0.42 0.37 0.35
## f1hr 0.45 0.41 0.37
## f1i 0.64 0.62 0.58
## f1j 0.62 0.60 0.56
## f1k 0.54 0.50 0.47
## f1lr 0.54 0.52 0.48
## f1m 0.38 0.32 0.30
## f1n 0.63 0.61 0.57
## f1o 0.49 0.45 0.42
## f1pr 0.50 0.47 0.43
## f1q 0.52 0.49 0.46
## f1r 0.72 0.72 0.68
## f1s 0.59 0.57 0.53
## f1t 0.70 0.68 0.65
# use on complete cases
# 7 subjects with missing data are removed here
pc1 <- princomp(na.omit(h2.cesd20), cor=TRUE)
pc1
## Call:
## princomp(x = na.omit(h2.cesd20), cor = TRUE)
##
## Standard deviations:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7
## 2.5758718 1.2949887 1.0848300 1.0048097 0.9757006 0.9411651 0.9247039
## Comp.8 Comp.9 Comp.10 Comp.11 Comp.12 Comp.13 Comp.14
## 0.8701675 0.8581877 0.8206986 0.7814483 0.7535941 0.7391315 0.7027879
## Comp.15 Comp.16 Comp.17 Comp.18 Comp.19 Comp.20
## 0.6856643 0.6558814 0.6485918 0.6238266 0.6138129 0.5793109
##
## 20 variables and 446 observations.
summary(pc1)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4
## Standard deviation 2.5758718 1.29498867 1.08483003 1.00480975
## Proportion of Variance 0.3317558 0.08384978 0.05884281 0.05048213
## Cumulative Proportion 0.3317558 0.41560555 0.47444836 0.52493049
## Comp.5 Comp.6 Comp.7 Comp.8
## Standard deviation 0.97570055 0.94116512 0.92470386 0.87016746
## Proportion of Variance 0.04759958 0.04428959 0.04275386 0.03785957
## Cumulative Proportion 0.57253007 0.61681966 0.65957352 0.69743309
## Comp.9 Comp.10 Comp.11 Comp.12
## Standard deviation 0.8581877 0.82069858 0.78144829 0.7535941
## Proportion of Variance 0.0368243 0.03367731 0.03053307 0.0283952
## Cumulative Proportion 0.7342574 0.76793470 0.79846777 0.8268630
## Comp.13 Comp.14 Comp.15 Comp.16
## Standard deviation 0.73913154 0.70278786 0.68566431 0.65588145
## Proportion of Variance 0.02731577 0.02469554 0.02350678 0.02150902
## Cumulative Proportion 0.85417875 0.87887428 0.90238106 0.92389009
## Comp.17 Comp.18 Comp.19 Comp.20
## Standard deviation 0.64859180 0.62382660 0.61381286 0.57931089
## Proportion of Variance 0.02103357 0.01945798 0.01883831 0.01678006
## Cumulative Proportion 0.94492365 0.96438163 0.98321994 1.00000000
loadings(pc1)
##
## Loadings:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9
## f1a -0.208 -0.273 0.239 -0.235 0.464 0.469
## f1b -0.200 -0.379 0.267 0.126 -0.182 0.406
## f1c -0.283 0.155 0.115
## f1dr -0.198 -0.308 -0.138 -0.209 -0.526 0.122
## f1e -0.212 0.165 -0.267 0.168 -0.104 -0.128 -0.669
## f1f -0.291 -0.195 -0.121 0.197 -0.135
## f1g -0.159 0.293 -0.239 -0.255 0.566 -0.202 -0.282 0.210
## f1hr -0.165 -0.483 0.271 -0.114 -0.102 -0.180 -0.196
## f1i -0.253 0.161 -0.186 0.136 0.502 -0.148
## f1j -0.247 0.201 -0.329 0.201 0.299 -0.293
## f1k -0.207 0.154 -0.358 -0.188 0.270 -0.109 -0.112
## f1lr -0.209 -0.391 -0.112 -0.105 0.261 0.166
## f1m -0.136 0.169 0.141 0.782 0.114 -0.371 -0.154
## f1n -0.247 -0.332 0.407 0.122 0.203
## f1o -0.187 0.211 0.427 0.394 0.144 0.254
## f1pr -0.190 -0.482 0.148 0.236 -0.215
## f1q -0.205 0.182 0.121 -0.219 -0.200 -0.536 -0.363
## f1r -0.288 -0.290 -0.126 -0.115 -0.272 0.159
## f1s -0.230 0.128 0.457 0.274 -0.120 0.108
## f1t -0.273 0.261 -0.170
## Comp.10 Comp.11 Comp.12 Comp.13 Comp.14 Comp.15 Comp.16 Comp.17
## f1a 0.256 -0.356 0.132 -0.116 0.277 -0.121
## f1b 0.596 0.138 0.171 -0.113 0.137
## f1c 0.108 -0.525 0.302 0.209 -0.307 0.191
## f1dr -0.346 0.255 0.317 0.113 -0.151 -0.177
## f1e 0.327 -0.275 -0.193 0.132 -0.128
## f1f -0.122 -0.140 0.616 0.270
## f1g -0.177 0.330 0.154 0.189 -0.185 -0.171
## f1hr -0.183 -0.205 0.207 -0.375 0.285 -0.110
## f1i 0.176 0.332 -0.272 0.149 -0.151 -0.115 0.449
## f1j -0.159 0.168 0.250 0.345 0.115 -0.467
## f1k -0.608 -0.387 0.140 -0.156 0.211
## f1lr 0.322 -0.205 0.182 0.302 -0.259 -0.245 -0.355
## f1m 0.248 0.210 0.120
## f1n 0.113 -0.242 -0.392 -0.147 -0.205 -0.402
## f1o -0.314 0.137 0.379 -0.372 0.142 -0.205
## f1pr 0.103 0.148 0.415 0.297
## f1q 0.309 0.324 0.180 -0.109
## f1r -0.240 0.177
## f1s -0.102 0.130 -0.156 -0.165 0.432 -0.293 0.282
## f1t 0.143 -0.154 -0.712 -0.312 0.106 -0.154
## Comp.18 Comp.19 Comp.20
## f1a
## f1b -0.256
## f1c 0.543
## f1dr -0.364
## f1e -0.266 0.117
## f1f -0.124 -0.329 0.415
## f1g
## f1hr 0.381 -0.249 0.101
## f1i 0.277
## f1j 0.190 -0.243
## f1k -0.102 0.184
## f1lr -0.263 -0.281
## f1m
## f1n -0.158 0.126 0.322
## f1o 0.117
## f1pr 0.533
## f1q 0.210 0.293
## f1r -0.106 -0.761
## f1s -0.248 -0.315 0.108
## f1t 0.320 0.133
##
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## SS loadings 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00
## Proportion Var 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05
## Cumulative Var 0.05 0.10 0.15 0.20 0.25 0.30 0.35 0.40
## Comp.9 Comp.10 Comp.11 Comp.12 Comp.13 Comp.14 Comp.15
## SS loadings 1.00 1.00 1.00 1.00 1.00 1.00 1.00
## Proportion Var 0.05 0.05 0.05 0.05 0.05 0.05 0.05
## Cumulative Var 0.45 0.50 0.55 0.60 0.65 0.70 0.75
## Comp.16 Comp.17 Comp.18 Comp.19 Comp.20
## SS loadings 1.00 1.00 1.00 1.00 1.00
## Proportion Var 0.05 0.05 0.05 0.05 0.05
## Cumulative Var 0.80 0.85 0.90 0.95 1.00
# scree plot of eigenvalues/variances
plot(pc1,type="lines")
# plot of scores with items overlaid (red arrows)
# plot of factors 1 and 2
biplot(pc1)
# make a loadings plot
plot(pc1$loadings[,1], pc1$loadings[,2],
xlim=c(-0.5,0.5), ylim=c(-0.5,0.5),
main="Loadings Plot of Factors 1 and 2",
xlab="Factor 1", ylab="Factor 2",
pch=19)
lines(c(-0.5,0.5),c(0,0),col="red")
lines(c(0,0),c(-0.5,0.5),col="red")
text(pc1$loadings[,1], pc1$loadings[,2],
labels=names(h2.cesd20),
pos=4, offset=0.5)