knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(warning = FALSE)
knitr::opts_chunk$set(message = FALSE)

# ==================================
# N736 Lesson 24 - Computing Reliability
# and Factor Analysis
#
# dated 11/20/2017
# Melinda Higgins, PhD
# 
# ==================================

# ==================================
# we're be working with the 
# helpmkh dataset
# ==================================

library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats
library(haven)

helpdat <- haven::read_spss("helpmkh.sav")

h1 <- helpdat %>%
  select(id,f1a,f1b,f1c,f1d,f1e,f1f,f1g,
         f1h,f1i,f1j,f1k,f1l,f1m,f1n,f1o,
         f1p,f1q,f1r,f1s,f1t,cesd)

# run a summary on all 20 items - check amounts of missing
summary(h1)
##        id             f1a             f1b             f1c       
##  Min.   :  1.0   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:119.0   1st Qu.:1.000   1st Qu.:0.000   1st Qu.:1.000  
##  Median :233.0   Median :2.000   Median :1.000   Median :2.000  
##  Mean   :233.4   Mean   :1.634   Mean   :1.391   Mean   :1.923  
##  3rd Qu.:348.0   3rd Qu.:3.000   3rd Qu.:2.000   3rd Qu.:3.000  
##  Max.   :470.0   Max.   :3.000   Max.   :3.000   Max.   :3.000  
##                                                                 
##       f1d             f1e             f1f             f1g      
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.00  
##  1st Qu.:0.000   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.00  
##  Median :1.000   Median :2.000   Median :2.000   Median :2.00  
##  Mean   :1.565   Mean   :1.695   Mean   :2.018   Mean   :1.73  
##  3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:3.00  
##  Max.   :3.000   Max.   :3.000   Max.   :3.000   Max.   :3.00  
##                                                  NA's   :1     
##       f1h             f1i             f1j             f1k       
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:1.000   1st Qu.:0.000   1st Qu.:1.000  
##  Median :1.000   Median :2.000   Median :2.000   Median :2.000  
##  Mean   :1.481   Mean   :1.883   Mean   :1.525   Mean   :2.035  
##  3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:3.000  
##  Max.   :3.000   Max.   :3.000   Max.   :3.000   Max.   :3.000  
##                                  NA's   :2                      
##       f1l             f1m             f1n             f1o       
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:0.000  
##  Median :1.000   Median :1.000   Median :2.000   Median :1.000  
##  Mean   :1.084   Mean   :1.507   Mean   :2.009   Mean   :1.219  
##  3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:3.000   3rd Qu.:2.000  
##  Max.   :3.000   Max.   :3.000   Max.   :3.000   Max.   :3.000  
##                  NA's   :1       NA's   :1                      
##       f1p             f1q             f1r             f1s       
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:1.000   1st Qu.:0.000  
##  Median :1.000   Median :1.000   Median :2.000   Median :1.000  
##  Mean   :1.338   Mean   :1.073   Mean   :1.945   Mean   :1.225  
##  3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:3.000   3rd Qu.:2.000  
##  Max.   :3.000   Max.   :3.000   Max.   :3.000   Max.   :3.000  
##  NA's   :1       NA's   :1                                      
##       f1t            cesd      
##  Min.   :0.00   Min.   : 1.00  
##  1st Qu.:1.00   1st Qu.:25.00  
##  Median :1.00   Median :34.00  
##  Mean   :1.53   Mean   :32.85  
##  3rd Qu.:3.00   3rd Qu.:41.00  
##  Max.   :3.00   Max.   :60.00  
## 
# let's go ahead and reverse code the 4 items
h2 <- h1 %>%
  mutate(f1dr = ifelse(f1d==0,3,
                       ifelse(f1d==1,2,
                              ifelse(f1d==2,1,0))),
         f1hr = ifelse(f1h==0,3,
                       ifelse(f1h==1,2,
                              ifelse(f1h==2,1,0))),
         f1lr = ifelse(f1l==0,3,
                       ifelse(f1l==1,2,
                              ifelse(f1l==2,1,0))),
         f1pr = ifelse(f1p==0,3,
                       ifelse(f1p==1,2,
                              ifelse(f1p==2,1,0))))

# add the useNA="always" to check
# that missing data was handled correctly
table(h2$f1d, useNA="always")
## 
##    0    1    2    3 <NA> 
##  116  115   72  150    0
table(h2$f1dr, useNA="always")
## 
##    0    1    2    3 <NA> 
##  150   72  115  116    0
table(h2$f1h, useNA="always")
## 
##    0    1    2    3 <NA> 
##  124  108  100  121    0
table(h2$f1hr, useNA="always")
## 
##    0    1    2    3 <NA> 
##  121  100  108  124    0
table(h2$f1l, useNA="always")
## 
##    0    1    2    3 <NA> 
##  159  151   89   54    0
table(h2$f1lr, useNA="always")
## 
##    0    1    2    3 <NA> 
##   54   89  151  159    0
table(h2$f1p, useNA="always")
## 
##    0    1    2    3 <NA> 
##  137  129   82  104    1
table(h2$f1pr, useNA="always")
## 
##    0    1    2    3 <NA> 
##  104   82  129  137    1
# select the 20 cesd items
# with the 4 reverse coded ones
h2.cesd20 <- h2 %>%
  select(f1a,f1b,f1c,f1dr,f1e,f1f,f1g,
         f1hr,f1i,f1j,f1k,f1lr,f1m,f1n,f1o,
         f1pr,f1q,f1r,f1s,f1t)

# compute the number missing by row (id)
nmiss_cesd <- rowSums(is.na(h2.cesd20))
table(nmiss_cesd)
## nmiss_cesd
##   0   1 
## 446   7
# compute the sum with mean substitution
sum_cesd <- rowMeans(h2.cesd20, na.rm=TRUE)*20

# compute the sum without mean substitution
sum2_cesd <- rowSums(h2.cesd20, na.rm=TRUE)

h3 <- data.frame(h2,nmiss_cesd,sum_cesd,sum2_cesd)

# compare stats
h3 %>%
  select(nmiss_cesd, cesd, sum_cesd, sum2_cesd) %>%
  summary()
##    nmiss_cesd           cesd          sum_cesd       sum2_cesd    
##  Min.   :0.00000   Min.   : 1.00   Min.   : 1.00   Min.   : 1.00  
##  1st Qu.:0.00000   1st Qu.:25.00   1st Qu.:25.00   1st Qu.:25.00  
##  Median :0.00000   Median :34.00   Median :34.00   Median :34.00  
##  Mean   :0.01545   Mean   :32.85   Mean   :32.87   Mean   :32.85  
##  3rd Qu.:0.00000   3rd Qu.:41.00   3rd Qu.:41.00   3rd Qu.:41.00  
##  Max.   :1.00000   Max.   :60.00   Max.   :60.00   Max.   :60.00
# compute the correlation matrix
# using the reverse coded items
# remove missing values
cmtx <- cor(h2.cesd20, use="complete.obs",
            method="pearson")
         
# load the psych package
library(psych)
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
# compute alpha
a1 <- psych::alpha(cmtx)
a1
## 
## Reliability analysis   
## Call: psych::alpha(x = cmtx)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N
##       0.89      0.89     0.9      0.29   8
## 
##  Reliability if an item is dropped:
##      raw_alpha std.alpha G6(smc) average_r S/N
## f1a       0.88      0.88    0.90      0.29 7.6
## f1b       0.88      0.88    0.90      0.29 7.7
## f1c       0.88      0.88    0.89      0.28 7.2
## f1dr      0.89      0.89    0.90      0.29 7.7
## f1e       0.88      0.88    0.90      0.29 7.6
## f1f       0.88      0.88    0.89      0.27 7.2
## f1g       0.89      0.89    0.90      0.29 7.9
## f1hr      0.89      0.89    0.90      0.29 7.9
## f1i       0.88      0.88    0.89      0.28 7.4
## f1j       0.88      0.88    0.89      0.28 7.4
## f1k       0.88      0.88    0.90      0.29 7.7
## f1lr      0.88      0.88    0.90      0.29 7.6
## f1m       0.89      0.89    0.90      0.30 8.1
## f1n       0.88      0.88    0.89      0.28 7.4
## f1o       0.89      0.89    0.90      0.29 7.8
## f1pr      0.89      0.89    0.90      0.29 7.7
## f1q       0.88      0.88    0.90      0.29 7.7
## f1r       0.88      0.88    0.89      0.27 7.2
## f1s       0.88      0.88    0.89      0.28 7.5
## f1t       0.88      0.88    0.89      0.28 7.3
## 
##  Item statistics 
##         r r.cor r.drop
## f1a  0.54  0.50   0.47
## f1b  0.53  0.49   0.46
## f1c  0.71  0.70   0.67
## f1dr 0.52  0.48   0.45
## f1e  0.55  0.52   0.48
## f1f  0.73  0.73   0.69
## f1g  0.42  0.37   0.35
## f1hr 0.45  0.41   0.37
## f1i  0.64  0.62   0.58
## f1j  0.62  0.60   0.56
## f1k  0.54  0.50   0.47
## f1lr 0.54  0.52   0.48
## f1m  0.38  0.32   0.30
## f1n  0.63  0.61   0.57
## f1o  0.49  0.45   0.42
## f1pr 0.50  0.47   0.43
## f1q  0.52  0.49   0.46
## f1r  0.72  0.72   0.68
## f1s  0.59  0.57   0.53
## f1t  0.70  0.68   0.65
# use on complete cases
# 7 subjects with missing data are removed here
pc1 <- princomp(na.omit(h2.cesd20), cor=TRUE)
pc1
## Call:
## princomp(x = na.omit(h2.cesd20), cor = TRUE)
## 
## Standard deviations:
##    Comp.1    Comp.2    Comp.3    Comp.4    Comp.5    Comp.6    Comp.7 
## 2.5758718 1.2949887 1.0848300 1.0048097 0.9757006 0.9411651 0.9247039 
##    Comp.8    Comp.9   Comp.10   Comp.11   Comp.12   Comp.13   Comp.14 
## 0.8701675 0.8581877 0.8206986 0.7814483 0.7535941 0.7391315 0.7027879 
##   Comp.15   Comp.16   Comp.17   Comp.18   Comp.19   Comp.20 
## 0.6856643 0.6558814 0.6485918 0.6238266 0.6138129 0.5793109 
## 
##  20  variables and  446 observations.
summary(pc1)
## Importance of components:
##                           Comp.1     Comp.2     Comp.3     Comp.4
## Standard deviation     2.5758718 1.29498867 1.08483003 1.00480975
## Proportion of Variance 0.3317558 0.08384978 0.05884281 0.05048213
## Cumulative Proportion  0.3317558 0.41560555 0.47444836 0.52493049
##                            Comp.5     Comp.6     Comp.7     Comp.8
## Standard deviation     0.97570055 0.94116512 0.92470386 0.87016746
## Proportion of Variance 0.04759958 0.04428959 0.04275386 0.03785957
## Cumulative Proportion  0.57253007 0.61681966 0.65957352 0.69743309
##                           Comp.9    Comp.10    Comp.11   Comp.12
## Standard deviation     0.8581877 0.82069858 0.78144829 0.7535941
## Proportion of Variance 0.0368243 0.03367731 0.03053307 0.0283952
## Cumulative Proportion  0.7342574 0.76793470 0.79846777 0.8268630
##                           Comp.13    Comp.14    Comp.15    Comp.16
## Standard deviation     0.73913154 0.70278786 0.68566431 0.65588145
## Proportion of Variance 0.02731577 0.02469554 0.02350678 0.02150902
## Cumulative Proportion  0.85417875 0.87887428 0.90238106 0.92389009
##                           Comp.17    Comp.18    Comp.19    Comp.20
## Standard deviation     0.64859180 0.62382660 0.61381286 0.57931089
## Proportion of Variance 0.02103357 0.01945798 0.01883831 0.01678006
## Cumulative Proportion  0.94492365 0.96438163 0.98321994 1.00000000
loadings(pc1)
## 
## Loadings:
##      Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9
## f1a  -0.208        -0.273  0.239        -0.235         0.464  0.469
## f1b  -0.200        -0.379  0.267  0.126 -0.182  0.406              
## f1c  -0.283                                            0.155  0.115
## f1dr -0.198 -0.308               -0.138 -0.209 -0.526  0.122       
## f1e  -0.212  0.165 -0.267  0.168 -0.104        -0.128        -0.669
## f1f  -0.291               -0.195 -0.121  0.197 -0.135              
## f1g  -0.159  0.293 -0.239        -0.255  0.566 -0.202 -0.282  0.210
## f1hr -0.165 -0.483         0.271        -0.114 -0.102 -0.180 -0.196
## f1i  -0.253         0.161 -0.186         0.136         0.502 -0.148
## f1j  -0.247         0.201 -0.329                0.201  0.299 -0.293
## f1k  -0.207  0.154 -0.358               -0.188  0.270 -0.109 -0.112
## f1lr -0.209 -0.391 -0.112 -0.105         0.261  0.166              
## f1m  -0.136  0.169         0.141  0.782  0.114 -0.371        -0.154
## f1n  -0.247               -0.332  0.407         0.122         0.203
## f1o  -0.187  0.211  0.427  0.394         0.144  0.254              
## f1pr -0.190 -0.482                       0.148  0.236 -0.215       
## f1q  -0.205  0.182  0.121 -0.219 -0.200 -0.536        -0.363       
## f1r  -0.288               -0.290        -0.126 -0.115 -0.272  0.159
## f1s  -0.230  0.128  0.457  0.274                      -0.120  0.108
## f1t  -0.273                0.261 -0.170                            
##      Comp.10 Comp.11 Comp.12 Comp.13 Comp.14 Comp.15 Comp.16 Comp.17
## f1a   0.256  -0.356   0.132  -0.116           0.277          -0.121 
## f1b           0.596   0.138           0.171  -0.113   0.137         
## f1c   0.108          -0.525   0.302   0.209          -0.307   0.191 
## f1dr -0.346   0.255           0.317   0.113  -0.151          -0.177 
## f1e   0.327          -0.275  -0.193           0.132          -0.128 
## f1f          -0.122  -0.140                           0.616   0.270 
## f1g  -0.177           0.330           0.154   0.189  -0.185  -0.171 
## f1hr -0.183  -0.205   0.207  -0.375   0.285          -0.110         
## f1i           0.176   0.332  -0.272   0.149  -0.151  -0.115   0.449 
## f1j  -0.159           0.168   0.250           0.345   0.115  -0.467 
## f1k  -0.608  -0.387           0.140  -0.156                   0.211 
## f1lr  0.322  -0.205   0.182   0.302  -0.259  -0.245  -0.355         
## f1m                   0.248   0.210                           0.120 
## f1n           0.113  -0.242  -0.392          -0.147  -0.205  -0.402 
## f1o          -0.314           0.137   0.379  -0.372   0.142  -0.205 
## f1pr  0.103                   0.148           0.415   0.297         
## f1q   0.309           0.324   0.180                  -0.109         
## f1r                          -0.240                   0.177         
## f1s  -0.102   0.130  -0.156          -0.165   0.432  -0.293   0.282 
## f1t           0.143          -0.154  -0.712  -0.312   0.106  -0.154 
##      Comp.18 Comp.19 Comp.20
## f1a                         
## f1b          -0.256         
## f1c   0.543                 
## f1dr -0.364                 
## f1e  -0.266   0.117         
## f1f  -0.124  -0.329   0.415 
## f1g                         
## f1hr  0.381  -0.249   0.101 
## f1i           0.277         
## f1j   0.190  -0.243         
## f1k  -0.102   0.184         
## f1lr -0.263  -0.281         
## f1m                         
## f1n  -0.158   0.126   0.322 
## f1o           0.117         
## f1pr          0.533         
## f1q           0.210   0.293 
## f1r          -0.106  -0.761 
## f1s  -0.248  -0.315   0.108 
## f1t   0.320   0.133         
## 
##                Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## SS loadings      1.00   1.00   1.00   1.00   1.00   1.00   1.00   1.00
## Proportion Var   0.05   0.05   0.05   0.05   0.05   0.05   0.05   0.05
## Cumulative Var   0.05   0.10   0.15   0.20   0.25   0.30   0.35   0.40
##                Comp.9 Comp.10 Comp.11 Comp.12 Comp.13 Comp.14 Comp.15
## SS loadings      1.00    1.00    1.00    1.00    1.00    1.00    1.00
## Proportion Var   0.05    0.05    0.05    0.05    0.05    0.05    0.05
## Cumulative Var   0.45    0.50    0.55    0.60    0.65    0.70    0.75
##                Comp.16 Comp.17 Comp.18 Comp.19 Comp.20
## SS loadings       1.00    1.00    1.00    1.00    1.00
## Proportion Var    0.05    0.05    0.05    0.05    0.05
## Cumulative Var    0.80    0.85    0.90    0.95    1.00
# scree plot of eigenvalues/variances
plot(pc1,type="lines")

# plot of scores with items overlaid (red arrows)
# plot of factors 1 and 2
biplot(pc1)

# make a loadings plot
plot(pc1$loadings[,1], pc1$loadings[,2],
     xlim=c(-0.5,0.5), ylim=c(-0.5,0.5),
     main="Loadings Plot of Factors 1 and 2",
     xlab="Factor 1", ylab="Factor 2",
     pch=19)
lines(c(-0.5,0.5),c(0,0),col="red")
lines(c(0,0),c(-0.5,0.5),col="red")
text(pc1$loadings[,1], pc1$loadings[,2],
     labels=names(h2.cesd20),
     pos=4, offset=0.5)