Load data and create subset

Using tidyverse and haven load the dataset and create a new subset for this lesson

library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats
library(haven)

helpdat <- haven::read_spss("helpmkh.sav")

h1 <- helpdat %>%
  select(id, pcs, pcs1, pcs2, pcs3, pcs4)

Running RM-ANOVA in R

There are a few helpful websites with information and examples on running repeated meausures analysis of variance (RM-ANOVA) in R. See these 2 websites:

The code I show below is based on these 2 examples - these use the car package.

# extract only the 5 pcs columns 
# for times 1-5
# and convert it to matrix format which is needed for lm()
h2 <- h1[,2:6]
h2 <- as.matrix(h2)

# use lm to get a model of 5 pcs columns
m1 <- lm(h2 ~ 1)
m1
## 
## Call:
## lm(formula = h2 ~ 1)
## 
## Coefficients:
##              pcs    pcs1   pcs2   pcs3   pcs4 
## (Intercept)  47.79  48.78  49.92  50.32  49.43
# create a time factor for the design
tfactor <- factor(c("t0","t1","t2","t3","t4"))

library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
m1.aov <- car::Anova(m1, 
                idata = data.frame(tfactor),
                idesign = ~tfactor, 
                type="III")

#summary(m1.aov, multivariate=FALSE)

# print complete results of the RM-ANOVA
summary(m1.aov, multivariate=TRUE)
## 
## Type III Repeated Measures MANOVA Tests:
## 
## ------------------------------------------
##  
## Term: (Intercept) 
## 
##  Response transformation matrix:
##      (Intercept)
## pcs            1
## pcs1           1
## pcs2           1
## pcs3           1
## pcs4           1
## 
## Sum of squares and products for the hypothesis:
##             (Intercept)
## (Intercept)     5942384
## 
## Multivariate Tests: (Intercept)
##                  Df test stat approx F num Df den Df     Pr(>F)    
## Pillai            1   0.96968 3102.153      1     97 < 2.22e-16 ***
## Wilks             1   0.03032 3102.153      1     97 < 2.22e-16 ***
## Hotelling-Lawley  1  31.98095 3102.153      1     97 < 2.22e-16 ***
## Roy               1  31.98095 3102.153      1     97 < 2.22e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## ------------------------------------------
##  
## Term: tfactor 
## 
##  Response transformation matrix:
##      tfactor1 tfactor2 tfactor3 tfactor4
## pcs         1        0        0        0
## pcs1        0        1        0        0
## pcs2        0        0        1        0
## pcs3        0        0        0        1
## pcs4       -1       -1       -1       -1
## 
## Sum of squares and products for the hypothesis:
##           tfactor1  tfactor2  tfactor3   tfactor4
## tfactor1  262.5720 104.40586 -78.44330 -142.78632
## tfactor2  104.4059  41.51464 -31.19121  -56.77576
## tfactor3  -78.4433 -31.19121  23.43491   42.65736
## tfactor4 -142.7863 -56.77576  42.65736   77.64701
## 
## Multivariate Tests: tfactor
##                  Df test stat approx F num Df den Df  Pr(>F)
## Pillai            1 0.0684915 1.727897      4     94 0.15031
## Wilks             1 0.9315085 1.727897      4     94 0.15031
## Hotelling-Lawley  1 0.0735275 1.727897      4     94 0.15031
## Roy               1 0.0735275 1.727897      4     94 0.15031
## 
## Univariate Type III Repeated-Measures ANOVA Assuming Sphericity
## 
##                  SS num Df Error SS den Df         F  Pr(>F)    
## (Intercept) 1188477      1    37162     97 3102.1526 < 2e-16 ***
## tfactor         389      4    17944    388    2.1028 0.07981 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Mauchly Tests for Sphericity
## 
##         Test statistic  p-value
## tfactor        0.84572 0.067154
## 
## 
## Greenhouse-Geisser and Huynh-Feldt Corrections
##  for Departure from Sphericity
## 
##          GG eps Pr(>F[GG])  
## tfactor 0.93062    0.08495 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##            HF eps Pr(>F[HF])
## tfactor 0.9723709 0.08181374

Plots of Means Over Time with Confidence Intervals

This next set of code comes from an example at the Cookbook for R website:

The code below creates a new function called summarySE for plotting the means +/- the SE (standard error)

# restructure into long format

h1long <- h1 %>%
  gather(key=item,
         value=value,
         -c(id))
## Warning: attributes are not identical across measure variables;
## they will be dropped
names(h1long) <- c("id","pcsitem","pcsvalue")

# add a time variable to long format
h1long <- h1long %>%
  mutate(time=c(rep(0,453),
                rep(1,453),
                rep(2,453),
                rep(3,453),
                rep(4,453)))

# from the cookbook for R
## Gives count, mean, standard deviation, standard error of the mean, and confidence interval (default 95%).
##   data: a data frame.
##   measurevar: the name of a column that contains the variable to be summariezed
##   groupvars: a vector containing names of columns that contain grouping variables
##   na.rm: a boolean that indicates whether to ignore NA's
##   conf.interval: the percent range of the confidence interval (default is 95%)
summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
                      conf.interval=.95, .drop=TRUE) {
  library(plyr)
  
  # New version of length which can handle NA's: if na.rm==T, don't count them
  length2 <- function (x, na.rm=FALSE) {
    if (na.rm) sum(!is.na(x))
    else       length(x)
  }
  
  # This does the summary. For each group's data frame, return a vector with
  # N, mean, and sd
  datac <- ddply(data, groupvars, .drop=.drop,
                 .fun = function(xx, col) {
                   c(N    = length2(xx[[col]], na.rm=na.rm),
                     mean = mean   (xx[[col]], na.rm=na.rm),
                     sd   = sd     (xx[[col]], na.rm=na.rm)
                   )
                 },
                 measurevar
  )
  
  # Rename the "mean" column    
  datac <- rename(datac, c("mean" = measurevar))
  
  datac$se <- datac$sd / sqrt(datac$N)  # Calculate standard error of the mean
  
  # Confidence interval multiplier for standard error
  # Calculate t-statistic for confidence interval: 
  # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
  ciMult <- qt(conf.interval/2 + .5, datac$N-1)
  datac$ci <- datac$se * ciMult
  
  return(datac)
}

h1long_nomiss <- na.omit(h1long)
table(h1long_nomiss$time)
## 
##   0   1   2   3   4 
## 453 246 211 248 266
h1se <- summarySE(h1long_nomiss, 
                  measurevar="pcsvalue", 
                  groupvars=c("time"))
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following object is masked from 'package:purrr':
## 
##     compact
ggplot(h1se, aes(x=time, y=pcsvalue)) + 
  geom_errorbar(aes(ymin=pcsvalue-se, ymax=pcsvalue+se), width=.1) +
  geom_line() +
  geom_point() +
  xlab("Time Points") +
  ylab("Physical Component Score (SF-36 PCS)") +
  ggtitle("PCS Means and CI's Over Time")