Using tidyverse
and haven
load the dataset and create a new subset for this lesson
library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
library(haven)
helpdat <- haven::read_spss("helpmkh.sav")
h1 <- helpdat %>%
select(id, pcs, pcs1, pcs2, pcs3, pcs4)
There are a few helpful websites with information and examples on running repeated meausures analysis of variance (RM-ANOVA) in R. See these 2 websites:
The code I show below is based on these 2 examples - these use the car
package.
# extract only the 5 pcs columns
# for times 1-5
# and convert it to matrix format which is needed for lm()
h2 <- h1[,2:6]
h2 <- as.matrix(h2)
# use lm to get a model of 5 pcs columns
m1 <- lm(h2 ~ 1)
m1
##
## Call:
## lm(formula = h2 ~ 1)
##
## Coefficients:
## pcs pcs1 pcs2 pcs3 pcs4
## (Intercept) 47.79 48.78 49.92 50.32 49.43
# create a time factor for the design
tfactor <- factor(c("t0","t1","t2","t3","t4"))
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
m1.aov <- car::Anova(m1,
idata = data.frame(tfactor),
idesign = ~tfactor,
type="III")
#summary(m1.aov, multivariate=FALSE)
# print complete results of the RM-ANOVA
summary(m1.aov, multivariate=TRUE)
##
## Type III Repeated Measures MANOVA Tests:
##
## ------------------------------------------
##
## Term: (Intercept)
##
## Response transformation matrix:
## (Intercept)
## pcs 1
## pcs1 1
## pcs2 1
## pcs3 1
## pcs4 1
##
## Sum of squares and products for the hypothesis:
## (Intercept)
## (Intercept) 5942384
##
## Multivariate Tests: (Intercept)
## Df test stat approx F num Df den Df Pr(>F)
## Pillai 1 0.96968 3102.153 1 97 < 2.22e-16 ***
## Wilks 1 0.03032 3102.153 1 97 < 2.22e-16 ***
## Hotelling-Lawley 1 31.98095 3102.153 1 97 < 2.22e-16 ***
## Roy 1 31.98095 3102.153 1 97 < 2.22e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## ------------------------------------------
##
## Term: tfactor
##
## Response transformation matrix:
## tfactor1 tfactor2 tfactor3 tfactor4
## pcs 1 0 0 0
## pcs1 0 1 0 0
## pcs2 0 0 1 0
## pcs3 0 0 0 1
## pcs4 -1 -1 -1 -1
##
## Sum of squares and products for the hypothesis:
## tfactor1 tfactor2 tfactor3 tfactor4
## tfactor1 262.5720 104.40586 -78.44330 -142.78632
## tfactor2 104.4059 41.51464 -31.19121 -56.77576
## tfactor3 -78.4433 -31.19121 23.43491 42.65736
## tfactor4 -142.7863 -56.77576 42.65736 77.64701
##
## Multivariate Tests: tfactor
## Df test stat approx F num Df den Df Pr(>F)
## Pillai 1 0.0684915 1.727897 4 94 0.15031
## Wilks 1 0.9315085 1.727897 4 94 0.15031
## Hotelling-Lawley 1 0.0735275 1.727897 4 94 0.15031
## Roy 1 0.0735275 1.727897 4 94 0.15031
##
## Univariate Type III Repeated-Measures ANOVA Assuming Sphericity
##
## SS num Df Error SS den Df F Pr(>F)
## (Intercept) 1188477 1 37162 97 3102.1526 < 2e-16 ***
## tfactor 389 4 17944 388 2.1028 0.07981 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Mauchly Tests for Sphericity
##
## Test statistic p-value
## tfactor 0.84572 0.067154
##
##
## Greenhouse-Geisser and Huynh-Feldt Corrections
## for Departure from Sphericity
##
## GG eps Pr(>F[GG])
## tfactor 0.93062 0.08495 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## HF eps Pr(>F[HF])
## tfactor 0.9723709 0.08181374
This next set of code comes from an example at the Cookbook for R website:
The code below creates a new function called summarySE
for plotting the means +/- the SE (standard error)
# restructure into long format
h1long <- h1 %>%
gather(key=item,
value=value,
-c(id))
## Warning: attributes are not identical across measure variables;
## they will be dropped
names(h1long) <- c("id","pcsitem","pcsvalue")
# add a time variable to long format
h1long <- h1long %>%
mutate(time=c(rep(0,453),
rep(1,453),
rep(2,453),
rep(3,453),
rep(4,453)))
# from the cookbook for R
## Gives count, mean, standard deviation, standard error of the mean, and confidence interval (default 95%).
## data: a data frame.
## measurevar: the name of a column that contains the variable to be summariezed
## groupvars: a vector containing names of columns that contain grouping variables
## na.rm: a boolean that indicates whether to ignore NA's
## conf.interval: the percent range of the confidence interval (default is 95%)
summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
conf.interval=.95, .drop=TRUE) {
library(plyr)
# New version of length which can handle NA's: if na.rm==T, don't count them
length2 <- function (x, na.rm=FALSE) {
if (na.rm) sum(!is.na(x))
else length(x)
}
# This does the summary. For each group's data frame, return a vector with
# N, mean, and sd
datac <- ddply(data, groupvars, .drop=.drop,
.fun = function(xx, col) {
c(N = length2(xx[[col]], na.rm=na.rm),
mean = mean (xx[[col]], na.rm=na.rm),
sd = sd (xx[[col]], na.rm=na.rm)
)
},
measurevar
)
# Rename the "mean" column
datac <- rename(datac, c("mean" = measurevar))
datac$se <- datac$sd / sqrt(datac$N) # Calculate standard error of the mean
# Confidence interval multiplier for standard error
# Calculate t-statistic for confidence interval:
# e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
ciMult <- qt(conf.interval/2 + .5, datac$N-1)
datac$ci <- datac$se * ciMult
return(datac)
}
h1long_nomiss <- na.omit(h1long)
table(h1long_nomiss$time)
##
## 0 1 2 3 4
## 453 246 211 248 266
h1se <- summarySE(h1long_nomiss,
measurevar="pcsvalue",
groupvars=c("time"))
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following object is masked from 'package:purrr':
##
## compact
ggplot(h1se, aes(x=time, y=pcsvalue)) +
geom_errorbar(aes(ymin=pcsvalue-se, ymax=pcsvalue+se), width=.1) +
geom_line() +
geom_point() +
xlab("Time Points") +
ylab("Physical Component Score (SF-36 PCS)") +
ggtitle("PCS Means and CI's Over Time")