Hierarchical Data

hurr.herp <- read.csv("./Data/hurricane_herp_data.csv", header=TRUE, na.strings= "") %>% clean_names() %>% filter(!str_detect(sitio_estadios, "NA")) %>% filter(!str_detect(grupo, "TORTUGA"))
hurr.herp$grupo <- as.factor(hurr.herp$grupo)
levels(hurr.herp$grupo) <- c("amphibian","lizard","lizard","snake","turtle")
hurr.herp$sitio_estadios <- as.factor(hurr.herp$sitio_estadios)
levels(hurr.herp$sitio_estadios) <- c("A","B","C","D","E","F","G","H","I","J","K","L","M","N","O")
hurr.herp <- select(hurr.herp, sitio_estadios, pre_post, grupo, especie)
colnames(hurr.herp) <- c("site","pre_post","group","species")
hurr.herp[469, "species"] <- "Phrynohyas venulosa" # Fix a typo in the data
hurr.herp.group <- hurr.herp %>% count(site, pre_post, group)
hurr.herp.group$pre_post = factor(hurr.herp.group$pre_post, levels = c("Pre", "Post")) # Reorder factor to be pre then post
hurr.herp.snake <- hurr.herp %>% count(site, pre_post, group) %>% filter(str_detect(group, "snake"))
hurr.herp.snake$pre_post = factor(hurr.herp.snake$pre_post, levels = c("Pre", "Post"))
hurr.herp.amph <- hurr.herp %>% count(site, pre_post, group) %>% filter(str_detect(group, "amphibian"))
hurr.herp.amph$pre_post = factor(hurr.herp.amph$pre_post, levels = c("Pre", "Post"))
hurr.herp.liz <- hurr.herp %>% count(site, pre_post, group) %>% filter(str_detect(group, "lizard"))
hurr.herp.liz$pre_post = factor(hurr.herp.liz$pre_post, levels = c("Pre", "Post"))

getPalette = colorRampPalette(brewer.pal(9, "Oranges"))

ggplot(hurr.herp.group, aes(group, n, colour= site, shape= pre_post)) + geom_jitter(width =0.15, size=5) + xlab("Taxonomic Group") +
  ylab("Number of Individuals") + scale_color_manual(name= "Site", values= getPalette(15)) + scale_shape_discrete(name= "Time Relative\n to Hurricane") +
  ggtitle("Herpetofauna Counts Before and After a Hurricane")

ggplot(hurr.herp.snake, aes(x=pre_post, y=n)) + xlab("Time Relative to Hurricane") + ylab("Number of Individuals") +
  geom_line(aes(group=site), color= "chocolate2") +
  geom_point(size=3, color= "burlywood4") +
  ggtitle("Effect of a Hurricane on Snake Counts")

ggplot(hurr.herp.liz, aes(x=pre_post, y=n)) + xlab("Time Relative to Hurricane") + ylab("Number of Individuals") +
  geom_line(aes(group=site), color= "chocolate2") +
  geom_point(size=3, color= "burlywood4") +
  ggtitle("Effect of a Hurricane on Lizard Counts")

ggplot(hurr.herp.amph, aes(x=pre_post, y=n)) + xlab("Time Relative to Hurricane") + ylab("Number of Individuals") +
  geom_line(aes(group=site), color= "chocolate2") +
  geom_point(size=3, color= "burlywood4") +
  ggtitle("Effect of a Hurricane on Amphibian Counts")

lmer.herp <- lmer(n ~ group*pre_post + (1|site), data= hurr.herp.group)

performance::check_model(lmer.herp)

anova(lmer.herp)

## Type III Analysis of Variance Table with Satterthwaite's method
##                Sum Sq Mean Sq NumDF DenDF F value    Pr(>F)    
## group          108716   54358     2    70 62.2130 2.965e-16 ***
## pre_post          947     947     1    70  1.0843  0.301325    
## group:pre_post  10023    5011     2    70  5.7356  0.004935 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Let’s calculate model-adjusted means.

emmeans(lmer.herp, "pre_post")

## NOTE: Results may be misleading due to involvement in interactions

##  pre_post emmean   SE   df lower.CL upper.CL
##  Pre        42.2 5.25 31.5     31.5     53.0
##  Post       48.7 5.25 31.5     38.0     59.4
## 
## Results are averaged over the levels of: group 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95

Pre-hurricane means are lower than post-hurricane means, but this doesn’t tell us much. We know from our ANOVA that this is statistically non-significant.

herp.emm <- emmeans(lmer.herp, "pre_post", "group")
herp.emm

## group = amphibian:
##  pre_post emmean   SE   df lower.CL upper.CL
##  Pre        49.1 8.15 78.1     32.8     65.3
##  Post       34.1 8.15 78.1     17.9     50.4
## 
## group = lizard:
##  pre_post emmean   SE   df lower.CL upper.CL
##  Pre        72.3 8.15 78.1     56.0     88.5
##  Post      107.5 8.15 78.1     91.2    123.7
## 
## group = snake:
##  pre_post emmean   SE   df lower.CL upper.CL
##  Pre         5.4 8.15 78.1    -10.8     21.6
##  Post        4.6 8.15 78.1    -11.6     20.8
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95

data.emm <- as.data.frame(summary(herp.emm))
data.emm

##   pre_post     group    emmean       SE       df  lower.CL  upper.CL
## 1      Pre amphibian  49.06667 8.150301 78.08248  32.84093  65.29240
## 2     Post amphibian  34.13333 8.150301 78.08248  17.90760  50.35907
## 3      Pre    lizard  72.26667 8.150301 78.08248  56.04093  88.49240
## 4     Post    lizard 107.46667 8.150301 78.08248  91.24093 123.69240
## 5      Pre     snake   5.40000 8.150301 78.08248 -10.82573  21.62573
## 6     Post     snake   4.60000 8.150301 78.08248 -11.62573  20.82573

Model-adjusted means are the same as raw means because the experiment is fully balanced, but look, I’m proving I can run the code for them!

data.emm$group <- factor(data.emm$group, levels= c("lizard","amphibian","snake"))
ggplot(data.emm, aes(x=pre_post, y=emmean, group=group, color=group)) + scale_color_manual(name= "Taxonomic Group", values= c("darkgoldenrod1","burlywood4","chocolate2")) +
  geom_line() +
  geom_point(size=4)+
  geom_errorbar(aes(ymin=emmean-SE, ymax=emmean+SE), width=.2)+
  labs(title="Herpetofauna Counts Before and After a Hurricane", x= "Time Relative to Hurricane", y= "Number of Individuals")

Repeated Measures Data

chimp <- read.csv("./Data/chimp_personality.csv", header=TRUE) %>% clean_names()
chimp$sex <- as.factor(chimp$sex)

ggplot(chimp, aes(x=time, y=extraversion, color=sex)) + scale_x_discrete(labels= c("Test 1", "Test 2")) +
  scale_color_manual(values= c("chocolate2","burlywood4"), name= "Sex", labels= c("Male","Female")) + labs(title= "Change in Extraversion of Chimps", x= "Test Number", y= "Extraversion Rating") + geom_line(aes(group=id)) + geom_point(size=3)

chimp.lmer <- lmer(extraversion ~ sex*time + (1|id), data= chimp)
anova(chimp.lmer)

## Type III Analysis of Variance Table with Satterthwaite's method
##           Sum Sq Mean Sq NumDF DenDF F value   Pr(>F)   
## sex      0.89656 0.89656     1    48  9.5284 0.003356 **
## time     0.17057 0.17057     1    48  1.8128 0.184501   
## sex:time 0.00001 0.00001     1    48  0.0001 0.992237   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(chimp.lmer)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: extraversion ~ sex * time + (1 | id)
##    Data: chimp
## 
## REML criterion at convergence: 109.4
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.93589 -0.60564  0.04399  0.56280  1.86287 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  id       (Intercept) 0.08914  0.2986  
##  Residual             0.09409  0.3067  
## Number of obs: 100, groups:  id, 50
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)  4.93240    0.08561 77.62766  57.613  < 2e-16 ***
## sex2        -0.32160    0.12107 77.62766  -2.656  0.00959 ** 
## timet2      -0.08200    0.08676 48.00000  -0.945  0.34933    
## sex2:timet2 -0.00120    0.12270 48.00000  -0.010  0.99224    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) sex2   timet2
## sex2        -0.707              
## timet2      -0.507  0.358       
## sex2:timet2  0.358 -0.507 -0.707

performance::check_model(chimp.lmer)

chimp.emm <- emmeans(chimp.lmer, "sex")

## NOTE: Results may be misleading due to involvement in interactions

chimp.emm

##  sex emmean     SE df lower.CL upper.CL
##  1     4.89 0.0738 48     4.74     5.04
##  2     4.57 0.0738 48     4.42     4.72
## 
## Results are averaged over the levels of: time 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95

chimp.emm.time <- emmeans(chimp.lmer, "time", "sex")
chimp.emm.time

## sex = 1:
##  time emmean     SE   df lower.CL upper.CL
##  t1     4.93 0.0856 77.6     4.76     5.10
##  t2     4.85 0.0856 77.6     4.68     5.02
## 
## sex = 2:
##  time emmean     SE   df lower.CL upper.CL
##  t1     4.61 0.0856 77.6     4.44     4.78
##  t2     4.53 0.0856 77.6     4.36     4.70
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95

data.chimp.emm <- as.data.frame(chimp.emm.time)

Interesting… all chimps, regardless of sex, got less extraverted over time. Males were more extraverted than females.

chimp.means <- chimp %>% group_by(time, sex) %>% summarise(mean_extraversion= mean(extraversion), se_rating =sd(extraversion)/sqrt(n()))

## `summarise()` has grouped output by 'time'. You can override using the `.groups` argument.

chimp.means

## # A tibble: 4 × 4
## # Groups:   time [2]
##   time  sex   mean_extraversion se_rating
##   <chr> <fct>             <dbl>     <dbl>
## 1 t1    1                  4.93    0.0837
## 2 t1    2                  4.61    0.0936
## 3 t2    1                  4.85    0.0845
## 4 t2    2                  4.53    0.0801

ggplot(data.chimp.emm, aes(x= time, y= emmean, color= sex)) + geom_point(size= 4) +
  geom_errorbar(aes(ymin= emmean-SE, ymax= emmean+SE), width=.2) +
  labs(title= "Means and Corrected Means for Extraversion in Chimps", x= "Test Number", y= "Mean Extraversion Rating") +
  scale_color_manual(values= c("chocolate2","burlywood4"), name= "Sex", labels= c("Male","Female")) +
  scale_x_discrete(labels= c("Test 1","Test 2")) +
  geom_point(data= chimp.means, size=2, x= chimp.means$time, y= chimp.means$mean_extraversion, color="darkgoldenrod1")

emmeans are the same as actual means.

Mixed Modeling for Repeated Measures and Hierarchical Data

Jocelyn Stalker

9/23/2021

Hierarchical Data

Repeated Measures Data