5  Hierarchical Model

Code
# Load libraries
library(tidyverse)
library(dagitty)
library(ggdag)
library(GGally)
library(ggpmisc)
library(gt)
library(cmdstanr)
library(tidybayes)
library(ggridges)
library(loo)
#library(posterior)

# Set ggplot theme
theme_set(theme_classic())

# Create a function to convert decimals to percentages
to_percent <- function(x, digits = 0){
  return(round(x * 100, digits = digits))
}

# Custom lower panel with regression line, equation, and styled points
lower_fun <- function(data, mapping, ...) {
  ggplot(data = data, mapping = mapping) +
    geom_point(shape = 1, alpha = 0.4, size = 1.5) +
    geom_smooth(method = "lm", formula = 'y ~ x', se = FALSE, color = "steelblue") +
    geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
    stat_poly_eq(
      formula = y ~ x,
      aes(label = after_stat(eq.label)),
      parse = TRUE,
      size = 3
    ) +
    scale_x_continuous(labels = scales::percent, breaks = seq(-1,1, 0.25)) +
    scale_y_continuous(labels = scales::percent, breaks = seq(-1,1, 0.250))
}

It’s sensible to assume that the teacher has a systematic impact on student achievement for two reasons. First, they are often the ones grading students and some teachers can be more lenient or harsher graders than others. Second, some teachers may be better or worse than others at teaching which would impact performance on tests. Given that it’s easy to collect information on who the teacher was, let’s add it to our DAG as seen in Figure 5.1.

Code
dag <- dagitty('dag {
bb="0,0,1,1"
teacher [exposure,pos="0.5,0.8"]
math_8 [exposure,pos="0.3,0.3"]
mth1w [exposure,pos="0.500,0.5"]
mpm2d [outcome,pos="0.700,0.5"]
teacher -> math_8
teacher -> mth1w
teacher -> mpm2d
math_8 -> mth1w
math_8 -> mpm2d
mth1w -> mpm2d
}
')

ggdag_status(dag, node_size = 20) +
  guides(color = "none") +  # Turn off legend
  theme_dag()
Figure 5.1: Hierarchical DAG with the effect of the teacher.

5.1 Mathematical Model

\[\text{logit}(\mu_{8i}) = \tau_{8j} + \alpha_8\]

\[\phi_8, \phi_9, \phi_{10} \sim N^+(10, 3)\]

\[x_{8i} \sim \text{Beta}(\mu_{8i} \cdot \phi_8, \, (1 - \mu_{8i}) \cdot \phi_8))\] \[\text{logit}(\mu_{9i}) = \tau_{9j} + \alpha_9 + \beta_1 \cdot (x_{8i} - \bar{x}_8)\] \[x_{9i} \sim \text{Beta}(\mu_{9i} \cdot \phi_9,\ (1 - \mu_{9i}) \cdot \phi_9)\] \[\text{logit}(\mu_{10i}) = \tau_{10j} + \alpha_{10} + \beta_2 \cdot (x_{8i} - \bar{x}_8) + \beta_3 \cdot (x_{9i} - \bar{x}_9)\] \[y_{10i} \sim \text{Beta}(\mu_{10i} \cdot \phi_{10},\ (1 - \mu_{10i}) \cdot \phi_{10})\] Let’s try to write some reasonable priors for the \(\alpha\) and \(\tau\) parameters. Alpha represents the expected log-odds grade for the average student. In the previous chapters we assumed that the average student would finish with 80%, 77%, and 72% in year 8, 9, 10 respectively. Thus, we could set the following priors.

Code
tibble(
  x = seq(0.95, 1.4, 0.05), 
  y = plogis(x)
)
# A tibble: 10 × 2
       x     y
   <dbl> <dbl>
 1  0.95 0.721
 2  1    0.731
 3  1.05 0.741
 4  1.1  0.750
 5  1.15 0.760
 6  1.2  0.769
 7  1.25 0.777
 8  1.3  0.786
 9  1.35 0.794
10  1.4  0.802
Code
ggplot() +
  stat_function(
    fun = plogis,
    xlim = c(-3, 3)
  ) +
  geom_vline(xintercept = 1.4) +
  scale_x_continuous(breaks = -6:6) +
  scale_y_continuous(breaks = seq(-1, 1, 0.1), labels = scales::percent) +
  labs(
    x = "Alpha",
    y = "Average Grade"
  )
Figure 5.2: Visualizing the log-odds to set plausible priors.
Code
ggplot() +
  stat_function(
    fun = dnorm,
    args = list(mean = 1.4, sd = 0.03),
    xlim = c(0.8, 1.5)
  ) +
  stat_function(
    fun = dnorm,
    args = list(mean = 1.2, sd = 0.03),
    xlim = c(0.8, 1.5)
  ) +
  stat_function(
    fun = dnorm,
    args = list(mean = 0.95, sd = 0.03),
    xlim = c(0.8, 1.5)
  ) +
  scale_x_continuous(breaks = seq(0, 5, 0.1))+
  labs(
    x = "Alpha",
    y = "Density"
  )

\[\alpha_8 \sim N(1.4, 0.03)\] \[\alpha_9 \sim N(1.2, 0.03)\] \[\alpha_{10} \sim N(0.95, 0.03)\] Now let’s pivot to focus on the \(\tau\) parameters which represent the teacher effects.We know some teacher’s are harsher than others but we can assume than the average teacher is a relatively grader. In other words, we assume that teacher bias is a random error of measurement instead of a systematic one. It’s entirely possible that teachers are systematically too generous in their grading because they want their students to succeed.

As seen in Figure 5.2 a one unit increase in log-odds represents roughly a 20% difference in student grades. We don’t expect teacher effects to be this strong so we might set the following prior.

\[\tau_{8j}, \tau_{9j}, \tau_{10j} \sim N(0, 0.3)\]

Code
ggplot() +
  stat_function(
    fun = dnorm,
    args = list(mean = 0, sd = 0.3),
    xlim = c(-1, 1)
  ) +
    labs(
    x = "Teacher Effect on log-odds",
    y = "Density"
  )

5.2 Simulated Teacher Data

Let’s simulate some data based on our mathematical model and our priors.

Code
N <- 250

teacher_8 <- rep(
  c("A", "B", "C", "C", "C", "D", "D", "D", "D", "D"),
  times = c(19, 29, 23, 30, 27, 22, 25, 28, 18, 29)
)

teacher_9 <- rep(
  c("E", "E", "E", "E", "F", "F", "F", "F", "G", "G"),
  times = c(21, 27, 31, 20, 22, 26, 25, 28, 24, 26)
)

teacher_10 <- rep(
  c("F", "F", "F", "G", "G", "G", "G", "G", "H", "H"),
  times = c(27, 24, 29, 28, 22, 20, 23, 26, 26, 25)
)

set.seed(2025)
teachers <- tibble(
  student = 1:N,
  teacher_8 = sample(teacher_8),
  bias_8 = case_when(
    teacher_8 == "A" ~ -0.1,
    teacher_8 == "B" ~ 0,
    teacher_8 == "C" ~ -0.3,
    teacher_8 == "D" ~ 0.25
  ),
  teacher_9 = sample(teacher_9),
  bias_9 = case_when(
    teacher_9 == "E" ~ 0,
    teacher_9 == "F" ~ -0.5,
    teacher_9 == "G" ~ 0.2
  ),
  teacher_10 = sample(teacher_10),
  bias_10 = case_when(
    teacher_10 == "F" ~ -0.3,
    teacher_10 == "G" ~ 0.2,
    teacher_10 == "H" ~ 0.4
  )
)
Code
set.seed(2025)
sim_grades <- teachers |>
  mutate(
    mu8 = plogis(1.39 + bias_8),
    y8 = rbeta(n = N, shape1 = mu8*10, shape2 = (1-mu8)*10),
    mu9 = plogis(1.21 + bias_9 + 5 * (y8 - mean(y8))),
    y9 = rbeta(n = N, shape1 = mu9 * 10, shape2 = (1 - mu9) * 10),
    mu10 = plogis(
      0.94 + bias_10 + 1.2 * (y8 - mean(y8)) + 4.35 * (y9 - mean(y9))
      ),
    y10 = rbeta(n = N, shape1 = mu10 * 10, shape2 = (1 - mu10) * 10),
)
Code
sim_grades |>
  pivot_longer(
    cols = starts_with("y"),
    names_to = "year", 
    values_to = "grade"
  ) |>
  mutate(
    year = factor(
      year,
      levels = c("y8", "y9", "y10"),
      labels = c("year 8", "MTH1W", "MPM2D")
      )) |>
  ggplot(aes(x = grade, fill = year)) +
  geom_density(alpha = 0.2) +
  geom_vline(xintercept = 0.5, linetype = "dashed") +
  scale_x_continuous(breaks = seq(0, 1, 0.1),labels = scales::percent) +
  labs(
    x = "Simulated Grades",
    y = "Probability Density"
  )

Code
sim_grades |>
  select(starts_with("y")) |>
  ggpairs(lower = list(continuous = lower_fun))

Code
sim_grades |>
  summary()
    student        teacher_8             bias_8         teacher_9        
 Min.   :  1.00   Length:250         Min.   :-0.3000   Length:250        
 1st Qu.: 63.25   Class :character   1st Qu.:-0.3000   Class :character  
 Median :125.50   Mode  :character   Median : 0.0000   Mode  :character  
 Mean   :125.50                      Mean   : 0.0184                     
 3rd Qu.:187.75                      3rd Qu.: 0.2500                     
 Max.   :250.00                      Max.   : 0.2500                     
     bias_9        teacher_10           bias_10             mu8        
 Min.   :-0.500   Length:250         Min.   :-0.3000   Min.   :0.7484  
 1st Qu.:-0.500   Class :character   1st Qu.:-0.3000   1st Qu.:0.7484  
 Median : 0.000   Mode  :character   Median : 0.2000   Median :0.8006  
 Mean   :-0.162                      Mean   : 0.0808   Mean   :0.8007  
 3rd Qu.: 0.000                      3rd Qu.: 0.2000   3rd Qu.:0.8375  
 Max.   : 0.200                      Max.   : 0.4000   Max.   :0.8375  
       y8              mu9               y9              mu10       
 Min.   :0.4064   Min.   :0.2449   Min.   :0.2017   Min.   :0.1585  
 1st Qu.:0.7011   1st Qu.:0.6279   1st Qu.:0.6307   1st Qu.:0.5927  
 Median :0.8125   Median :0.7600   Median :0.7699   Median :0.7753  
 Mean   :0.7912   Mean   :0.7193   Mean   :0.7292   Mean   :0.7057  
 3rd Qu.:0.8915   3rd Qu.:0.8353   3rd Qu.:0.8697   3rd Qu.:0.8512  
 Max.   :0.9933   Max.   :0.9184   Max.   :0.9899   Max.   :0.9323  
      y10        
 Min.   :0.1111  
 1st Qu.:0.5641  
 Median :0.7668  
 Mean   :0.7096  
 3rd Qu.:0.8868  
 Max.   :1.0000  
Code
sim_grades_by_teacher <- sim_grades |>
  pivot_longer(
    cols = starts_with("y"),
    names_to = "student_grade",
    names_prefix = "y",
    values_to = "result",
  ) |>
  pivot_longer(
    cols = starts_with("teacher"),
    names_to = "teacher_grade",
    names_prefix = "teacher_",
    values_to = "teacher",
  ) |>
  pivot_longer(
    cols = starts_with("bias"),
    names_to = "bias_grade",
    names_prefix = "bias_",
    values_to = "bias",
  ) |>
  filter(student_grade == teacher_grade, student_grade == bias_grade) |>
  mutate(
    teacher_grade = factor(teacher_grade, levels = c("8", "9", "10")),
    teacher = factor(teacher, levels = c("A", "B", "C", "D", "E", "F", "G", "H"))
  )

sim_grades_by_teacher |>
  group_by(teacher_grade) |>
  summarise(
    n = n(),
    mean_bias = mean(bias),
    mean = mean(result),
    median = median(result),
    sd = sd(result)
  ) |>
  gt() |>
  fmt_number(columns = where(is.numeric), decimals = 2)
teacher_grade n mean_bias mean median sd
8 250.00 0.02 0.79 0.81 0.13
9 250.00 −0.16 0.73 0.77 0.19
10 250.00 0.08 0.71 0.77 0.22
Code
sim_grades_by_teacher |>
  group_by(teacher) |>
  summarise(
    n = n(),
    mean_bias = mean(bias),
    mean = mean(result),
    median = median(result),
    sd = sd(result)
  ) |>
  gt() |>
  fmt_number(columns = where(is.numeric), decimals = 2)
teacher n mean_bias mean median sd
A 19.00 −0.10 0.76 0.80 0.14
B 29.00 0.00 0.78 0.81 0.13
C 80.00 −0.30 0.74 0.76 0.14
D 122.00 0.25 0.83 0.86 0.12
E 99.00 0.00 0.77 0.78 0.16
F 181.00 −0.41 0.65 0.69 0.21
G 169.00 0.20 0.75 0.81 0.20
H 51.00 0.40 0.77 0.83 0.22
Code
sim_grades_by_teacher |>
  group_by(teacher_grade, teacher) |>
  summarise(
    n = n(),
    mean_bias = mean(bias),
    mean = mean(result),
    median = median(result),
    sd = sd(result)
  ) |>
  gt() |>
  fmt_number(columns = where(is.numeric), decimals = 2)
teacher n mean_bias mean median sd
8
A 19.00 −0.10 0.76 0.80 0.14
B 29.00 0.00 0.78 0.81 0.13
C 80.00 −0.30 0.74 0.76 0.14
D 122.00 0.25 0.83 0.86 0.12
9
E 99.00 0.00 0.77 0.78 0.16
F 101.00 −0.50 0.65 0.70 0.20
G 50.00 0.20 0.80 0.83 0.15
10
F 80.00 −0.30 0.64 0.69 0.22
G 119.00 0.20 0.73 0.79 0.22
H 51.00 0.40 0.77 0.83 0.22

We see that our data is realistic and that teacher effects do indeed shift the distributions. Let’s focus our attention on the three teachers in year 9.

Code
sim_grades_by_teacher |>
  filter(student_grade == 9) |>
  ggplot(aes(x = result, fill = teacher)) +
  geom_density(alpha = 0.5) +
    stat_function(
    fun = dbeta,
    args = list(shape1 = 0.77*10, shape2 = (1-0.77)*10),
    xlim = c(0,1),
    size = 3, color = "darkorange"
  ) +
  geom_vline(xintercept = 0.5, linetype = "dashed") +
  scale_x_continuous(breaks = seq(0, 1, 0.1),labels = scales::percent) +
  labs(
    title = "Bias: E = 0, F = -0.5, G = 0.2",,
    subtitle = "The orange line is the theoretical density",
    x = "Simulated Year 9 Grades",
    y = "Probability Density"
  )

We see that the green distribution is shifted to the left relative to the orange curve because of the negative bias of -0.5 on the log-odd scale. Conversely, the blue distribution is slightly shifted to the right while the red distribution roughly follows the theoretical distribution.

5.3 Consistent Teacher Effects

The previous simulation assumed that the teacher effects were independent between years. That is, a harsh grader in year 9 was no more likely to be a grader in year 10 than a easy grader. It seems reasonable to assume that some teachers are systematically harsher teachers than others across grades and that their bias may differ for different courses.

\[ \zeta_j \sim \mathcal{N}(0, \sigma_\zeta) \]

\[ \tau_{8j} \sim \mathcal{N}(\zeta_j, \sigma_8) \]

\[ \tau_{9j} \sim \mathcal{N}(\zeta_j, \sigma_9) \]

\[ \tau_{10j} \sim \mathcal{N}(\zeta_j, \sigma_{10}) \]

\[ \phi_8, \phi_9, \phi_{10} \sim \mathcal{N}^+(10, 3) \]

\[ \text{logit}(\mu_{8i}) = \tau_{8j[i]} + \alpha_8 \]

\[ x_{8i} \sim \text{Beta}(\mu_{8i} \cdot \phi_8,\ (1 - \mu_{8i}) \cdot \phi_8) \]

\[ \text{logit}(\mu_{9i}) = \tau_{9j[i]} + \alpha_9 + \beta_1 \cdot (x_{8i} - \bar{x}_8) \]

\[ x_{9i} \sim \text{Beta}(\mu_{9i} \cdot \phi_9,\ (1 - \mu_{9i}) \cdot \phi_9) \]

\[ \text{logit}(\mu_{10i}) = \tau_{10j[i]} + \alpha_{10} + \beta_2 \cdot (x_{8i} - \bar{x}_8) + \beta_3 \cdot (x_{9i} - \bar{x}_9) \]

\[ y_{10i} \sim \text{Beta}(\mu_{10i} \cdot \phi_{10},\ (1 - \mu_{10i}) \cdot \phi_{10}) \]

Code
set.seed(2025)

teachers <- tibble(
  student = 1:N,
  teacher_8 = sample(teacher_8),
  teacher_9 = sample(teacher_9),
  teacher_10 = sample(teacher_10)
)

unique_teachers <- teachers |>
  pivot_longer(
    cols = starts_with("teacher"),
    names_to = "teacher_grade",
    names_prefix = "teacher_",
    values_to = "teacher",
  ) |>
  distinct(teacher) |>
  arrange(teacher) |>
  mutate(
    teacher_id = row_number()
  )

K <- nrow(unique_teachers)

set.seed(2025)
unique_teachers <- unique_teachers |>
  arrange(teacher) |>
  mutate(zeta = rnorm(K, 0, 0.3))
 
unique_teachers_grades <- teachers |>
  pivot_longer(
    cols = starts_with("teacher"),
    names_to = "teacher_grade",
    names_prefix = "teacher_",
    values_to = "teacher",
  ) |>
  distinct(teacher_grade, teacher) |>
  arrange(teacher_grade, teacher) |>
  inner_join(unique_teachers, by = join_by(teacher)) 

L <- nrow(unique_teachers_grades)

set.seed(2025)
unique_teachers_grades <- unique_teachers_grades|>
  mutate(
    tau = rnorm(n= L, mean = zeta, sd = 0.1)
  )
 
unique_teachers_grades
# A tibble: 10 × 5
   teacher_grade teacher teacher_id    zeta     tau
   <chr>         <chr>        <int>   <dbl>   <dbl>
 1 10            F                6 -0.0489  0.0132
 2 10            G                7  0.119   0.123 
 3 10            H                8 -0.0240  0.0533
 4 8             A                1  0.186   0.313 
 5 8             B                2  0.0107  0.0478
 6 8             C                3  0.232   0.216 
 7 8             D                4  0.382   0.421 
 8 9             E                5  0.111   0.103 
 9 9             F                6 -0.0489 -0.0834
10 9             G                7  0.119   0.189 
Code
teachers_with_effects <- teachers |>
  pivot_longer(
    cols = starts_with("teacher_"),
    names_to = "teacher_grade",
    names_prefix = "teacher_",
    values_to = "teacher"
  ) |>
  mutate(teacher_grade = as.character(teacher_grade)) |>
  left_join(unique_teachers_grades, by = join_by(teacher_grade, teacher)) |>
  pivot_wider(
    id_cols = student,
    names_from = teacher_grade,
    values_from = c(teacher, zeta, tau),
    names_sep = "_"
  )

teachers_with_effects
# A tibble: 250 × 10
   student teacher_8 teacher_9 teacher_10 zeta_8  zeta_9 zeta_10  tau_8   tau_9
     <int> <chr>     <chr>     <chr>       <dbl>   <dbl>   <dbl>  <dbl>   <dbl>
 1       1 D         F         H          0.382  -0.0489 -0.0240 0.421  -0.0834
 2       2 D         E         H          0.382   0.111  -0.0240 0.421   0.103 
 3       3 D         F         H          0.382  -0.0489 -0.0240 0.421  -0.0834
 4       4 D         F         G          0.382  -0.0489  0.119  0.421  -0.0834
 5       5 D         F         H          0.382  -0.0489 -0.0240 0.421  -0.0834
 6       6 B         G         G          0.0107  0.119   0.119  0.0478  0.189 
 7       7 D         F         F          0.382  -0.0489 -0.0489 0.421  -0.0834
 8       8 A         E         F          0.186   0.111  -0.0489 0.313   0.103 
 9       9 D         F         H          0.382  -0.0489 -0.0240 0.421  -0.0834
10      10 C         E         G          0.232   0.111   0.119  0.216   0.103 
# ℹ 240 more rows
# ℹ 1 more variable: tau_10 <dbl>
Code
set.seed(2025)

sim_grades <- teachers_with_effects |>
  mutate(
    mu8 = plogis(1.39 + tau_8),
    y8 = rbeta(n = N, shape1 = mu8*10, shape2 = (1-mu8)*10),
    mu9 = plogis(1.21 + tau_9 + 5 * (y8 - mean(y8))),
    y9 = rbeta(n = N, shape1 = mu9 * 10, shape2 = (1 - mu9) * 10),
    mu10 = plogis(
      0.94 + tau_10 + 1.2 * (y8 - mean(y8)) + 4.35 * (y9 - mean(y9))
      ),
    y10 = rbeta(n = N, shape1 = mu10 * 10, shape2 = (1 - mu10) * 10),
)
Code
sim_grades |>
  pivot_longer(
    cols = starts_with("y"),
    names_to = "year", 
    values_to = "grade"
  ) |>
  mutate(
    year = factor(
      year,
      levels = c("y8", "y9", "y10"),
      labels = c("year 8", "MTH1W", "MPM2D")
      )) |>
  ggplot(aes(x = grade, fill = year)) +
  geom_density(alpha = 0.2) +
  geom_vline(xintercept = 0.5, linetype = "dashed") +
  scale_x_continuous(breaks = seq(0, 1, 0.1),labels = scales::percent) +
  labs(
    x = "Simulated Grades",
    y = "Probability Density"
  )

Code
sim_grades |>
  select(starts_with("y")) |>
  ggpairs(lower = list(continuous = lower_fun))

Code
sim_grades |>
  summary()
    student        teacher_8          teacher_9          teacher_10       
 Min.   :  1.00   Length:250         Length:250         Length:250        
 1st Qu.: 63.25   Class :character   Class :character   Class :character  
 Median :125.50   Mode  :character   Mode  :character   Mode  :character  
 Mean   :125.50                                                           
 3rd Qu.:187.75                                                           
 Max.   :250.00                                                           
     zeta_8            zeta_9            zeta_10             tau_8        
 Min.   :0.01069   Min.   :-0.04886   Min.   :-0.04886   Min.   :0.04779  
 1st Qu.:0.23195   1st Qu.:-0.04886   1st Qu.:-0.04886   1st Qu.:0.21566  
 Median :0.23195   Median : 0.11129   Median :-0.02400   Median :0.31348  
 Mean   :0.27591   Mean   : 0.04816   Mean   : 0.03618   Mean   :0.30405  
 3rd Qu.:0.38175   3rd Qu.: 0.11129   3rd Qu.: 0.11913   3rd Qu.:0.42146  
 Max.   :0.38175   Max.   : 0.11913   Max.   : 0.11913   Max.   :0.42146  
     tau_9              tau_10             mu8               y8        
 Min.   :-0.08335   Min.   :0.01322   Min.   :0.8081   Min.   :0.4241  
 1st Qu.:-0.08335   1st Qu.:0.01322   1st Qu.:0.8328   1st Qu.:0.7625  
 Median : 0.10329   Median :0.05332   Median :0.8460   Median :0.8640  
 Mean   : 0.04510   Mean   :0.07351   Mean   :0.8440   Mean   :0.8346  
 3rd Qu.: 0.10329   3rd Qu.:0.12270   3rd Qu.:0.8595   3rd Qu.:0.9323  
 Max.   : 0.18935   Max.   :0.12270   Max.   :0.8595   Max.   :0.9927  
      mu9               y9              mu10              y10        
 Min.   :0.2838   Min.   :0.1072   Min.   :0.08653   Min.   :0.0379  
 1st Qu.:0.7091   1st Qu.:0.6789   1st Qu.:0.63449   1st Qu.:0.5636  
 Median :0.8019   Median :0.8044   Median :0.77314   Median :0.7685  
 Mean   :0.7617   Mean   :0.7641   Mean   :0.71230   Mean   :0.7057  
 3rd Qu.:0.8520   3rd Qu.:0.9004   3rd Qu.:0.83975   3rd Qu.:0.8688  
 Max.   :0.8913   Max.   :0.9948   Max.   :0.89775   Max.   :0.9993  
Code
sim_grades_by_teacher <- sim_grades |>
  pivot_longer(
    cols = starts_with("y"),
    names_to = "student_grade",
    names_prefix = "y",
    values_to = "result",
  ) |>
  pivot_longer(
    cols = starts_with("teacher"),
    names_to = "teacher_grade",
    names_prefix = "teacher_",
    values_to = "teacher",
  ) |>
  pivot_longer(
    cols = starts_with("tau"),
    names_to = "bias_grade",
    names_prefix = "tau_",
    values_to = "bias",
  ) |>
  filter(student_grade == teacher_grade, student_grade == bias_grade) |>
  mutate(
    teacher_grade = factor(teacher_grade, levels = c("8", "9", "10")),
    teacher = factor(teacher, levels = c("A", "B", "C", "D", "E", "F", "G", "H"))
  )

sim_grades_by_teacher |>
  group_by(teacher_grade) |>
  summarise(
    n = n(),
    mean_bias = mean(bias),
    mean = mean(result),
    median = median(result),
    sd = sd(result)
  ) |>
  gt() |>
  fmt_number(columns = where(is.numeric), decimals = 2)
teacher_grade n mean_bias mean median sd
8 250.00 0.30 0.83 0.86 0.12
9 250.00 0.05 0.76 0.80 0.17
10 250.00 0.07 0.71 0.77 0.22
Code
sim_grades_by_teacher |>
  group_by(teacher) |>
  summarise(
    n = n(),
    mean_bias = mean(bias),
    mean = mean(result),
    median = median(result),
    sd = sd(result)
  ) |>
  gt() |>
  fmt_number(columns = where(is.numeric), decimals = 2)
teacher n mean_bias mean median sd
A 19.00 0.31 0.87 0.92 0.13
B 29.00 0.05 0.78 0.81 0.10
C 80.00 0.22 0.82 0.85 0.14
D 122.00 0.42 0.85 0.88 0.11
E 99.00 0.10 0.79 0.81 0.14
F 181.00 −0.04 0.70 0.74 0.20
G 169.00 0.14 0.73 0.80 0.21
H 51.00 0.05 0.73 0.79 0.21
Code
sim_grades_by_teacher |>
  group_by(teacher_grade, teacher) |>
  summarise(
    n = n(),
    mean_bias = mean(bias),
    mean = mean(result),
    median = median(result),
    sd = sd(result)
  ) |>
  gt() |>
  fmt_number(columns = where(is.numeric), decimals = 2)
teacher n mean_bias mean median sd
8
A 19.00 0.31 0.87 0.92 0.13
B 29.00 0.05 0.78 0.81 0.10
C 80.00 0.22 0.82 0.85 0.14
D 122.00 0.42 0.85 0.88 0.11
9
E 99.00 0.10 0.79 0.81 0.14
F 101.00 −0.08 0.72 0.77 0.20
G 50.00 0.19 0.80 0.85 0.14
10
F 80.00 0.01 0.69 0.71 0.20
G 119.00 0.12 0.71 0.78 0.23
H 51.00 0.05 0.73 0.79 0.21

5.4 Stan Hierarchical Model

Code
sim_grades <- sim_grades |>
  left_join(unique_teachers, by = c("teacher_8" = "teacher")) |>
  rename(teacher_8_id = teacher_id) |>
  left_join(unique_teachers, by = c("teacher_9" = "teacher")) |>
  rename(teacher_9_id = teacher_id) |>
  left_join(unique_teachers, by = c("teacher_10" = "teacher")) |>
  rename(teacher_10_id = teacher_id)

# sim_grades |>
#   select(
#     student,
#     starts_with("teacher_"),
#     starts_with("y")
#   ) |>
#   write_csv(file = "data/sim_grades_teacher.csv")
Code
data_list <- list(
  N = nrow(sim_grades),
  J = sim_grades |>
  select(teacher_8, teacher_9, teacher_10) |>
  unlist() |>
  unique() |>
  length(),
  teacher_8 = sim_grades$teacher_8_id,
  teacher_9 = sim_grades$teacher_9_id,
  teacher_10 = sim_grades$teacher_10_id,
  x8 = sim_grades$y8,
  x9 = sim_grades$y9,
  y10 = sim_grades$y10
)
Code
# Compile the Stan Model
teacher_mod <- cmdstan_model(stan_file = 'stan_models/teacher_model.stan')

# Fit the model
fit_teacher_mod <- teacher_mod$sample(
  dat = data_list,
  seed = 2025
  )

# Save the Stan model
fit_teacher_mod$save_object(file = "stan_models/fit_teacher_mod.RDS")

fit_teacher_mod$cmdstan_diagnose()

rm("teacher_mod", "fit_teacher_mod")
Code
# Load the Stan model
fit_teacher_mod <- readRDS("stan_models/fit_teacher_mod.RDS")
Code
fit_teacher_mod$diagnostic_summary()
$num_divergent
[1] 0 0 0 0

$num_max_treedepth
[1] 0 0 0 0

$ebfmi
[1] 1.0031725 0.9940134 1.0209796 0.9982838
Code
fit_teacher_mod$sampler_diagnostics(format = "df")
# A draws_df: 1000 iterations, 4 chains, and 6 variables
   treedepth__ divergent__ energy__ accept_stat__ stepsize__ n_leapfrog__
1            4           0     -618          0.91       0.28           15
2            5           0     -622          0.92       0.28           31
3            5           0     -612          0.73       0.28           31
4            4           0     -611          0.98       0.28           31
5            4           0     -613          0.75       0.28           15
6            5           0     -612          0.98       0.28           31
7            5           0     -619          0.99       0.28           31
8            4           0     -618          0.90       0.28           15
9            5           0     -617          0.77       0.28           31
10           4           0     -621          0.85       0.28           15
# ... with 3990 more draws
# ... hidden reserved variables {'.chain', '.iteration', '.draw'}
Code
fit_teacher_mod$summary(
  variables = c(
    "alpha8", "alpha9", "alpha10",
    "phi8", "phi9", "phi10",
    "beta1", "beta2", "beta3",
    "zeta",
    "tau_8", "tau_9", "tau_10")
  ) |>
  gt() |>
  fmt_number(columns = where(is.numeric), decimals = 2)
Table 5.1: Summary Statistics of Posterior Parameters
variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
alpha8 1.41 1.41 0.03 0.03 1.36 1.46 1.00 4,840.51 2,924.18
alpha9 1.21 1.21 0.03 0.03 1.16 1.25 1.00 4,305.96 3,231.97
alpha10 0.95 0.95 0.03 0.03 0.90 1.00 1.00 5,219.41 3,346.93
phi8 9.15 9.14 0.79 0.80 7.89 10.48 1.00 5,678.47 3,300.23
phi9 11.16 11.14 0.94 0.95 9.66 12.76 1.00 6,299.27 2,797.44
phi10 10.92 10.90 0.90 0.93 9.46 12.43 1.00 6,904.81 2,820.62
beta1 5.23 5.23 0.33 0.32 4.69 5.78 1.00 6,228.16 2,860.43
beta2 1.56 1.57 0.44 0.44 0.84 2.28 1.00 3,944.21 2,189.14
beta3 4.51 4.50 0.34 0.33 3.96 5.07 1.00 4,947.37 3,187.98
zeta[1] 0.38 0.38 0.17 0.17 0.10 0.67 1.00 2,895.36 2,478.26
zeta[2] −0.17 −0.17 0.15 0.15 −0.42 0.07 1.00 2,759.26 2,907.13
zeta[3] 0.12 0.12 0.12 0.12 −0.07 0.32 1.00 3,037.27 3,103.58
zeta[4] 0.28 0.27 0.12 0.12 0.08 0.47 1.00 3,360.04 2,801.41
zeta[5] 0.10 0.09 0.12 0.12 −0.09 0.29 1.00 3,692.65 2,968.36
zeta[6] −0.08 −0.08 0.08 0.08 −0.22 0.05 1.00 3,800.52 3,315.84
zeta[7] 0.18 0.18 0.09 0.09 0.03 0.34 1.00 4,252.14 3,282.62
zeta[8] 0.08 0.08 0.13 0.13 −0.13 0.30 1.00 3,419.04 2,957.83
tau_8[1] 0.42 0.42 0.16 0.15 0.17 0.68 1.00 3,002.52 2,534.37
tau_8[2] −0.19 −0.19 0.13 0.13 −0.41 0.02 1.00 3,262.42 3,214.41
tau_8[3] 0.14 0.14 0.09 0.09 −0.01 0.28 1.00 3,943.73 3,357.03
tau_8[4] 0.31 0.30 0.08 0.08 0.18 0.43 1.00 4,146.37 3,409.61
tau_8[5] 0.10 0.10 0.15 0.15 −0.15 0.35 1.00 4,144.39 3,038.90
tau_8[6] −0.08 −0.08 0.13 0.13 −0.30 0.13 1.00 4,897.63 3,363.67
tau_8[7] 0.18 0.18 0.13 0.13 −0.04 0.40 1.00 5,009.63 3,237.57
tau_8[8] 0.08 0.08 0.16 0.16 −0.19 0.34 1.00 3,757.63 3,161.71
tau_9[1] 0.38 0.38 0.20 0.20 0.07 0.71 1.00 3,129.41 3,219.67
tau_9[2] −0.17 −0.17 0.18 0.18 −0.47 0.13 1.00 3,095.72 2,795.46
tau_9[3] 0.12 0.12 0.16 0.16 −0.13 0.38 1.00 3,828.58 3,233.30
tau_9[4] 0.28 0.27 0.15 0.15 0.03 0.53 1.00 4,045.58 3,280.71
tau_9[5] 0.11 0.11 0.08 0.08 −0.01 0.24 1.00 4,580.00 3,577.73
tau_9[6] −0.06 −0.06 0.07 0.06 −0.17 0.05 1.00 4,306.85 3,588.73
tau_9[7] 0.21 0.21 0.09 0.09 0.07 0.35 1.00 4,789.40 3,318.40
tau_9[8] 0.09 0.09 0.16 0.17 −0.18 0.36 1.00 3,608.47 3,163.09
tau_10[1] 0.38 0.38 0.20 0.20 0.06 0.71 1.00 3,199.43 3,034.73
tau_10[2] −0.17 −0.17 0.18 0.19 −0.46 0.13 1.00 3,010.15 2,937.50
tau_10[3] 0.12 0.12 0.16 0.16 −0.14 0.39 1.00 3,697.43 2,971.59
tau_10[4] 0.28 0.27 0.15 0.15 0.03 0.53 1.00 3,794.41 3,136.77
tau_10[5] 0.10 0.10 0.15 0.15 −0.15 0.36 1.00 3,897.73 3,286.00
tau_10[6] −0.12 −0.11 0.07 0.07 −0.23 0.00 1.00 5,080.42 3,675.51
tau_10[7] 0.18 0.18 0.07 0.06 0.07 0.29 1.00 5,278.78 3,291.34
tau_10[8] 0.09 0.09 0.10 0.10 −0.06 0.25 1.00 4,010.96 3,006.18
Code
post_long <- fit_teacher_mod |>
  spread_draws(y10_hat[student]) |>
  inner_join(sim_grades, by = join_by(student))

grades_pred_summary <- post_long |>
  group_by(student) |>
  summarise(
    n = n(),
    y8 = first(y8),
    y9 = first(y9),
    y10 = first(y10),
    y10_hat_mean = mean(y10_hat),
    y10_hat_median = median(y10_hat),
    residuals = y10 - y10_hat_median,
    y10_hat_sd = sd(y10_hat),
    lower_95 = quantile(y10_hat, 0.025),
    upper_95 = quantile(y10_hat, 0.975),
    p_fail = mean(y10_hat < 0.5),
    p_1 = mean(y10_hat >= 0.5 & y10_hat < 0.6),
    p_2 = mean(y10_hat >= 0.6 & y10_hat < 0.7),
    p_3 = mean(y10_hat >= 0.7 & y10_hat < 0.8),
    p_4 = mean(y10_hat >= 0.8),
    p_1_om = mean(y10_hat >= 0.5),
    p_2_om = mean(y10_hat >= 0.6),
    p_3_om = mean(y10_hat >= 0.7),
    p_4_om = mean(y10_hat >= 0.8),
    p_pass = mean(y10_hat >= 0.5)
  )

post_long <- post_long |>
  inner_join(grades_pred_summary, by = join_by(student, y8, y9, y10))

# Randomly select 25 student IDs
set.seed(2025)
random_25_students <- grades_pred_summary |>
  slice_sample(n = 25) |>
  pull(student)
Code
post_long |>
  filter(student %in% random_25_students) |>
  ggplot(aes(x = y10_hat, y = fct_reorder(factor(student), y10_hat_mean))) +
  geom_density_ridges_gradient(
    aes(height = ..density.., fill = ..x..),
    stat = "density",
    scale = 0.95,
    rel_min_height = 0.01
  ) +
  stat_pointinterval(.width = c(0.66, 0.90), point_interval = median_hdi) +
  geom_point(aes(x = y10), color = "blue", size = 2) +
  geom_vline(xintercept = 0.5, linetype = "dashed") +
  scale_x_continuous(
    breaks = seq(0, 1, 0.1), limits = c(0, 1),
    labels = scales::percent
  ) +
  scale_fill_gradientn(
    colours = c("red", "red", "yellow", "lightgreen", "forestgreen"),
    values = c(0, 0.45, 0.6, 0.8, 1),
    limits = c(0, 1),
    guide = "none"
  ) +
  labs(
    x = "Posterior Predicted Year 10 Result",
    y = "Student",
    title = "Posterior Predictive Distributions for 25 Random Students",
    subtitle = "66% and 90% median HDI ; Blue point = true observed value"
  ) +
  theme(
    panel.grid.major = element_line(color = "gray90"),
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_line(color = "gray90"),
    panel.grid.major.y = element_blank(),
  )

#ggsave("posterior_plot_teacher.png", width = 8, height = 10)
Figure 5.3: geom_density_ridges_gradient()
Code
fit_teacher_mod$loo()

Computed from 4000 by 250 log-likelihood matrix.

         Estimate   SE
elpd_loo    202.0 11.1
p_loo         4.9  0.5
looic      -404.0 22.1
------
MCSE of elpd_loo is 0.0.
MCSE and ESS estimates assume MCMC draws (r_eff in [0.8, 1.6]).

All Pareto k estimates are good (k < 0.7).
See help('pareto-k-diagnostic') for details.
Code
linear_mod <- cmdstan_model(stan_file = 'linear_model.stan')
fit_linear_mod <- linear_mod$sample(
  dat = data_list
  )

logit_mod <- cmdstan_model(stan_file = 'logit_model.stan')

fit_logit_mod <- logit_mod$sample(
  dat = data_list
  )

loo_compare(x = list(
  linear_model = fit_linear_mod$loo(),
  logit_model = fit_logit_mod$loo(),
  teacher_model = fit_teacher_mod$loo()
))