install.packages("rmarkdown")
library(readxl)
library(tidyverse)
library(psych) 
library(ggplot2)
library(effects)
library(dplyr)
library(rmarkdown)
library(readxl)

setwd("/Users/kimchaewon/Desktop/QRMSEMINARS/i. data/")

data <- read_excel("dataLondon.xlsx") %>%
  filter(language == "EAL") 

View(data)
str(data)
tibble [500 × 7] (S3: tbl_df/tbl/data.frame)
 $ year            : num [1:500] 202425 202425 202425 202425 202425 ...
 $ country         : chr [1:500] "England" "England" "England" "England" ...
 $ region          : chr [1:500] "London" "London" "London" "London" ...
 $ localAuthority  : chr [1:500] "City of London" "Camden" "Camden" "Camden" ...
 $ schoolType      : chr [1:500] "State-funded primary" "State-funded nursery" "State-funded primary" "State-funded secondary" ...
 $ language        : chr [1:500] "EAL" "EAL" "EAL" "EAL" ...
 $ percentagePupils: num [1:500] 60.2 14.7 52.5 36.8 46.7 ...
data$year <- as.factor(data$year)
data$country <- as.factor(data$country)
data$region <- as.factor(data$region)
data$localAuthority <- as.factor(data$localAuthority)
data$schoolType <- as.factor(data$schoolType)
data$language <- as.factor(data$language)
data$percentagePupils <- as.numeric(data$percentagePupils)
library(dplyr)

EAL_data <- EAL_data %>%
  mutate(
    year = recode(year,
      "202122" = "2021/22",
      "202223" = "2022/23",
      "202324" = "2023/24",
      "202425" = "2024/25"
    )
  )

print(levels(EAL_data$year))
[1] "2021/22" "2022/23" "2023/24" "2024/25"
library(readr)
library(dplyr)
library(ggplot2)

overall_trend <- data %>%
  group_by(year) %>%
  summarise(
    mean = mean(percentagePupils, na.rm = TRUE),
    sd = sd(percentagePupils, na.rm = TRUE),    
    min = min(percentagePupils, na.rm = TRUE),  
    max = max(percentagePupils, na.rm = TRUE)) #na.rm = TRUE excludes the N/A data

print(overall_trend)
overall_trend <- EAL_data %>%
  group_by(year) %>%
  summarise(mean_EAL = mean(percentagePupils, na.rm = TRUE), .groups = "drop")

ggplot(overall_trend, aes(x = factor(year), y = mean_EAL, group = 1)) +
  geom_line(size = 1) +
  geom_point(size = 2) +
  labs(title = "Overall EAL Proportion Trend in London (2021-2025)",
       x = "Academic Year",
       y = "Mean EAL Percentage (%)") +
  theme_minimal() +
  ylim(40, 45)

library(dplyr)

print(levels(EAL_data$year))
[1] "2021/22" "2022/23" "2023/24" "2024/25"
London_overview <- data %>%
  group_by(schoolType, year) %>%
   summarize(
    mean = mean(percentagePupils),
    sd = sd(percentagePupils),
    median = median(percentagePupils),
    min = min(percentagePupils),
    max = max(percentagePupils)
  )
print(London_overview)
View(London_overview)
library(dplyr)
library(tidyr)

print(levels(EAL_data$year))
[1] "2021/22" "2022/23" "2023/24" "2024/25"
London_overview <- EAL_data %>%
  group_by(year, schoolType) %>%
  summarise(mean_EAL = mean(`percentagePupils`, na.rm = TRUE), .groups = "drop")

summary_table <- London_overview %>%
  pivot_wider(
    names_from = schoolType,
    values_from = mean_EAL
  ) %>%
  arrange(year)

print(summary_table)
library(ggplot2)
library(dplyr)

London_overview <- EAL_data %>%
  group_by(year, schoolType) %>%
   summarize(
     count = n(),
     mean = mean(percentagePupils, na.rm = TRUE), # <-- This creates the 'mean' column
     sd = sd(percentagePupils, na.rm = TRUE),
     median = median(percentagePupils, na.rm = TRUE),
     min = min(percentagePupils, na.rm = TRUE),
     max = max(percentagePupils, na.rm = TRUE)
   )

print(levels(EAL_data$year))
[1] "2021/22" "2022/23" "2023/24" "2024/25"
ggplot(London_overview, aes(x = schoolType, y = mean, fill = year)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    title = "Overall EAL Proportions by School Type and Year in London",
    x = "School Type",
    y = "Mean EAL Percentage (%)",
    fill = "Academic Year" 
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +

ylim (0, 60)

library(dplyr)
library(readxl)

EAL_data <- data %>%
  filter(language == "EAL")

EAL_summary <- EAL_data %>%
  group_by(localAuthority) %>%
  summarise(mean_EAL = mean(percentagePupils, na.rm = TRUE)) %>%
  arrange(desc(mean_EAL))

head(EAL_summary, 10)
tail(EAL_summary, 10)
EAL_data <- data %>%
  filter(language == "EAL")

EAL_summary <- EAL_data %>%
  filter(!is.na(localAuthority)) %>%
  group_by(localAuthority) %>%
  summarise(mean_EAL = mean(percentagePupils, na.rm = TRUE)) %>%
  arrange(desc(mean_EAL))

n <- nrow(EAL_summary)

middle_EAL <- EAL_summary %>%
  slice(round(n/2 - 5):round(n/2 + 5))

middle_EAL
library(dplyr)
library(readxl)

desired_order <- c("Harrow", "Haringey", "Bromley")

London_EAL <- data %>%
  filter(language == "EAL",
         localAuthority %in% desired_order) %>%
 mutate(localAuthority = factor(localAuthority, levels = desired_order)) %>%
  group_by(localAuthority, year) %>%
  summarise(
    mean_EAL = mean(percentagePupils, na.rm = TRUE),
    median_EAL = median(percentagePupils, na.rm = TRUE),
    sd_EAL = sd(percentagePupils, na.rm = TRUE),.groups = "drop") %>%
  arrange(localAuthority, year)

print(London_EAL)
EAL_data <- EAL_data %>%
  mutate(
    year = recode(year,
      "202122" = "2021/22",
      "202223" = "2022/23",
      "202324" = "2023/24",
      "202425" = "2024/25"
    )
  )

LA_choices <- c("Harrow", "Haringey", "Bromley")
LA_comparison <- EAL_data %>%
  filter(localAuthority %in% LA_choices) %>%
  group_by(localAuthority, year) %>%
  summarise(mean_EAL = mean(percentagePupils, na.rm = TRUE), .groups = "drop")

ggplot(LA_comparison, aes(x = year, y = mean_EAL, group = localAuthority, color = localAuthority)) +
  geom_line(linewidth = 1) +
  geom_point(size = 3) +
  labs(
    title = "EAL Proportions of Selected Local Authorities",
    x = "Academic Year",
    y = "Mean EAL Percentage (%)",
    color = "Local Authority"
  ) +
  theme_minimal() +
  ylim(0, NA)

library(dplyr)
library(ggplot2)

LA_choices <- c("Harrow", "Haringey", "Bromley")

LA_comparison <- EAL_data %>%
  filter(localAuthority %in% LA_choices) %>%
  group_by(localAuthority, schoolType, year) %>%
  summarise(mean_EAL = mean(percentagePupils, na.rm = TRUE), .groups = "drop") %>%

print(LA_comparison)
View(LA_comparison)
library(dplyr)
library(ggplot2)

LA_choices <- c("Harrow", "Haringey", "Bromley")

three_LA_summary <- EAL_data %>%
  filter(localAuthority %in% LA_choices) %>%
  group_by(year, localAuthority, schoolType) %>%
  summarise(mean_EAL = mean(percentagePupils, na.rm = TRUE), .groups = "drop")

for (LA in LA_choices) {

  plot_data <- three_LA_summary %>%
    filter(localAuthority == LA)
  
  g <- ggplot(plot_data, aes(x = year, y = mean_EAL, group = schoolType, color = schoolType)) +
    geom_line(linewidth = 1) +
    geom_point(size = 3) +
    labs(
      title = paste("EAL Porportion Trends in", LA), 
      x = "Academic Year",
      y = "Mean EAL Percentage (%)",
      color = "School Type"
    ) +
    theme_minimal() +
    ylim(0, 100) 
  
  print(g)
}

library(ggplot2)

overall_trend <- EAL_data %>%
  group_by(year) %>%
  summarise(mean_EAL = mean(percentagePupils, na.rm = TRUE), .groups = "drop")

LA_choices <- c("Harrow", "Haringey", "Bromley")
LA_comparison <- EAL_data %>%
  filter(localAuthority %in% LA_choices) %>%
  group_by(localAuthority, year) %>%
  summarise(mean_EAL = mean(percentagePupils, na.rm = TRUE), .groups = "drop")

ggplot(LA_comparison, aes(x = factor(year), y = mean_EAL)) +
  geom_line(aes(group = localAuthority, color = localAuthority), linewidth = 1) +
  geom_point(aes(group = localAuthority, color = localAuthority), size = 2) +
  
  geom_line(data = overall_trend, aes(group = 1), color = "black", linewidth = 1.3) +
  geom_point(data = overall_trend, aes(group = 1), color = "black", size = 2) +

  labs(
    title = "EAL Proportions: Selected LAs vs. Overall EAL Proportions in London",
    subtitle = "the black line indicates the overall London trend",
    x = "Academic Year",
    y = "Mean EAL Percentage (%)",
    color = "Local Authority"
  ) +
  theme_minimal() +
  ylim(0, NA)

---
title: "R Notebook"
output:
  html_notebook: default
  word_document: default
---

```{r}
install.packages("rmarkdown")
```

```{r}
library(readxl)
library(tidyverse)
library(psych) 
library(ggplot2)
library(effects)
library(dplyr)
library(rmarkdown)
```

```{r}
library(readxl)

setwd("/Users/kimchaewon/Desktop/QRMSEMINARS/i. data/")

data <- read_excel("dataLondon.xlsx") %>%
  filter(language == "EAL") 

View(data)
```

```{r}
str(data)
```
```{r}
data$year <- as.factor(data$year)
data$country <- as.factor(data$country)
data$region <- as.factor(data$region)
data$localAuthority <- as.factor(data$localAuthority)
data$schoolType <- as.factor(data$schoolType)
data$language <- as.factor(data$language)
data$percentagePupils <- as.numeric(data$percentagePupils)
```

```{r}
library(dplyr)

EAL_data <- EAL_data %>%
  mutate(
    year = recode(year,
      "202122" = "2021/22",
      "202223" = "2022/23",
      "202324" = "2023/24",
      "202425" = "2024/25"
    )
  )

print(levels(EAL_data$year))
```

```{r}
library(readr)
library(dplyr)
library(ggplot2)

overall_trend <- data %>%
  group_by(year) %>%
  summarise(
    mean = mean(percentagePupils, na.rm = TRUE),
    sd = sd(percentagePupils, na.rm = TRUE),    
    min = min(percentagePupils, na.rm = TRUE),  
    max = max(percentagePupils, na.rm = TRUE)) #na.rm = TRUE excludes the N/A data

print(overall_trend)
```

```{r}
overall_trend <- EAL_data %>%
  group_by(year) %>%
  summarise(mean_EAL = mean(percentagePupils, na.rm = TRUE), .groups = "drop")

ggplot(overall_trend, aes(x = factor(year), y = mean_EAL, group = 1)) +
  geom_line(size = 1) +
  geom_point(size = 2) +
  labs(title = "Overall EAL Proportion Trend in London (2021-2025)",
       x = "Academic Year",
       y = "Mean EAL Percentage (%)") +
  theme_minimal() +
  ylim(40, 45)
```


```{r}
library(dplyr)

print(levels(EAL_data$year))
London_overview <- data %>%
  group_by(schoolType, year) %>%
   summarize(
    mean = mean(percentagePupils),
    sd = sd(percentagePupils),
    median = median(percentagePupils),
    min = min(percentagePupils),
    max = max(percentagePupils)
  )
print(London_overview)
View(London_overview)
```
```{r}
library(dplyr)
library(tidyr)

print(levels(EAL_data$year))
London_overview <- EAL_data %>%
  group_by(year, schoolType) %>%
  summarise(mean_EAL = mean(`percentagePupils`, na.rm = TRUE), .groups = "drop")

summary_table <- London_overview %>%
  pivot_wider(
    names_from = schoolType,
    values_from = mean_EAL
  ) %>%
  arrange(year)

print(summary_table)
```


```{r}
library(ggplot2)
library(dplyr)

London_overview <- EAL_data %>%
  group_by(year, schoolType) %>%
   summarize(
     count = n(),
     mean = mean(percentagePupils, na.rm = TRUE), # <-- This creates the 'mean' column
     sd = sd(percentagePupils, na.rm = TRUE),
     median = median(percentagePupils, na.rm = TRUE),
     min = min(percentagePupils, na.rm = TRUE),
     max = max(percentagePupils, na.rm = TRUE)
   )

print(levels(EAL_data$year))
ggplot(London_overview, aes(x = schoolType, y = mean, fill = year)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    title = "Overall EAL Proportions by School Type and Year in London",
    x = "School Type",
    y = "Mean EAL Percentage (%)",
    fill = "Academic Year" 
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +

ylim (0, 60)
```

```{r}
library(dplyr)
library(readxl)

EAL_data <- data %>%
  filter(language == "EAL")

EAL_summary <- EAL_data %>%
  group_by(localAuthority) %>%
  summarise(mean_EAL = mean(percentagePupils, na.rm = TRUE)) %>%
  arrange(desc(mean_EAL))

head(EAL_summary, 10)
tail(EAL_summary, 10)
```

```{r}
EAL_data <- data %>%
  filter(language == "EAL")

EAL_summary <- EAL_data %>%
  filter(!is.na(localAuthority)) %>%
  group_by(localAuthority) %>%
  summarise(mean_EAL = mean(percentagePupils, na.rm = TRUE)) %>%
  arrange(desc(mean_EAL))

n <- nrow(EAL_summary)

middle_EAL <- EAL_summary %>%
  slice(round(n/2 - 5):round(n/2 + 5))

middle_EAL
```

```{r}
library(dplyr)
library(readxl)

desired_order <- c("Harrow", "Haringey", "Bromley")

London_EAL <- data %>%
  filter(language == "EAL",
         localAuthority %in% desired_order) %>%
 mutate(localAuthority = factor(localAuthority, levels = desired_order)) %>%
  group_by(localAuthority, year) %>%
  summarise(
    mean_EAL = mean(percentagePupils, na.rm = TRUE),
    median_EAL = median(percentagePupils, na.rm = TRUE),
    sd_EAL = sd(percentagePupils, na.rm = TRUE),.groups = "drop") %>%
  arrange(localAuthority, year)

print(London_EAL)
```

```{r}
EAL_data <- EAL_data %>%
  mutate(
    year = recode(year,
      "202122" = "2021/22",
      "202223" = "2022/23",
      "202324" = "2023/24",
      "202425" = "2024/25"
    )
  )

LA_choices <- c("Harrow", "Haringey", "Bromley")
LA_comparison <- EAL_data %>%
  filter(localAuthority %in% LA_choices) %>%
  group_by(localAuthority, year) %>%
  summarise(mean_EAL = mean(percentagePupils, na.rm = TRUE), .groups = "drop")

ggplot(LA_comparison, aes(x = year, y = mean_EAL, group = localAuthority, color = localAuthority)) +
  geom_line(linewidth = 1) +
  geom_point(size = 3) +
  labs(
    title = "EAL Proportions of Selected Local Authorities",
    x = "Academic Year",
    y = "Mean EAL Percentage (%)",
    color = "Local Authority"
  ) +
  theme_minimal() +
  ylim(0, NA)
```

```{r}
library(dplyr)
library(ggplot2)

LA_choices <- c("Harrow", "Haringey", "Bromley")

LA_comparison <- EAL_data %>%
  filter(localAuthority %in% LA_choices) %>%
  group_by(localAuthority, schoolType, year) %>%
  summarise(mean_EAL = mean(percentagePupils, na.rm = TRUE), .groups = "drop") %>%

print(LA_comparison)
View(LA_comparison)
```

```{r}
library(dplyr)
library(ggplot2)

LA_choices <- c("Harrow", "Haringey", "Bromley")

three_LA_summary <- EAL_data %>%
  filter(localAuthority %in% LA_choices) %>%
  group_by(year, localAuthority, schoolType) %>%
  summarise(mean_EAL = mean(percentagePupils, na.rm = TRUE), .groups = "drop")

for (LA in LA_choices) {

  plot_data <- three_LA_summary %>%
    filter(localAuthority == LA)
  
  g <- ggplot(plot_data, aes(x = year, y = mean_EAL, group = schoolType, color = schoolType)) +
    geom_line(linewidth = 1) +
    geom_point(size = 3) +
    labs(
      title = paste("EAL Porportion Trends in", LA), 
      x = "Academic Year",
      y = "Mean EAL Percentage (%)",
      color = "School Type"
    ) +
    theme_minimal() +
    ylim(0, 100) 
  
  print(g)
}
```

```{r}
library(ggplot2)

overall_trend <- EAL_data %>%
  group_by(year) %>%
  summarise(mean_EAL = mean(percentagePupils, na.rm = TRUE), .groups = "drop")

LA_choices <- c("Harrow", "Haringey", "Bromley")
LA_comparison <- EAL_data %>%
  filter(localAuthority %in% LA_choices) %>%
  group_by(localAuthority, year) %>%
  summarise(mean_EAL = mean(percentagePupils, na.rm = TRUE), .groups = "drop")

ggplot(LA_comparison, aes(x = factor(year), y = mean_EAL)) +
  geom_line(aes(group = localAuthority, color = localAuthority), linewidth = 1) +
  geom_point(aes(group = localAuthority, color = localAuthority), size = 2) +
  
  geom_line(data = overall_trend, aes(group = 1), color = "black", linewidth = 1.3) +
  geom_point(data = overall_trend, aes(group = 1), color = "black", size = 2) +

  labs(
    title = "EAL Proportions: Selected LAs vs. Overall EAL Proportions in London",
    subtitle = "the black line indicates the overall London trend",
    x = "Academic Year",
    y = "Mean EAL Percentage (%)",
    color = "Local Authority"
  ) +
  theme_minimal() +
  ylim(0, NA)
```

