Laura has asked that I do some basic coding and logisitc regression for a chapter of hers.
It involves the dataset on pregnancies in Anglican churchs in Cape Town.
The instructions are included in a document called, “instructions.doc” in the data folder of this project.
library(pacman)
pacman::p_load(tidyverse, lubridate, glue, stringr, broom)
library(readxl)
lr <- read_excel("data/LR.xlsx")
## New names:
## * `` -> ...8
# creating date of marriage
lr <- lr %>%
mutate(date_married = glue("{Day_married}-{Month_married}-{Year_married}"),
date_married = lubridate::dmy(date_married))
# check for failures
lr %>%
filter(is.na(date_married))
## # A tibble: 0 x 35
## # ... with 35 variables: `Parish name` <chr>, Year_baptized <chr>,
## # Month_baptized_written <chr>, Month_baptized <chr>, Day_baptized <chr>,
## # Childs_name <chr>, Year_of_birth <chr>, ...8 <chr>, Month_of_birth <chr>,
## # Day_of_birth <chr>, Male_name_parent <chr>, Female_name_parent <chr>,
## # Surname_atbap <chr>, Year_married <dbl>, Month_married_written <chr>,
## # Month_married <dbl>, Day_married <chr>, Male_name <chr>,
## # Male_surname <chr>, Male_age <chr>, Male_race <chr>, Male_condition <chr>,
## # Male_Profession <chr>, Prof_code <chr>, Female_name <chr>,
## # Female_surname <chr>, Female_age <chr>, Female_race <chr>,
## # Female_profession <chr>, Female_prof_rank <chr>, Male_signed <chr>,
## # Female_signed <chr>, Race <chr>, Breadwinner_class <chr>,
## # date_married <date>
# all good, no missing
# creating date of birth
lr <- lr %>%
mutate(date_birth = glue("{Day_of_birth}-{Month_of_birth}-{Year_of_birth}"),
date_birth = lubridate::dmy(date_birth))
## Warning: 6 failed to parse.
# check for failures
lr %>%
filter(is.na(date_birth))
## # A tibble: 6 x 36
## `Parish name` Year_baptized Month_baptized_~ Month_baptized Day_baptized
## <chr> <chr> <chr> <chr> <chr>
## 1 Cape Town, M~ 1907 December 12 17
## 2 Cape Town, A~ 1952 Mar 3 14
## 3 Cape Town, L~ 1943 October 10 20
## 4 Cape Town,Be~ 1955 Sep 9 4
## 5 Cape Town, A~ 1940 Jan 1 16
## 6 Cape Town, M~ 1919 October 10 17
## # ... with 31 more variables: Childs_name <chr>, Year_of_birth <chr>,
## # ...8 <chr>, Month_of_birth <chr>, Day_of_birth <chr>,
## # Male_name_parent <chr>, Female_name_parent <chr>, Surname_atbap <chr>,
## # Year_married <dbl>, Month_married_written <chr>, Month_married <dbl>,
## # Day_married <chr>, Male_name <chr>, Male_surname <chr>, Male_age <chr>,
## # Male_race <chr>, Male_condition <chr>, Male_Profession <chr>,
## # Prof_code <chr>, Female_name <chr>, Female_surname <chr>, Female_age <chr>,
## # Female_race <chr>, Female_profession <chr>, Female_prof_rank <chr>,
## # Male_signed <chr>, Female_signed <chr>, Race <chr>,
## # Breadwinner_class <chr>, date_married <date>, date_birth <date>
# 6 failures. 3 missing days. 3 days which are incorrect, e.g. day 232 and day 31
# creating date of baptism
lr <- lr %>%
mutate(date_baptism = glue("{Day_baptized}-{Month_baptized}-{Year_baptized}"),
date_baptism = lubridate::dmy(date_baptism))
## Warning: 11 failed to parse.
# 11 failed to parse. why?
lr %>%
filter(is.na(date_baptism))
## # A tibble: 11 x 37
## `Parish name` Year_baptized Month_baptized_~ Month_baptized Day_baptized
## <chr> <chr> <chr> <chr> <chr>
## 1 Cape Town, A~ 1937 Jun 6 .
## 2 Cape Town, A~ 1937 Sep 9 .
## 3 Cape Town, M~ <NA> September 9 30
## 4 Cape Town, M~ 1920 October <NA> 8
## 5 Cape Town, M~ 1926 June <NA> 6
## 6 Cape Town, M~ 1925 December <NA> 9
## 7 Cape Town, M~ 1923 March <NA> 4
## 8 Cape Town, M~ 1924 October <NA> 5
## 9 Cape Town, M~ 1925 November <NA> 1
## 10 Cape Town, M~ 1921 October <NA> 7
## 11 Cape Town, M~ 1921 August <NA> 7
## # ... with 32 more variables: Childs_name <chr>, Year_of_birth <chr>,
## # ...8 <chr>, Month_of_birth <chr>, Day_of_birth <chr>,
## # Male_name_parent <chr>, Female_name_parent <chr>, Surname_atbap <chr>,
## # Year_married <dbl>, Month_married_written <chr>, Month_married <dbl>,
## # Day_married <chr>, Male_name <chr>, Male_surname <chr>, Male_age <chr>,
## # Male_race <chr>, Male_condition <chr>, Male_Profession <chr>,
## # Prof_code <chr>, Female_name <chr>, Female_surname <chr>, Female_age <chr>,
## # Female_race <chr>, Female_profession <chr>, Female_prof_rank <chr>,
## # Male_signed <chr>, Female_signed <chr>, Race <chr>,
## # Breadwinner_class <chr>, date_married <date>, date_birth <date>,
## # date_baptism <date>
# two have missing day, one has missing year, 8 have missing month.
# Month_baptized
lr <- lr %>%
mutate(Month_baptized = as.numeric(Month_baptized),
Month_baptized= month(Month_baptized, label = T))
lr %>%
count(Month_baptized, sort = T)
## Warning: Factor `Month_baptized` contains implicit NA, consider using
## `forcats::fct_explicit_na`
## # A tibble: 13 x 2
## Month_baptized n
## <ord> <int>
## 1 Nov 205
## 2 Feb 201
## 3 Dec 201
## 4 Mar 199
## 5 Sep 189
## 6 Aug 186
## 7 Oct 183
## 8 Jun 176
## 9 Apr 168
## 10 May 167
## 11 Jul 162
## 12 Jan 138
## 13 <NA> 8
# plot to check distribution of births and baptisims and marriages
lr %>%
ggplot(aes()) +
geom_histogram(aes(date_birth), alpha = 0.3, fill = 'blue') +
geom_histogram(aes(date_baptism), alpha = 0.3, fill = 'red') +
geom_histogram(aes(date_married), alpha = 0.3, fill = 'green')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 6 rows containing non-finite values (stat_bin).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 11 rows containing non-finite values (stat_bin).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
lr <- lr %>%
mutate(days_marriage_to_birth = as.numeric(as.duration(date_birth - date_married), "days"))
# NB there is no-one in the dataset with an illigitmate birth
# plots
# density plot shows proportions for each group
lr %>%
filter(days_marriage_to_birth > 0,
days_marriage_to_birth < 700,
Race != ".",
Race != "NA") %>%
ggplot(aes(days_marriage_to_birth, fill = Female_race)) +
geom_density(alpha = .3) +
geom_vline(lty = 2, xintercept = 240)
# histogram shows distribtuion, not proportions
lr %>%
filter(days_marriage_to_birth > 0,
days_marriage_to_birth < 700,
Race != ".",
Race != "NA") %>%
ggplot(aes(days_marriage_to_birth, fill = Female_race)) +
geom_histogram(alpha = .3, position = "dodge") +
geom_vline(lty = 2, xintercept = 240)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
lr <- lr %>%
mutate(days_birth_to_baptism = as.numeric(as.duration(date_baptism- date_birth), "days"))
# plots
# density plot shows proportions for each group
lr %>%
filter(days_birth_to_baptism > 0,
days_birth_to_baptism < 700,
Race != ".",
Race != "NA") %>%
ggplot(aes(days_birth_to_baptism, fill = Female_race)) +
geom_density(alpha = .3)
# histogram shows distribtuion, not proportions
lr %>%
filter(days_birth_to_baptism > 0,
days_birth_to_baptism < 700,
Race != ".",
Race != "NA") %>%
ggplot(aes(days_birth_to_baptism, fill = Female_race)) +
geom_histogram(alpha = .3, position = "dodge")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# here we don't worry about illigitamte births as there are none
lr <- lr %>%
filter(days_marriage_to_birth >= 0) %>%
mutate(birth_type = ifelse(days_marriage_to_birth >= 240, yes = 0, no = 1))
# plot to check for sensibility
lr %>%
ggplot(aes(birth_type)) +
geom_bar() +
facet_wrap(~ Female_race)
# THERE IS A LOT OF DATA THAT WE LOSE FROM MISSING RACE INFORMATION
lr <- lr %>%
mutate(bridal_pregnancy = birth_type)
# day of week of baptism
lr <- lr %>%
mutate(baptism_dow = lubridate::wday(date_baptism),
baptism_sunday = ifelse(baptism_dow == 1, 1, 0))
lr %>%
ggplot(aes(baptism_dow)) +
geom_bar() +
facet_wrap( ~ Female_race)
## Warning: Removed 9 rows containing non-finite values (stat_count).
# comment is that babies of coloured mothers are more likely to have baptism on Fridays
# what about the interaction between Sunday and bridal pregnancy?
lr %>%
filter(Female_race != ".") %>%
mutate(bridal_pregnancy = recode(bridal_pregnancy,
`0` = "Conventional pregnancy",
`1` = "Bridal pregnancy"),
baptism_dow = wday(baptism_dow, label = T)) %>%
filter(baptism_dow != "NA") %>%
ggplot(aes(baptism_dow)) +
geom_bar() +
facet_grid(bridal_pregnancy ~ Female_race) +
labs(title = "Plot of day of week of baptism by mother's race and type of pregnancy",
x = "Baptism day of the week",
y = "Count")
# getting rid of the dots
lr_clean <- lr %>%
filter(Race != ".",
Breadwinner_class != ".",
Male_age != ".",
Female_age != ".",
Male_signed != ".",
Female_signed != ".",
Month_baptized != ".")
# what is largest parish?
lr %>%
count(`Parish name`, sort = T)
## # A tibble: 6 x 2
## `Parish name` n
## <chr> <int>
## 1 Cape Town, Athlone, St Mark 821
## 2 Cape Town, Mowbray, St Peter 422
## 3 Cape Town, Kenilworth, Christ Church 229
## 4 Cape Town, Sea Point, St James the Great 199
## 5 Cape Town, Maitland, St. Peters 136
## 6 Cape Town,Bellville,Parow,St Margaret 74
# Athlone, St Mark
# treat variables as numeric
lr_clean <- lr_clean %>%
mutate(Race = recode(Race, `0` = "White", `2` = "Mixed race", `1` = "Coloured"),
Race = as.factor(Race),
Breadwinner_class = as.numeric(Breadwinner_class),
Male_age = as.numeric(Male_age),
Female_age = as.numeric(Female_age),
Male_signed = recode(Male_signed, "Yes" = 1, "No" = 0),
Female_signed = recode(Female_signed, "Yes" = 1, "No" = 0),
Year_baptized = as.numeric(Year_baptized),
Parish = as.factor(`Parish name`),
Parish = fct_relevel(Parish, "Cape Town, Athlone, St Mark"))
months <- lr %>%
count(Month_baptized, sort =T)
## Warning: Factor `Month_baptized` contains implicit NA, consider using
## `forcats::fct_explicit_na`
model_bridal_pregnancy <- lr_clean %>%
glm(bridal_pregnancy ~ Race + Breadwinner_class + Male_age + Female_age + Male_signed + Female_signed + Year_baptized + Parish, family = "binomial", data = .) %>%
tidy(conf.int = T)
# Here is the output
model_bridal_pregnancy
## # A tibble: 14 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -1.71 11.8 -0.145 8.85e-1 -24.8 21.3
## 2 RaceMixed race -0.635 0.523 -1.22 2.24e-1 -1.73 0.357
## 3 RaceWhite -1.37 0.242 -5.64 1.70e-8 -1.85 -0.900
## 4 Breadwinner_class 0.684 0.205 3.33 8.61e-4 0.286 1.09
## 5 Male_age -0.0357 0.0167 -2.14 3.20e-2 -0.0690 -0.00355
## 6 Female_age -0.0977 0.0205 -4.76 1.91e-6 -0.138 -0.0580
## 7 Male_signed 0.342 0.224 1.52 1.27e-1 -0.0953 0.786
## 8 Female_signed 0.476 0.239 1.99 4.63e-2 0.0128 0.951
## 9 Year_baptized 0.00163 0.00607 0.268 7.89e-1 -0.0103 0.0136
## 10 ParishCape Town, Ke~ -0.410 0.359 -1.14 2.53e-1 -1.14 0.280
## 11 ParishCape Town, Ma~ 0.321 0.291 1.10 2.70e-1 -0.250 0.892
## 12 ParishCape Town, Mo~ 0.344 0.161 2.14 3.22e-2 0.0303 0.661
## 13 ParishCape Town, Se~ 0.143 0.435 0.328 7.43e-1 -0.730 0.984
## 14 ParishCape Town,Bel~ -0.499 0.360 -1.39 1.66e-1 -1.23 0.188
# coefficient plot including the parish and year terms
model_bridal_pregnancy %>%
filter(term != "(Intercept)") %>%
arrange(desc(estimate)) %>%
mutate(term = fct_reorder(term, estimate)) %>%
ggplot(aes(estimate, term)) +
geom_point() +
geom_errorbarh(aes(xmin = conf.low, xmax = conf.high)) +
geom_vline(lty =2, xintercept = 0) +
theme(legend.position = "bottom",
legend.title = element_blank()) +
labs(subtitle = "Coefficient plot for logistic regression of determinants of bridal pregnancy",
x = "Estimate",
y = "",
caption = "Note: error bars show 95% confidence interval")
model_bridal_pregnancy_excl <- model_bridal_pregnancy %>%
filter(term != "Year_baptized",
term != "ParishCape Town, Kenilworth, Christ Church",
term != "ParishCape Town, Maitland, St. Peters",
term != "ParishCape Town, Mowbray, St Peter",
term != "ParishCape Town, Sea Point, St James the Great",
term != "ParishCape Town,Bellville,Parow,St Margaret")
# coefficient plot excluding the parish and year terms
model_bridal_pregnancy_excl %>%
filter(term != "(Intercept)") %>%
arrange(desc(estimate)) %>%
mutate(term = fct_reorder(term, estimate)) %>%
ggplot(aes(estimate, term)) +
geom_point() +
geom_errorbarh(aes(xmin = conf.low, xmax = conf.high)) +
geom_vline(lty =2, xintercept = 0) +
theme(legend.position = "bottom",
legend.title = element_blank()) +
labs(title = "Coefficient plot for logistic regression of determinants of bridal pregnancy",
x = "Estimate",
y = "",
caption = "Note: error bars show 95% confidence interval")
Does this look correct, Laura?
model_sunday <- lr_clean %>%
mutate(Month_baptized = as.character(Month_baptized)) %>%
glm(baptism_sunday ~ Race + bridal_pregnancy + Race*bridal_pregnancy +
Parish + Month_baptized + Year_baptized,
family = "binomial", data = .) %>%
tidy(conf.int = T)
# here is the output
model_sunday
## # A tibble: 23 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 43.2 13.1 3.29 9.94e- 4 17.7 69.2
## 2 RaceMixed race -0.492 1.09 -0.450 6.53e- 1 -2.28 2.46
## 3 RaceWhite -2.68 0.327 -8.22 2.02e-16 -3.34 -2.05
## 4 bridal_pregnancy -2.17 0.181 -12.0 2.91e-33 -2.54 -1.83
## 5 ParishCape Town, Ke~ 1.03 0.322 3.20 1.35e- 3 0.408 1.67
## 6 ParishCape Town, Ma~ -1.16 0.323 -3.58 3.47e- 4 -1.79 -0.520
## 7 ParishCape Town, Mo~ 1.68 0.247 6.79 1.12e-11 1.21 2.18
## 8 ParishCape Town, Se~ 1.09 0.432 2.53 1.13e- 2 0.261 1.96
## 9 ParishCape Town,Bel~ 2.37 0.449 5.26 1.41e- 7 1.53 3.30
## 10 Month_baptizedAug 0.0677 0.334 0.203 8.39e- 1 -0.587 0.723
## # ... with 13 more rows
model_sunday %>%
filter(term != "(Intercept)") %>%
arrange(desc(estimate)) %>%
mutate(term = fct_reorder(term, estimate)) %>%
ggplot(aes(estimate, term)) +
geom_point() +
geom_errorbarh(aes(xmin = conf.low, xmax = conf.high)) +
geom_vline(lty =2, xintercept = 0) +
theme(legend.position = "bottom",
legend.title = element_blank()) +
labs(title = "Coefficient plot for logistic regression of determinants baptism on Sunday",
x = "Estimate",
y = "",
caption = "Note: error bars show 95% confidence interval")
model_sunday_excl <- model_sunday %>%
filter(term != "Year_baptized",
term != "ParishCape Town, Kenilworth, Christ Church",
term != "ParishCape Town, Maitland, St. Peters",
term != "ParishCape Town, Mowbray, St Peter",
term != "ParishCape Town, Sea Point, St James the Great",
term != "ParishCape Town,Bellville,Parow,St Margaret",
term!= "Month_baptizedMay",
term!= "Month_baptizedAug",
term!= "Month_baptizedJun",
term!= "Month_baptizedFeb",
term!= "Month_baptizedNov",
term!= "Month_baptizedSep",
term!= "Month_baptizedMar",
term!= "Month_baptizedOct",
term!= "Month_baptizedJul",
term!= "Month_baptizedJan",
term!= "Month_baptizedDec")
model_sunday_excl %>%
filter(term != "(Intercept)") %>%
arrange(desc(estimate)) %>%
mutate(term = fct_reorder(term, estimate)) %>%
ggplot(aes(estimate, term)) +
geom_point() +
geom_errorbarh(aes(xmin = conf.low, xmax = conf.high)) +
geom_vline(lty =2, xintercept = 0) +
theme(legend.position = "bottom",
legend.title = element_blank()) +
labs(title = "Coefficient plot for logistic regression of determinants baptism on Sunday",
x = "Estimate",
y = "",
caption = "Note: error bars show 95% confidence interval")
model_sunday_binary <- lr_clean %>%
filter(Race != "Mixed race") %>%
mutate(Race = recode(Race, "White" = 0, "Coloured" = 1)) %>%
mutate(Month_baptized = as.character(Month_baptized)) %>%
glm(baptism_sunday ~ Race + bridal_pregnancy + Race*bridal_pregnancy +
Parish + Month_baptized + Year_baptized,
family = "binomial", data = .) %>%
tidy(conf.int = T)
# here is the output
model_sunday_binary
## # A tibble: 21 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 40.0 13.2 3.04 2.39e- 3 14.4 66.1
## 2 Race 2.71 0.330 8.21 2.17e-16 2.07 3.36
## 3 bridal_pregnancy 0.413 0.383 1.08 2.81e- 1 -0.314 1.20
## 4 ParishCape Town, Ke~ 1.05 0.326 3.23 1.23e- 3 0.422 1.70
## 5 ParishCape Town, Ma~ -1.15 0.323 -3.55 3.86e- 4 -1.78 -0.511
## 6 ParishCape Town, Mo~ 1.71 0.251 6.82 9.21e-12 1.24 2.23
## 7 ParishCape Town, Se~ 1.12 0.434 2.59 9.62e- 3 0.287 1.99
## 8 ParishCape Town,Bel~ 2.38 0.452 5.27 1.34e- 7 1.54 3.32
## 9 Month_baptizedAug 0.0601 0.334 0.180 8.57e- 1 -0.596 0.717
## 10 Month_baptizedDec -0.436 0.306 -1.43 1.54e- 1 -1.04 0.159
## # ... with 11 more rows
model_sunday_binary %>%
filter(term != "(Intercept)") %>%
arrange(desc(estimate)) %>%
mutate(term = fct_reorder(term, estimate)) %>%
ggplot(aes(estimate, term)) +
geom_point() +
geom_errorbarh(aes(xmin = conf.low, xmax = conf.high)) +
geom_vline(lty =2, xintercept = 0) +
theme(legend.position = "bottom",
legend.title = element_blank()) +
labs(title = "Coefficient plot for logistic regression of determinants baptism on Sunday",
x = "Estimate",
y = "",
caption = "Note: error bars show 95% confidence interval")
model_sunday_binary_excl <- model_sunday_binary %>%
filter(term != "Year_baptized",
term != "ParishCape Town, Kenilworth, Christ Church",
term != "ParishCape Town, Maitland, St. Peters",
term != "ParishCape Town, Mowbray, St Peter",
term != "ParishCape Town, Sea Point, St James the Great",
term != "ParishCape Town,Bellville,Parow,St Margaret",
term!= "Month_baptizedMay",
term!= "Month_baptizedAug",
term!= "Month_baptizedJun",
term!= "Month_baptizedFeb",
term!= "Month_baptizedNov",
term!= "Month_baptizedSep",
term!= "Month_baptizedMar",
term!= "Month_baptizedOct",
term!= "Month_baptizedJul",
term!= "Month_baptizedJan",
term!= "Month_baptizedDec")
model_sunday_binary_excl %>%
filter(term != "(Intercept)") %>%
arrange(desc(estimate)) %>%
mutate(term = fct_reorder(term, estimate)) %>%
ggplot(aes(estimate, term)) +
geom_point() +
geom_errorbarh(aes(xmin = conf.low, xmax = conf.high)) +
geom_vline(lty =2, xintercept = 0) +
theme(legend.position = "bottom",
legend.title = element_blank()) +
labs(title = "Coefficient plot for logistic regression of determinants baptism on Sunday",
x = "Estimate",
y = "",
caption = "Note: error bars show 95% confidence interval")
model_sunday_binary_white <- lr_clean %>%
filter(Race != "Mixed race") %>%
mutate(Race = fct_relevel(Race, "White")) %>%
mutate(Month_baptized = as.character(Month_baptized)) %>%
glm(baptism_sunday ~ Race + bridal_pregnancy + Race*bridal_pregnancy +
Parish + Month_baptized + Year_baptized,
family = "binomial", data = .) %>%
tidy(conf.int = T)
# here is the output
model_sunday_binary_white
## # A tibble: 21 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 40.0 13.2 3.04 2.39e- 3 14.4 66.1
## 2 RaceColoured 2.71 0.330 8.21 2.17e-16 2.07 3.36
## 3 bridal_pregnancy 0.413 0.383 1.08 2.81e- 1 -0.314 1.20
## 4 ParishCape Town, Ke~ 1.05 0.326 3.23 1.23e- 3 0.422 1.70
## 5 ParishCape Town, Ma~ -1.15 0.323 -3.55 3.86e- 4 -1.78 -0.511
## 6 ParishCape Town, Mo~ 1.71 0.251 6.82 9.21e-12 1.24 2.23
## 7 ParishCape Town, Se~ 1.12 0.434 2.59 9.62e- 3 0.287 1.99
## 8 ParishCape Town,Bel~ 2.38 0.452 5.27 1.34e- 7 1.54 3.32
## 9 Month_baptizedAug 0.0601 0.334 0.180 8.57e- 1 -0.596 0.717
## 10 Month_baptizedDec -0.436 0.306 -1.43 1.54e- 1 -1.04 0.159
## # ... with 11 more rows
model_sunday_binary_white %>%
filter(term != "(Intercept)") %>%
arrange(desc(estimate)) %>%
mutate(term = fct_reorder(term, estimate)) %>%
ggplot(aes(estimate, term)) +
geom_point() +
geom_errorbarh(aes(xmin = conf.low, xmax = conf.high)) +
geom_vline(lty =2, xintercept = 0) +
theme(legend.position = "bottom",
legend.title = element_blank()) +
labs(title = "Coefficient plot for logistic regression of determinants baptism on Sunday",
x = "Estimate",
y = "",
caption = "Note: error bars show 95% confidence interval")
model_sunday_binary_white_excl <- model_sunday_binary_white %>%
filter(term != "Year_baptized",
term != "ParishCape Town, Kenilworth, Christ Church",
term != "ParishCape Town, Maitland, St. Peters",
term != "ParishCape Town, Mowbray, St Peter",
term != "ParishCape Town, Sea Point, St James the Great",
term != "ParishCape Town,Bellville,Parow,St Margaret",
term!= "Month_baptizedMay",
term!= "Month_baptizedAug",
term!= "Month_baptizedJun",
term!= "Month_baptizedFeb",
term!= "Month_baptizedNov",
term!= "Month_baptizedSep",
term!= "Month_baptizedMar",
term!= "Month_baptizedOct",
term!= "Month_baptizedJul",
term!= "Month_baptizedJan",
term!= "Month_baptizedDec")
model_sunday_binary_white_excl %>%
filter(term != "(Intercept)") %>%
arrange(desc(estimate)) %>%
mutate(term = fct_reorder(term, estimate)) %>%
ggplot(aes(estimate, term)) +
geom_point() +
geom_errorbarh(aes(xmin = conf.low, xmax = conf.high)) +
geom_vline(lty =2, xintercept = 0) +
theme(legend.position = "bottom",
legend.title = element_blank()) +
labs(title = "Coefficient plot for logistic regression of determinants baptism on Sunday",
x = "Estimate",
y = "",
caption = "Note: error bars show 95% confidence interval")