library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.0.4     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 4.0.4
setwd("c:/Users/Laptop/Documents/CEES MRes/LisbonTreatyAssignment/Lisbon Treaty/Assignment One Quantative Methods - Lisbon Treaty")
getwd()
## [1] "C:/Users/Laptop/Documents/CEES MRes/LisbonTreatyAssignment/Lisbon Treaty/Assignment One Quantative Methods - Lisbon Treaty"
LTR09 <- read.csv("lisbon-treaty-2-data (2).csv")

#Question One

#The Lisbon Treaty dataset reflects the voting attitudes, voting behaviours of the varying demographics of the Irish population during the Lisbon Treaty (Second Referendum (October 2009)). This dataset was collected to understand the public support for European integration, before, during, and after the second referendum. Hence, the dataset is evidence of the trajectories of the Irish populations perceptions, trends, and their European identity in relation to the policies of the European Union in this time and space (Sinnott et all. 2009).

#Fieldwork for the poll was conducted by Millward Brown Lansdowne between 20th and 23rd November 2009. The poll consisted of a sample set of 1,002 respondents who are representative of all persons aged 18+ who were eligible to vote. Participants were selected according to the 2006 census, based on region, sex, age, and socio-economic group (Sinnott et all. 2009).

#In this context, the referendums on the Treaty of Nice in 2001 and the Lisbon Treaty in 2008 were initially defeated but subsequently, they were both ratified within a year of the initial referendums. Thus, following the 2008 NO Vote and the subsequent Yes vote in 2009, the data collected about voting behaviours provided the Irish Government with data to monitor the public’s voting varying attitude towards integration within the European Union (Sinnott et all. 2009).

#i) Subset of the dataset for analysis:

Lisbon_Treaty2 <- select(LTR09, q1b, class, tea, q8, q9b, exage)

#ii) Data management

str(Lisbon_Treaty2)
## 'data.frame':    1002 obs. of  6 variables:
##  $ q1b  : int  NA 3 NA 2 3 NA 2 2 2 2 ...
##  $ class: int  4 6 6 2 4 5 6 5 5 3 ...
##  $ tea  : int  3 3 3 3 3 3 2 4 2 4 ...
##  $ q8   : int  10 7 7 7 7 8 8 7 6 5 ...
##  $ q9b  : int  2 2 3 3 3 4 4 3 3 3 ...
##  $ exage: int  23 47 22 38 44 25 76 52 69 39 ...

#The subset contains 6 variables and 1002 observations. In order to ascertain a deeper understanding of the variable the Lisbon Treaty code books will be utilized in conjunction with R studio.

#Question Two

#Variable q1b - the code book indicates that participants were asked how they voted in the referendum ‘in favour’ or ‘against’ the second referendum.

Voting_Preference <-(Lisbon_Treaty2$q1b)
table(Voting_Preference)
## Voting_Preference
##   2   3 
## 485 271

#The measurement of this variable is: nominal, dummy, binary variable. #The code book indicates that variable levels are: #2 = Yes In favour #3 = No Against

summary(Lisbon_Treaty2$q1b)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   2.000   2.000   2.000   2.358   3.000   3.000     246

#246 voters did not respond which may distort the overall analysis and therefore these missing integers will be removed from the analysis thereby reducing the number of observations to 756.

#Variable levels changed:
#2.“In_Favour” #3.“Against”

Lisbon_Treaty2 <-
  mutate(Lisbon_Treaty2, 
         Voting_Preference = factor(q1b,
levels = 2:3,
labels = c("In_Favour", "Against")))
Voting_Preference <- filter(Lisbon_Treaty2, Voting_Preference != "NA")
glimpse(Voting_Preference)
## Rows: 756
## Columns: 7
## $ q1b               <int> 3, 2, 3, 2, 2, 2, 2, 3, 2, 2, 3, 2, 3, 2, 3, 2, 2...
## $ class             <int> 6, 2, 4, 6, 5, 5, 3, 4, 5, 3, 4, 4, 5, 5, 5, 3, 5...
## $ tea               <int> 3, 3, 3, 2, 4, 2, 4, 3, 3, 3, 3, 3, 4, 3, 3, 4, 4...
## $ q8                <int> 7, 7, 7, 8, 7, 6, 5, 7, 8, 8, 6, 4, 2, 5, 7, 5, 6...
## $ q9b               <int> 2, 3, 3, 4, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3...
## $ exage             <int> 47, 38, 44, 76, 52, 69, 39, 37, 26, 69, 51, 52, 2...
## $ Voting_Preference <fct> Against, In_Favour, Against, In_Favour, In_Favour...
class(Lisbon_Treaty2$Voting_Preference)
## [1] "factor"
Voting_Preference %>%
  count(Voting_Preference) %>%
  mutate(percent = (n / sum(n)*100) %>% round(2))
##   Voting_Preference   n percent
## 1         In_Favour 485   64.15
## 2           Against 271   35.85

#From total population of 756 for this integer, 485 people voted in favour of the Lisbon Treaty.

#Question 2

ggplot(data = Voting_Preference, aes(x = factor(""), 
                          y = prop.table(stat(count)), 
                          fill = factor(Voting_Preference), 
                          label = scales::percent(prop.table(stat(count))))) +
    geom_bar(position = "dodge") + 
    labs(title = "Voting preference",
   caption = "Source: Lisbon Treaty Second Referendum, Oct. 2009") +
                geom_text(stat = 'count',
              position = position_dodge(.9), 
              vjust = -0.1, 
              size = 3) + 
               scale_y_continuous(labels = scales::percent) + 
        labs(x = "485 Voters                   271 Voters", y = 'percentage %', fill = '') +
   scale_fill_colorblind()

#This box plot clearly demonstrates that 64% equating to 485 class of participants voted in favour of the second referendum as opposed to 271 voters representing 36% of the class of the population who voted against.

#Question 3.

#As the variable exage is a continuous variable it is suitable for this analysis. Firstly, the variable is renamed to provide clarity.

Age_of_Voter <-(Voting_Preference$exage)
Voting_Preference %>% group_by(Voting_Preference) %>% summarise(mean_age = mean(exage))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 2
##   Voting_Preference mean_age
##   <fct>                <dbl>
## 1 In_Favour             46.3
## 2 Against               43.9
Voting_Preference %>%
  ggplot(aes(Voting_Preference,Age_of_Voter)) + 
   geom_boxplot(aes(fill=Voting_Preference)) +
  labs( title = "Distribution by average age and voting preference",
        x = "Voting Preference",
        y = "Average Age",
        caption = "Source:Lisbon Treaty Second Referendum. Oct. 2009") +
    scale_y_log10() +
  theme(legend.title = element_blank()) +
scale_fill_colorblind()

#This Box Plot was chosen as it highlights the median age of voting preferences by quartile groups, whilst also visualising the outliers. The majority voting in favour falls between the first and third quartile between the age of 35 to 60 years with a median age of 46. The first quartile line for against voters were between the age of 35 to 55 with a median age of 43. Outliers existed in the same age range of both voting preferences below the age of 18 and over the age of 75.

#Question Four

#I renamed and classified the variable levels in accordance with the UK Geographics website and the Irish Social Science Data Archives.

Socio_economic_Class <-(Voting_Preference$class)
table(Socio_economic_Class)
## Socio_economic_Class
##   2   3   4   5   6   7 
## 108 248 165 184  37  14

#2 = AB - Higher and intermediate managerial, administrative, professional occupations #3 = C1 - Supervisory, clerical, junior managerial, administrative, professional occupations #4 = C2 - Skilled manual occupations #5 = DE - Semi-skilled and unskilled manual occupations, unemployed and lowest grade occupations #6 = F50+ Farmers with more than 50 acres of land #7 = F50- Farmers with less then 50 acres of land #This variable is ordinal as the number have order as it has increasing levels of social mobility although the distance between the levels is not meaningful.

Voting_Preference <- 
  mutate(Voting_Preference, 
         Socio_economic_Class = factor(Socio_economic_Class,
levels = 2:7,
labels = c("Professional", "Managerial", "Skilled Manual", "Semi/Unskilled/Unemp", "Large Farmers", "Smaller Farmers")))
glimpse(Voting_Preference)
## Rows: 756
## Columns: 8
## $ q1b                  <int> 3, 2, 3, 2, 2, 2, 2, 3, 2, 2, 3, 2, 3, 2, 3, 2...
## $ class                <int> 6, 2, 4, 6, 5, 5, 3, 4, 5, 3, 4, 4, 5, 5, 5, 3...
## $ tea                  <int> 3, 3, 3, 2, 4, 2, 4, 3, 3, 3, 3, 3, 4, 3, 3, 4...
## $ q8                   <int> 7, 7, 7, 8, 7, 6, 5, 7, 8, 8, 6, 4, 2, 5, 7, 5...
## $ q9b                  <int> 2, 3, 3, 4, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3...
## $ exage                <int> 47, 38, 44, 76, 52, 69, 39, 37, 26, 69, 51, 52...
## $ Voting_Preference    <fct> Against, In_Favour, Against, In_Favour, In_Fav...
## $ Socio_economic_Class <fct> Large Farmers, Professional, Skilled Manual, L...
ggplot(data = Voting_Preference, aes(x = factor(Socio_economic_Class),
                          y = prop.table(stat(count)), 
                          fill = factor(Voting_Preference), 
                          label = scales::percent(prop.table(stat(count))))) +
    geom_bar(position = "dodge") + 
    labs(title = "Voting preference by socio-economic class",
              caption = "Source: Lisbon Treaty Second Referendum, Oct. 2009") +
              geom_text(stat = 'count',
                 position = position_dodge(.9), 
              vjust = -0.1, 
              size = 3) + 
   scale_y_continuous(labels = scales::percent) + 
               labs(x ='', y = 'Percentage % ', fill = '') +
 theme_bw() +
 coord_flip()

#This bar chart exemplifies the percentage of the population’s voting preferences qualified by socio-economic class. Middle management were more likely to vote in favour of the second referendum compared to the groups classified as lower social economic groups. Although the highest professional group did not follow this same trend. By contrast, the skilled manual and managerial groups only varied slightly in their opposing voting trend by a small percentage. Although equally both small and large farmers have a very similar voting pattern in relation to rejecting the second referendum.

#Question 5.

#The code book indicates that the ‘tea’ variable represents the finished education levels. #2 = Primary #3 = Secondary #4 = Third level #5 = Still at School/College

#The level of education can be ordered from primary to higher education. However, as the variable has no meaning it is an ordinal and categorical variable. Again, this variable and its levels are renamed for clarity.

Education_level <-(Voting_Preference$tea)
Voting_Preference <- 
  mutate(Voting_Preference, 
         Education_Level = factor(Education_level,
levels = 2:5,
labels = c("Primary", "Secondary", "Third_Level", "In_education")))
glimpse(Voting_Preference)
## Rows: 756
## Columns: 9
## $ q1b                  <int> 3, 2, 3, 2, 2, 2, 2, 3, 2, 2, 3, 2, 3, 2, 3, 2...
## $ class                <int> 6, 2, 4, 6, 5, 5, 3, 4, 5, 3, 4, 4, 5, 5, 5, 3...
## $ tea                  <int> 3, 3, 3, 2, 4, 2, 4, 3, 3, 3, 3, 3, 4, 3, 3, 4...
## $ q8                   <int> 7, 7, 7, 8, 7, 6, 5, 7, 8, 8, 6, 4, 2, 5, 7, 5...
## $ q9b                  <int> 2, 3, 3, 4, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3...
## $ exage                <int> 47, 38, 44, 76, 52, 69, 39, 37, 26, 69, 51, 52...
## $ Voting_Preference    <fct> Against, In_Favour, Against, In_Favour, In_Fav...
## $ Socio_economic_Class <fct> Large Farmers, Professional, Skilled Manual, L...
## $ Education_Level      <fct> Secondary, Secondary, Secondary, Primary, Thir...
table(Voting_Preference$Voting_Preference, Voting_Preference$Education_Level) %>% prop.table(2) %>% round(2)
##            
##             Primary Secondary Third_Level In_education
##   In_Favour    0.59      0.59        0.75         0.59
##   Against      0.41      0.41        0.25         0.41
Voting_Preference %>%
  ggplot(aes(exage, fill=Voting_Preference)) + 
 geom_density(alpha=0.3) +
  scale_x_log10() +
  theme_minimal() +
    facet_wrap(~Education_Level) +
  theme(legend.position = "top",
        legend.title = element_blank())  +
  labs(title = "Voting Preference distribution by population and Education Level",
       caption = "Source: Lisbon Treaty Second Referendum, Oct. 2009",
       x = "Education Level",
       y = "Density")

#The prop table shows a 50% disparity in third level of education. However, the density plot shows that more detail as the divergences appear between the age of 20 to 30 and in a small group in the age of 60 to 70 years of age. The prop table demonstrates similarities in the primary, secondary and third level of education. However, visualising the data confirms that these relationships are not linear. For example, primary education between the age of 60-70 of in favour voting is skewed within this class of voters. #The overarching trends indicate that those who attained the highest level of education were more likely to vote infavour of the second referendum compared to those who left in the early stages or who were still in education.

#Question 6

#Code Book indicates that variable q8 analyses how do participants feel that you know about the European Union, its policies, and institutions – measurement Likert Scale:

#0 = Don’t know #1 - Nothing at all #2-9 #10 - Know a great deal #measurement: As this variable has more than 5 values the purpose of this question it will be classed as a continuous variable. To display the proportions of this Likert Scale a Histogram will be used to plot the proportion of the Voting preferences of the voters to visualise the patterns of this variable with a voters knowledge of the European Union.

Voting_Preference$Knowledge_of_EU <-Voting_Preference$q8

#The labels are renamed and reordered to correspond ‘valid values’ within the code book.

table(Voting_Preference$Knowledge_of_EU)
## 
##   2   3   4   5   6   7   8   9  10  11  12 
##  20  19  86 104 148 151  95  66  30  33   4
Voting_Preference <-
mutate(Voting_Preference,
    Knowledge_of_EU = factor(Knowledge_of_EU,
                       levels = 2:12,
                       labels = c("Vast","9", "8", "7", "6", "5","4","3", "2", "None", "Unknow")))
class(Voting_Preference$Knowledge_of_EU)
## [1] "factor"
Voting_Preference <- filter(Voting_Preference, Knowledge_of_EU != "Unknown")
Voting_Preference <- filter(Voting_Preference, Knowledge_of_EU != "None")
Voting_Preference <- filter(Voting_Preference, Knowledge_of_EU != "9")

#The column percentage ‘Knowledge of the EU’ is selected for this analysis proportions of the voting population.

table(Voting_Preference$Knowledge_of_EU, Voting_Preference$Voting_Preference) %>% prop.table(2) %>% round(2)
##         
##          In_Favour Against
##   Vast        0.03    0.03
##   9           0.00    0.00
##   8           0.13    0.11
##   7           0.16    0.12
##   6           0.24    0.14
##   5           0.20    0.24
##   4           0.11    0.17
##   3           0.08    0.11
##   2           0.03    0.06
##   None        0.00    0.00
##   Unknow      0.00    0.01

#data below 0.03 will be removed from the analysis as these values are extremely small.

glimpse(Voting_Preference)
## Rows: 704
## Columns: 10
## $ q1b                  <int> 3, 2, 3, 2, 2, 2, 2, 3, 2, 2, 3, 2, 3, 2, 3, 2...
## $ class                <int> 6, 2, 4, 6, 5, 5, 3, 4, 5, 3, 4, 4, 5, 5, 5, 3...
## $ tea                  <int> 3, 3, 3, 2, 4, 2, 4, 3, 3, 3, 3, 3, 4, 3, 3, 4...
## $ q8                   <int> 7, 7, 7, 8, 7, 6, 5, 7, 8, 8, 6, 4, 2, 5, 7, 5...
## $ q9b                  <int> 2, 3, 3, 4, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3...
## $ exage                <int> 47, 38, 44, 76, 52, 69, 39, 37, 26, 69, 51, 52...
## $ Voting_Preference    <fct> Against, In_Favour, Against, In_Favour, In_Fav...
## $ Socio_economic_Class <fct> Large Farmers, Professional, Skilled Manual, L...
## $ Education_Level      <fct> Secondary, Secondary, Secondary, Primary, Thir...
## $ Knowledge_of_EU      <fct> 5, 5, 5, 4, 5, 6, 7, 5, 4, 4, 6, 8, Vast, 7, 5...
Voting_Preference %>%
    ggplot(aes(Knowledge_of_EU, exage)) + 
  geom_boxplot(aes(fill=Voting_Preference)) +
  labs( title = "Voting preference by knowledge of the EU",
        subtitle = "Logarithmic scale",
        x = "Knowledge of the EU",
        y = "Voting population",
        caption = "Source: Lisbon Treaty Referendum - October 2009") +
  theme(legend.position = "",
          legend.title = element_blank()) +
  theme_minimal() +
  scale_y_log10() +
    scale_fill_colorblind() +
 coord_flip()

#I have called the Socio-economic variable to demonstrate that there is a relationship between the voting preferences of this population and its knowledge of the European Union and its policies, except for those pertaining to have a vast knowledge and no knowledge of the European Union and its policies.

#Question 7 #These patterns indicate that there is only a relationship for participants who have a mid-range of knowledge of the EU and the perception that Switzerland is a member of the EU. #All the levels of variables for q8b are relevant for this analysis. The variable is nominal as it is unordered by also categorical.

table(LTR09$q9b)
## 
##   2   3   4 
## 243 524 235

The code book identifies the level of this variable: 2 = True 3 = False 4 = Don’t know

Voting_Preference$Switzerland_Member_of_EU <-Voting_Preference$q9b
Voting_Preference <- 
  mutate(Voting_Preference, Switzerland_Member_of_EU = factor(Switzerland_Member_of_EU,
levels = 2:4,
labels = c("True", "False", "Don't know")))
Voting_Preference %>%
   ggplot(aes(Knowledge_of_EU,exage)) + 
    geom_boxplot(aes(fill=Switzerland_Member_of_EU)) +
  labs( title = "EU knowledge and perception that Switzerland is an EU member",
        x = "Knowledge of the European Union",
        y = "Age of Voter",
        caption = "Source: Lisbon Treaty Referendum - October 2009") +
        theme(legend.position = "top",
        legend.title = element_blank()) +
        theme_minimal() +
        scale_y_log10() +
      scale_fill_colorblind() +
      coord_flip()