## the dataset used in this work is taken from the 1997-2001 British Election Panel Study (BEPS).
## A data frame with 1525 observations on the following 10 variables:
## vote, age, economic.cond.national, economic.cond.household, Blair, Hague, Kennedy, Europe, political.knowledge, gender
beps <- read.csv(file="beps.csv", header=TRUE, sep=",")
summary(beps) #Summary of the csv data
## X vote age
## Min. : 1 Conservative :462 Min. :24.00
## 1st Qu.: 382 Labour :720 1st Qu.:41.00
## Median : 763 Liberal Democrat:343 Median :53.00
## Mean : 763 Mean :54.18
## 3rd Qu.:1144 3rd Qu.:67.00
## Max. :1525 Max. :93.00
## economic.cond.national economic.cond.household Blair
## Min. :1.000 Min. :1.00 Min. :1.000
## 1st Qu.:3.000 1st Qu.:3.00 1st Qu.:2.000
## Median :3.000 Median :3.00 Median :4.000
## Mean :3.246 Mean :3.14 Mean :3.334
## 3rd Qu.:4.000 3rd Qu.:4.00 3rd Qu.:4.000
## Max. :5.000 Max. :5.00 Max. :5.000
## Hague Kennedy Europe political.knowledge
## Min. :1.000 Min. :1.000 Min. : 1.000 Min. :0.000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.: 4.000 1st Qu.:0.000
## Median :2.000 Median :3.000 Median : 6.000 Median :2.000
## Mean :2.747 Mean :3.135 Mean : 6.729 Mean :1.542
## 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:10.000 3rd Qu.:2.000
## Max. :5.000 Max. :5.000 Max. :11.000 Max. :3.000
## gender
## female:812
## male :713
##
##
##
##
## Mean, Median of age and economic.cond.national attributes
mean_age <- mean(beps$age)
paste0("Mean age: ", mean_age)
## [1] "Mean age: 54.1822950819672"
median_age <- median(beps$age)
paste0("Median age: ", median_age)
## [1] "Median age: 53"
mean_national_condition <- mean(beps$economic.cond.national)
paste0("Mean Nationa Economic condition: ", mean_national_condition)
## [1] "Mean Nationa Economic condition: 3.24590163934426"
median_national_condition <- median(beps$economic.cond.national)
paste0("Median Nationa Economic condition: ", median_national_condition)
## [1] "Median Nationa Economic condition: 3"
##creating a new data.frame with subset of the rows (selecting 20 rows) and columns of the BEPS Data
df <- subset(beps, X < 21, select = c("vote","age","economic.cond.national", "economic.cond.household", "Europe"))
df
## vote age economic.cond.national economic.cond.household
## 1 Liberal Democrat 43 3 3
## 2 Labour 36 4 4
## 3 Labour 35 4 4
## 4 Labour 24 4 2
## 5 Labour 41 2 2
## 6 Labour 47 3 4
## 7 Liberal Democrat 57 2 2
## 8 Labour 77 3 4
## 9 Labour 39 3 3
## 10 Labour 70 3 2
## 11 Labour 39 3 3
## 12 Labour 66 4 3
## 13 Labour 59 4 4
## 14 Labour 66 3 3
## 15 Labour 77 2 3
## 16 Labour 51 4 4
## 17 Labour 43 2 4
## 18 Labour 41 4 4
## 19 Labour 79 3 3
## 20 Labour 37 3 1
## Europe
## 1 2
## 2 5
## 3 3
## 4 4
## 5 6
## 6 4
## 7 11
## 8 1
## 9 11
## 10 11
## 11 7
## 12 9
## 13 10
## 14 8
## 15 11
## 16 5
## 17 8
## 18 7
## 19 1
## 20 5
names(df)<- c("voter", "age group", "national worth", "individual worth", "Euro") ## Renaming the columns
df
## voter age group national worth individual worth Euro
## 1 Liberal Democrat 43 3 3 2
## 2 Labour 36 4 4 5
## 3 Labour 35 4 4 3
## 4 Labour 24 4 2 4
## 5 Labour 41 2 2 6
## 6 Labour 47 3 4 4
## 7 Liberal Democrat 57 2 2 11
## 8 Labour 77 3 4 1
## 9 Labour 39 3 3 11
## 10 Labour 70 3 2 11
## 11 Labour 39 3 3 7
## 12 Labour 66 4 3 9
## 13 Labour 59 4 4 10
## 14 Labour 66 3 3 8
## 15 Labour 77 2 3 11
## 16 Labour 51 4 4 5
## 17 Labour 43 2 4 8
## 18 Labour 41 4 4 7
## 19 Labour 79 3 3 1
## 20 Labour 37 3 1 5
summary(df)
## voter age group national worth individual worth
## Conservative : 0 Min. :24.00 Min. :2.00 Min. :1.00
## Labour :18 1st Qu.:39.00 1st Qu.:3.00 1st Qu.:2.75
## Liberal Democrat: 2 Median :45.00 Median :3.00 Median :3.00
## Mean :51.35 Mean :3.15 Mean :3.10
## 3rd Qu.:66.00 3rd Qu.:4.00 3rd Qu.:4.00
## Max. :79.00 Max. :4.00 Max. :4.00
## Euro
## Min. : 1.00
## 1st Qu.: 4.00
## Median : 6.50
## Mean : 6.45
## 3rd Qu.: 9.25
## Max. :11.00
## Mean and Median of age and economic.cond.national attributes in df (New dataframe)
mean_age <- mean(df$`age group`)
paste0("New Mean age: ",mean_age)
## [1] "New Mean age: 51.35"
median_age <- median(df$`age group`)
paste0("New Median age: ", median_age)
## [1] "New Median age: 45"
mean_national_worth <- mean(df$`national worth`)
paste0("New Mean Nationa Economic condition: ",mean_national_worth)
## [1] "New Mean Nationa Economic condition: 3.15"
median_national_condition <- median(df$`individual worth`)
paste0("New Median Nationa Economic condition: ", median_national_condition)
## [1] "New Median Nationa Economic condition: 3"
The Mean and median ages of the original data (at 54 and 53 respectively) are higher than that of the new dataframe (at 51 and 45 respectively). The mean ages were almost unchanged while the median ages remained the same.
df$voter <- gsub("^Labour$", "excellent", df$voter) ## Renaming values in the voter column
df
## voter age group national worth individual worth Euro
## 1 Liberal Democrat 43 3 3 2
## 2 excellent 36 4 4 5
## 3 excellent 35 4 4 3
## 4 excellent 24 4 2 4
## 5 excellent 41 2 2 6
## 6 excellent 47 3 4 4
## 7 Liberal Democrat 57 2 2 11
## 8 excellent 77 3 4 1
## 9 excellent 39 3 3 11
## 10 excellent 70 3 2 11
## 11 excellent 39 3 3 7
## 12 excellent 66 4 3 9
## 13 excellent 59 4 4 10
## 14 excellent 66 3 3 8
## 15 excellent 77 2 3 11
## 16 excellent 51 4 4 5
## 17 excellent 43 2 4 8
## 18 excellent 41 4 4 7
## 19 excellent 79 3 3 1
## 20 excellent 37 3 1 5
head(df,10) ## showing only 10 rows out of the original subsetted 20 rows
## voter age group national worth individual worth Euro
## 1 Liberal Democrat 43 3 3 2
## 2 excellent 36 4 4 5
## 3 excellent 35 4 4 3
## 4 excellent 24 4 2 4
## 5 excellent 41 2 2 6
## 6 excellent 47 3 4 4
## 7 Liberal Democrat 57 2 2 11
## 8 excellent 77 3 4 1
## 9 excellent 39 3 3 11
## 10 excellent 70 3 2 11
beps_git <- read.csv2("https://raw.githubusercontent.com/henryvalentine/MSDS2019/master/beps.csv", header = TRUE, sep=",")
summary(beps_git) #Summary of the csv data from github.com
## X vote age
## Min. : 1 Conservative :462 Min. :24.00
## 1st Qu.: 382 Labour :720 1st Qu.:41.00
## Median : 763 Liberal Democrat:343 Median :53.00
## Mean : 763 Mean :54.18
## 3rd Qu.:1144 3rd Qu.:67.00
## Max. :1525 Max. :93.00
## economic.cond.national economic.cond.household Blair
## Min. :1.000 Min. :1.00 Min. :1.000
## 1st Qu.:3.000 1st Qu.:3.00 1st Qu.:2.000
## Median :3.000 Median :3.00 Median :4.000
## Mean :3.246 Mean :3.14 Mean :3.334
## 3rd Qu.:4.000 3rd Qu.:4.00 3rd Qu.:4.000
## Max. :5.000 Max. :5.00 Max. :5.000
## Hague Kennedy Europe political.knowledge
## Min. :1.000 Min. :1.000 Min. : 1.000 Min. :0.000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.: 4.000 1st Qu.:0.000
## Median :2.000 Median :3.000 Median : 6.000 Median :2.000
## Mean :2.747 Mean :3.135 Mean : 6.729 Mean :1.542
## 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:10.000 3rd Qu.:2.000
## Max. :5.000 Max. :5.000 Max. :11.000 Max. :3.000
## gender
## female:812
## male :713
##
##
##
##