getwd()
## [1] "/Users/mac/bigdata"
rm(list=ls())
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
df<-read.csv("Data1.csv")
df$Gender<-as.factor(df$Gender)
df$Gender<-revalue(df$Gender, replace=c("0"="female","1"="male"))
table(df$Gender)
##
## female male
## 1136 789
colSums(is.na(df))
## X Q1 Q2 Q3 Q4 Q5 Q6 Q7
## 0 0 0 0 0 0 0 0
## Q8 Q9 Q10 Q11 Q12 Q13 Q14 Q15
## 0 0 0 0 0 0 0 0
## Q16 Q17 Q18 Q19 Q20 Gender EDU BF
## 0 0 0 0 0 0 0 0
## BM Happiness Peace
## 0 0 0
head(df)
## X Q1 Q2 Q3 Q4 Q5 Q6 Q7 Q8 Q9 Q10 Q11 Q12 Q13 Q14 Q15 Q16 Q17 Q18 Q19 Q20
## 1 1 4 4 2 3 4 2 2 4 4 4 4 4 4 4 4 4 4 4 4 4
## 2 2 4 4 4 4 4 3 2 4 4 4 4 4 4 4 4 4 3 4 2 1
## 3 3 4 4 4 4 2 4 4 4 4 2 4 4 4 4 3 4 4 4 4 3
## 4 4 5 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## 5 5 4 4 4 4 4 4 4 4 2 4 4 4 4 4 4 4 4 4 4 4
## 6 6 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## Gender EDU BF BM Happiness Peace
## 1 female 1 3.4 3.2 4.0 4.0
## 2 female 1 4.0 3.4 4.0 2.8
## 3 female 2 3.6 3.6 3.8 3.8
## 4 female 1 4.2 4.0 4.0 4.0
## 5 female 2 4.0 3.6 4.0 4.0
## 6 female 1 4.0 4.0 4.0 4.0
summary(df)
## X Q1 Q2 Q3 Q4
## Min. : 1 Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.: 482 1st Qu.:3.000 1st Qu.:3.000 1st Qu.:2.000 1st Qu.:2.000
## Median : 963 Median :4.000 Median :3.000 Median :3.000 Median :3.000
## Mean : 963 Mean :3.536 Mean :3.291 Mean :2.928 Mean :3.061
## 3rd Qu.:1444 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :1925 Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
## Q5 Q6 Q7 Q8
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000
## Median :3.000 Median :3.000 Median :3.000 Median :3.000
## Mean :3.041 Mean :2.796 Mean :3.086 Mean :3.049
## 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
## Q9 Q10 Q11 Q12 Q13
## Min. :1.000 Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:3.00 1st Qu.:3.000 1st Qu.:3.000
## Median :3.000 Median :3.000 Median :4.00 Median :4.000 Median :4.000
## Mean :3.066 Mean :2.883 Mean :3.47 Mean :3.421 Mean :3.588
## 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.00 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.000 Max. :5.00 Max. :5.000 Max. :5.000
## Q14 Q15 Q16 Q17
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:3.000 1st Qu.:3.000 1st Qu.:3.000
## Median :4.000 Median :4.000 Median :4.000 Median :4.000
## Mean :3.716 Mean :3.542 Mean :3.791 Mean :3.516
## 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
## Q18 Q19 Q20 Gender EDU
## Min. :1.000 Min. :1.000 Min. :1.000 female:1136 Min. :1.000
## 1st Qu.:4.000 1st Qu.:3.000 1st Qu.:3.000 male : 789 1st Qu.:2.000
## Median :4.000 Median :3.000 Median :3.000 Median :3.000
## Mean :3.804 Mean :3.364 Mean :3.349 Mean :2.616
## 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:3.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :4.000
## BF BM Happiness Peace
## Min. :1.000 Min. :1.000 Min. :1.400 Min. :1.200
## 1st Qu.:2.600 1st Qu.:2.400 1st Qu.:3.000 1st Qu.:3.200
## Median :3.200 Median :3.000 Median :3.600 Median :3.600
## Mean :3.172 Mean :2.976 Mean :3.547 Mean :3.564
## 3rd Qu.:3.800 3rd Qu.:3.600 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
str(df)
## 'data.frame': 1925 obs. of 27 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Q1 : int 4 4 4 5 4 4 4 4 4 4 ...
## $ Q2 : int 4 4 4 4 4 4 2 2 4 4 ...
## $ Q3 : int 2 4 4 4 4 4 4 4 4 2 ...
## $ Q4 : int 3 4 4 4 4 4 4 4 4 2 ...
## $ Q5 : int 4 4 2 4 4 4 4 4 2 4 ...
## $ Q6 : int 2 3 4 4 4 4 4 4 1 2 ...
## $ Q7 : int 2 2 4 4 4 4 4 4 3 4 ...
## $ Q8 : int 4 4 4 4 4 4 5 5 2 2 ...
## $ Q9 : int 4 4 4 4 2 4 5 5 3 4 ...
## $ Q10 : int 4 4 2 4 4 4 5 5 2 4 ...
## $ Q11 : int 4 4 4 4 4 4 5 5 4 4 ...
## $ Q12 : int 4 4 4 4 4 4 5 5 3 4 ...
## $ Q13 : int 4 4 4 4 4 4 5 5 4 4 ...
## $ Q14 : int 4 4 4 4 4 4 5 5 5 4 ...
## $ Q15 : int 4 4 3 4 4 4 4 2 3 4 ...
## $ Q16 : int 4 4 4 4 4 4 5 2 4 4 ...
## $ Q17 : int 4 3 4 4 4 4 2 2 4 4 ...
## $ Q18 : int 4 4 4 4 4 4 4 4 4 4 ...
## $ Q19 : int 4 2 4 4 4 4 4 2 4 2 ...
## $ Q20 : int 4 1 3 4 4 4 4 2 4 2 ...
## $ Gender : Factor w/ 2 levels "female","male": 1 1 1 1 1 1 1 1 2 1 ...
## $ EDU : int 1 1 2 1 2 1 1 1 4 3 ...
## $ BF : num 3.4 4 3.6 4.2 4 4 3.6 3.6 3.6 3.2 ...
## $ BM : num 3.2 3.4 3.6 4 3.6 4 4.6 4.6 2.2 3.2 ...
## $ Happiness: num 4 4 3.8 4 4 4 4.8 4.4 3.8 4 ...
## $ Peace : num 4 2.8 3.8 4 4 4 3.8 2.4 4 3.2 ...
ls(df)
## [1] "BF" "BM" "EDU" "Gender" "Happiness" "Peace"
## [7] "Q1" "Q10" "Q11" "Q12" "Q13" "Q14"
## [13] "Q15" "Q16" "Q17" "Q18" "Q19" "Q2"
## [19] "Q20" "Q3" "Q4" "Q5" "Q6" "Q7"
## [25] "Q8" "Q9" "X"
levels(df$Gender)
## [1] "female" "male"
levels(df$Happiness)
## NULL
mean(df$Happiness)
## [1] 3.547065
mean(df$Peace)
## [1] 3.56426
table(df$Gender)
##
## female male
## 1136 789
df1<-df %>% select(Gender,Happiness)
head(df1)
## Gender Happiness
## 1 female 4.0
## 2 female 4.0
## 3 female 3.8
## 4 female 4.0
## 5 female 4.0
## 6 female 4.0
df2<-df %>% select(Gender,Happiness,Peace)
head(df2)
## Gender Happiness Peace
## 1 female 4.0 4.0
## 2 female 4.0 2.8
## 3 female 3.8 3.8
## 4 female 4.0 4.0
## 5 female 4.0 4.0
## 6 female 4.0 4.0
df3<-df2 %>% filter(Gender=="male", Happiness>=3.0, Peace>=3.0)
head(df3)
## Gender Happiness Peace
## 1 male 3.8 4.0
## 2 male 3.8 3.2
## 3 male 4.0 4.1
## 4 male 4.0 4.6
## 5 male 3.9 4.0
## 6 male 3.4 3.2
count(df3)
## Gender Happiness Peace freq
## 1 male 3.0 3.0 10
## 2 male 3.0 3.2 8
## 3 male 3.0 3.4 3
## 4 male 3.0 3.6 5
## 5 male 3.0 3.8 3
## 6 male 3.0 4.0 9
## 7 male 3.0 4.2 3
## 8 male 3.2 3.0 2
## 9 male 3.2 3.2 5
## 10 male 3.2 3.4 9
## 11 male 3.2 3.6 8
## 12 male 3.2 3.8 3
## 13 male 3.2 4.0 6
## 14 male 3.2 4.2 1
## 15 male 3.2 4.4 3
## 16 male 3.3 3.8 1
## 17 male 3.4 3.0 10
## 18 male 3.4 3.2 7
## 19 male 3.4 3.4 11
## 20 male 3.4 3.6 11
## 21 male 3.4 3.8 6
## 22 male 3.4 4.0 7
## 23 male 3.4 4.2 4
## 24 male 3.4 4.4 2
## 25 male 3.4 4.6 2
## 26 male 3.4 4.8 1
## 27 male 3.5 3.6 1
## 28 male 3.6 3.0 5
## 29 male 3.6 3.2 8
## 30 male 3.6 3.4 7
## 31 male 3.6 3.6 18
## 32 male 3.6 3.8 11
## 33 male 3.6 3.9 1
## 34 male 3.6 4.0 15
## 35 male 3.6 4.2 6
## 36 male 3.6 4.4 3
## 37 male 3.6 4.6 2
## 38 male 3.6 4.8 1
## 39 male 3.6 5.0 2
## 40 male 3.8 3.0 8
## 41 male 3.8 3.2 3
## 42 male 3.8 3.4 6
## 43 male 3.8 3.6 15
## 44 male 3.8 3.8 10
## 45 male 3.8 3.9 1
## 46 male 3.8 4.0 12
## 47 male 3.8 4.2 1
## 48 male 3.8 4.4 5
## 49 male 3.8 4.6 2
## 50 male 3.8 4.8 1
## 51 male 3.9 3.8 1
## 52 male 3.9 4.0 1
## 53 male 4.0 3.0 7
## 54 male 4.0 3.2 11
## 55 male 4.0 3.4 13
## 56 male 4.0 3.5 1
## 57 male 4.0 3.6 26
## 58 male 4.0 3.8 19
## 59 male 4.0 3.9 1
## 60 male 4.0 4.0 66
## 61 male 4.0 4.1 1
## 62 male 4.0 4.2 8
## 63 male 4.0 4.4 3
## 64 male 4.0 4.6 5
## 65 male 4.0 5.0 2
## 66 male 4.2 3.0 2
## 67 male 4.2 3.2 8
## 68 male 4.2 3.6 2
## 69 male 4.2 3.8 6
## 70 male 4.2 4.0 7
## 71 male 4.2 4.2 5
## 72 male 4.2 4.4 4
## 73 male 4.2 4.6 1
## 74 male 4.2 5.0 3
## 75 male 4.4 3.0 1
## 76 male 4.4 3.4 2
## 77 male 4.4 3.6 1
## 78 male 4.4 3.8 2
## 79 male 4.4 4.0 3
## 80 male 4.4 4.2 4
## 81 male 4.4 4.4 3
## 82 male 4.4 4.6 1
## 83 male 4.4 4.8 1
## 84 male 4.4 5.0 3
## 85 male 4.6 3.0 1
## 86 male 4.6 3.2 2
## 87 male 4.6 3.6 1
## 88 male 4.6 4.0 3
## 89 male 4.6 4.2 2
## 90 male 4.6 4.4 1
## 91 male 4.6 4.6 2
## 92 male 4.8 3.8 2
## 93 male 4.8 4.0 1
## 94 male 4.8 4.2 3
## 95 male 4.8 4.4 2
## 96 male 4.8 5.0 1
## 97 male 5.0 3.0 1
## 98 male 5.0 3.6 3
## 99 male 5.0 3.8 1
## 100 male 5.0 4.0 2
## 101 male 5.0 4.2 5
## 102 male 5.0 4.4 4
## 103 male 5.0 4.6 10
## 104 male 5.0 4.8 2
## 105 male 5.0 5.0 11
df %>% summarize(AvgHappines=mean(Happiness),AvgPeace=mean(Peace))
## AvgHappines AvgPeace
## 1 3.547065 3.56426
df4<-df %>% mutate(Happiness=ifelse(Happiness>=4.0, "Very Happy",
ifelse(Happiness>=3.5, "Happy",
ifelse(Happiness>=3.0, "Unhappy","Very Unhappy"))))
glimpse(df4)
## Rows: 1,925
## Columns: 27
## $ X <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 1…
## $ Q1 <int> 4, 4, 4, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, …
## $ Q2 <int> 4, 4, 4, 4, 4, 4, 2, 2, 4, 4, 4, 4, 4, 2, 4, 4, 2, 4, 2, 2, …
## $ Q3 <int> 2, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 2, 4, 4, 4, 4, 4, 3, 2, 3, …
## $ Q4 <int> 3, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 4, 4, 2, 4, 4, 4, 2, 2, 4, …
## $ Q5 <int> 4, 4, 2, 4, 4, 4, 4, 4, 2, 4, 4, 2, 4, 4, 4, 4, 4, 3, 1, 2, …
## $ Q6 <int> 2, 3, 4, 4, 4, 4, 4, 4, 1, 2, 2, 2, 4, 4, 3, 5, 2, 2, 1, 4, …
## $ Q7 <int> 2, 2, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 5, 4, 4, 5, 4, 3, 4, 4, …
## $ Q8 <int> 4, 4, 4, 4, 4, 4, 5, 5, 2, 2, 4, 4, 4, 4, 3, 5, 4, 2, 4, 4, …
## $ Q9 <int> 4, 4, 4, 4, 2, 4, 5, 5, 3, 4, 4, 4, 2, 2, 4, 5, 2, 4, 2, 4, …
## $ Q10 <int> 4, 4, 2, 4, 4, 4, 5, 5, 2, 4, 2, 4, 4, 4, 3, 4, 4, 3, 2, 3, …
## $ Q11 <int> 4, 4, 4, 4, 4, 4, 5, 5, 4, 4, 4, 3, 4, 4, 4, 4, 5, 4, 3, 3, …
## $ Q12 <int> 4, 4, 4, 4, 4, 4, 5, 5, 3, 4, 4, 3, 4, 3, 3, 4, 5, 4, 4, 2, …
## $ Q13 <int> 4, 4, 4, 4, 4, 4, 5, 5, 4, 4, 4, 4, 2, 4, 4, 4, 5, 4, 4, 4, …
## $ Q14 <int> 4, 4, 4, 4, 4, 4, 5, 5, 5, 4, 4, 4, 3, 4, 5, 4, 5, 4, 4, 4, …
## $ Q15 <int> 4, 4, 3, 4, 4, 4, 4, 2, 3, 4, 4, 3, 1, 4, 4, 4, 5, 4, 4, 4, …
## $ Q16 <int> 4, 4, 4, 4, 4, 4, 5, 2, 4, 4, 4, 4, 4, 4, 5, 4, 5, 4, 4, 4, …
## $ Q17 <int> 4, 3, 4, 4, 4, 4, 2, 2, 4, 4, 4, 4, 3, 2, 4, 5, 4, 4, 3, 4, …
## $ Q18 <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 2, 4, 4, 4, …
## $ Q19 <int> 4, 2, 4, 4, 4, 4, 4, 2, 4, 2, 4, 4, 1, 4, 4, 4, 5, 4, 2, 3, …
## $ Q20 <int> 4, 1, 3, 4, 4, 4, 4, 2, 4, 2, 4, 4, 4, 2, 4, 5, 5, 4, 2, 4, …
## $ Gender <fct> female, female, female, female, female, female, female, fema…
## $ EDU <int> 1, 1, 2, 1, 2, 1, 1, 1, 4, 3, 2, 1, 1, 3, 3, 2, 1, 1, 1, 4, …
## $ BF <dbl> 3.4, 4.0, 3.6, 4.2, 4.0, 4.0, 3.6, 3.6, 3.6, 3.2, 4.0, 3.2, …
## $ BM <dbl> 3.2, 3.4, 3.6, 4.0, 3.6, 4.0, 4.6, 4.6, 2.2, 3.2, 3.2, 3.6, …
## $ Happiness <chr> "Very Happy", "Very Happy", "Happy", "Very Happy", "Very Hap…
## $ Peace <dbl> 4.0, 2.8, 3.8, 4.0, 4.0, 4.0, 3.8, 2.4, 4.0, 3.2, 4.0, 3.9, …
df4$Happiness<-as.factor(df4$Happiness)
glimpse(df4)
## Rows: 1,925
## Columns: 27
## $ X <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 1…
## $ Q1 <int> 4, 4, 4, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, …
## $ Q2 <int> 4, 4, 4, 4, 4, 4, 2, 2, 4, 4, 4, 4, 4, 2, 4, 4, 2, 4, 2, 2, …
## $ Q3 <int> 2, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 2, 4, 4, 4, 4, 4, 3, 2, 3, …
## $ Q4 <int> 3, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 4, 4, 2, 4, 4, 4, 2, 2, 4, …
## $ Q5 <int> 4, 4, 2, 4, 4, 4, 4, 4, 2, 4, 4, 2, 4, 4, 4, 4, 4, 3, 1, 2, …
## $ Q6 <int> 2, 3, 4, 4, 4, 4, 4, 4, 1, 2, 2, 2, 4, 4, 3, 5, 2, 2, 1, 4, …
## $ Q7 <int> 2, 2, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 5, 4, 4, 5, 4, 3, 4, 4, …
## $ Q8 <int> 4, 4, 4, 4, 4, 4, 5, 5, 2, 2, 4, 4, 4, 4, 3, 5, 4, 2, 4, 4, …
## $ Q9 <int> 4, 4, 4, 4, 2, 4, 5, 5, 3, 4, 4, 4, 2, 2, 4, 5, 2, 4, 2, 4, …
## $ Q10 <int> 4, 4, 2, 4, 4, 4, 5, 5, 2, 4, 2, 4, 4, 4, 3, 4, 4, 3, 2, 3, …
## $ Q11 <int> 4, 4, 4, 4, 4, 4, 5, 5, 4, 4, 4, 3, 4, 4, 4, 4, 5, 4, 3, 3, …
## $ Q12 <int> 4, 4, 4, 4, 4, 4, 5, 5, 3, 4, 4, 3, 4, 3, 3, 4, 5, 4, 4, 2, …
## $ Q13 <int> 4, 4, 4, 4, 4, 4, 5, 5, 4, 4, 4, 4, 2, 4, 4, 4, 5, 4, 4, 4, …
## $ Q14 <int> 4, 4, 4, 4, 4, 4, 5, 5, 5, 4, 4, 4, 3, 4, 5, 4, 5, 4, 4, 4, …
## $ Q15 <int> 4, 4, 3, 4, 4, 4, 4, 2, 3, 4, 4, 3, 1, 4, 4, 4, 5, 4, 4, 4, …
## $ Q16 <int> 4, 4, 4, 4, 4, 4, 5, 2, 4, 4, 4, 4, 4, 4, 5, 4, 5, 4, 4, 4, …
## $ Q17 <int> 4, 3, 4, 4, 4, 4, 2, 2, 4, 4, 4, 4, 3, 2, 4, 5, 4, 4, 3, 4, …
## $ Q18 <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 2, 4, 4, 4, …
## $ Q19 <int> 4, 2, 4, 4, 4, 4, 4, 2, 4, 2, 4, 4, 1, 4, 4, 4, 5, 4, 2, 3, …
## $ Q20 <int> 4, 1, 3, 4, 4, 4, 4, 2, 4, 2, 4, 4, 4, 2, 4, 5, 5, 4, 2, 4, …
## $ Gender <fct> female, female, female, female, female, female, female, fema…
## $ EDU <int> 1, 1, 2, 1, 2, 1, 1, 1, 4, 3, 2, 1, 1, 3, 3, 2, 1, 1, 1, 4, …
## $ BF <dbl> 3.4, 4.0, 3.6, 4.2, 4.0, 4.0, 3.6, 3.6, 3.6, 3.2, 4.0, 3.2, …
## $ BM <dbl> 3.2, 3.4, 3.6, 4.0, 3.6, 4.0, 4.6, 4.6, 2.2, 3.2, 3.2, 3.6, …
## $ Happiness <fct> Very Happy, Very Happy, Happy, Very Happy, Very Happy, Very …
## $ Peace <dbl> 4.0, 2.8, 3.8, 4.0, 4.0, 4.0, 3.8, 2.4, 4.0, 3.2, 4.0, 3.9, …
count(df4$Happiness)
## x freq
## 1 Happy 414
## 2 Unhappy 408
## 3 Very Happy 734
## 4 Very Unhappy 369
library(ggplot2)
str(df4)
## 'data.frame': 1925 obs. of 27 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Q1 : int 4 4 4 5 4 4 4 4 4 4 ...
## $ Q2 : int 4 4 4 4 4 4 2 2 4 4 ...
## $ Q3 : int 2 4 4 4 4 4 4 4 4 2 ...
## $ Q4 : int 3 4 4 4 4 4 4 4 4 2 ...
## $ Q5 : int 4 4 2 4 4 4 4 4 2 4 ...
## $ Q6 : int 2 3 4 4 4 4 4 4 1 2 ...
## $ Q7 : int 2 2 4 4 4 4 4 4 3 4 ...
## $ Q8 : int 4 4 4 4 4 4 5 5 2 2 ...
## $ Q9 : int 4 4 4 4 2 4 5 5 3 4 ...
## $ Q10 : int 4 4 2 4 4 4 5 5 2 4 ...
## $ Q11 : int 4 4 4 4 4 4 5 5 4 4 ...
## $ Q12 : int 4 4 4 4 4 4 5 5 3 4 ...
## $ Q13 : int 4 4 4 4 4 4 5 5 4 4 ...
## $ Q14 : int 4 4 4 4 4 4 5 5 5 4 ...
## $ Q15 : int 4 4 3 4 4 4 4 2 3 4 ...
## $ Q16 : int 4 4 4 4 4 4 5 2 4 4 ...
## $ Q17 : int 4 3 4 4 4 4 2 2 4 4 ...
## $ Q18 : int 4 4 4 4 4 4 4 4 4 4 ...
## $ Q19 : int 4 2 4 4 4 4 4 2 4 2 ...
## $ Q20 : int 4 1 3 4 4 4 4 2 4 2 ...
## $ Gender : Factor w/ 2 levels "female","male": 1 1 1 1 1 1 1 1 2 1 ...
## $ EDU : int 1 1 2 1 2 1 1 1 4 3 ...
## $ BF : num 3.4 4 3.6 4.2 4 4 3.6 3.6 3.6 3.2 ...
## $ BM : num 3.2 3.4 3.6 4 3.6 4 4.6 4.6 2.2 3.2 ...
## $ Happiness: Factor w/ 4 levels "Happy","Unhappy",..: 3 3 1 3 3 3 3 3 1 3 ...
## $ Peace : num 4 2.8 3.8 4 4 4 3.8 2.4 4 3.2 ...
summary(df4)
## X Q1 Q2 Q3 Q4
## Min. : 1 Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.: 482 1st Qu.:3.000 1st Qu.:3.000 1st Qu.:2.000 1st Qu.:2.000
## Median : 963 Median :4.000 Median :3.000 Median :3.000 Median :3.000
## Mean : 963 Mean :3.536 Mean :3.291 Mean :2.928 Mean :3.061
## 3rd Qu.:1444 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :1925 Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
## Q5 Q6 Q7 Q8
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000
## Median :3.000 Median :3.000 Median :3.000 Median :3.000
## Mean :3.041 Mean :2.796 Mean :3.086 Mean :3.049
## 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
## Q9 Q10 Q11 Q12 Q13
## Min. :1.000 Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:3.00 1st Qu.:3.000 1st Qu.:3.000
## Median :3.000 Median :3.000 Median :4.00 Median :4.000 Median :4.000
## Mean :3.066 Mean :2.883 Mean :3.47 Mean :3.421 Mean :3.588
## 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.00 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.000 Max. :5.00 Max. :5.000 Max. :5.000
## Q14 Q15 Q16 Q17
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:3.000 1st Qu.:3.000 1st Qu.:3.000
## Median :4.000 Median :4.000 Median :4.000 Median :4.000
## Mean :3.716 Mean :3.542 Mean :3.791 Mean :3.516
## 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
## Q18 Q19 Q20 Gender EDU
## Min. :1.000 Min. :1.000 Min. :1.000 female:1136 Min. :1.000
## 1st Qu.:4.000 1st Qu.:3.000 1st Qu.:3.000 male : 789 1st Qu.:2.000
## Median :4.000 Median :3.000 Median :3.000 Median :3.000
## Mean :3.804 Mean :3.364 Mean :3.349 Mean :2.616
## 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:3.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :4.000
## BF BM Happiness Peace
## Min. :1.000 Min. :1.000 Happy :414 Min. :1.200
## 1st Qu.:2.600 1st Qu.:2.400 Unhappy :408 1st Qu.:3.200
## Median :3.200 Median :3.000 Very Happy :734 Median :3.600
## Mean :3.172 Mean :2.976 Very Unhappy:369 Mean :3.564
## 3rd Qu.:3.800 3rd Qu.:3.600 3rd Qu.:4.000
## Max. :5.000 Max. :5.000 Max. :5.000
ggplot(df, aes(x = Happiness, y = Peace, col=Gender, size=Peace)) +
geom_point(alpha = 0.5)

library(mlbench)
library(caret)
## Loading required package: lattice
library(dplyr)
control <- trainControl(method="repeatedcv", number=10)
model <- train(Gender~., data=df4, method="rf",
preProcess=c("center","scale"), trControl=control)
importance <- varImp(model, scale=FALSE)
print(importance)
## rf variable importance
##
## only 20 most important variables shown (out of 28)
##
## Overall
## X 76.10
## Peace 49.57
## BM 48.79
## BF 48.07
## EDU 36.06
## Q6 31.60
## Q17 28.71
## Q5 27.80
## Q20 27.77
## Q19 27.54
## Q9 27.53
## Q7 26.67
## Q10 26.52
## Q2 26.22
## Q8 26.20
## Q11 25.83
## Q18 25.57
## Q1 25.26
## Q4 25.09
## Q3 24.75
plot(importance)
