getwd()
## [1] "/Users/mac/bigdata"
rm(list=ls())
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
df<-read.csv("Data1.csv")
df$Gender<-as.factor(df$Gender)
df$Gender<-revalue(df$Gender, replace=c("0"="female","1"="male"))

table(df$Gender)
## 
## female   male 
##   1136    789
colSums(is.na(df))
##         X        Q1        Q2        Q3        Q4        Q5        Q6        Q7 
##         0         0         0         0         0         0         0         0 
##        Q8        Q9       Q10       Q11       Q12       Q13       Q14       Q15 
##         0         0         0         0         0         0         0         0 
##       Q16       Q17       Q18       Q19       Q20    Gender       EDU        BF 
##         0         0         0         0         0         0         0         0 
##        BM Happiness     Peace 
##         0         0         0
head(df)
##   X Q1 Q2 Q3 Q4 Q5 Q6 Q7 Q8 Q9 Q10 Q11 Q12 Q13 Q14 Q15 Q16 Q17 Q18 Q19 Q20
## 1 1  4  4  2  3  4  2  2  4  4   4   4   4   4   4   4   4   4   4   4   4
## 2 2  4  4  4  4  4  3  2  4  4   4   4   4   4   4   4   4   3   4   2   1
## 3 3  4  4  4  4  2  4  4  4  4   2   4   4   4   4   3   4   4   4   4   3
## 4 4  5  4  4  4  4  4  4  4  4   4   4   4   4   4   4   4   4   4   4   4
## 5 5  4  4  4  4  4  4  4  4  2   4   4   4   4   4   4   4   4   4   4   4
## 6 6  4  4  4  4  4  4  4  4  4   4   4   4   4   4   4   4   4   4   4   4
##   Gender EDU  BF  BM Happiness Peace
## 1 female   1 3.4 3.2       4.0   4.0
## 2 female   1 4.0 3.4       4.0   2.8
## 3 female   2 3.6 3.6       3.8   3.8
## 4 female   1 4.2 4.0       4.0   4.0
## 5 female   2 4.0 3.6       4.0   4.0
## 6 female   1 4.0 4.0       4.0   4.0
summary(df)
##        X              Q1              Q2              Q3              Q4       
##  Min.   :   1   Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.: 482   1st Qu.:3.000   1st Qu.:3.000   1st Qu.:2.000   1st Qu.:2.000  
##  Median : 963   Median :4.000   Median :3.000   Median :3.000   Median :3.000  
##  Mean   : 963   Mean   :3.536   Mean   :3.291   Mean   :2.928   Mean   :3.061  
##  3rd Qu.:1444   3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :1925   Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##        Q5              Q6              Q7              Q8       
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000  
##  Median :3.000   Median :3.000   Median :3.000   Median :3.000  
##  Mean   :3.041   Mean   :2.796   Mean   :3.086   Mean   :3.049  
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##        Q9             Q10             Q11            Q12             Q13       
##  Min.   :1.000   Min.   :1.000   Min.   :1.00   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:2.000   1st Qu.:3.00   1st Qu.:3.000   1st Qu.:3.000  
##  Median :3.000   Median :3.000   Median :4.00   Median :4.000   Median :4.000  
##  Mean   :3.066   Mean   :2.883   Mean   :3.47   Mean   :3.421   Mean   :3.588  
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.00   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.00   Max.   :5.000   Max.   :5.000  
##       Q14             Q15             Q16             Q17       
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:3.000   1st Qu.:3.000   1st Qu.:3.000  
##  Median :4.000   Median :4.000   Median :4.000   Median :4.000  
##  Mean   :3.716   Mean   :3.542   Mean   :3.791   Mean   :3.516  
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##       Q18             Q19             Q20           Gender          EDU       
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   female:1136   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:3.000   1st Qu.:3.000   male  : 789   1st Qu.:2.000  
##  Median :4.000   Median :3.000   Median :3.000                 Median :3.000  
##  Mean   :3.804   Mean   :3.364   Mean   :3.349                 Mean   :2.616  
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000                 3rd Qu.:3.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000                 Max.   :4.000  
##        BF              BM          Happiness         Peace      
##  Min.   :1.000   Min.   :1.000   Min.   :1.400   Min.   :1.200  
##  1st Qu.:2.600   1st Qu.:2.400   1st Qu.:3.000   1st Qu.:3.200  
##  Median :3.200   Median :3.000   Median :3.600   Median :3.600  
##  Mean   :3.172   Mean   :2.976   Mean   :3.547   Mean   :3.564  
##  3rd Qu.:3.800   3rd Qu.:3.600   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000
str(df)
## 'data.frame':    1925 obs. of  27 variables:
##  $ X        : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Q1       : int  4 4 4 5 4 4 4 4 4 4 ...
##  $ Q2       : int  4 4 4 4 4 4 2 2 4 4 ...
##  $ Q3       : int  2 4 4 4 4 4 4 4 4 2 ...
##  $ Q4       : int  3 4 4 4 4 4 4 4 4 2 ...
##  $ Q5       : int  4 4 2 4 4 4 4 4 2 4 ...
##  $ Q6       : int  2 3 4 4 4 4 4 4 1 2 ...
##  $ Q7       : int  2 2 4 4 4 4 4 4 3 4 ...
##  $ Q8       : int  4 4 4 4 4 4 5 5 2 2 ...
##  $ Q9       : int  4 4 4 4 2 4 5 5 3 4 ...
##  $ Q10      : int  4 4 2 4 4 4 5 5 2 4 ...
##  $ Q11      : int  4 4 4 4 4 4 5 5 4 4 ...
##  $ Q12      : int  4 4 4 4 4 4 5 5 3 4 ...
##  $ Q13      : int  4 4 4 4 4 4 5 5 4 4 ...
##  $ Q14      : int  4 4 4 4 4 4 5 5 5 4 ...
##  $ Q15      : int  4 4 3 4 4 4 4 2 3 4 ...
##  $ Q16      : int  4 4 4 4 4 4 5 2 4 4 ...
##  $ Q17      : int  4 3 4 4 4 4 2 2 4 4 ...
##  $ Q18      : int  4 4 4 4 4 4 4 4 4 4 ...
##  $ Q19      : int  4 2 4 4 4 4 4 2 4 2 ...
##  $ Q20      : int  4 1 3 4 4 4 4 2 4 2 ...
##  $ Gender   : Factor w/ 2 levels "female","male": 1 1 1 1 1 1 1 1 2 1 ...
##  $ EDU      : int  1 1 2 1 2 1 1 1 4 3 ...
##  $ BF       : num  3.4 4 3.6 4.2 4 4 3.6 3.6 3.6 3.2 ...
##  $ BM       : num  3.2 3.4 3.6 4 3.6 4 4.6 4.6 2.2 3.2 ...
##  $ Happiness: num  4 4 3.8 4 4 4 4.8 4.4 3.8 4 ...
##  $ Peace    : num  4 2.8 3.8 4 4 4 3.8 2.4 4 3.2 ...
ls(df)
##  [1] "BF"        "BM"        "EDU"       "Gender"    "Happiness" "Peace"    
##  [7] "Q1"        "Q10"       "Q11"       "Q12"       "Q13"       "Q14"      
## [13] "Q15"       "Q16"       "Q17"       "Q18"       "Q19"       "Q2"       
## [19] "Q20"       "Q3"        "Q4"        "Q5"        "Q6"        "Q7"       
## [25] "Q8"        "Q9"        "X"
levels(df$Gender)
## [1] "female" "male"
levels(df$Happiness)
## NULL
mean(df$Happiness)
## [1] 3.547065
mean(df$Peace)
## [1] 3.56426
table(df$Gender)
## 
## female   male 
##   1136    789
df1<-df %>% select(Gender,Happiness)
head(df1)
##   Gender Happiness
## 1 female       4.0
## 2 female       4.0
## 3 female       3.8
## 4 female       4.0
## 5 female       4.0
## 6 female       4.0
df2<-df %>% select(Gender,Happiness,Peace)
head(df2)
##   Gender Happiness Peace
## 1 female       4.0   4.0
## 2 female       4.0   2.8
## 3 female       3.8   3.8
## 4 female       4.0   4.0
## 5 female       4.0   4.0
## 6 female       4.0   4.0
df3<-df2 %>% filter(Gender=="male", Happiness>=3.0, Peace>=3.0)
head(df3)
##   Gender Happiness Peace
## 1   male       3.8   4.0
## 2   male       3.8   3.2
## 3   male       4.0   4.1
## 4   male       4.0   4.6
## 5   male       3.9   4.0
## 6   male       3.4   3.2
count(df3)
##     Gender Happiness Peace freq
## 1     male       3.0   3.0   10
## 2     male       3.0   3.2    8
## 3     male       3.0   3.4    3
## 4     male       3.0   3.6    5
## 5     male       3.0   3.8    3
## 6     male       3.0   4.0    9
## 7     male       3.0   4.2    3
## 8     male       3.2   3.0    2
## 9     male       3.2   3.2    5
## 10    male       3.2   3.4    9
## 11    male       3.2   3.6    8
## 12    male       3.2   3.8    3
## 13    male       3.2   4.0    6
## 14    male       3.2   4.2    1
## 15    male       3.2   4.4    3
## 16    male       3.3   3.8    1
## 17    male       3.4   3.0   10
## 18    male       3.4   3.2    7
## 19    male       3.4   3.4   11
## 20    male       3.4   3.6   11
## 21    male       3.4   3.8    6
## 22    male       3.4   4.0    7
## 23    male       3.4   4.2    4
## 24    male       3.4   4.4    2
## 25    male       3.4   4.6    2
## 26    male       3.4   4.8    1
## 27    male       3.5   3.6    1
## 28    male       3.6   3.0    5
## 29    male       3.6   3.2    8
## 30    male       3.6   3.4    7
## 31    male       3.6   3.6   18
## 32    male       3.6   3.8   11
## 33    male       3.6   3.9    1
## 34    male       3.6   4.0   15
## 35    male       3.6   4.2    6
## 36    male       3.6   4.4    3
## 37    male       3.6   4.6    2
## 38    male       3.6   4.8    1
## 39    male       3.6   5.0    2
## 40    male       3.8   3.0    8
## 41    male       3.8   3.2    3
## 42    male       3.8   3.4    6
## 43    male       3.8   3.6   15
## 44    male       3.8   3.8   10
## 45    male       3.8   3.9    1
## 46    male       3.8   4.0   12
## 47    male       3.8   4.2    1
## 48    male       3.8   4.4    5
## 49    male       3.8   4.6    2
## 50    male       3.8   4.8    1
## 51    male       3.9   3.8    1
## 52    male       3.9   4.0    1
## 53    male       4.0   3.0    7
## 54    male       4.0   3.2   11
## 55    male       4.0   3.4   13
## 56    male       4.0   3.5    1
## 57    male       4.0   3.6   26
## 58    male       4.0   3.8   19
## 59    male       4.0   3.9    1
## 60    male       4.0   4.0   66
## 61    male       4.0   4.1    1
## 62    male       4.0   4.2    8
## 63    male       4.0   4.4    3
## 64    male       4.0   4.6    5
## 65    male       4.0   5.0    2
## 66    male       4.2   3.0    2
## 67    male       4.2   3.2    8
## 68    male       4.2   3.6    2
## 69    male       4.2   3.8    6
## 70    male       4.2   4.0    7
## 71    male       4.2   4.2    5
## 72    male       4.2   4.4    4
## 73    male       4.2   4.6    1
## 74    male       4.2   5.0    3
## 75    male       4.4   3.0    1
## 76    male       4.4   3.4    2
## 77    male       4.4   3.6    1
## 78    male       4.4   3.8    2
## 79    male       4.4   4.0    3
## 80    male       4.4   4.2    4
## 81    male       4.4   4.4    3
## 82    male       4.4   4.6    1
## 83    male       4.4   4.8    1
## 84    male       4.4   5.0    3
## 85    male       4.6   3.0    1
## 86    male       4.6   3.2    2
## 87    male       4.6   3.6    1
## 88    male       4.6   4.0    3
## 89    male       4.6   4.2    2
## 90    male       4.6   4.4    1
## 91    male       4.6   4.6    2
## 92    male       4.8   3.8    2
## 93    male       4.8   4.0    1
## 94    male       4.8   4.2    3
## 95    male       4.8   4.4    2
## 96    male       4.8   5.0    1
## 97    male       5.0   3.0    1
## 98    male       5.0   3.6    3
## 99    male       5.0   3.8    1
## 100   male       5.0   4.0    2
## 101   male       5.0   4.2    5
## 102   male       5.0   4.4    4
## 103   male       5.0   4.6   10
## 104   male       5.0   4.8    2
## 105   male       5.0   5.0   11
df %>% summarize(AvgHappines=mean(Happiness),AvgPeace=mean(Peace))
##   AvgHappines AvgPeace
## 1    3.547065  3.56426
df4<-df %>% mutate(Happiness=ifelse(Happiness>=4.0, "Very Happy",
                                    ifelse(Happiness>=3.5, "Happy",
                                           ifelse(Happiness>=3.0, "Unhappy","Very Unhappy"))))

glimpse(df4)
## Rows: 1,925
## Columns: 27
## $ X         <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 1…
## $ Q1        <int> 4, 4, 4, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, …
## $ Q2        <int> 4, 4, 4, 4, 4, 4, 2, 2, 4, 4, 4, 4, 4, 2, 4, 4, 2, 4, 2, 2, …
## $ Q3        <int> 2, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 2, 4, 4, 4, 4, 4, 3, 2, 3, …
## $ Q4        <int> 3, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 4, 4, 2, 4, 4, 4, 2, 2, 4, …
## $ Q5        <int> 4, 4, 2, 4, 4, 4, 4, 4, 2, 4, 4, 2, 4, 4, 4, 4, 4, 3, 1, 2, …
## $ Q6        <int> 2, 3, 4, 4, 4, 4, 4, 4, 1, 2, 2, 2, 4, 4, 3, 5, 2, 2, 1, 4, …
## $ Q7        <int> 2, 2, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 5, 4, 4, 5, 4, 3, 4, 4, …
## $ Q8        <int> 4, 4, 4, 4, 4, 4, 5, 5, 2, 2, 4, 4, 4, 4, 3, 5, 4, 2, 4, 4, …
## $ Q9        <int> 4, 4, 4, 4, 2, 4, 5, 5, 3, 4, 4, 4, 2, 2, 4, 5, 2, 4, 2, 4, …
## $ Q10       <int> 4, 4, 2, 4, 4, 4, 5, 5, 2, 4, 2, 4, 4, 4, 3, 4, 4, 3, 2, 3, …
## $ Q11       <int> 4, 4, 4, 4, 4, 4, 5, 5, 4, 4, 4, 3, 4, 4, 4, 4, 5, 4, 3, 3, …
## $ Q12       <int> 4, 4, 4, 4, 4, 4, 5, 5, 3, 4, 4, 3, 4, 3, 3, 4, 5, 4, 4, 2, …
## $ Q13       <int> 4, 4, 4, 4, 4, 4, 5, 5, 4, 4, 4, 4, 2, 4, 4, 4, 5, 4, 4, 4, …
## $ Q14       <int> 4, 4, 4, 4, 4, 4, 5, 5, 5, 4, 4, 4, 3, 4, 5, 4, 5, 4, 4, 4, …
## $ Q15       <int> 4, 4, 3, 4, 4, 4, 4, 2, 3, 4, 4, 3, 1, 4, 4, 4, 5, 4, 4, 4, …
## $ Q16       <int> 4, 4, 4, 4, 4, 4, 5, 2, 4, 4, 4, 4, 4, 4, 5, 4, 5, 4, 4, 4, …
## $ Q17       <int> 4, 3, 4, 4, 4, 4, 2, 2, 4, 4, 4, 4, 3, 2, 4, 5, 4, 4, 3, 4, …
## $ Q18       <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 2, 4, 4, 4, …
## $ Q19       <int> 4, 2, 4, 4, 4, 4, 4, 2, 4, 2, 4, 4, 1, 4, 4, 4, 5, 4, 2, 3, …
## $ Q20       <int> 4, 1, 3, 4, 4, 4, 4, 2, 4, 2, 4, 4, 4, 2, 4, 5, 5, 4, 2, 4, …
## $ Gender    <fct> female, female, female, female, female, female, female, fema…
## $ EDU       <int> 1, 1, 2, 1, 2, 1, 1, 1, 4, 3, 2, 1, 1, 3, 3, 2, 1, 1, 1, 4, …
## $ BF        <dbl> 3.4, 4.0, 3.6, 4.2, 4.0, 4.0, 3.6, 3.6, 3.6, 3.2, 4.0, 3.2, …
## $ BM        <dbl> 3.2, 3.4, 3.6, 4.0, 3.6, 4.0, 4.6, 4.6, 2.2, 3.2, 3.2, 3.6, …
## $ Happiness <chr> "Very Happy", "Very Happy", "Happy", "Very Happy", "Very Hap…
## $ Peace     <dbl> 4.0, 2.8, 3.8, 4.0, 4.0, 4.0, 3.8, 2.4, 4.0, 3.2, 4.0, 3.9, …
df4$Happiness<-as.factor(df4$Happiness)
glimpse(df4)
## Rows: 1,925
## Columns: 27
## $ X         <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 1…
## $ Q1        <int> 4, 4, 4, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, …
## $ Q2        <int> 4, 4, 4, 4, 4, 4, 2, 2, 4, 4, 4, 4, 4, 2, 4, 4, 2, 4, 2, 2, …
## $ Q3        <int> 2, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 2, 4, 4, 4, 4, 4, 3, 2, 3, …
## $ Q4        <int> 3, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 4, 4, 2, 4, 4, 4, 2, 2, 4, …
## $ Q5        <int> 4, 4, 2, 4, 4, 4, 4, 4, 2, 4, 4, 2, 4, 4, 4, 4, 4, 3, 1, 2, …
## $ Q6        <int> 2, 3, 4, 4, 4, 4, 4, 4, 1, 2, 2, 2, 4, 4, 3, 5, 2, 2, 1, 4, …
## $ Q7        <int> 2, 2, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 5, 4, 4, 5, 4, 3, 4, 4, …
## $ Q8        <int> 4, 4, 4, 4, 4, 4, 5, 5, 2, 2, 4, 4, 4, 4, 3, 5, 4, 2, 4, 4, …
## $ Q9        <int> 4, 4, 4, 4, 2, 4, 5, 5, 3, 4, 4, 4, 2, 2, 4, 5, 2, 4, 2, 4, …
## $ Q10       <int> 4, 4, 2, 4, 4, 4, 5, 5, 2, 4, 2, 4, 4, 4, 3, 4, 4, 3, 2, 3, …
## $ Q11       <int> 4, 4, 4, 4, 4, 4, 5, 5, 4, 4, 4, 3, 4, 4, 4, 4, 5, 4, 3, 3, …
## $ Q12       <int> 4, 4, 4, 4, 4, 4, 5, 5, 3, 4, 4, 3, 4, 3, 3, 4, 5, 4, 4, 2, …
## $ Q13       <int> 4, 4, 4, 4, 4, 4, 5, 5, 4, 4, 4, 4, 2, 4, 4, 4, 5, 4, 4, 4, …
## $ Q14       <int> 4, 4, 4, 4, 4, 4, 5, 5, 5, 4, 4, 4, 3, 4, 5, 4, 5, 4, 4, 4, …
## $ Q15       <int> 4, 4, 3, 4, 4, 4, 4, 2, 3, 4, 4, 3, 1, 4, 4, 4, 5, 4, 4, 4, …
## $ Q16       <int> 4, 4, 4, 4, 4, 4, 5, 2, 4, 4, 4, 4, 4, 4, 5, 4, 5, 4, 4, 4, …
## $ Q17       <int> 4, 3, 4, 4, 4, 4, 2, 2, 4, 4, 4, 4, 3, 2, 4, 5, 4, 4, 3, 4, …
## $ Q18       <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 2, 4, 4, 4, …
## $ Q19       <int> 4, 2, 4, 4, 4, 4, 4, 2, 4, 2, 4, 4, 1, 4, 4, 4, 5, 4, 2, 3, …
## $ Q20       <int> 4, 1, 3, 4, 4, 4, 4, 2, 4, 2, 4, 4, 4, 2, 4, 5, 5, 4, 2, 4, …
## $ Gender    <fct> female, female, female, female, female, female, female, fema…
## $ EDU       <int> 1, 1, 2, 1, 2, 1, 1, 1, 4, 3, 2, 1, 1, 3, 3, 2, 1, 1, 1, 4, …
## $ BF        <dbl> 3.4, 4.0, 3.6, 4.2, 4.0, 4.0, 3.6, 3.6, 3.6, 3.2, 4.0, 3.2, …
## $ BM        <dbl> 3.2, 3.4, 3.6, 4.0, 3.6, 4.0, 4.6, 4.6, 2.2, 3.2, 3.2, 3.6, …
## $ Happiness <fct> Very Happy, Very Happy, Happy, Very Happy, Very Happy, Very …
## $ Peace     <dbl> 4.0, 2.8, 3.8, 4.0, 4.0, 4.0, 3.8, 2.4, 4.0, 3.2, 4.0, 3.9, …
count(df4$Happiness)
##              x freq
## 1        Happy  414
## 2      Unhappy  408
## 3   Very Happy  734
## 4 Very Unhappy  369
library(ggplot2)
str(df4)
## 'data.frame':    1925 obs. of  27 variables:
##  $ X        : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Q1       : int  4 4 4 5 4 4 4 4 4 4 ...
##  $ Q2       : int  4 4 4 4 4 4 2 2 4 4 ...
##  $ Q3       : int  2 4 4 4 4 4 4 4 4 2 ...
##  $ Q4       : int  3 4 4 4 4 4 4 4 4 2 ...
##  $ Q5       : int  4 4 2 4 4 4 4 4 2 4 ...
##  $ Q6       : int  2 3 4 4 4 4 4 4 1 2 ...
##  $ Q7       : int  2 2 4 4 4 4 4 4 3 4 ...
##  $ Q8       : int  4 4 4 4 4 4 5 5 2 2 ...
##  $ Q9       : int  4 4 4 4 2 4 5 5 3 4 ...
##  $ Q10      : int  4 4 2 4 4 4 5 5 2 4 ...
##  $ Q11      : int  4 4 4 4 4 4 5 5 4 4 ...
##  $ Q12      : int  4 4 4 4 4 4 5 5 3 4 ...
##  $ Q13      : int  4 4 4 4 4 4 5 5 4 4 ...
##  $ Q14      : int  4 4 4 4 4 4 5 5 5 4 ...
##  $ Q15      : int  4 4 3 4 4 4 4 2 3 4 ...
##  $ Q16      : int  4 4 4 4 4 4 5 2 4 4 ...
##  $ Q17      : int  4 3 4 4 4 4 2 2 4 4 ...
##  $ Q18      : int  4 4 4 4 4 4 4 4 4 4 ...
##  $ Q19      : int  4 2 4 4 4 4 4 2 4 2 ...
##  $ Q20      : int  4 1 3 4 4 4 4 2 4 2 ...
##  $ Gender   : Factor w/ 2 levels "female","male": 1 1 1 1 1 1 1 1 2 1 ...
##  $ EDU      : int  1 1 2 1 2 1 1 1 4 3 ...
##  $ BF       : num  3.4 4 3.6 4.2 4 4 3.6 3.6 3.6 3.2 ...
##  $ BM       : num  3.2 3.4 3.6 4 3.6 4 4.6 4.6 2.2 3.2 ...
##  $ Happiness: Factor w/ 4 levels "Happy","Unhappy",..: 3 3 1 3 3 3 3 3 1 3 ...
##  $ Peace    : num  4 2.8 3.8 4 4 4 3.8 2.4 4 3.2 ...
summary(df4)
##        X              Q1              Q2              Q3              Q4       
##  Min.   :   1   Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.: 482   1st Qu.:3.000   1st Qu.:3.000   1st Qu.:2.000   1st Qu.:2.000  
##  Median : 963   Median :4.000   Median :3.000   Median :3.000   Median :3.000  
##  Mean   : 963   Mean   :3.536   Mean   :3.291   Mean   :2.928   Mean   :3.061  
##  3rd Qu.:1444   3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :1925   Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##        Q5              Q6              Q7              Q8       
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000  
##  Median :3.000   Median :3.000   Median :3.000   Median :3.000  
##  Mean   :3.041   Mean   :2.796   Mean   :3.086   Mean   :3.049  
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##        Q9             Q10             Q11            Q12             Q13       
##  Min.   :1.000   Min.   :1.000   Min.   :1.00   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:2.000   1st Qu.:3.00   1st Qu.:3.000   1st Qu.:3.000  
##  Median :3.000   Median :3.000   Median :4.00   Median :4.000   Median :4.000  
##  Mean   :3.066   Mean   :2.883   Mean   :3.47   Mean   :3.421   Mean   :3.588  
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.00   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.00   Max.   :5.000   Max.   :5.000  
##       Q14             Q15             Q16             Q17       
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:3.000   1st Qu.:3.000   1st Qu.:3.000  
##  Median :4.000   Median :4.000   Median :4.000   Median :4.000  
##  Mean   :3.716   Mean   :3.542   Mean   :3.791   Mean   :3.516  
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##       Q18             Q19             Q20           Gender          EDU       
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   female:1136   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:3.000   1st Qu.:3.000   male  : 789   1st Qu.:2.000  
##  Median :4.000   Median :3.000   Median :3.000                 Median :3.000  
##  Mean   :3.804   Mean   :3.364   Mean   :3.349                 Mean   :2.616  
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000                 3rd Qu.:3.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000                 Max.   :4.000  
##        BF              BM               Happiness       Peace      
##  Min.   :1.000   Min.   :1.000   Happy       :414   Min.   :1.200  
##  1st Qu.:2.600   1st Qu.:2.400   Unhappy     :408   1st Qu.:3.200  
##  Median :3.200   Median :3.000   Very Happy  :734   Median :3.600  
##  Mean   :3.172   Mean   :2.976   Very Unhappy:369   Mean   :3.564  
##  3rd Qu.:3.800   3rd Qu.:3.600                      3rd Qu.:4.000  
##  Max.   :5.000   Max.   :5.000                      Max.   :5.000
ggplot(df, aes(x = Happiness, y = Peace, col=Gender, size=Peace)) +
  geom_point(alpha = 0.5)

library(mlbench)
library(caret)
## Loading required package: lattice
library(dplyr)
control <- trainControl(method="repeatedcv", number=10)
model <- train(Gender~., data=df4, method="rf",
               preProcess=c("center","scale"), trControl=control)
importance <- varImp(model, scale=FALSE)
print(importance)
## rf variable importance
## 
##   only 20 most important variables shown (out of 28)
## 
##       Overall
## X       76.10
## Peace   49.57
## BM      48.79
## BF      48.07
## EDU     36.06
## Q6      31.60
## Q17     28.71
## Q5      27.80
## Q20     27.77
## Q19     27.54
## Q9      27.53
## Q7      26.67
## Q10     26.52
## Q2      26.22
## Q8      26.20
## Q11     25.83
## Q18     25.57
## Q1      25.26
## Q4      25.09
## Q3      24.75
plot(importance)