This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
rm(list = ls())
setwd("C:/Users/hp/OneDrive - Higher Education Commission")
library(foreign)
library(haven)
library(tidyverse)
## -- Attaching packages ------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1 v purrr 0.3.2
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 0.8.3 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts ---------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
LFS<-read.spss("LFS_labels.sav",use.value.labels = TRUE,to.data.frame = TRUE)
## re-encoding from CP1252
## Warning in read.spss("LFS_labels.sav", use.value.labels = TRUE,
## to.data.frame = TRUE): Undeclared level(s) 3 added in variable: S04C04
## Warning in read.spss("LFS_labels.sav", use.value.labels = TRUE,
## to.data.frame = TRUE): Undeclared level(s) 11, 12, 13, 14, 15 added in
## variable: S04C09
## Warning in read.spss("LFS_labels.sav", use.value.labels = TRUE,
## to.data.frame = TRUE): Undeclared level(s) 11, 12, 13, 14, 15 added in
## variable: S04C10
## Warning in read.spss("LFS_labels.sav", use.value.labels = TRUE,
## to.data.frame = TRUE): Undeclared level(s) 42 added in variable: occupation
## Warning in read.spss("LFS_labels.sav", use.value.labels = TRUE,
## to.data.frame = TRUE): Undeclared level(s) 0 added in variable:
## under_employed
#View(LFS)
## Select variables which will be used in this analysis
## Select variables which will be used in this analysis
LFS1<-LFS %>% select(S04C05,S04C06,S04C07,S04C08, S04C09, S04C11,S04C13,S07C042, S07C043, S05C11,S07C053, education_level,occupation, industry,Region,PROVINCE,prov6)
## S05C11 variable measures professional categories, we ll drop all values with NA
LFS1<-LFS1 %>% filter(!is.na(S05C11))
#LFS1
dim(LFS1)
## [1] 46440 17
## No we recode Profession categories
LFS1<-LFS1 %>%
mutate(Emp_cat=recode(S05C11,'1'="Federal",'2'="Provincial",'3'="Local_body",'4'="Pub_Ent", '5'="Pub_Lim",'6'="Pvt_lim",'7'="Coop_Soc",'8'="Inv_own",'9'="Partnership",'10'="Other"))
dim(LFS1)
## [1] 46440 18
## We declare some variables as factors
LFS1<-mutate_at(LFS1, vars(S04C05,S04C07,S04C08, S04C09, S04C11,S04C13, education_level,occupation, industry,Region,PROVINCE),as.factor)
## To calculate experience we subtract 6 from age
LFS1<-mutate_at(LFS1, vars(S04C06),as.numeric)
LFS1<- mutate(LFS1,Exp=S04C06-6)
LFS1 %>%
group_by(education_level) %>%
summarise(avg=mean(S07C042)) %>%
arrange(avg)
## # A tibble: 15 x 2
## education_level avg
## <fct> <dbl>
## 1 No formal education NA
## 2 Nursery but below KG NA
## 3 KG but below Primary NA
## 4 Primary but below Middle NA
## 5 Middle But below Matric NA
## 6 Mattic but below intermediate NA
## 7 Intermediate below Degree NA
## 8 Degree in engineering NA
## 9 Degree in Medicine NA
## 10 Degree in computer NA
## 11 Degree in agriculture NA
## 12 Degree in other subjects NA
## 13 M.A/ M.SC NA
## 14 MPhil NA
## 15 Ph.D NA
LFS1 %>%
group_by(S04C07) %>%
summarise(count=n())
## # A tibble: 4 x 2
## S04C07 count
## <fct> <int>
## 1 Never married 12937
## 2 Married 32532
## 3 Widow/Widower 771
## 4 Divorced 200
LFS %>%filter(S04C06>10) %>%
group_by(S04C07) %>%
summarise(n = n()) %>%
mutate(freq = n*100 /sum(n),freq=round(freq,2)) #Round to two decimal points
## # A tibble: 4 x 3
## S04C07 n freq
## <fct> <int> <dbl>
## 1 Never married 72241 39.6
## 2 Married 102575 56.3
## 3 Widow/Widower 6847 3.76
## 4 Divorced 547 0.3
##Sex ratio
LFS %>%
group_by(S04C05) %>%
summarise(n = n()) %>%
mutate(freq = n*100 /sum(n),freq=round(freq,2)) #Round to two decimal points
## # A tibble: 2 x 3
## S04C05 n freq
## <fct> <int> <dbl>
## 1 Male 139125 51.1
## 2 Female 133365 48.9
LFS1<- mutate(LFS1,Exp=S04C06-6)
#View(LFS1$Exp)
#logit<-glm(S04C05~Emp_cat+Region,data = LFS1,family = "binomial")
#summary(logit)
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.
#Variable S04C09 measure education but some numbers are not labeled properly
## We label missing values as below
LFS1<-LFS1 %>%
mutate(Edu_Yrs=recode(S04C09,'12'="Intermediate but below degree",'11'="Matric but below Intermediate",'13'="Intermediate but below degree",'14'="Bachelor", '15'="Degree but below master"))
# We verify whether now all values are labelled as desired
LFS1 %>%
group_by(occupation) %>%
summarise(count=n())
## Warning: Factor `occupation` contains implicit NA, consider using
## `forcats::fct_explicit_na`
## # A tibble: 39 x 2
## occupation count
## <fct> <int>
## 1 Chief Exective, senior officials and legislators 141
## 2 Admistrative and Commercial Managers 217
## 3 Production and specialized service Managers 746
## 4 Hospitality retail and other service Managers 521
## 5 Science and engineering profesioanls 135
## 6 Health professionals 264
## 7 Teaching professionals 2773
## 8 Business and administration professionals 274
## 9 Information and communication technology professional 750
## 10 legal, social and cultural professional 588
## # ... with 29 more rows
Table1<-LFS1 %>%
group_by(occupation) %>%
summarise(count=n())
## Warning: Factor `occupation` contains implicit NA, consider using
## `forcats::fct_explicit_na`
LFS1<- LFS1 %>% mutate(Main_occ=recode(occupation, 'Chief Exective, senior officials and legislators'="Managerial",'Admistrative and Commercial Managers'="Managerial",'Production and specialized service Managers'="Managerial",'Hospitality retail and other service Managers'="Managerial",'Science and engineering profesioanls'="Professionals",'Health professionals'="Professionals",'Teaching professionals'="Professionals",'Business and administration professionals'="Professionals", 'Informaion and communication technology professional'="Professionals",'Information and communication technology professional'="Professionals",'legal, social and cultural professional'="Professionals",'Scinnce and engineering associate professional'="Technician", 'Business and adminstration assocaite professionals'="Technician", 'Legal, social cultural and related associate professional'="Technician", 'Health associate professional'="Technician",'Informtion and communication Technician'="Technician",'Informtion and communication technicians'="Clerks" ,'Clerical and keyboard clerks'="Clerks", 'Business and adminstration assocaite professionals'="Clerks", 'Legal, social cultural and related associate professional'="Clerks", 'Informtion and communication Technician'="Clerks",'Clerical and keyboard clerks'="Clerks",'42'="Clerks", 'Numerical and material recording clerks'="Clerks", 'other clerical support workers'="Clerks",'personal service workers'="Services", 'salers workers'="Services", 'personal care workers'="Services",'protective service workers'="Services",'craft and related trade workers'="Craft",'Metal, machinary and related trade workers'="Craft", 'Handicraft and printing workers'="Craft", 'electrical and electronic trade workers'="Craft",'Food processing, wood working, garments and other craft related trade workers'="Craft",'stationery, plant and machine operators'="Operators", 'Assemblers'="Operators", 'Drivers and mobile plant operators'="Operators",'Cleaners and helpers'="Elementary", 'Agricultural, forestery and fishery labourers'="Elementary", 'Labourer in Mining, construction, manufacturing and transport'="Elementary", 'Food preparation Assistant'="Elementary",'Street and related sales and service workers'="Elementary",'Refuse workers and Elementary workers'="Elementary",'subsistence farmers, fishers, hunters a'="Elementary", 'msrlte oriented skilled fishery, forestery and hunting workers'="Elementary",'Market orineted skilled agriculture workers'="Skilled",'Refuse workers and elementary workers'="Elementary",'msrlte oriented skilled fishery, forestery and hunting workers
'="Skilled",'subsistence farmers, fishers, hunters and gatherers'="Elementary",'armed forces'="Services",'Hospitality retail and other service Managers'="Managerial"))
LFS1<-as_tibble(LFS1)
LFS11<-LFS1 %>% mutate(Main_occu=recode(Main_occ,'Managerial'=1, 'Professionals'=2,
'Technician'=3, 'Informtion and communication technicians'=4, 'Clerks'=4,
'Services'=5, 'Skilled'=9, 'Elementary'=6, 'Craft'=7, 'Operators'=8, 'Elementary'=5
))
LFS11<-mutate_at(LFS11, vars(Main_occu),as.factor)
LFS11<-LFS11 %>% filter(!is.na(Main_occu))
tbl1<-LFS11 %>%
group_by(Main_occu) %>%
summarise(count=n())
tbl1
## # A tibble: 9 x 2
## Main_occu count
## <fct> <int>
## 1 1 1625
## 2 2 4784
## 3 3 1946
## 4 4 1220
## 5 5 12430
## 6 6 8845
## 7 7 10465
## 8 8 5004
## 9 9 120
save(LFS11,file = "LabourForceSurvery.RDATA")
dim(LFS1)
## [1] 46440 21
df1 <-
model.matrix(~ -1 + LFS1$Main_occ) %>%
tibble::as_tibble() %>%
purrr::set_names(levels(LFS1$Main_occ))
library(nnet)
#LFS1$Main_occ <- relevel(LFS1$Main_occ, ref = "academic")
#m1<-multinom(LFS1$Main_occu~LFS1$education_level+LFS1$Exp,data = LFS1)
#summary(m1)
#exp(coef(m1))
wage<-LFS11 %>% filter(!is.na(S07C043))
#wage
p1<-wage %>%group_by(education_level,S04C05) %>%
summarize(avg=round(mean(S07C043,0))) %>%
arrange(avg)
p1
## # A tibble: 30 x 3
## # Groups: education_level [15]
## education_level S04C05 avg
## <fct> <fct> <dbl>
## 1 KG but below Primary Female 6221
## 2 No formal education Female 7889
## 3 Primary but below Middle Female 8952
## 4 Nursery but below KG Female 9500
## 5 Middle But below Matric Female 13843
## 6 KG but below Primary Male 15271
## 7 Mattic but below intermediate Female 15564
## 8 Primary but below Middle Male 15955
## 9 Intermediate below Degree Female 16356
## 10 Middle But below Matric Male 16558
## # ... with 20 more rows
p2<-ggplot(p1,aes(x=education_level,y=avg, fill=S04C05))+geom_bar(stat = "identity",width=0.7,position = 'dodge') +scale_y_continuous(labels = function(x) format(x, scientific = FALSE))+coord_flip()
p2+ggtitle("Average monthly earnings by education_level and genderwise")
## To display numbers instead of 1+e00 use following
##p2+scale_y_continuous(labels = function(x) format(x, scientific = FALSE))
#Label Bar charts theme(legend.title = element_blank()) .
p3<-p2+geom_text(aes(label=avg), vjust =1.6, size = 3.0) + theme(legend.title = element_blank())
p3
## Histogram and boxplot
#ggplot(wage)+aes(log(S07C043))+geom_histogram()
m<-lm(S07C043~Edu_Yrs,data = wage)
summary(m)
##
## Call:
## lm(formula = S07C043 ~ Edu_Yrs, data = wage)
##
## Residuals:
## Min 1Q Median 3Q Max
## -99842 -8369 -2141 4770 1268778
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15230.0 375.6 40.547 < 2e-16
## Edu_YrsDegree in Computer 19715.3 2603.6 7.572 3.85e-14
## Edu_YrsMatric but below Intermediate 5139.0 524.4 9.800 < 2e-16
## Edu_YrsIntermediate but below degree 15992.4 462.6 34.569 < 2e-16
## Edu_YrsBachelor 33343.8 2065.3 16.145 < 2e-16
## Edu_YrsDegree but below master 96112.1 4743.3 20.263 < 2e-16
## Edu_YrsNursery but below KG 4351.1 3409.1 1.276 0.2019
## Edu_YrsKG but below Primary -523.6 1042.7 -0.502 0.6156
## Edu_YrsPrimary but below Middel 411.1 595.9 0.690 0.4903
## Edu_YrsMiddle but below Matric 1224.3 595.7 2.055 0.0399
## Edu_YrsDegree in Engineering 35488.3 1557.2 22.789 < 2e-16
## Edu_YrsDegree in medicine 58594.4 1839.5 31.853 < 2e-16
##
## (Intercept) ***
## Edu_YrsDegree in Computer ***
## Edu_YrsMatric but below Intermediate ***
## Edu_YrsIntermediate but below degree ***
## Edu_YrsBachelor ***
## Edu_YrsDegree but below master ***
## Edu_YrsNursery but below KG
## Edu_YrsKG but below Primary
## Edu_YrsPrimary but below Middel
## Edu_YrsMiddle but below Matric *
## Edu_YrsDegree in Engineering ***
## Edu_YrsDegree in medicine ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 20610 on 16956 degrees of freedom
## Multiple R-squared: 0.1749, Adjusted R-squared: 0.1744
## F-statistic: 326.9 on 11 and 16956 DF, p-value: < 2.2e-16
#ggplot(wage)+aes(log(S07C043))+geom_histogram()+facet_wrap(S04C05)
#glimpse(wage)
p<-ggplot(wage,aes(x=S04C05,y=log(S07C043)))+geom_boxplot()
p+ stat_summary(fun.y=mean, geom="point", shape=23, size=4) ## diplay statistics
## Warning: Removed 17 rows containing non-finite values (stat_boxplot).
## Warning: Removed 17 rows containing non-finite values (stat_summary).
p+coord_flip()
## Warning: Removed 17 rows containing non-finite values (stat_boxplot).
# Box plot with mean points
p + stat_summary(fun.y=mean, geom="point", shape=23, size=4)
## Warning: Removed 17 rows containing non-finite values (stat_boxplot).
## Warning: Removed 17 rows containing non-finite values (stat_summary).