firms<-read.csv("F2019_Section_5.csv")
my_firm<-firms %>%
filter(firmid==2)
```
my_firm<-my_firm %>%
mutate(age= 2019- birthyear) %>%
mutate(old = ifelse(age>=55, 1, 0))
my_firm$old <- factor(my_firm$old, labels = c("Young", "Old"))
view(my_firm)
my_firm$age[1:10]
## [1] 39 39 57 63 54 39 53 63 33 49
my_firm$old[1:10]
## [1] Young Young Old Old Young Young Young Old Young Young
## Levels: Young Old
my_firm<-my_firm %>%
mutate(age_group= cut(age, breaks = c(19, 24, 29, 34, 39, 44, 49, 54, 59, 64, 69, 74), labels=c("20-24", "25-29", "30-34", "35-39", "40-44", "45-49", "50-54", "55-59", "60-64", "65-69", "70-74")))
ggplot(my_firm, aes(x=age))+
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
my_firm%>%
group_by(age) %>%
filter(age>=50, age<55)
## # A tibble: 561 x 22
## # Groups: age [5]
## firmid personid birthyear dailywage date hiredate engagement unit
## <int> <int> <int> <int> <fct> <fct> <int> <int>
## 1 2 584106 1965 219 25-F… 14-Dec-… 5 24
## 2 2 488991 1966 224 25-F… 23-Jul-… 2 14
## 3 2 164314 1969 291 25-F… 3-Oct-06 2 21
## 4 2 119400 1968 231 25-F… 16-Mar-… 3 54
## 5 2 584103 1965 299 25-F… 14-Dec-… 5 24
## 6 2 28903 1967 267 25-F… 8-Nov-17 5 24
## 7 2 118503 1969 90 25-F… 25-Feb-… 4 54
## 8 2 119593 1966 472 25-F… 16-Mar-… 1 54
## 9 2 119475 1967 314 25-F… 11-Mar-… 2 54
## 10 2 119183 1967 248 25-F… 30-Sep-… 2 54
## # … with 551 more rows, and 14 more variables: region <fct>, gender <fct>,
## # married <fct>, children <fct>, education <fct>, taskcomplexity <fct>,
## # workschedule <fct>, contract <fct>, industry <fct>, nationality <fct>,
## # occupation <fct>, age <dbl>, old <fct>, age_group <fct>
##561 people will become 'older workers' within the next five years
my_firm%>%
group_by(age) %>%
filter(age>=55)
## # A tibble: 1,207 x 22
## # Groups: age [15]
## firmid personid birthyear dailywage date hiredate engagement unit
## <int> <int> <int> <int> <fct> <fct> <int> <int>
## 1 2 119872 1962 121 25-F… 10-Sep-… 5 54
## 2 2 164437 1956 803 25-F… 12-Nov-… 1 21
## 3 2 584032 1956 280 25-F… 18-Jun-… 4 24
## 4 2 355693 1960 247 25-F… 16-Apr-… 3 15
## 5 2 584530 1955 258 25-F… 6-Nov-89 4 24
## 6 2 119868 1962 106 25-F… 10-Sep-… 5 54
## 7 2 584185 1954 242 25-F… 12-Jul-… 3 24
## 8 2 584608 1952 264 25-F… 11-Feb-… 4 24
## 9 2 584252 1962 161 25-F… 12-Aug-… 2 24
## 10 2 119867 1962 41 25-F… 10-Sep-… 3 54
## # … with 1,197 more rows, and 14 more variables: region <fct>,
## # gender <fct>, married <fct>, children <fct>, education <fct>,
## # taskcomplexity <fct>, workschedule <fct>, contract <fct>,
## # industry <fct>, nationality <fct>, occupation <fct>, age <dbl>,
## # old <fct>, age_group <fct>
##1207 workers are currently considered 'older workers'
my_firm%>%
group_by(age) %>%
filter(age>=60)
## # A tibble: 705 x 22
## # Groups: age [10]
## firmid personid birthyear dailywage date hiredate engagement unit
## <int> <int> <int> <int> <fct> <fct> <int> <int>
## 1 2 164437 1956 803 25-F… 12-Nov-… 1 21
## 2 2 584032 1956 280 25-F… 18-Jun-… 4 24
## 3 2 584530 1955 258 25-F… 6-Nov-89 4 24
## 4 2 584185 1954 242 25-F… 12-Jul-… 3 24
## 5 2 584608 1952 264 25-F… 11-Feb-… 4 24
## 6 2 164504 1949 760 25-F… 19-Oct-… 1 21
## 7 2 584022 1956 237 25-F… 18-Jun-… 4 24
## 8 2 583786 1956 228 25-F… 10-Nov-… 5 24
## 9 2 118325 1958 202 25-F… 11-Mar-… 3 54
## 10 2 119265 1952 153 25-F… 11-Dec-… 3 54
## # … with 695 more rows, and 14 more variables: region <fct>, gender <fct>,
## # married <fct>, children <fct>, education <fct>, taskcomplexity <fct>,
## # workschedule <fct>, contract <fct>, industry <fct>, nationality <fct>,
## # occupation <fct>, age <dbl>, old <fct>, age_group <fct>
##705 workers will be retiring within the next 5 years
```
my_firm<- my_firm%>%
mutate(retired = ifelse(age>= 60, 1, 0))
my_firm$retired <- factor(my_firm$retired, labels = c("Not Retired", "Retired"))
view(my_firm)
my_firm$retired[1:10]
## [1] Not Retired Not Retired Not Retired Retired Not Retired
## [6] Not Retired Not Retired Retired Not Retired Not Retired
## Levels: Not Retired Retired
my_firm%>%
group_by(retired)
## # A tibble: 2,852 x 23
## # Groups: retired [2]
## firmid personid birthyear dailywage date hiredate engagement unit
## <int> <int> <int> <int> <fct> <fct> <int> <int>
## 1 2 119844 1980 88 25-F… 25-Feb-… 4 54
## 2 2 119842 1980 75 25-F… 25-Feb-… 4 54
## 3 2 119872 1962 121 25-F… 10-Sep-… 5 54
## 4 2 164437 1956 803 25-F… 12-Nov-… 1 21
## 5 2 584106 1965 219 25-F… 14-Dec-… 5 24
## 6 2 355723 1980 37 25-F… 25-Feb-… 3 15
## 7 2 488991 1966 224 25-F… 23-Jul-… 2 14
## 8 2 584032 1956 280 25-F… 18-Jun-… 4 24
## 9 2 584992 1986 37 25-F… 7-May-16 3 24
## 10 2 584403 1970 169 25-F… 25-Feb-… 4 24
## # … with 2,842 more rows, and 15 more variables: region <fct>,
## # gender <fct>, married <fct>, children <fct>, education <fct>,
## # taskcomplexity <fct>, workschedule <fct>, contract <fct>,
## # industry <fct>, nationality <fct>, occupation <fct>, age <dbl>,
## # old <fct>, age_group <fct>, retired <fct>
My_firmSpecialty <- my_firm%>%
filter(occupation == "Sales"|occupation== "Management, Business and Financial" | occupation == "Legal" | occupation == "Computer, Engineering and Science")
ggplot(My_firmSpecialty, aes(x= occupation, fill= retired))+
geom_bar(position="dodge")+
coord_flip()
my_firmNonSpecialty <- my_firm%>%
filter(occupation == "Service" | occupation == "Production" | occupation == "Office and Administrative")
ggplot(my_firmNonSpecialty, aes(x= occupation, fill= retired))+
geom_bar(position="dodge")+
coord_flip()
my_firm<- my_firm%>%
mutate(retired = ifelse(age>= 60, 1, 0))
my_firm$retired <- factor(my_firm$retired, labels = c("Not Retired", "Retired"))
view(my_firm)
my_firm$retired[1:10]
## [1] Not Retired Not Retired Not Retired Retired Not Retired
## [6] Not Retired Not Retired Retired Not Retired Not Retired
## Levels: Not Retired Retired
table(my_firm$occupation)
##
## Computer, Engineering and Science Legal
## 16 14
## Management, Business and Financial Office and Administrative
## 244 322
## Production Sales
## 1043 708
## Service
## 505
ret_firm <- my_firm %>%
filter(retired == "Retired")
table(ret_firm$occupation)
##
## Computer, Engineering and Science Legal
## 13 14
## Management, Business and Financial Office and Administrative
## 124 28
## Production Sales
## 197 323
## Service
## 6
NonCritDF <- data.frame(group = c("Retired", "Non-Retired"), value = c(231, 1639))
head(NonCritDF)
## group value
## 1 Retired 231
## 2 Non-Retired 1639
NonCriticalOccupations <- ggplot(NonCritDF, aes(x="", y=value, fill=group))+
geom_bar(width=1, stat="identity")
NonCriticalOccupations
NonCriticalPie <- NonCriticalOccupations + coord_polar("y", start = 0)
NonCriticalPie
CritDF <- data.frame(group = c("Retired", "Non-retired"), value = c(474, 508))
head(CritDF)
## group value
## 1 Retired 474
## 2 Non-retired 508
CriticalOccupations <- ggplot(CritDF, aes(x="", y=value, fill=group))+
geom_bar(width=1, stat="identity")
CriticalOccupations
CriticalOccupationPie <- CriticalOccupations + coord_polar("y", start = 0)
CriticalOccupationPie
chisq.test(table(my_firm$age_group), p=c(.0778, .0825, .0935, .0928, .0865, .0887, .1142, .1164, .0995, .0825, .0656))
##
## Chi-squared test for given probabilities
##
## data: table(my_firm$age_group)
## X-squared = 997.71, df = 10, p-value < 2.2e-16