getwd()
[1] "C:/Users/Hariharan/Documents"
setwd("C:/Users/Hariharan/Documents")
adult.dataset<-read.csv('adult.csv')
##How many male are not married?
suppressMessages(
adult.dataset %>%
group_by(gender,marital.status)%>%
filter(marital.status=="Never-married")%>%
summarise(gender_count = n()))
##find the summary of the data
summary(adult.dataset)
age workclass fnlwgt education educational.num marital.status
Min. :17.00 Length:48842 Min. : 12285 Length:48842 Min. : 1.00 Length:48842
1st Qu.:28.00 Class :character 1st Qu.: 117551 Class :character 1st Qu.: 9.00 Class :character
Median :37.00 Mode :character Median : 178145 Mode :character Median :10.00 Mode :character
Mean :38.64 Mean : 189664 Mean :10.08
3rd Qu.:48.00 3rd Qu.: 237642 3rd Qu.:12.00
Max. :90.00 Max. :1490400 Max. :16.00
occupation relationship race gender capital.gain
Length:48842 Length:48842 Length:48842 Length:48842 Min. : 0
Class :character Class :character Class :character Class :character 1st Qu.: 0
Mode :character Mode :character Mode :character Mode :character Median : 0
Mean : 1079
3rd Qu.: 0
Max. :99999
capital.loss hours.per.week native.country income
Min. : 0.0 Min. : 1.00 Length:48842 Length:48842
1st Qu.: 0.0 1st Qu.:40.00 Class :character Class :character
Median : 0.0 Median :40.00 Mode :character Mode :character
Mean : 87.5 Mean :40.42
3rd Qu.: 0.0 3rd Qu.:45.00
Max. :4356.0 Max. :99.00
##how many citizens are high qualified and what is their race?
suppressMessages(
adult.dataset%>%
group_by(race,educational.num)%>%
filter(educational.num>=16)%>%
summarise(citizen_count = n()))
NA
##how many citizens are high qualified and what is their race?
adult.dataset%>%
group_by(race,educational.num)%>%
filter(educational.num>=10)%>%
tally()
##what is the average working hour per week of a person?
mean(adult.dataset$hours.per.week)
[1] 40.42238
##what is the average working hour per week for male and female?
suppressMessages(
adult.dataset%>%
group_by(gender)%>%
summarise(mean(hours.per.week)))
NA
##what is the female to male ratio?
suppressMessages(
adult.dataset%>%
group_by(gender)%>%
summarise(adult = n()))
NA
##percentage of male citizen
per_male = sum(adult.dataset$gender=="Male")/length(adult.dataset$gender)*100
per_male
[1] 66.8482
##percentage of female
per_female = sum(adult.dataset$gender=="Female")/length(adult.dataset$gender)*100
per_female
[1] 33.1518
##who are never married where their age is above 30
filter(adult.dataset,age>30,marital.status=="Never-married")
##whose occupasion is Prof-specialty and sales
filter(adult.dataset,occupation=="Prof-specialty" | occupation=="sales")
##working hours for male & female
adult.dataset %>%
select(gender,hours.per.week) %>%
filter(gender=="Male")
adult.dataset %>%
select(education,educational.num) %>%
arrange(desc(educational.num))
adult.dataset%>%
group_by(education,educational.num)%>%
tally(sort = TRUE)
LS0tDQp0aXRsZTogImFkdWx0IGRhdGEiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQoNCg0KYGBge3J9DQpnZXR3ZCgpDQpzZXR3ZCgiQzovVXNlcnMvSGFyaWhhcmFuL0RvY3VtZW50cyIpDQphZHVsdC5kYXRhc2V0PC1yZWFkLmNzdignYWR1bHQuY3N2JykNCmBgYA0KDQpgYGB7cn0NCiMjSG93IG1hbnkgbWFsZSBhbmQgZmVtYWxlIGFyZSBub3QgbWFycmllZD8NCnN1cHByZXNzTWVzc2FnZXMoDQphZHVsdC5kYXRhc2V0ICU+JQ0KICBncm91cF9ieShnZW5kZXIsbWFyaXRhbC5zdGF0dXMpJT4lDQogIGZpbHRlcihtYXJpdGFsLnN0YXR1cz09Ik5ldmVyLW1hcnJpZWQiKSU+JQ0KICBzdW1tYXJpc2UoZ2VuZGVyX2NvdW50ID0gbigpKSkNCmBgYA0KYGBge3J9DQojI2ZpbmQgdGhlIHN1bW1hcnkgb2YgdGhlIGRhdGENCnN1bW1hcnkoYWR1bHQuZGF0YXNldCkNCmBgYA0KYGBge3J9DQojI2NpdGl6ZW5zIGNvbXBsZXRlZCB0aGVpciBkb2N0b3JhdGUgYW5kIHRoZWlyIHJhY2UNCnN1cHByZXNzTWVzc2FnZXMoDQphZHVsdC5kYXRhc2V0JT4lDQogIGdyb3VwX2J5KHJhY2UsZWR1Y2F0aW9uYWwubnVtKSU+JQ0KICBmaWx0ZXIoZWR1Y2F0aW9uYWwubnVtPj0xNiklPiUNCiAgc3VtbWFyaXNlKGNpdGl6ZW5fY291bnQgPSBuKCkpKQ0KICANCmBgYA0KYGBge3J9DQojI2hvdyBtYW55IGNpdGl6ZW5zIGFyZSBoaWdoIHF1YWxpZmllZCBhbmQgd2hhdCBpcyB0aGVpciByYWNlPw0KYWR1bHQuZGF0YXNldCU+JQ0KICBncm91cF9ieShyYWNlLGVkdWNhdGlvbmFsLm51bSklPiUNCiAgZmlsdGVyKGVkdWNhdGlvbmFsLm51bT49MTApJT4lDQogIHRhbGx5KCkNCmBgYA0KDQpgYGB7cn0NCiMjd2hhdCBpcyB0aGUgYXZlcmFnZSB3b3JraW5nIGhvdXIgcGVyIHdlZWsgb2YgYSBwZXJzb24/DQptZWFuKGFkdWx0LmRhdGFzZXQkaG91cnMucGVyLndlZWspDQpgYGANCmBgYHtyfQ0KIyN3aGF0IGlzIHRoZSBhdmVyYWdlIHdvcmtpbmcgaG91ciBwZXIgd2VlayBmb3IgbWFsZSBhbmQgZmVtYWxlPw0Kc3VwcHJlc3NNZXNzYWdlcygNCmFkdWx0LmRhdGFzZXQlPiUNCiAgZ3JvdXBfYnkoZ2VuZGVyKSU+JQ0KICBzdW1tYXJpc2UobWVhbihob3Vycy5wZXIud2VlaykpKQ0KDQpgYGANCg0KYGBge3J9DQojI2hvdyBtYW55IG1hbGUgYW5kIGZlbWFsZSBhcmUgdGhlcmU/DQpzdXBwcmVzc01lc3NhZ2VzKA0KYWR1bHQuZGF0YXNldCU+JQ0KICBncm91cF9ieShnZW5kZXIpJT4lDQogIHN1bW1hcmlzZShhZHVsdCA9IG4oKSkpDQoNCmBgYA0KYGBge3J9DQojI3BlcmNlbnRhZ2Ugb2YgbWFsZSBjaXRpemVuDQpwZXJfbWFsZSA9IHN1bShhZHVsdC5kYXRhc2V0JGdlbmRlcj09Ik1hbGUiKS9sZW5ndGgoYWR1bHQuZGF0YXNldCRnZW5kZXIpKjEwMA0KcGVyX21hbGUNCg0KYGBgDQpgYGB7cn0NCiMjcGVyY2VudGFnZSBvZiBmZW1hbGUNCnBlcl9mZW1hbGUgPSBzdW0oYWR1bHQuZGF0YXNldCRnZW5kZXI9PSJGZW1hbGUiKS9sZW5ndGgoYWR1bHQuZGF0YXNldCRnZW5kZXIpKjEwMA0KcGVyX2ZlbWFsZQ0KDQpgYGANCg0KYGBge3J9DQojI3dobyBhcmUgbmV2ZXIgbWFycmllZCB3aGVyZSB0aGVpciBhZ2UgaXMgYWJvdmUgMzANCmZpbHRlcihhZHVsdC5kYXRhc2V0LGFnZT4zMCxtYXJpdGFsLnN0YXR1cz09Ik5ldmVyLW1hcnJpZWQiKQ0KYGBgDQoNCmBgYHtyfQ0KIyN3aG9zZSBvY2N1cGFzaW9uIGlzIFByb2Ytc3BlY2lhbHR5IGFuZCBzYWxlcw0KZmlsdGVyKGFkdWx0LmRhdGFzZXQsb2NjdXBhdGlvbj09IlByb2Ytc3BlY2lhbHR5IiB8IG9jY3VwYXRpb249PSJzYWxlcyIpDQpgYGANCg0KYGBge3J9DQojI3Nob3cgdGhlIGluY29tZSB3aXRoIGVkdWNhdGlvbg0Kc2VsZWN0KGFkdWx0LmRhdGFzZXQsZWR1Y2F0aW9uLGluY29tZSkNCmBgYA0KDQoNCmBgYHtyfQ0KIyN3b3JraW5nIGhvdXJzIGZvciBtYWxlICYgZmVtYWxlDQphZHVsdC5kYXRhc2V0ICU+JQ0KICBzZWxlY3QoZ2VuZGVyLGhvdXJzLnBlci53ZWVrKSAlPiUNCiAgZmlsdGVyKGdlbmRlcj09Ik1hbGUiKQ0KYGBgDQoNCmBgYHtyfQ0KI0lzIEVkdWNhdGlvbiBhbmQgZWR1Y2F0aW9uIG51bWJlcnMgYm90aCBhcmUgcmVsYXRlZD8NCmFkdWx0LmRhdGFzZXQgJT4lDQogIHNlbGVjdChlZHVjYXRpb24sZWR1Y2F0aW9uYWwubnVtKSAlPiUNCiAgYXJyYW5nZShkZXNjKGVkdWNhdGlvbmFsLm51bSkpDQpgYGANCg0KYGBge3J9DQojI2xpc3QgdGhlIG5vLiBvZiBhZHVsdHMgYnkgZWR1Y2F0aW9uDQphZHVsdC5kYXRhc2V0JT4lDQogIGdyb3VwX2J5KGVkdWNhdGlvbixlZHVjYXRpb25hbC5udW0pJT4lDQogIHRhbGx5KHNvcnQgPSBUUlVFKQ0KYGBgDQoNCg0K