| dplyr | tidyr |
|---|---|
| Â mutate | Â gather |
| Â filter | Â spread |
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## Loading required package: DBI
library(ggplot2)
disability_df = read.csv(file="03_DisabilityData.csv", header=TRUE, sep=",", skip = 0)
colnames(disability_df) = c("Age", "Disability_Type", "CT", "MA", "NY")
disability_df = filter(disability_df, Disability_Type != "")
print(disability_df)
## Age Disability_Type CT MA NY
## 1 00--04 Hearing 990 1675 7937
## 2 Vision 916 1446 6179
## 3 05--17 Hearing 3959 5549 15268
## 4 Vision 5188 7754 18673
## 5 Cognitive 21841 49995 109496
## 6 Ambulatory 4750 7109 18549
## 7 Self_care 6775 13008 28969
## 8 18--64 Hearing 36730 70648 185731
## 9 Vision 25467 56577 172292
## 10 Cognitive 82632 178639 420953
## 11 Ambulatory 83191 171696 543684
## 12 Self_care 31956 68609 189646
## 13 Independent_Living 63518 144643 370959
## 14 65--125 Hearing 63780 127943 312478
## 15 Vision 26493 55416 161706
## 16 Cognitive 38388 79919 232410
## 17 Ambulatory 97804 190794 613547
## 18 Self_care 36798 75502 245282
## 19 Independent_Living 72678 143724 437172
print (colnames(disability_df))
## [1] "Age" "Disability_Type" "CT" "MA" "NY"
for (i in 1:nrow(disability_df)) {
if (disability_df$Age[i] == "") {
disability_df$Age[i] = disability_df$Age[i-1]
}
}
print(disability_df)
## Age Disability_Type CT MA NY
## 1 00--04 Hearing 990 1675 7937
## 2 00--04 Vision 916 1446 6179
## 3 05--17 Hearing 3959 5549 15268
## 4 05--17 Vision 5188 7754 18673
## 5 05--17 Cognitive 21841 49995 109496
## 6 05--17 Ambulatory 4750 7109 18549
## 7 05--17 Self_care 6775 13008 28969
## 8 18--64 Hearing 36730 70648 185731
## 9 18--64 Vision 25467 56577 172292
## 10 18--64 Cognitive 82632 178639 420953
## 11 18--64 Ambulatory 83191 171696 543684
## 12 18--64 Self_care 31956 68609 189646
## 13 18--64 Independent_Living 63518 144643 370959
## 14 65--125 Hearing 63780 127943 312478
## 15 65--125 Vision 26493 55416 161706
## 16 65--125 Cognitive 38388 79919 232410
## 17 65--125 Ambulatory 97804 190794 613547
## 18 65--125 Self_care 36798 75502 245282
## 19 65--125 Independent_Living 72678 143724 437172
disability_df = gather(disability_df, State, Disability_Count, CT:NY)
print(disability_df)
## Age Disability_Type State Disability_Count
## 1 00--04 Hearing CT 990
## 2 00--04 Vision CT 916
## 3 05--17 Hearing CT 3959
## 4 05--17 Vision CT 5188
## 5 05--17 Cognitive CT 21841
## 6 05--17 Ambulatory CT 4750
## 7 05--17 Self_care CT 6775
## 8 18--64 Hearing CT 36730
## 9 18--64 Vision CT 25467
## 10 18--64 Cognitive CT 82632
## 11 18--64 Ambulatory CT 83191
## 12 18--64 Self_care CT 31956
## 13 18--64 Independent_Living CT 63518
## 14 65--125 Hearing CT 63780
## 15 65--125 Vision CT 26493
## 16 65--125 Cognitive CT 38388
## 17 65--125 Ambulatory CT 97804
## 18 65--125 Self_care CT 36798
## 19 65--125 Independent_Living CT 72678
## 20 00--04 Hearing MA 1675
## 21 00--04 Vision MA 1446
## 22 05--17 Hearing MA 5549
## 23 05--17 Vision MA 7754
## 24 05--17 Cognitive MA 49995
## 25 05--17 Ambulatory MA 7109
## 26 05--17 Self_care MA 13008
## 27 18--64 Hearing MA 70648
## 28 18--64 Vision MA 56577
## 29 18--64 Cognitive MA 178639
## 30 18--64 Ambulatory MA 171696
## 31 18--64 Self_care MA 68609
## 32 18--64 Independent_Living MA 144643
## 33 65--125 Hearing MA 127943
## 34 65--125 Vision MA 55416
## 35 65--125 Cognitive MA 79919
## 36 65--125 Ambulatory MA 190794
## 37 65--125 Self_care MA 75502
## 38 65--125 Independent_Living MA 143724
## 39 00--04 Hearing NY 7937
## 40 00--04 Vision NY 6179
## 41 05--17 Hearing NY 15268
## 42 05--17 Vision NY 18673
## 43 05--17 Cognitive NY 109496
## 44 05--17 Ambulatory NY 18549
## 45 05--17 Self_care NY 28969
## 46 18--64 Hearing NY 185731
## 47 18--64 Vision NY 172292
## 48 18--64 Cognitive NY 420953
## 49 18--64 Ambulatory NY 543684
## 50 18--64 Self_care NY 189646
## 51 18--64 Independent_Living NY 370959
## 52 65--125 Hearing NY 312478
## 53 65--125 Vision NY 161706
## 54 65--125 Cognitive NY 232410
## 55 65--125 Ambulatory NY 613547
## 56 65--125 Self_care NY 245282
## 57 65--125 Independent_Living NY 437172
disability_df = spread(disability_df, Disability_Type, Disability_Count)
disability_df[is.na(disability_df)] = 0
print (disability_df)
## Age State Ambulatory Cognitive Hearing Independent_Living Self_care Vision
## 1 00--04 CT 0 0 990 0 0 916
## 2 00--04 MA 0 0 1675 0 0 1446
## 3 00--04 NY 0 0 7937 0 0 6179
## 4 05--17 CT 4750 21841 3959 0 6775 5188
## 5 05--17 MA 7109 49995 5549 0 13008 7754
## 6 05--17 NY 18549 109496 15268 0 28969 18673
## 7 18--64 CT 83191 82632 36730 63518 31956 25467
## 8 18--64 MA 171696 178639 70648 144643 68609 56577
## 9 18--64 NY 543684 420953 185731 370959 189646 172292
## 10 65--125 CT 97804 38388 63780 72678 36798 26493
## 11 65--125 MA 190794 79919 127943 143724 75502 55416
## 12 65--125 NY 613547 232410 312478 437172 245282 161706
calcPercentage = function(value, totalValue) {
round((value/totalValue)*100, 2)
}
disability_df = mutate(disability_df, TotalDisability=(Ambulatory+Cognitive+Hearing+Independent_Living+Self_care+Vision),
PercentAmbulatory=calcPercentage(Ambulatory, TotalDisability),
PercentCognitive=calcPercentage(Cognitive, TotalDisability),
PercentHearing=calcPercentage(Hearing, TotalDisability),
PercentIndependent_Living=calcPercentage(Independent_Living, TotalDisability),
PercentSelf_care=calcPercentage(Self_care, TotalDisability),
PercentVision=calcPercentage(Vision, TotalDisability))
print(disability_df)
## Age State Ambulatory Cognitive Hearing Independent_Living Self_care Vision TotalDisability PercentAmbulatory PercentCognitive PercentHearing PercentIndependent_Living PercentSelf_care PercentVision
## 1 00--04 CT 0 0 990 0 0 916 1906 0.00 0.00 51.94 0.00 0.00 48.06
## 2 00--04 MA 0 0 1675 0 0 1446 3121 0.00 0.00 53.67 0.00 0.00 46.33
## 3 00--04 NY 0 0 7937 0 0 6179 14116 0.00 0.00 56.23 0.00 0.00 43.77
## 4 05--17 CT 4750 21841 3959 0 6775 5188 42513 11.17 51.37 9.31 0.00 15.94 12.20
## 5 05--17 MA 7109 49995 5549 0 13008 7754 83415 8.52 59.94 6.65 0.00 15.59 9.30
## 6 05--17 NY 18549 109496 15268 0 28969 18673 190955 9.71 57.34 8.00 0.00 15.17 9.78
## 7 18--64 CT 83191 82632 36730 63518 31956 25467 323494 25.72 25.54 11.35 19.63 9.88 7.87
## 8 18--64 MA 171696 178639 70648 144643 68609 56577 690812 24.85 25.86 10.23 20.94 9.93 8.19
## 9 18--64 NY 543684 420953 185731 370959 189646 172292 1883265 28.87 22.35 9.86 19.70 10.07 9.15
## 10 65--125 CT 97804 38388 63780 72678 36798 26493 335941 29.11 11.43 18.99 21.63 10.95 7.89
## 11 65--125 MA 190794 79919 127943 143724 75502 55416 673298 28.34 11.87 19.00 21.35 11.21 8.23
## 12 65--125 NY 613547 232410 312478 437172 245282 161706 2002595 30.64 11.61 15.60 21.83 12.25 8.07
disability_state_group_df=sqldf("select State, sum(TotalDisability) As TotalDisability from disability_df group by State")
## Loading required package: tcltk
print(disability_state_group_df)
## State TotalDisability
## 1 CT 703854
## 2 MA 1450646
## 3 NY 4090931
ggplot(disability_state_group_df, aes(x=State, y=TotalDisability)) + geom_bar(fill="#8877aa", color="black", stat="identity")
disability_age_group_df=sqldf("select Age, sum(TotalDisability) As TotalDisability from disability_df group by Age")
print(disability_age_group_df)
## Age TotalDisability
## 1 00--04 19143
## 2 05--17 316883
## 3 18--64 2897571
## 4 65--125 3011834
ggplot(disability_age_group_df, aes(x=Age, y=TotalDisability)) + geom_bar(fill="#87ffaa", color="black", stat="identity")
graphHearing_df = filter (disability_df, (Age != "00--04"))
print(graphHearing_df)
## Age State Ambulatory Cognitive Hearing Independent_Living Self_care Vision TotalDisability PercentAmbulatory PercentCognitive PercentHearing PercentIndependent_Living PercentSelf_care PercentVision
## 1 05--17 CT 4750 21841 3959 0 6775 5188 42513 11.17 51.37 9.31 0.00 15.94 12.20
## 2 05--17 MA 7109 49995 5549 0 13008 7754 83415 8.52 59.94 6.65 0.00 15.59 9.30
## 3 05--17 NY 18549 109496 15268 0 28969 18673 190955 9.71 57.34 8.00 0.00 15.17 9.78
## 4 18--64 CT 83191 82632 36730 63518 31956 25467 323494 25.72 25.54 11.35 19.63 9.88 7.87
## 5 18--64 MA 171696 178639 70648 144643 68609 56577 690812 24.85 25.86 10.23 20.94 9.93 8.19
## 6 18--64 NY 543684 420953 185731 370959 189646 172292 1883265 28.87 22.35 9.86 19.70 10.07 9.15
## 7 65--125 CT 97804 38388 63780 72678 36798 26493 335941 29.11 11.43 18.99 21.63 10.95 7.89
## 8 65--125 MA 190794 79919 127943 143724 75502 55416 673298 28.34 11.87 19.00 21.35 11.21 8.23
## 9 65--125 NY 613547 232410 312478 437172 245282 161706 2002595 30.64 11.61 15.60 21.83 12.25 8.07
ggplot(data=graphHearing_df, aes(x=State, y=PercentHearing, fill=Age)) + geom_bar(stat="identity", position="dodge") +
ylab("% of Hearing Disability") + ggtitle("% of Hearing Disability")
graphVision_df = filter (disability_df, (Age != "00--04"))
print(graphVision_df)
## Age State Ambulatory Cognitive Hearing Independent_Living Self_care Vision TotalDisability PercentAmbulatory PercentCognitive PercentHearing PercentIndependent_Living PercentSelf_care PercentVision
## 1 05--17 CT 4750 21841 3959 0 6775 5188 42513 11.17 51.37 9.31 0.00 15.94 12.20
## 2 05--17 MA 7109 49995 5549 0 13008 7754 83415 8.52 59.94 6.65 0.00 15.59 9.30
## 3 05--17 NY 18549 109496 15268 0 28969 18673 190955 9.71 57.34 8.00 0.00 15.17 9.78
## 4 18--64 CT 83191 82632 36730 63518 31956 25467 323494 25.72 25.54 11.35 19.63 9.88 7.87
## 5 18--64 MA 171696 178639 70648 144643 68609 56577 690812 24.85 25.86 10.23 20.94 9.93 8.19
## 6 18--64 NY 543684 420953 185731 370959 189646 172292 1883265 28.87 22.35 9.86 19.70 10.07 9.15
## 7 65--125 CT 97804 38388 63780 72678 36798 26493 335941 29.11 11.43 18.99 21.63 10.95 7.89
## 8 65--125 MA 190794 79919 127943 143724 75502 55416 673298 28.34 11.87 19.00 21.35 11.21 8.23
## 9 65--125 NY 613547 232410 312478 437172 245282 161706 2002595 30.64 11.61 15.60 21.83 12.25 8.07
ggplot(data=graphVision_df, aes(x=State, y=PercentVision, fill=Age)) + geom_bar(stat="identity", position="dodge") +
ylab("% of Vision Disability") + ggtitle("% of Vision Disability")
graphAmbulatory_df = filter (disability_df, (Age != "00--04"))
print(graphAmbulatory_df)
## Age State Ambulatory Cognitive Hearing Independent_Living Self_care Vision TotalDisability PercentAmbulatory PercentCognitive PercentHearing PercentIndependent_Living PercentSelf_care PercentVision
## 1 05--17 CT 4750 21841 3959 0 6775 5188 42513 11.17 51.37 9.31 0.00 15.94 12.20
## 2 05--17 MA 7109 49995 5549 0 13008 7754 83415 8.52 59.94 6.65 0.00 15.59 9.30
## 3 05--17 NY 18549 109496 15268 0 28969 18673 190955 9.71 57.34 8.00 0.00 15.17 9.78
## 4 18--64 CT 83191 82632 36730 63518 31956 25467 323494 25.72 25.54 11.35 19.63 9.88 7.87
## 5 18--64 MA 171696 178639 70648 144643 68609 56577 690812 24.85 25.86 10.23 20.94 9.93 8.19
## 6 18--64 NY 543684 420953 185731 370959 189646 172292 1883265 28.87 22.35 9.86 19.70 10.07 9.15
## 7 65--125 CT 97804 38388 63780 72678 36798 26493 335941 29.11 11.43 18.99 21.63 10.95 7.89
## 8 65--125 MA 190794 79919 127943 143724 75502 55416 673298 28.34 11.87 19.00 21.35 11.21 8.23
## 9 65--125 NY 613547 232410 312478 437172 245282 161706 2002595 30.64 11.61 15.60 21.83 12.25 8.07
ggplot(data=graphAmbulatory_df, aes(x=State, y=PercentAmbulatory, group=Age, color=Age)) + geom_line() +
geom_point(color = "Red") + ylab("% of Ambulatory Disability") + ggtitle("% of Ambulatory Disability")
graphCognitive_df = filter (disability_df, (Age != "00--04"))
print(graphCognitive_df)
## Age State Ambulatory Cognitive Hearing Independent_Living Self_care Vision TotalDisability PercentAmbulatory PercentCognitive PercentHearing PercentIndependent_Living PercentSelf_care PercentVision
## 1 05--17 CT 4750 21841 3959 0 6775 5188 42513 11.17 51.37 9.31 0.00 15.94 12.20
## 2 05--17 MA 7109 49995 5549 0 13008 7754 83415 8.52 59.94 6.65 0.00 15.59 9.30
## 3 05--17 NY 18549 109496 15268 0 28969 18673 190955 9.71 57.34 8.00 0.00 15.17 9.78
## 4 18--64 CT 83191 82632 36730 63518 31956 25467 323494 25.72 25.54 11.35 19.63 9.88 7.87
## 5 18--64 MA 171696 178639 70648 144643 68609 56577 690812 24.85 25.86 10.23 20.94 9.93 8.19
## 6 18--64 NY 543684 420953 185731 370959 189646 172292 1883265 28.87 22.35 9.86 19.70 10.07 9.15
## 7 65--125 CT 97804 38388 63780 72678 36798 26493 335941 29.11 11.43 18.99 21.63 10.95 7.89
## 8 65--125 MA 190794 79919 127943 143724 75502 55416 673298 28.34 11.87 19.00 21.35 11.21 8.23
## 9 65--125 NY 613547 232410 312478 437172 245282 161706 2002595 30.64 11.61 15.60 21.83 12.25 8.07
ggplot(data=graphCognitive_df, aes(x=State, y=PercentCognitive, group=Age, color=Age)) + geom_line() +
geom_point(color = "Red") + ylab("% of Cognitive Disability") + ggtitle("% of Cognitive Disability")
(4.a) From the graphs we can conclude that CT and MA have almost equal percentage of people with hearing disabilities in the age group 65 and over. And we also notice that CT has the highest percentage of children with the hearing disability in the age group 5-17 years.
From the first graph we can conclude that NY has the highest number of people with disabilities combined. This includes all age groups. Also, CT has the highest percentag of childern with Vision disability in the age group 5-17 years even though CT represents smaller population compared to NY.