Use the summary function to gain an overview of the data set. Then display the mean and median for at least two attributes.
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("curl")
#trafficFatalities <- read.csv(file ='TrafficFatalities.csv',header = TRUE,sep =',' )
trafficFatalities <- read.csv(curl("https://raw.githubusercontent.com/rathish-ps/RLearning/main/data/TrafficFatalities.csv"))
summary(trafficFatalities)
## ColNum state year unemp
## Min. : 1.00 Length:336 Min. :1982 Min. : 2.400
## 1st Qu.: 84.75 Class :character 1st Qu.:1983 1st Qu.: 5.475
## Median :168.50 Mode :character Median :1985 Median : 7.000
## Mean :168.50 Mean :1985 Mean : 7.347
## 3rd Qu.:252.25 3rd Qu.:1987 3rd Qu.: 8.900
## Max. :336.00 Max. :1988 Max. :18.000
## income emppop drinkage youngdrivers
## Min. : 9514 Min. :42.99 Min. :18.00 Min. :0.07314
## 1st Qu.:12086 1st Qu.:57.69 1st Qu.:20.00 1st Qu.:0.17037
## Median :13763 Median :61.36 Median :21.00 Median :0.18539
## Mean :13880 Mean :60.81 Mean :20.46 Mean :0.18593
## 3rd Qu.:15175 3rd Qu.:64.41 3rd Qu.:21.00 3rd Qu.:0.20219
## Max. :22193 Max. :71.27 Max. :21.00 Max. :0.28163
## miles fatal nfatal fatal1517
## Min. : 4576 Min. : 79.0 Min. : 13.00 Min. : 3.00
## 1st Qu.: 7183 1st Qu.: 293.8 1st Qu.: 53.75 1st Qu.: 25.75
## Median : 7796 Median : 701.0 Median : 135.00 Median : 49.00
## Mean : 7891 Mean : 928.7 Mean : 182.58 Mean : 62.61
## 3rd Qu.: 8504 3rd Qu.:1063.5 3rd Qu.: 212.00 3rd Qu.: 77.00
## Max. :26148 Max. :5504.0 Max. :1049.00 Max. :318.00
## nfatal1517 fatal1820 nfatal1820 afatal
## Min. : 0.00 Min. : 7.0 Min. : 0.00 Min. : 24.6
## 1st Qu.: 4.00 1st Qu.: 38.0 1st Qu.: 11.00 1st Qu.: 90.5
## Median :10.00 Median : 82.0 Median : 24.00 Median : 211.6
## Mean :12.26 Mean :106.7 Mean : 33.53 Mean : 293.3
## 3rd Qu.:15.25 3rd Qu.:130.2 3rd Qu.: 44.00 3rd Qu.: 364.0
## Max. :76.00 Max. :601.0 Max. :196.00 Max. :2094.9
## pop pop1517 pop1820 milestot
## Min. : 479000 Min. : 21000 Min. : 21000 Min. : 3993
## 1st Qu.: 1545251 1st Qu.: 71750 1st Qu.: 76962 1st Qu.: 11692
## Median : 3310503 Median : 163000 Median : 170982 Median : 28484
## Mean : 4930272 Mean : 230816 Mean : 249090 Mean : 37101
## 3rd Qu.: 5751735 3rd Qu.: 270500 3rd Qu.: 308311 3rd Qu.: 44140
## Max. :28314028 Max. :1172000 Max. :1321004 Max. :241575
head(trafficFatalities,10)
## ColNum state year unemp income emppop drinkage youngdrivers miles
## 1 1 al 1982 14.4 10544.15 50.69204 19.00 0.211572 7233.887
## 2 2 al 1983 13.7 10732.80 52.14703 19.00 0.210768 7836.348
## 3 3 al 1984 11.1 11108.79 54.16809 19.00 0.211484 8262.990
## 4 4 al 1985 8.9 11332.63 55.27114 19.67 0.211140 8726.917
## 5 5 al 1986 9.8 11661.51 56.51450 21.00 0.213400 8952.854
## 6 6 al 1987 7.8 11944.00 57.50988 21.00 0.215527 9166.302
## 7 7 al 1988 7.2 12368.62 56.83453 21.00 0.218328 9674.323
## 8 8 az 1982 9.9 12309.07 56.89330 19.00 0.209012 6810.157
## 9 9 az 1983 9.1 12693.81 57.55363 19.00 0.203855 6587.495
## 10 10 az 1984 5.0 13265.93 60.37902 19.00 0.209127 6709.970
## fatal nfatal fatal1517 nfatal1517 fatal1820 nfatal1820 afatal pop
## 1 839 146 53 9 99 34 309.438 3942002
## 2 930 154 71 8 108 26 341.834 3960008
## 3 932 165 49 7 103 25 304.872 3988992
## 4 882 146 66 9 100 23 276.742 4021008
## 5 1081 172 82 10 120 23 360.716 4049994
## 6 1110 181 94 11 127 31 368.421 4082999
## 7 1023 139 66 8 105 24 298.322 4101992
## 8 724 131 40 7 81 16 173.668 2896997
## 9 675 112 40 7 83 19 196.890 2977004
## 10 869 149 51 8 118 34 212.361 3071996
## pop1517 pop1820 milestot
## 1 208999.6 221553.4 28516
## 2 202000.1 219125.5 31032
## 3 197000.0 216724.1 32961
## 4 194999.7 214349.0 35091
## 5 203999.9 212000.0 36259
## 6 204999.8 208998.5 37426
## 7 201000.1 193000.5 39684
## 8 141000.0 156378.7 19729
## 9 138999.9 157521.4 19611
## 10 138000.1 158672.5 20613
summarise(trafficFatalities, MeanFatalities = mean(trafficFatalities$fatal),MedianFatalities =median(trafficFatalities$fatal),MeanNightTimeFatalities = mean(trafficFatalities$nfatal),MedianNightTimeFatalities = median(trafficFatalities$nfatal))
## MeanFatalities MedianFatalities MeanNightTimeFatalities
## 1 928.6637 701 182.5833
## MedianNightTimeFatalities
## 1 135
Create a new data frame with a subset of the columns and rows. Make sure to rename it.
trafficFatalities1982 <-subset(trafficFatalities,year == "1982",select = c(ColNum,state,year,miles,fatal,nfatal))
head(trafficFatalities1982,10)
## ColNum state year miles fatal nfatal
## 1 1 al 1982 7233.887 839 146
## 8 8 az 1982 6810.157 724 131
## 15 15 ar 1982 7208.500 550 102
## 22 22 ca 1982 6858.677 4615 944
## 29 29 co 1982 7742.842 668 140
## 36 36 ct 1982 6440.054 515 158
## 43 43 de 1982 7651.654 122 34
## 50 50 fl 1982 7587.130 2653 587
## 57 57 ga 1982 8623.444 1229 225
## 64 64 id 1982 8033.752 256 47
Create new column names for the new data frame.
colnames(trafficFatalities1982) <- c("ColOrder","St","Yr","Mls","Fatalities","NightFatalities")
head(trafficFatalities1982,10)
## ColOrder St Yr Mls Fatalities NightFatalities
## 1 1 al 1982 7233.887 839 146
## 8 8 az 1982 6810.157 724 131
## 15 15 ar 1982 7208.500 550 102
## 22 22 ca 1982 6858.677 4615 944
## 29 29 co 1982 7742.842 668 140
## 36 36 ct 1982 6440.054 515 158
## 43 43 de 1982 7651.654 122 34
## 50 50 fl 1982 7587.130 2653 587
## 57 57 ga 1982 8623.444 1229 225
## 64 64 id 1982 8033.752 256 47
Use the summary function to create an overview of your new data frame. The print the mean and median for the same two attributes. Please compare.
summary(trafficFatalities1982)
## ColOrder St Yr Mls
## Min. : 1.00 Length:48 Min. :1982 Min. : 4576
## 1st Qu.: 83.25 Class :character 1st Qu.:1982 1st Qu.: 6728
## Median :165.50 Mode :character Median :1982 Median : 7178
## Mean :165.50 Mean :1982 Mean : 7227
## 3rd Qu.:247.75 3rd Qu.:1982 3rd Qu.: 7603
## Max. :330.00 Max. :1982 Max. :10355
## Fatalities NightFatalities
## Min. : 105.0 Min. : 18.00
## 1st Qu.: 291.2 1st Qu.: 52.25
## Median : 696.0 Median : 143.00
## Mean : 909.2 Mean : 202.65
## 3rd Qu.:1056.5 3rd Qu.: 234.25
## Max. :4615.0 Max. :1049.00
meanTotalFatality <- mean(trafficFatalities$fatal)
meanNightFatality <- mean(trafficFatalities$nfatal)
meanTotalFatality82 <- mean(trafficFatalities1982$Fatalities)
meanNightFatality82 <- mean(trafficFatalities1982$NightFatalities)
print(paste("Mean of Total Fatalities is ",meanTotalFatality," and Mean of Total Fatalities for the subset is ",meanTotalFatality82))
## [1] "Mean of Total Fatalities is 928.66369047619 and Mean of Total Fatalities for the subset is 909.208333333333"
print(paste("Mean of Total Night Fatalities is ",meanNightFatality," and Mean of Total Night Fatalities for subset is ",meanNightFatality82))
## [1] "Mean of Total Night Fatalities is 182.583333333333 and Mean of Total Night Fatalities for subset is 202.645833333333"
medianTotalFatality <- median(trafficFatalities$fatal)
medianNightFatality <- median(trafficFatalities$nfatal)
medianTotalFatality82 <- median(trafficFatalities1982$Fatalities)
medianNightFatality82 <- median(trafficFatalities1982$NightFatalities)
print(paste("Median of Total Fatalities is ",medianTotalFatality," and Median of Total Fatalities for the subset is ",medianTotalFatality82))
## [1] "Median of Total Fatalities is 701 and Median of Total Fatalities for the subset is 696"
print(paste("Median of Total Night Fatalities is ",medianNightFatality," and Median of Total Night Fatalities for subset is ",medianNightFatality82))
## [1] "Median of Total Night Fatalities is 135 and Median of Total Night Fatalities for subset is 143"
For at least 3 values in a column please rename so that every value in that column is renamed. For example, suppose I have 20 values of the letter “e” in one column. Rename those values so that all 20 would show as “excellent”.
#trafficFatalities1982[trafficFatalities1982 == 'az'] <- 'Arizona'
#trafficFatalities1982[trafficFatalities1982 == 'ca'] <- 'California'
#trafficFatalities1982[trafficFatalities1982 == 'co'] <- 'Colorado'
trafficFatalities1982$St[trafficFatalities1982$St == 'az'] <- 'Arizona'
trafficFatalities1982$St[trafficFatalities1982$St == 'ca'] <- 'California'
trafficFatalities1982$St[trafficFatalities1982$St == 'co'] <- 'Colorado'
head(trafficFatalities1982,10)
## ColOrder St Yr Mls Fatalities NightFatalities
## 1 1 al 1982 7233.887 839 146
## 8 8 Arizona 1982 6810.157 724 131
## 15 15 ar 1982 7208.500 550 102
## 22 22 California 1982 6858.677 4615 944
## 29 29 Colorado 1982 7742.842 668 140
## 36 36 ct 1982 6440.054 515 158
## 43 43 de 1982 7651.654 122 34
## 50 50 fl 1982 7587.130 2653 587
## 57 57 ga 1982 8623.444 1229 225
## 64 64 id 1982 8033.752 256 47
Display enough rows to see examples of all of steps 1-5 above
head(trafficFatalities,10)
## ColNum state year unemp income emppop drinkage youngdrivers miles
## 1 1 al 1982 14.4 10544.15 50.69204 19.00 0.211572 7233.887
## 2 2 al 1983 13.7 10732.80 52.14703 19.00 0.210768 7836.348
## 3 3 al 1984 11.1 11108.79 54.16809 19.00 0.211484 8262.990
## 4 4 al 1985 8.9 11332.63 55.27114 19.67 0.211140 8726.917
## 5 5 al 1986 9.8 11661.51 56.51450 21.00 0.213400 8952.854
## 6 6 al 1987 7.8 11944.00 57.50988 21.00 0.215527 9166.302
## 7 7 al 1988 7.2 12368.62 56.83453 21.00 0.218328 9674.323
## 8 8 az 1982 9.9 12309.07 56.89330 19.00 0.209012 6810.157
## 9 9 az 1983 9.1 12693.81 57.55363 19.00 0.203855 6587.495
## 10 10 az 1984 5.0 13265.93 60.37902 19.00 0.209127 6709.970
## fatal nfatal fatal1517 nfatal1517 fatal1820 nfatal1820 afatal pop
## 1 839 146 53 9 99 34 309.438 3942002
## 2 930 154 71 8 108 26 341.834 3960008
## 3 932 165 49 7 103 25 304.872 3988992
## 4 882 146 66 9 100 23 276.742 4021008
## 5 1081 172 82 10 120 23 360.716 4049994
## 6 1110 181 94 11 127 31 368.421 4082999
## 7 1023 139 66 8 105 24 298.322 4101992
## 8 724 131 40 7 81 16 173.668 2896997
## 9 675 112 40 7 83 19 196.890 2977004
## 10 869 149 51 8 118 34 212.361 3071996
## pop1517 pop1820 milestot
## 1 208999.6 221553.4 28516
## 2 202000.1 219125.5 31032
## 3 197000.0 216724.1 32961
## 4 194999.7 214349.0 35091
## 5 203999.9 212000.0 36259
## 6 204999.8 208998.5 37426
## 7 201000.1 193000.5 39684
## 8 141000.0 156378.7 19729
## 9 138999.9 157521.4 19611
## 10 138000.1 158672.5 20613
head(trafficFatalities1982,10)
## ColOrder St Yr Mls Fatalities NightFatalities
## 1 1 al 1982 7233.887 839 146
## 8 8 Arizona 1982 6810.157 724 131
## 15 15 ar 1982 7208.500 550 102
## 22 22 California 1982 6858.677 4615 944
## 29 29 Colorado 1982 7742.842 668 140
## 36 36 ct 1982 6440.054 515 158
## 43 43 de 1982 7651.654 122 34
## 50 50 fl 1982 7587.130 2653 587
## 57 57 ga 1982 8623.444 1229 225
## 64 64 id 1982 8033.752 256 47
BONUS – place the original .csv in a github file and have R read from the link.
library("curl")
trafficFatailityfromGit <- read.csv(curl("https://raw.githubusercontent.com/rathish-ps/RLearning/main/data/TrafficFatalities.csv"))
head(trafficFatailityfromGit,10)
## ColNum state year unemp income emppop drinkage youngdrivers miles
## 1 1 al 1982 14.4 10544.15 50.69204 19.00 0.211572 7233.887
## 2 2 al 1983 13.7 10732.80 52.14703 19.00 0.210768 7836.348
## 3 3 al 1984 11.1 11108.79 54.16809 19.00 0.211484 8262.990
## 4 4 al 1985 8.9 11332.63 55.27114 19.67 0.211140 8726.917
## 5 5 al 1986 9.8 11661.51 56.51450 21.00 0.213400 8952.854
## 6 6 al 1987 7.8 11944.00 57.50988 21.00 0.215527 9166.302
## 7 7 al 1988 7.2 12368.62 56.83453 21.00 0.218328 9674.323
## 8 8 az 1982 9.9 12309.07 56.89330 19.00 0.209012 6810.157
## 9 9 az 1983 9.1 12693.81 57.55363 19.00 0.203855 6587.495
## 10 10 az 1984 5.0 13265.93 60.37902 19.00 0.209127 6709.970
## fatal nfatal fatal1517 nfatal1517 fatal1820 nfatal1820 afatal pop
## 1 839 146 53 9 99 34 309.438 3942002
## 2 930 154 71 8 108 26 341.834 3960008
## 3 932 165 49 7 103 25 304.872 3988992
## 4 882 146 66 9 100 23 276.742 4021008
## 5 1081 172 82 10 120 23 360.716 4049994
## 6 1110 181 94 11 127 31 368.421 4082999
## 7 1023 139 66 8 105 24 298.322 4101992
## 8 724 131 40 7 81 16 173.668 2896997
## 9 675 112 40 7 83 19 196.890 2977004
## 10 869 149 51 8 118 34 212.361 3071996
## pop1517 pop1820 milestot
## 1 208999.6 221553.4 28516
## 2 202000.1 219125.5 31032
## 3 197000.0 216724.1 32961
## 4 194999.7 214349.0 35091
## 5 203999.9 212000.0 36259
## 6 204999.8 208998.5 37426
## 7 201000.1 193000.5 39684
## 8 141000.0 156378.7 19729
## 9 138999.9 157521.4 19611
## 10 138000.1 158672.5 20613