Assignment 1

Use the summary function to gain an overview of the data set. Then display the mean and median for at least two attributes.

library("dplyr")
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library("curl")
#trafficFatalities <- read.csv(file ='TrafficFatalities.csv',header = TRUE,sep =',' )
trafficFatalities <- read.csv(curl("https://raw.githubusercontent.com/rathish-ps/RLearning/main/data/TrafficFatalities.csv"))

summary(trafficFatalities)
##      ColNum          state                year          unemp       
##  Min.   :  1.00   Length:336         Min.   :1982   Min.   : 2.400  
##  1st Qu.: 84.75   Class :character   1st Qu.:1983   1st Qu.: 5.475  
##  Median :168.50   Mode  :character   Median :1985   Median : 7.000  
##  Mean   :168.50                      Mean   :1985   Mean   : 7.347  
##  3rd Qu.:252.25                      3rd Qu.:1987   3rd Qu.: 8.900  
##  Max.   :336.00                      Max.   :1988   Max.   :18.000  
##      income          emppop         drinkage      youngdrivers    
##  Min.   : 9514   Min.   :42.99   Min.   :18.00   Min.   :0.07314  
##  1st Qu.:12086   1st Qu.:57.69   1st Qu.:20.00   1st Qu.:0.17037  
##  Median :13763   Median :61.36   Median :21.00   Median :0.18539  
##  Mean   :13880   Mean   :60.81   Mean   :20.46   Mean   :0.18593  
##  3rd Qu.:15175   3rd Qu.:64.41   3rd Qu.:21.00   3rd Qu.:0.20219  
##  Max.   :22193   Max.   :71.27   Max.   :21.00   Max.   :0.28163  
##      miles           fatal            nfatal          fatal1517     
##  Min.   : 4576   Min.   :  79.0   Min.   :  13.00   Min.   :  3.00  
##  1st Qu.: 7183   1st Qu.: 293.8   1st Qu.:  53.75   1st Qu.: 25.75  
##  Median : 7796   Median : 701.0   Median : 135.00   Median : 49.00  
##  Mean   : 7891   Mean   : 928.7   Mean   : 182.58   Mean   : 62.61  
##  3rd Qu.: 8504   3rd Qu.:1063.5   3rd Qu.: 212.00   3rd Qu.: 77.00  
##  Max.   :26148   Max.   :5504.0   Max.   :1049.00   Max.   :318.00  
##    nfatal1517      fatal1820       nfatal1820         afatal      
##  Min.   : 0.00   Min.   :  7.0   Min.   :  0.00   Min.   :  24.6  
##  1st Qu.: 4.00   1st Qu.: 38.0   1st Qu.: 11.00   1st Qu.:  90.5  
##  Median :10.00   Median : 82.0   Median : 24.00   Median : 211.6  
##  Mean   :12.26   Mean   :106.7   Mean   : 33.53   Mean   : 293.3  
##  3rd Qu.:15.25   3rd Qu.:130.2   3rd Qu.: 44.00   3rd Qu.: 364.0  
##  Max.   :76.00   Max.   :601.0   Max.   :196.00   Max.   :2094.9  
##       pop              pop1517           pop1820           milestot     
##  Min.   :  479000   Min.   :  21000   Min.   :  21000   Min.   :  3993  
##  1st Qu.: 1545251   1st Qu.:  71750   1st Qu.:  76962   1st Qu.: 11692  
##  Median : 3310503   Median : 163000   Median : 170982   Median : 28484  
##  Mean   : 4930272   Mean   : 230816   Mean   : 249090   Mean   : 37101  
##  3rd Qu.: 5751735   3rd Qu.: 270500   3rd Qu.: 308311   3rd Qu.: 44140  
##  Max.   :28314028   Max.   :1172000   Max.   :1321004   Max.   :241575
head(trafficFatalities,10) 
##    ColNum state year unemp   income   emppop drinkage youngdrivers    miles
## 1       1    al 1982  14.4 10544.15 50.69204    19.00     0.211572 7233.887
## 2       2    al 1983  13.7 10732.80 52.14703    19.00     0.210768 7836.348
## 3       3    al 1984  11.1 11108.79 54.16809    19.00     0.211484 8262.990
## 4       4    al 1985   8.9 11332.63 55.27114    19.67     0.211140 8726.917
## 5       5    al 1986   9.8 11661.51 56.51450    21.00     0.213400 8952.854
## 6       6    al 1987   7.8 11944.00 57.50988    21.00     0.215527 9166.302
## 7       7    al 1988   7.2 12368.62 56.83453    21.00     0.218328 9674.323
## 8       8    az 1982   9.9 12309.07 56.89330    19.00     0.209012 6810.157
## 9       9    az 1983   9.1 12693.81 57.55363    19.00     0.203855 6587.495
## 10     10    az 1984   5.0 13265.93 60.37902    19.00     0.209127 6709.970
##    fatal nfatal fatal1517 nfatal1517 fatal1820 nfatal1820  afatal     pop
## 1    839    146        53          9        99         34 309.438 3942002
## 2    930    154        71          8       108         26 341.834 3960008
## 3    932    165        49          7       103         25 304.872 3988992
## 4    882    146        66          9       100         23 276.742 4021008
## 5   1081    172        82         10       120         23 360.716 4049994
## 6   1110    181        94         11       127         31 368.421 4082999
## 7   1023    139        66          8       105         24 298.322 4101992
## 8    724    131        40          7        81         16 173.668 2896997
## 9    675    112        40          7        83         19 196.890 2977004
## 10   869    149        51          8       118         34 212.361 3071996
##     pop1517  pop1820 milestot
## 1  208999.6 221553.4    28516
## 2  202000.1 219125.5    31032
## 3  197000.0 216724.1    32961
## 4  194999.7 214349.0    35091
## 5  203999.9 212000.0    36259
## 6  204999.8 208998.5    37426
## 7  201000.1 193000.5    39684
## 8  141000.0 156378.7    19729
## 9  138999.9 157521.4    19611
## 10 138000.1 158672.5    20613
summarise(trafficFatalities, MeanFatalities = mean(trafficFatalities$fatal),MedianFatalities =median(trafficFatalities$fatal),MeanNightTimeFatalities = mean(trafficFatalities$nfatal),MedianNightTimeFatalities = median(trafficFatalities$nfatal))
##   MeanFatalities MedianFatalities MeanNightTimeFatalities
## 1       928.6637              701                182.5833
##   MedianNightTimeFatalities
## 1                       135

Assignment 2

Create a new data frame with a subset of the columns and rows. Make sure to rename it.

trafficFatalities1982 <-subset(trafficFatalities,year == "1982",select = c(ColNum,state,year,miles,fatal,nfatal))
head(trafficFatalities1982,10)
##    ColNum state year    miles fatal nfatal
## 1       1    al 1982 7233.887   839    146
## 8       8    az 1982 6810.157   724    131
## 15     15    ar 1982 7208.500   550    102
## 22     22    ca 1982 6858.677  4615    944
## 29     29    co 1982 7742.842   668    140
## 36     36    ct 1982 6440.054   515    158
## 43     43    de 1982 7651.654   122     34
## 50     50    fl 1982 7587.130  2653    587
## 57     57    ga 1982 8623.444  1229    225
## 64     64    id 1982 8033.752   256     47

Assignment 3

Create new column names for the new data frame.

colnames(trafficFatalities1982) <- c("ColOrder","St","Yr","Mls","Fatalities","NightFatalities")
head(trafficFatalities1982,10)
##    ColOrder St   Yr      Mls Fatalities NightFatalities
## 1         1 al 1982 7233.887        839             146
## 8         8 az 1982 6810.157        724             131
## 15       15 ar 1982 7208.500        550             102
## 22       22 ca 1982 6858.677       4615             944
## 29       29 co 1982 7742.842        668             140
## 36       36 ct 1982 6440.054        515             158
## 43       43 de 1982 7651.654        122              34
## 50       50 fl 1982 7587.130       2653             587
## 57       57 ga 1982 8623.444       1229             225
## 64       64 id 1982 8033.752        256              47

Assignment 4

Use the summary function to create an overview of your new data frame. The print the mean and median for the same two attributes. Please compare.

summary(trafficFatalities1982)
##     ColOrder           St                  Yr            Mls       
##  Min.   :  1.00   Length:48          Min.   :1982   Min.   : 4576  
##  1st Qu.: 83.25   Class :character   1st Qu.:1982   1st Qu.: 6728  
##  Median :165.50   Mode  :character   Median :1982   Median : 7178  
##  Mean   :165.50                      Mean   :1982   Mean   : 7227  
##  3rd Qu.:247.75                      3rd Qu.:1982   3rd Qu.: 7603  
##  Max.   :330.00                      Max.   :1982   Max.   :10355  
##    Fatalities     NightFatalities  
##  Min.   : 105.0   Min.   :  18.00  
##  1st Qu.: 291.2   1st Qu.:  52.25  
##  Median : 696.0   Median : 143.00  
##  Mean   : 909.2   Mean   : 202.65  
##  3rd Qu.:1056.5   3rd Qu.: 234.25  
##  Max.   :4615.0   Max.   :1049.00
meanTotalFatality <- mean(trafficFatalities$fatal)
meanNightFatality <- mean(trafficFatalities$nfatal)
meanTotalFatality82 <- mean(trafficFatalities1982$Fatalities)
meanNightFatality82 <- mean(trafficFatalities1982$NightFatalities)

print(paste("Mean of Total Fatalities is ",meanTotalFatality," and Mean of Total Fatalities for the subset is  ",meanTotalFatality82))
## [1] "Mean of Total Fatalities is  928.66369047619  and Mean of Total Fatalities for the subset is   909.208333333333"
print(paste("Mean of Total Night Fatalities is ",meanNightFatality," and Mean of Total Night Fatalities for subset is  ",meanNightFatality82))
## [1] "Mean of Total Night Fatalities is  182.583333333333  and Mean of Total Night Fatalities for subset is   202.645833333333"
medianTotalFatality <- median(trafficFatalities$fatal)
medianNightFatality <- median(trafficFatalities$nfatal)
medianTotalFatality82 <- median(trafficFatalities1982$Fatalities)
medianNightFatality82 <- median(trafficFatalities1982$NightFatalities)

print(paste("Median of Total Fatalities is  ",medianTotalFatality," and Median of Total Fatalities for the subset is ",medianTotalFatality82))
## [1] "Median of Total Fatalities is   701  and Median of Total Fatalities for the subset is  696"
print(paste("Median of Total Night Fatalities is ",medianNightFatality," and Median of Total Night Fatalities for subset is ",medianNightFatality82))
## [1] "Median of Total Night Fatalities is  135  and Median of Total Night Fatalities for subset is  143"

Assignment 5

For at least 3 values in a column please rename so that every value in that column is renamed. For example, suppose I have 20 values of the letter “e” in one column. Rename those values so that all 20 would show as “excellent”.

#trafficFatalities1982[trafficFatalities1982 == 'az'] <- 'Arizona' 
#trafficFatalities1982[trafficFatalities1982 == 'ca'] <- 'California' 
#trafficFatalities1982[trafficFatalities1982 == 'co'] <- 'Colorado' 

trafficFatalities1982$St[trafficFatalities1982$St == 'az'] <- 'Arizona' 
trafficFatalities1982$St[trafficFatalities1982$St == 'ca'] <- 'California' 
trafficFatalities1982$St[trafficFatalities1982$St == 'co'] <- 'Colorado' 

head(trafficFatalities1982,10)
##    ColOrder         St   Yr      Mls Fatalities NightFatalities
## 1         1         al 1982 7233.887        839             146
## 8         8    Arizona 1982 6810.157        724             131
## 15       15         ar 1982 7208.500        550             102
## 22       22 California 1982 6858.677       4615             944
## 29       29   Colorado 1982 7742.842        668             140
## 36       36         ct 1982 6440.054        515             158
## 43       43         de 1982 7651.654        122              34
## 50       50         fl 1982 7587.130       2653             587
## 57       57         ga 1982 8623.444       1229             225
## 64       64         id 1982 8033.752        256              47

Assignment 6

Display enough rows to see examples of all of steps 1-5 above

head(trafficFatalities,10)
##    ColNum state year unemp   income   emppop drinkage youngdrivers    miles
## 1       1    al 1982  14.4 10544.15 50.69204    19.00     0.211572 7233.887
## 2       2    al 1983  13.7 10732.80 52.14703    19.00     0.210768 7836.348
## 3       3    al 1984  11.1 11108.79 54.16809    19.00     0.211484 8262.990
## 4       4    al 1985   8.9 11332.63 55.27114    19.67     0.211140 8726.917
## 5       5    al 1986   9.8 11661.51 56.51450    21.00     0.213400 8952.854
## 6       6    al 1987   7.8 11944.00 57.50988    21.00     0.215527 9166.302
## 7       7    al 1988   7.2 12368.62 56.83453    21.00     0.218328 9674.323
## 8       8    az 1982   9.9 12309.07 56.89330    19.00     0.209012 6810.157
## 9       9    az 1983   9.1 12693.81 57.55363    19.00     0.203855 6587.495
## 10     10    az 1984   5.0 13265.93 60.37902    19.00     0.209127 6709.970
##    fatal nfatal fatal1517 nfatal1517 fatal1820 nfatal1820  afatal     pop
## 1    839    146        53          9        99         34 309.438 3942002
## 2    930    154        71          8       108         26 341.834 3960008
## 3    932    165        49          7       103         25 304.872 3988992
## 4    882    146        66          9       100         23 276.742 4021008
## 5   1081    172        82         10       120         23 360.716 4049994
## 6   1110    181        94         11       127         31 368.421 4082999
## 7   1023    139        66          8       105         24 298.322 4101992
## 8    724    131        40          7        81         16 173.668 2896997
## 9    675    112        40          7        83         19 196.890 2977004
## 10   869    149        51          8       118         34 212.361 3071996
##     pop1517  pop1820 milestot
## 1  208999.6 221553.4    28516
## 2  202000.1 219125.5    31032
## 3  197000.0 216724.1    32961
## 4  194999.7 214349.0    35091
## 5  203999.9 212000.0    36259
## 6  204999.8 208998.5    37426
## 7  201000.1 193000.5    39684
## 8  141000.0 156378.7    19729
## 9  138999.9 157521.4    19611
## 10 138000.1 158672.5    20613
head(trafficFatalities1982,10)
##    ColOrder         St   Yr      Mls Fatalities NightFatalities
## 1         1         al 1982 7233.887        839             146
## 8         8    Arizona 1982 6810.157        724             131
## 15       15         ar 1982 7208.500        550             102
## 22       22 California 1982 6858.677       4615             944
## 29       29   Colorado 1982 7742.842        668             140
## 36       36         ct 1982 6440.054        515             158
## 43       43         de 1982 7651.654        122              34
## 50       50         fl 1982 7587.130       2653             587
## 57       57         ga 1982 8623.444       1229             225
## 64       64         id 1982 8033.752        256              47

Assignment 7

BONUS – place the original .csv in a github file and have R read from the link.

library("curl")
trafficFatailityfromGit <- read.csv(curl("https://raw.githubusercontent.com/rathish-ps/RLearning/main/data/TrafficFatalities.csv"))
head(trafficFatailityfromGit,10)
##    ColNum state year unemp   income   emppop drinkage youngdrivers    miles
## 1       1    al 1982  14.4 10544.15 50.69204    19.00     0.211572 7233.887
## 2       2    al 1983  13.7 10732.80 52.14703    19.00     0.210768 7836.348
## 3       3    al 1984  11.1 11108.79 54.16809    19.00     0.211484 8262.990
## 4       4    al 1985   8.9 11332.63 55.27114    19.67     0.211140 8726.917
## 5       5    al 1986   9.8 11661.51 56.51450    21.00     0.213400 8952.854
## 6       6    al 1987   7.8 11944.00 57.50988    21.00     0.215527 9166.302
## 7       7    al 1988   7.2 12368.62 56.83453    21.00     0.218328 9674.323
## 8       8    az 1982   9.9 12309.07 56.89330    19.00     0.209012 6810.157
## 9       9    az 1983   9.1 12693.81 57.55363    19.00     0.203855 6587.495
## 10     10    az 1984   5.0 13265.93 60.37902    19.00     0.209127 6709.970
##    fatal nfatal fatal1517 nfatal1517 fatal1820 nfatal1820  afatal     pop
## 1    839    146        53          9        99         34 309.438 3942002
## 2    930    154        71          8       108         26 341.834 3960008
## 3    932    165        49          7       103         25 304.872 3988992
## 4    882    146        66          9       100         23 276.742 4021008
## 5   1081    172        82         10       120         23 360.716 4049994
## 6   1110    181        94         11       127         31 368.421 4082999
## 7   1023    139        66          8       105         24 298.322 4101992
## 8    724    131        40          7        81         16 173.668 2896997
## 9    675    112        40          7        83         19 196.890 2977004
## 10   869    149        51          8       118         34 212.361 3071996
##     pop1517  pop1820 milestot
## 1  208999.6 221553.4    28516
## 2  202000.1 219125.5    31032
## 3  197000.0 216724.1    32961
## 4  194999.7 214349.0    35091
## 5  203999.9 212000.0    36259
## 6  204999.8 208998.5    37426
## 7  201000.1 193000.5    39684
## 8  141000.0 156378.7    19729
## 9  138999.9 157521.4    19611
## 10 138000.1 158672.5    20613