R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

#QUESTION 1 - Use the summary function to gain an overview of the data set. Then display the mean and median for at least two attributes.

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.0.4     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
fatalities <- read.table("https://vincentarelbundock.github.io/Rdatasets/csv/AER/Fatalities.csv",sep = ",", header = T)


head(fatalities)
##   X state year spirits unemp   income   emppop  beertax baptist  mormon
## 1 1    al 1982    1.37  14.4 10544.15 50.69204 1.539379 30.3557 0.32829
## 2 2    al 1983    1.36  13.7 10732.80 52.14703 1.788991 30.3336 0.34341
## 3 3    al 1984    1.32  11.1 11108.79 54.16809 1.714286 30.3115 0.35924
## 4 4    al 1985    1.28   8.9 11332.63 55.27114 1.652542 30.2895 0.37579
## 5 5    al 1986    1.23   9.8 11661.51 56.51450 1.609907 30.2674 0.39311
## 6 6    al 1987    1.18   7.8 11944.00 57.50988 1.560000 30.2453 0.41123
##   drinkage     dry youngdrivers    miles breath jail service fatal nfatal
## 1    19.00 25.0063     0.211572 7233.887     no   no      no   839    146
## 2    19.00 22.9942     0.210768 7836.348     no   no      no   930    154
## 3    19.00 24.0426     0.211484 8262.990     no   no      no   932    165
## 4    19.67 23.6339     0.211140 8726.917     no   no      no   882    146
## 5    21.00 23.4647     0.213400 8952.854     no   no      no  1081    172
## 6    21.00 23.7924     0.215527 9166.302     no   no      no  1110    181
##   sfatal fatal1517 nfatal1517 fatal1820 nfatal1820 fatal2124 nfatal2124  afatal
## 1     99        53          9        99         34       120         32 309.438
## 2     98        71          8       108         26       124         35 341.834
## 3     94        49          7       103         25       118         34 304.872
## 4     98        66          9       100         23       114         45 276.742
## 5    119        82         10       120         23       119         29 360.716
## 6    114        94         11       127         31       138         30 368.421
##       pop  pop1517  pop1820  pop2124 milestot unempus emppopus         gsp
## 1 3942002 208999.6 221553.4 290000.1    28516     9.7     57.8 -0.02212476
## 2 3960008 202000.1 219125.5 290000.2    31032     9.6     57.9  0.04655825
## 3 3988992 197000.0 216724.1 288000.2    32961     7.5     59.5  0.06279784
## 4 4021008 194999.7 214349.0 284000.3    35091     7.2     60.1  0.02748997
## 5 4049994 203999.9 212000.0 263000.3    36259     7.0     60.7  0.03214295
## 6 4082999 204999.8 208998.5 258999.8    37426     6.2     61.5  0.04897637
##show summary-
summary(fatalities)
##        X             state                year         spirits     
##  Min.   :  1.00   Length:336         Min.   :1982   Min.   :0.790  
##  1st Qu.: 84.75   Class :character   1st Qu.:1983   1st Qu.:1.300  
##  Median :168.50   Mode  :character   Median :1985   Median :1.670  
##  Mean   :168.50                      Mean   :1985   Mean   :1.754  
##  3rd Qu.:252.25                      3rd Qu.:1987   3rd Qu.:2.013  
##  Max.   :336.00                      Max.   :1988   Max.   :4.900  
##      unemp            income          emppop         beertax       
##  Min.   : 2.400   Min.   : 9514   Min.   :42.99   Min.   :0.04331  
##  1st Qu.: 5.475   1st Qu.:12086   1st Qu.:57.69   1st Qu.:0.20885  
##  Median : 7.000   Median :13763   Median :61.36   Median :0.35259  
##  Mean   : 7.347   Mean   :13880   Mean   :60.81   Mean   :0.51326  
##  3rd Qu.: 8.900   3rd Qu.:15175   3rd Qu.:64.41   3rd Qu.:0.65157  
##  Max.   :18.000   Max.   :22193   Max.   :71.27   Max.   :2.72076  
##     baptist            mormon           drinkage          dry          
##  Min.   : 0.0000   Min.   : 0.1000   Min.   :18.00   Min.   : 0.00000  
##  1st Qu.: 0.6268   1st Qu.: 0.2722   1st Qu.:20.00   1st Qu.: 0.00000  
##  Median : 1.7492   Median : 0.3931   Median :21.00   Median : 0.08681  
##  Mean   : 7.1569   Mean   : 2.8019   Mean   :20.46   Mean   : 4.26707  
##  3rd Qu.:13.1271   3rd Qu.: 0.6293   3rd Qu.:21.00   3rd Qu.: 2.42481  
##  Max.   :30.3557   Max.   :65.9165   Max.   :21.00   Max.   :45.79210  
##   youngdrivers         miles          breath              jail          
##  Min.   :0.07314   Min.   : 4576   Length:336         Length:336        
##  1st Qu.:0.17037   1st Qu.: 7183   Class :character   Class :character  
##  Median :0.18539   Median : 7796   Mode  :character   Mode  :character  
##  Mean   :0.18593   Mean   : 7891                                        
##  3rd Qu.:0.20219   3rd Qu.: 8504                                        
##  Max.   :0.28163   Max.   :26148                                        
##    service              fatal            nfatal            sfatal     
##  Length:336         Min.   :  79.0   Min.   :  13.00   Min.   :  8.0  
##  Class :character   1st Qu.: 293.8   1st Qu.:  53.75   1st Qu.: 35.0  
##  Mode  :character   Median : 701.0   Median : 135.00   Median : 81.0  
##                     Mean   : 928.7   Mean   : 182.58   Mean   :109.9  
##                     3rd Qu.:1063.5   3rd Qu.: 212.00   3rd Qu.:131.0  
##                     Max.   :5504.0   Max.   :1049.00   Max.   :603.0  
##    fatal1517        nfatal1517      fatal1820       nfatal1820    
##  Min.   :  3.00   Min.   : 0.00   Min.   :  7.0   Min.   :  0.00  
##  1st Qu.: 25.75   1st Qu.: 4.00   1st Qu.: 38.0   1st Qu.: 11.00  
##  Median : 49.00   Median :10.00   Median : 82.0   Median : 24.00  
##  Mean   : 62.61   Mean   :12.26   Mean   :106.7   Mean   : 33.53  
##  3rd Qu.: 77.00   3rd Qu.:15.25   3rd Qu.:130.2   3rd Qu.: 44.00  
##  Max.   :318.00   Max.   :76.00   Max.   :601.0   Max.   :196.00  
##    fatal2124       nfatal2124         afatal            pop          
##  Min.   : 12.0   Min.   :  1.00   Min.   :  24.6   Min.   :  479000  
##  1st Qu.: 42.0   1st Qu.: 13.00   1st Qu.:  90.5   1st Qu.: 1545251  
##  Median : 97.5   Median : 30.00   Median : 211.6   Median : 3310503  
##  Mean   :126.9   Mean   : 41.38   Mean   : 293.3   Mean   : 4930272  
##  3rd Qu.:150.5   3rd Qu.: 49.00   3rd Qu.: 364.0   3rd Qu.: 5751735  
##  Max.   :770.0   Max.   :249.00   Max.   :2094.9   Max.   :28314028  
##     pop1517           pop1820           pop2124           milestot     
##  Min.   :  21000   Min.   :  21000   Min.   :  30000   Min.   :  3993  
##  1st Qu.:  71750   1st Qu.:  76962   1st Qu.: 103500   1st Qu.: 11692  
##  Median : 163000   Median : 170982   Median : 241000   Median : 28484  
##  Mean   : 230816   Mean   : 249090   Mean   : 336390   Mean   : 37101  
##  3rd Qu.: 270500   3rd Qu.: 308311   3rd Qu.: 413000   3rd Qu.: 44140  
##  Max.   :1172000   Max.   :1321004   Max.   :1892998   Max.   :241575  
##     unempus         emppopus          gsp           
##  Min.   :5.500   Min.   :57.80   Min.   :-0.123641  
##  1st Qu.:6.200   1st Qu.:57.90   1st Qu.: 0.001182  
##  Median :7.200   Median :60.10   Median : 0.032413  
##  Mean   :7.529   Mean   :59.97   Mean   : 0.025313  
##  3rd Qu.:9.600   3rd Qu.:61.50   3rd Qu.: 0.056501  
##  Max.   :9.700   Max.   :62.30   Max.   : 0.142361
##mean and median by state
by_state<-group_by(fatalities,state)
summarise(by_state,meanmiles=mean(miles),medianmiles=median(miles))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 48 x 3
##    state meanmiles medianmiles
##    <chr>     <dbl>       <dbl>
##  1 al        8551.       8727.
##  2 ar        7412.       7254.
##  3 az        7742.       6810.
##  4 ca        7759.       7874.
##  5 co        7985.       8092.
##  6 ct        7247.       6979.
##  7 de        8701.       8625.
##  8 fl        7824.       7747.
##  9 ga        9089.       8988.
## 10 ia        7107.       7061.
## # ... with 38 more rows
##mean and median by of drinkage by state

by_state<-group_by(fatalities,state)
summarise(by_state,meandrinkage=mean(drinkage),mediandrinkage=median(drinkage))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 48 x 3
##    state meandrinkage mediandrinkage
##    <chr>        <dbl>          <dbl>
##  1 al            20.0           19.7
##  2 ar            21             21  
##  3 az            20.1           21  
##  4 ca            21             21  
##  5 co            21             21  
##  6 ct            20.2           20.3
##  7 de            20.7           21  
##  8 fl            20             20  
##  9 ga            19.8           19.2
## 10 ia            19.7           19  
## # ... with 38 more rows

#SECOND QUESTION -Create a new data frame with a subset of the columns and rows. Make sure to rename it.

fatalities <- read.table("https://vincentarelbundock.github.io/Rdatasets/csv/AER/Fatalities.csv",sep = ",", header = T)

fatalitiesSubset <- subset(fatalities, select=c(state, year,income,drinkage,miles), subset=(drinkage >20))

##get first 100 rows 
head(fatalitiesSubset,100)
##     state year   income drinkage    miles
## 5      al 1986 11661.51    21.00 8952.854
## 6      al 1987 11944.00    21.00 9166.302
## 7      al 1988 12368.62    21.00 9674.323
## 11     az 1985 13726.70    21.00 6771.263
## 12     az 1986 14107.33    21.00 8129.008
## 13     az 1987 14241.00    21.00 9370.654
## 14     az 1988 14408.08    21.00 9815.721
## 15     ar 1982 10267.30    21.00 7208.500
## 16     ar 1983 10433.49    21.00 7175.917
## 17     ar 1984 10916.48    21.00 7084.820
## 18     ar 1985 11149.36    21.00 7253.918
## 19     ar 1986 11399.38    21.00 7468.999
## 20     ar 1987 11537.00    21.00 7665.831
## 21     ar 1988 11760.35    21.00 8024.625
## 22     ca 1982 15797.14    21.00 6858.677
## 23     ca 1983 15970.18    21.00 7216.292
## 24     ca 1984 16590.11    21.00 7619.176
## 25     ca 1985 16985.17    21.00 7874.067
## 26     ca 1986 17356.04    21.00 8034.910
## 27     ca 1987 17846.00    21.00 8180.633
## 28     ca 1988 18049.09    21.00 8531.990
## 29     co 1982 15082.34    21.00 7742.842
## 30     co 1983 15131.88    21.00 7656.063
## 31     co 1984 15486.81    21.00 7707.853
## 32     co 1985 15569.92    21.00 8092.209
## 33     co 1986 15616.10    21.00 8131.375
## 34     co 1987 15605.00    21.00 8182.028
## 35     co 1988 15845.04    21.00 8380.769
## 39     ct 1985 19312.50    20.33 6979.215
## 40     ct 1986 20152.73    21.00 7661.745
## 41     ct 1987 21192.00    21.00 8338.534
## 42     ct 1988 22193.46    21.00 8061.235
## 45     de 1984 14925.27    21.00 8368.062
## 46     de 1985 15408.90    21.00 8625.425
## 47     de 1986 15822.50    21.00 9045.817
## 48     de 1987 16407.00    21.00 9450.308
## 49     de 1988 16998.07    21.00 9703.021
## 54     fl 1986 15102.17    21.00 7768.756
## 55     fl 1987 15584.00    21.00 7788.331
## 56     fl 1988 15979.79    21.00 8538.230
## 61     ga 1986 13891.64    20.25 9344.768
## 62     ga 1987 14306.00    21.00 9690.281
## 63     ga 1988 14687.20    21.00 9817.396
## 69     id 1987 11859.00    20.50 8135.269
## 70     id 1988 12189.61    21.00 8102.682
## 71     il 1982 14743.44    21.00 5696.535
## 72     il 1983 14745.41    21.00 5862.868
## 73     il 1984 15390.11    21.00 6067.528
## 74     il 1985 15602.75    21.00 6141.676
## 75     il 1986 15988.65    21.00 6345.777
## 76     il 1987 16417.00    21.00 6540.846
## 77     il 1988 16915.30    21.00 6757.613
## 78     in 1982 12282.82    21.00 7149.917
## 79     in 1983 12364.68    21.00 7277.506
## 80     in 1984 13008.79    21.00 7478.887
## 81     in 1985 13161.02    21.00 7416.253
## 82     in 1986 13582.04    21.00 7714.322
## 83     in 1987 13937.00    21.00 7977.216
## 84     in 1988 14363.81    21.00 9201.576
## 90     ia 1987 14284.00    21.00 7342.257
## 91     ia 1988 14111.65    21.00 7730.063
## 92     ks 1982 14094.27    21.00 7333.069
## 93     ks 1983 13917.43    21.00 7479.611
## 94     ks 1984 14308.79    21.00 7670.888
## 95     ks 1985 14631.36    21.00 7867.333
## 96     ks 1986 14977.30    21.00 8100.053
## 97     ks 1987 15152.00    21.00 8304.131
## 98     ks 1988 15167.47    21.00 8481.355
## 99     ky 1982 11071.60    21.00 6937.466
## 100    ky 1983 10913.99    21.00 7194.143
## 101    ky 1984 11441.76    21.00 7513.703
## 102    ky 1985 11405.72    21.00 7654.335
## 103    ky 1986 11602.68    21.00 7859.621
## 104    ky 1987 12008.00    21.00 8135.244
## 105    ky 1988 12340.71    21.00 8482.436
## 111    la 1987 11515.00    20.50 6859.208
## 112    la 1988 11830.61    21.00 7867.977
## 116    me 1985 12609.11    20.50 7969.934
## 117    me 1986 13292.05    21.00 8550.348
## 118    me 1987 13984.00    21.00 9069.937
## 119    me 1988 14538.98    21.00 9461.399
## 120    md 1982 15198.09    21.00 6768.094
## 121    md 1983 15644.50    21.00 7118.825
## 122    md 1984 16313.19    21.00 7289.488
## 123    md 1985 16921.61    21.00 7590.410
## 124    md 1986 17475.75    21.00 7826.705
## 125    md 1987 18167.00    21.00 8046.975
## 126    md 1988 18755.53    21.00 8112.946
## 130    ma 1985 17271.19    20.50 6818.296
## 131    ma 1986 18145.51    21.00 7027.964
## 132    ma 1987 19050.00    21.00 7225.436
## 133    ma 1988 20034.65    21.00 7358.473
## 134    mi 1982 13247.02    21.00 6712.743
## 135    mi 1983 13606.65    21.00 6721.328
## 136    mi 1984 14317.58    21.00 7007.071
## 137    mi 1985 14830.51    21.00 7416.576
## 138    mi 1986 15278.64    21.00 7829.523
## 139    mi 1987 15418.00    21.00 8228.915
## 140    mi 1988 15930.70    21.00 8430.646
## 146    mn 1987 15910.00    21.00 8282.359

#QUESTION 3 - Create new column names for the new data frame

library(tidyverse)
fatalities <- read.table("https://vincentarelbundock.github.io/Rdatasets/csv/AER/Fatalities.csv",sep = ",", header = T)

fatalitiesSubset <- subset(fatalities, select=c(state, year,income,drinkage,miles), subset=(drinkage >20))

#names(fatalitiesSubset)[names(fatalitiesSubset)=="state"] <- "NEW_STATE"

# Change colnames of all columns
colnames(fatalitiesSubset) <- c("NEW_STATE", "NEW_YEAR", "NEW_INCOME","NEW_DRINKAGE","NEW_MILES")

head(fatalitiesSubset,100)
##     NEW_STATE NEW_YEAR NEW_INCOME NEW_DRINKAGE NEW_MILES
## 5          al     1986   11661.51        21.00  8952.854
## 6          al     1987   11944.00        21.00  9166.302
## 7          al     1988   12368.62        21.00  9674.323
## 11         az     1985   13726.70        21.00  6771.263
## 12         az     1986   14107.33        21.00  8129.008
## 13         az     1987   14241.00        21.00  9370.654
## 14         az     1988   14408.08        21.00  9815.721
## 15         ar     1982   10267.30        21.00  7208.500
## 16         ar     1983   10433.49        21.00  7175.917
## 17         ar     1984   10916.48        21.00  7084.820
## 18         ar     1985   11149.36        21.00  7253.918
## 19         ar     1986   11399.38        21.00  7468.999
## 20         ar     1987   11537.00        21.00  7665.831
## 21         ar     1988   11760.35        21.00  8024.625
## 22         ca     1982   15797.14        21.00  6858.677
## 23         ca     1983   15970.18        21.00  7216.292
## 24         ca     1984   16590.11        21.00  7619.176
## 25         ca     1985   16985.17        21.00  7874.067
## 26         ca     1986   17356.04        21.00  8034.910
## 27         ca     1987   17846.00        21.00  8180.633
## 28         ca     1988   18049.09        21.00  8531.990
## 29         co     1982   15082.34        21.00  7742.842
## 30         co     1983   15131.88        21.00  7656.063
## 31         co     1984   15486.81        21.00  7707.853
## 32         co     1985   15569.92        21.00  8092.209
## 33         co     1986   15616.10        21.00  8131.375
## 34         co     1987   15605.00        21.00  8182.028
## 35         co     1988   15845.04        21.00  8380.769
## 39         ct     1985   19312.50        20.33  6979.215
## 40         ct     1986   20152.73        21.00  7661.745
## 41         ct     1987   21192.00        21.00  8338.534
## 42         ct     1988   22193.46        21.00  8061.235
## 45         de     1984   14925.27        21.00  8368.062
## 46         de     1985   15408.90        21.00  8625.425
## 47         de     1986   15822.50        21.00  9045.817
## 48         de     1987   16407.00        21.00  9450.308
## 49         de     1988   16998.07        21.00  9703.021
## 54         fl     1986   15102.17        21.00  7768.756
## 55         fl     1987   15584.00        21.00  7788.331
## 56         fl     1988   15979.79        21.00  8538.230
## 61         ga     1986   13891.64        20.25  9344.768
## 62         ga     1987   14306.00        21.00  9690.281
## 63         ga     1988   14687.20        21.00  9817.396
## 69         id     1987   11859.00        20.50  8135.269
## 70         id     1988   12189.61        21.00  8102.682
## 71         il     1982   14743.44        21.00  5696.535
## 72         il     1983   14745.41        21.00  5862.868
## 73         il     1984   15390.11        21.00  6067.528
## 74         il     1985   15602.75        21.00  6141.676
## 75         il     1986   15988.65        21.00  6345.777
## 76         il     1987   16417.00        21.00  6540.846
## 77         il     1988   16915.30        21.00  6757.613
## 78         in     1982   12282.82        21.00  7149.917
## 79         in     1983   12364.68        21.00  7277.506
## 80         in     1984   13008.79        21.00  7478.887
## 81         in     1985   13161.02        21.00  7416.253
## 82         in     1986   13582.04        21.00  7714.322
## 83         in     1987   13937.00        21.00  7977.216
## 84         in     1988   14363.81        21.00  9201.576
## 90         ia     1987   14284.00        21.00  7342.257
## 91         ia     1988   14111.65        21.00  7730.063
## 92         ks     1982   14094.27        21.00  7333.069
## 93         ks     1983   13917.43        21.00  7479.611
## 94         ks     1984   14308.79        21.00  7670.888
## 95         ks     1985   14631.36        21.00  7867.333
## 96         ks     1986   14977.30        21.00  8100.053
## 97         ks     1987   15152.00        21.00  8304.131
## 98         ks     1988   15167.47        21.00  8481.355
## 99         ky     1982   11071.60        21.00  6937.466
## 100        ky     1983   10913.99        21.00  7194.143
## 101        ky     1984   11441.76        21.00  7513.703
## 102        ky     1985   11405.72        21.00  7654.335
## 103        ky     1986   11602.68        21.00  7859.621
## 104        ky     1987   12008.00        21.00  8135.244
## 105        ky     1988   12340.71        21.00  8482.436
## 111        la     1987   11515.00        20.50  6859.208
## 112        la     1988   11830.61        21.00  7867.977
## 116        me     1985   12609.11        20.50  7969.934
## 117        me     1986   13292.05        21.00  8550.348
## 118        me     1987   13984.00        21.00  9069.937
## 119        me     1988   14538.98        21.00  9461.399
## 120        md     1982   15198.09        21.00  6768.094
## 121        md     1983   15644.50        21.00  7118.825
## 122        md     1984   16313.19        21.00  7289.488
## 123        md     1985   16921.61        21.00  7590.410
## 124        md     1986   17475.75        21.00  7826.705
## 125        md     1987   18167.00        21.00  8046.975
## 126        md     1988   18755.53        21.00  8112.946
## 130        ma     1985   17271.19        20.50  6818.296
## 131        ma     1986   18145.51        21.00  7027.964
## 132        ma     1987   19050.00        21.00  7225.436
## 133        ma     1988   20034.65        21.00  7358.473
## 134        mi     1982   13247.02        21.00  6712.743
## 135        mi     1983   13606.65        21.00  6721.328
## 136        mi     1984   14317.58        21.00  7007.071
## 137        mi     1985   14830.51        21.00  7416.576
## 138        mi     1986   15278.64        21.00  7829.523
## 139        mi     1987   15418.00        21.00  8228.915
## 140        mi     1988   15930.70        21.00  8430.646
## 146        mn     1987   15910.00        21.00  8282.359

#QUESTION 4- Use the summary function to create an overview of your new data frame. The print the mean and median for the same two attributes. Please compare.

Mean from 1st question average is between 19 and 21 but with the new subset the mean and median are in 90k range because of the different subset

library(tidyverse)

fatalities <- read.table("https://vincentarelbundock.github.io/Rdatasets/csv/AER/Fatalities.csv",sep = ",", header = T)

fatalitiesSubset <- subset(fatalities, select=c(state, year,income,drinkage,miles), subset=(drinkage >20))

#names(fatalitiesSubset)[names(fatalitiesSubset)=="state"] <- "NEW_STATE"

# Change colnames of all columns
colnames(fatalitiesSubset) <- c("NEW_STATE", "NEW_YEAR", "NEW_INCOME","NEW_DRINKAGE","NEW_MILES")

head(fatalitiesSubset)
##    NEW_STATE NEW_YEAR NEW_INCOME NEW_DRINKAGE NEW_MILES
## 5         al     1986   11661.51           21  8952.854
## 6         al     1987   11944.00           21  9166.302
## 7         al     1988   12368.62           21  9674.323
## 11        az     1985   13726.70           21  6771.263
## 12        az     1986   14107.33           21  8129.008
## 13        az     1987   14241.00           21  9370.654
#summary(fatalitiesSubset)

##mean and median by state
by_new_state<-group_by(fatalitiesSubset,NEW_STATE)
summarise(by_new_state,NEW_meanmiles=mean(NEW_MILES),NEW_medianmiles=median(NEW_MILES))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 47 x 3
##    NEW_STATE NEW_meanmiles NEW_medianmiles
##    <chr>             <dbl>           <dbl>
##  1 al                9264.           9166.
##  2 ar                7412.           7254.
##  3 az                8522.           8750.
##  4 ca                7759.           7874.
##  5 co                7985.           8092.
##  6 ct                7760.           7861.
##  7 de                9039.           9046.
##  8 fl                8032.           7788.
##  9 ga                9617.           9690.
## 10 ia                7536.           7536.
## # ... with 37 more rows

#QUESTION 5 -rename values in column-first display original values and then changed ones

bankwagesdataset <- read.table("https://vincentarelbundock.github.io/Rdatasets/csv/AER/BankWages.csv",sep = ",", header = T)
head(bankwagesdataset)
##   X    job education gender minority
## 1 1 manage        15   male       no
## 2 2  admin        16   male       no
## 3 3  admin        12 female       no
## 4 4  admin         8 female       no
## 5 5  admin        15   male       no
## 6 6  admin        15   male       no
bankwagesdataset$job[c(1,2,3,4)]<-"changed"
head(bankwagesdataset)
##   X     job education gender minority
## 1 1 changed        15   male       no
## 2 2 changed        16   male       no
## 3 3 changed        12 female       no
## 4 4 changed         8 female       no
## 5 5   admin        15   male       no
## 6 6   admin        15   male       no

#question 6- uploaded csv to personal git and read raw from that URL.

dataSetAtgit <- read.table("https://raw.githubusercontent.com/yathdeep/Week2Assignment/main/BankWages.csv",sep = ",", header = T)

head(dataSetAtgit)
##   X    job education gender minority
## 1 1 manage        15   male       no
## 2 2  admin        16   male       no
## 3 3  admin        12 female       no
## 4 4  admin         8 female       no
## 5 5  admin        15   male       no
## 6 6  admin        15   male       no

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.