This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
#QUESTION 1 - Use the summary function to gain an overview of the data set. Then display the mean and median for at least two attributes.
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.4 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
fatalities <- read.table("https://vincentarelbundock.github.io/Rdatasets/csv/AER/Fatalities.csv",sep = ",", header = T)
head(fatalities)
## X state year spirits unemp income emppop beertax baptist mormon
## 1 1 al 1982 1.37 14.4 10544.15 50.69204 1.539379 30.3557 0.32829
## 2 2 al 1983 1.36 13.7 10732.80 52.14703 1.788991 30.3336 0.34341
## 3 3 al 1984 1.32 11.1 11108.79 54.16809 1.714286 30.3115 0.35924
## 4 4 al 1985 1.28 8.9 11332.63 55.27114 1.652542 30.2895 0.37579
## 5 5 al 1986 1.23 9.8 11661.51 56.51450 1.609907 30.2674 0.39311
## 6 6 al 1987 1.18 7.8 11944.00 57.50988 1.560000 30.2453 0.41123
## drinkage dry youngdrivers miles breath jail service fatal nfatal
## 1 19.00 25.0063 0.211572 7233.887 no no no 839 146
## 2 19.00 22.9942 0.210768 7836.348 no no no 930 154
## 3 19.00 24.0426 0.211484 8262.990 no no no 932 165
## 4 19.67 23.6339 0.211140 8726.917 no no no 882 146
## 5 21.00 23.4647 0.213400 8952.854 no no no 1081 172
## 6 21.00 23.7924 0.215527 9166.302 no no no 1110 181
## sfatal fatal1517 nfatal1517 fatal1820 nfatal1820 fatal2124 nfatal2124 afatal
## 1 99 53 9 99 34 120 32 309.438
## 2 98 71 8 108 26 124 35 341.834
## 3 94 49 7 103 25 118 34 304.872
## 4 98 66 9 100 23 114 45 276.742
## 5 119 82 10 120 23 119 29 360.716
## 6 114 94 11 127 31 138 30 368.421
## pop pop1517 pop1820 pop2124 milestot unempus emppopus gsp
## 1 3942002 208999.6 221553.4 290000.1 28516 9.7 57.8 -0.02212476
## 2 3960008 202000.1 219125.5 290000.2 31032 9.6 57.9 0.04655825
## 3 3988992 197000.0 216724.1 288000.2 32961 7.5 59.5 0.06279784
## 4 4021008 194999.7 214349.0 284000.3 35091 7.2 60.1 0.02748997
## 5 4049994 203999.9 212000.0 263000.3 36259 7.0 60.7 0.03214295
## 6 4082999 204999.8 208998.5 258999.8 37426 6.2 61.5 0.04897637
##show summary-
summary(fatalities)
## X state year spirits
## Min. : 1.00 Length:336 Min. :1982 Min. :0.790
## 1st Qu.: 84.75 Class :character 1st Qu.:1983 1st Qu.:1.300
## Median :168.50 Mode :character Median :1985 Median :1.670
## Mean :168.50 Mean :1985 Mean :1.754
## 3rd Qu.:252.25 3rd Qu.:1987 3rd Qu.:2.013
## Max. :336.00 Max. :1988 Max. :4.900
## unemp income emppop beertax
## Min. : 2.400 Min. : 9514 Min. :42.99 Min. :0.04331
## 1st Qu.: 5.475 1st Qu.:12086 1st Qu.:57.69 1st Qu.:0.20885
## Median : 7.000 Median :13763 Median :61.36 Median :0.35259
## Mean : 7.347 Mean :13880 Mean :60.81 Mean :0.51326
## 3rd Qu.: 8.900 3rd Qu.:15175 3rd Qu.:64.41 3rd Qu.:0.65157
## Max. :18.000 Max. :22193 Max. :71.27 Max. :2.72076
## baptist mormon drinkage dry
## Min. : 0.0000 Min. : 0.1000 Min. :18.00 Min. : 0.00000
## 1st Qu.: 0.6268 1st Qu.: 0.2722 1st Qu.:20.00 1st Qu.: 0.00000
## Median : 1.7492 Median : 0.3931 Median :21.00 Median : 0.08681
## Mean : 7.1569 Mean : 2.8019 Mean :20.46 Mean : 4.26707
## 3rd Qu.:13.1271 3rd Qu.: 0.6293 3rd Qu.:21.00 3rd Qu.: 2.42481
## Max. :30.3557 Max. :65.9165 Max. :21.00 Max. :45.79210
## youngdrivers miles breath jail
## Min. :0.07314 Min. : 4576 Length:336 Length:336
## 1st Qu.:0.17037 1st Qu.: 7183 Class :character Class :character
## Median :0.18539 Median : 7796 Mode :character Mode :character
## Mean :0.18593 Mean : 7891
## 3rd Qu.:0.20219 3rd Qu.: 8504
## Max. :0.28163 Max. :26148
## service fatal nfatal sfatal
## Length:336 Min. : 79.0 Min. : 13.00 Min. : 8.0
## Class :character 1st Qu.: 293.8 1st Qu.: 53.75 1st Qu.: 35.0
## Mode :character Median : 701.0 Median : 135.00 Median : 81.0
## Mean : 928.7 Mean : 182.58 Mean :109.9
## 3rd Qu.:1063.5 3rd Qu.: 212.00 3rd Qu.:131.0
## Max. :5504.0 Max. :1049.00 Max. :603.0
## fatal1517 nfatal1517 fatal1820 nfatal1820
## Min. : 3.00 Min. : 0.00 Min. : 7.0 Min. : 0.00
## 1st Qu.: 25.75 1st Qu.: 4.00 1st Qu.: 38.0 1st Qu.: 11.00
## Median : 49.00 Median :10.00 Median : 82.0 Median : 24.00
## Mean : 62.61 Mean :12.26 Mean :106.7 Mean : 33.53
## 3rd Qu.: 77.00 3rd Qu.:15.25 3rd Qu.:130.2 3rd Qu.: 44.00
## Max. :318.00 Max. :76.00 Max. :601.0 Max. :196.00
## fatal2124 nfatal2124 afatal pop
## Min. : 12.0 Min. : 1.00 Min. : 24.6 Min. : 479000
## 1st Qu.: 42.0 1st Qu.: 13.00 1st Qu.: 90.5 1st Qu.: 1545251
## Median : 97.5 Median : 30.00 Median : 211.6 Median : 3310503
## Mean :126.9 Mean : 41.38 Mean : 293.3 Mean : 4930272
## 3rd Qu.:150.5 3rd Qu.: 49.00 3rd Qu.: 364.0 3rd Qu.: 5751735
## Max. :770.0 Max. :249.00 Max. :2094.9 Max. :28314028
## pop1517 pop1820 pop2124 milestot
## Min. : 21000 Min. : 21000 Min. : 30000 Min. : 3993
## 1st Qu.: 71750 1st Qu.: 76962 1st Qu.: 103500 1st Qu.: 11692
## Median : 163000 Median : 170982 Median : 241000 Median : 28484
## Mean : 230816 Mean : 249090 Mean : 336390 Mean : 37101
## 3rd Qu.: 270500 3rd Qu.: 308311 3rd Qu.: 413000 3rd Qu.: 44140
## Max. :1172000 Max. :1321004 Max. :1892998 Max. :241575
## unempus emppopus gsp
## Min. :5.500 Min. :57.80 Min. :-0.123641
## 1st Qu.:6.200 1st Qu.:57.90 1st Qu.: 0.001182
## Median :7.200 Median :60.10 Median : 0.032413
## Mean :7.529 Mean :59.97 Mean : 0.025313
## 3rd Qu.:9.600 3rd Qu.:61.50 3rd Qu.: 0.056501
## Max. :9.700 Max. :62.30 Max. : 0.142361
##mean and median by state
by_state<-group_by(fatalities,state)
summarise(by_state,meanmiles=mean(miles),medianmiles=median(miles))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 48 x 3
## state meanmiles medianmiles
## <chr> <dbl> <dbl>
## 1 al 8551. 8727.
## 2 ar 7412. 7254.
## 3 az 7742. 6810.
## 4 ca 7759. 7874.
## 5 co 7985. 8092.
## 6 ct 7247. 6979.
## 7 de 8701. 8625.
## 8 fl 7824. 7747.
## 9 ga 9089. 8988.
## 10 ia 7107. 7061.
## # ... with 38 more rows
##mean and median by of drinkage by state
by_state<-group_by(fatalities,state)
summarise(by_state,meandrinkage=mean(drinkage),mediandrinkage=median(drinkage))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 48 x 3
## state meandrinkage mediandrinkage
## <chr> <dbl> <dbl>
## 1 al 20.0 19.7
## 2 ar 21 21
## 3 az 20.1 21
## 4 ca 21 21
## 5 co 21 21
## 6 ct 20.2 20.3
## 7 de 20.7 21
## 8 fl 20 20
## 9 ga 19.8 19.2
## 10 ia 19.7 19
## # ... with 38 more rows
#SECOND QUESTION -Create a new data frame with a subset of the columns and rows. Make sure to rename it.
fatalities <- read.table("https://vincentarelbundock.github.io/Rdatasets/csv/AER/Fatalities.csv",sep = ",", header = T)
fatalitiesSubset <- subset(fatalities, select=c(state, year,income,drinkage,miles), subset=(drinkage >20))
##get first 100 rows
head(fatalitiesSubset,100)
## state year income drinkage miles
## 5 al 1986 11661.51 21.00 8952.854
## 6 al 1987 11944.00 21.00 9166.302
## 7 al 1988 12368.62 21.00 9674.323
## 11 az 1985 13726.70 21.00 6771.263
## 12 az 1986 14107.33 21.00 8129.008
## 13 az 1987 14241.00 21.00 9370.654
## 14 az 1988 14408.08 21.00 9815.721
## 15 ar 1982 10267.30 21.00 7208.500
## 16 ar 1983 10433.49 21.00 7175.917
## 17 ar 1984 10916.48 21.00 7084.820
## 18 ar 1985 11149.36 21.00 7253.918
## 19 ar 1986 11399.38 21.00 7468.999
## 20 ar 1987 11537.00 21.00 7665.831
## 21 ar 1988 11760.35 21.00 8024.625
## 22 ca 1982 15797.14 21.00 6858.677
## 23 ca 1983 15970.18 21.00 7216.292
## 24 ca 1984 16590.11 21.00 7619.176
## 25 ca 1985 16985.17 21.00 7874.067
## 26 ca 1986 17356.04 21.00 8034.910
## 27 ca 1987 17846.00 21.00 8180.633
## 28 ca 1988 18049.09 21.00 8531.990
## 29 co 1982 15082.34 21.00 7742.842
## 30 co 1983 15131.88 21.00 7656.063
## 31 co 1984 15486.81 21.00 7707.853
## 32 co 1985 15569.92 21.00 8092.209
## 33 co 1986 15616.10 21.00 8131.375
## 34 co 1987 15605.00 21.00 8182.028
## 35 co 1988 15845.04 21.00 8380.769
## 39 ct 1985 19312.50 20.33 6979.215
## 40 ct 1986 20152.73 21.00 7661.745
## 41 ct 1987 21192.00 21.00 8338.534
## 42 ct 1988 22193.46 21.00 8061.235
## 45 de 1984 14925.27 21.00 8368.062
## 46 de 1985 15408.90 21.00 8625.425
## 47 de 1986 15822.50 21.00 9045.817
## 48 de 1987 16407.00 21.00 9450.308
## 49 de 1988 16998.07 21.00 9703.021
## 54 fl 1986 15102.17 21.00 7768.756
## 55 fl 1987 15584.00 21.00 7788.331
## 56 fl 1988 15979.79 21.00 8538.230
## 61 ga 1986 13891.64 20.25 9344.768
## 62 ga 1987 14306.00 21.00 9690.281
## 63 ga 1988 14687.20 21.00 9817.396
## 69 id 1987 11859.00 20.50 8135.269
## 70 id 1988 12189.61 21.00 8102.682
## 71 il 1982 14743.44 21.00 5696.535
## 72 il 1983 14745.41 21.00 5862.868
## 73 il 1984 15390.11 21.00 6067.528
## 74 il 1985 15602.75 21.00 6141.676
## 75 il 1986 15988.65 21.00 6345.777
## 76 il 1987 16417.00 21.00 6540.846
## 77 il 1988 16915.30 21.00 6757.613
## 78 in 1982 12282.82 21.00 7149.917
## 79 in 1983 12364.68 21.00 7277.506
## 80 in 1984 13008.79 21.00 7478.887
## 81 in 1985 13161.02 21.00 7416.253
## 82 in 1986 13582.04 21.00 7714.322
## 83 in 1987 13937.00 21.00 7977.216
## 84 in 1988 14363.81 21.00 9201.576
## 90 ia 1987 14284.00 21.00 7342.257
## 91 ia 1988 14111.65 21.00 7730.063
## 92 ks 1982 14094.27 21.00 7333.069
## 93 ks 1983 13917.43 21.00 7479.611
## 94 ks 1984 14308.79 21.00 7670.888
## 95 ks 1985 14631.36 21.00 7867.333
## 96 ks 1986 14977.30 21.00 8100.053
## 97 ks 1987 15152.00 21.00 8304.131
## 98 ks 1988 15167.47 21.00 8481.355
## 99 ky 1982 11071.60 21.00 6937.466
## 100 ky 1983 10913.99 21.00 7194.143
## 101 ky 1984 11441.76 21.00 7513.703
## 102 ky 1985 11405.72 21.00 7654.335
## 103 ky 1986 11602.68 21.00 7859.621
## 104 ky 1987 12008.00 21.00 8135.244
## 105 ky 1988 12340.71 21.00 8482.436
## 111 la 1987 11515.00 20.50 6859.208
## 112 la 1988 11830.61 21.00 7867.977
## 116 me 1985 12609.11 20.50 7969.934
## 117 me 1986 13292.05 21.00 8550.348
## 118 me 1987 13984.00 21.00 9069.937
## 119 me 1988 14538.98 21.00 9461.399
## 120 md 1982 15198.09 21.00 6768.094
## 121 md 1983 15644.50 21.00 7118.825
## 122 md 1984 16313.19 21.00 7289.488
## 123 md 1985 16921.61 21.00 7590.410
## 124 md 1986 17475.75 21.00 7826.705
## 125 md 1987 18167.00 21.00 8046.975
## 126 md 1988 18755.53 21.00 8112.946
## 130 ma 1985 17271.19 20.50 6818.296
## 131 ma 1986 18145.51 21.00 7027.964
## 132 ma 1987 19050.00 21.00 7225.436
## 133 ma 1988 20034.65 21.00 7358.473
## 134 mi 1982 13247.02 21.00 6712.743
## 135 mi 1983 13606.65 21.00 6721.328
## 136 mi 1984 14317.58 21.00 7007.071
## 137 mi 1985 14830.51 21.00 7416.576
## 138 mi 1986 15278.64 21.00 7829.523
## 139 mi 1987 15418.00 21.00 8228.915
## 140 mi 1988 15930.70 21.00 8430.646
## 146 mn 1987 15910.00 21.00 8282.359
#QUESTION 3 - Create new column names for the new data frame
library(tidyverse)
fatalities <- read.table("https://vincentarelbundock.github.io/Rdatasets/csv/AER/Fatalities.csv",sep = ",", header = T)
fatalitiesSubset <- subset(fatalities, select=c(state, year,income,drinkage,miles), subset=(drinkage >20))
#names(fatalitiesSubset)[names(fatalitiesSubset)=="state"] <- "NEW_STATE"
# Change colnames of all columns
colnames(fatalitiesSubset) <- c("NEW_STATE", "NEW_YEAR", "NEW_INCOME","NEW_DRINKAGE","NEW_MILES")
head(fatalitiesSubset,100)
## NEW_STATE NEW_YEAR NEW_INCOME NEW_DRINKAGE NEW_MILES
## 5 al 1986 11661.51 21.00 8952.854
## 6 al 1987 11944.00 21.00 9166.302
## 7 al 1988 12368.62 21.00 9674.323
## 11 az 1985 13726.70 21.00 6771.263
## 12 az 1986 14107.33 21.00 8129.008
## 13 az 1987 14241.00 21.00 9370.654
## 14 az 1988 14408.08 21.00 9815.721
## 15 ar 1982 10267.30 21.00 7208.500
## 16 ar 1983 10433.49 21.00 7175.917
## 17 ar 1984 10916.48 21.00 7084.820
## 18 ar 1985 11149.36 21.00 7253.918
## 19 ar 1986 11399.38 21.00 7468.999
## 20 ar 1987 11537.00 21.00 7665.831
## 21 ar 1988 11760.35 21.00 8024.625
## 22 ca 1982 15797.14 21.00 6858.677
## 23 ca 1983 15970.18 21.00 7216.292
## 24 ca 1984 16590.11 21.00 7619.176
## 25 ca 1985 16985.17 21.00 7874.067
## 26 ca 1986 17356.04 21.00 8034.910
## 27 ca 1987 17846.00 21.00 8180.633
## 28 ca 1988 18049.09 21.00 8531.990
## 29 co 1982 15082.34 21.00 7742.842
## 30 co 1983 15131.88 21.00 7656.063
## 31 co 1984 15486.81 21.00 7707.853
## 32 co 1985 15569.92 21.00 8092.209
## 33 co 1986 15616.10 21.00 8131.375
## 34 co 1987 15605.00 21.00 8182.028
## 35 co 1988 15845.04 21.00 8380.769
## 39 ct 1985 19312.50 20.33 6979.215
## 40 ct 1986 20152.73 21.00 7661.745
## 41 ct 1987 21192.00 21.00 8338.534
## 42 ct 1988 22193.46 21.00 8061.235
## 45 de 1984 14925.27 21.00 8368.062
## 46 de 1985 15408.90 21.00 8625.425
## 47 de 1986 15822.50 21.00 9045.817
## 48 de 1987 16407.00 21.00 9450.308
## 49 de 1988 16998.07 21.00 9703.021
## 54 fl 1986 15102.17 21.00 7768.756
## 55 fl 1987 15584.00 21.00 7788.331
## 56 fl 1988 15979.79 21.00 8538.230
## 61 ga 1986 13891.64 20.25 9344.768
## 62 ga 1987 14306.00 21.00 9690.281
## 63 ga 1988 14687.20 21.00 9817.396
## 69 id 1987 11859.00 20.50 8135.269
## 70 id 1988 12189.61 21.00 8102.682
## 71 il 1982 14743.44 21.00 5696.535
## 72 il 1983 14745.41 21.00 5862.868
## 73 il 1984 15390.11 21.00 6067.528
## 74 il 1985 15602.75 21.00 6141.676
## 75 il 1986 15988.65 21.00 6345.777
## 76 il 1987 16417.00 21.00 6540.846
## 77 il 1988 16915.30 21.00 6757.613
## 78 in 1982 12282.82 21.00 7149.917
## 79 in 1983 12364.68 21.00 7277.506
## 80 in 1984 13008.79 21.00 7478.887
## 81 in 1985 13161.02 21.00 7416.253
## 82 in 1986 13582.04 21.00 7714.322
## 83 in 1987 13937.00 21.00 7977.216
## 84 in 1988 14363.81 21.00 9201.576
## 90 ia 1987 14284.00 21.00 7342.257
## 91 ia 1988 14111.65 21.00 7730.063
## 92 ks 1982 14094.27 21.00 7333.069
## 93 ks 1983 13917.43 21.00 7479.611
## 94 ks 1984 14308.79 21.00 7670.888
## 95 ks 1985 14631.36 21.00 7867.333
## 96 ks 1986 14977.30 21.00 8100.053
## 97 ks 1987 15152.00 21.00 8304.131
## 98 ks 1988 15167.47 21.00 8481.355
## 99 ky 1982 11071.60 21.00 6937.466
## 100 ky 1983 10913.99 21.00 7194.143
## 101 ky 1984 11441.76 21.00 7513.703
## 102 ky 1985 11405.72 21.00 7654.335
## 103 ky 1986 11602.68 21.00 7859.621
## 104 ky 1987 12008.00 21.00 8135.244
## 105 ky 1988 12340.71 21.00 8482.436
## 111 la 1987 11515.00 20.50 6859.208
## 112 la 1988 11830.61 21.00 7867.977
## 116 me 1985 12609.11 20.50 7969.934
## 117 me 1986 13292.05 21.00 8550.348
## 118 me 1987 13984.00 21.00 9069.937
## 119 me 1988 14538.98 21.00 9461.399
## 120 md 1982 15198.09 21.00 6768.094
## 121 md 1983 15644.50 21.00 7118.825
## 122 md 1984 16313.19 21.00 7289.488
## 123 md 1985 16921.61 21.00 7590.410
## 124 md 1986 17475.75 21.00 7826.705
## 125 md 1987 18167.00 21.00 8046.975
## 126 md 1988 18755.53 21.00 8112.946
## 130 ma 1985 17271.19 20.50 6818.296
## 131 ma 1986 18145.51 21.00 7027.964
## 132 ma 1987 19050.00 21.00 7225.436
## 133 ma 1988 20034.65 21.00 7358.473
## 134 mi 1982 13247.02 21.00 6712.743
## 135 mi 1983 13606.65 21.00 6721.328
## 136 mi 1984 14317.58 21.00 7007.071
## 137 mi 1985 14830.51 21.00 7416.576
## 138 mi 1986 15278.64 21.00 7829.523
## 139 mi 1987 15418.00 21.00 8228.915
## 140 mi 1988 15930.70 21.00 8430.646
## 146 mn 1987 15910.00 21.00 8282.359
#QUESTION 4- Use the summary function to create an overview of your new data frame. The print the mean and median for the same two attributes. Please compare.
Mean from 1st question average is between 19 and 21 but with the new subset the mean and median are in 90k range because of the different subset
library(tidyverse)
fatalities <- read.table("https://vincentarelbundock.github.io/Rdatasets/csv/AER/Fatalities.csv",sep = ",", header = T)
fatalitiesSubset <- subset(fatalities, select=c(state, year,income,drinkage,miles), subset=(drinkage >20))
#names(fatalitiesSubset)[names(fatalitiesSubset)=="state"] <- "NEW_STATE"
# Change colnames of all columns
colnames(fatalitiesSubset) <- c("NEW_STATE", "NEW_YEAR", "NEW_INCOME","NEW_DRINKAGE","NEW_MILES")
head(fatalitiesSubset)
## NEW_STATE NEW_YEAR NEW_INCOME NEW_DRINKAGE NEW_MILES
## 5 al 1986 11661.51 21 8952.854
## 6 al 1987 11944.00 21 9166.302
## 7 al 1988 12368.62 21 9674.323
## 11 az 1985 13726.70 21 6771.263
## 12 az 1986 14107.33 21 8129.008
## 13 az 1987 14241.00 21 9370.654
#summary(fatalitiesSubset)
##mean and median by state
by_new_state<-group_by(fatalitiesSubset,NEW_STATE)
summarise(by_new_state,NEW_meanmiles=mean(NEW_MILES),NEW_medianmiles=median(NEW_MILES))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 47 x 3
## NEW_STATE NEW_meanmiles NEW_medianmiles
## <chr> <dbl> <dbl>
## 1 al 9264. 9166.
## 2 ar 7412. 7254.
## 3 az 8522. 8750.
## 4 ca 7759. 7874.
## 5 co 7985. 8092.
## 6 ct 7760. 7861.
## 7 de 9039. 9046.
## 8 fl 8032. 7788.
## 9 ga 9617. 9690.
## 10 ia 7536. 7536.
## # ... with 37 more rows
#QUESTION 5 -rename values in column-first display original values and then changed ones
bankwagesdataset <- read.table("https://vincentarelbundock.github.io/Rdatasets/csv/AER/BankWages.csv",sep = ",", header = T)
head(bankwagesdataset)
## X job education gender minority
## 1 1 manage 15 male no
## 2 2 admin 16 male no
## 3 3 admin 12 female no
## 4 4 admin 8 female no
## 5 5 admin 15 male no
## 6 6 admin 15 male no
bankwagesdataset$job[c(1,2,3,4)]<-"changed"
head(bankwagesdataset)
## X job education gender minority
## 1 1 changed 15 male no
## 2 2 changed 16 male no
## 3 3 changed 12 female no
## 4 4 changed 8 female no
## 5 5 admin 15 male no
## 6 6 admin 15 male no
#question 6- uploaded csv to personal git and read raw from that URL.
dataSetAtgit <- read.table("https://raw.githubusercontent.com/yathdeep/Week2Assignment/main/BankWages.csv",sep = ",", header = T)
head(dataSetAtgit)
## X job education gender minority
## 1 1 manage 15 male no
## 2 2 admin 16 male no
## 3 3 admin 12 female no
## 4 4 admin 8 female no
## 5 5 admin 15 male no
## 6 6 admin 15 male no
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.