## Please select one, download it and perform the following tasks:
url <- 'https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/carData/States.csv'
edu_us <- read.csv(url, header=TRUE, stringsAsFactors=FALSE)
edu_us
##     X region   pop SATV SATM percent dollars pay
## 1  AL    ESC  4041  470  514       8   3.648  27
## 2  AK    PAC   550  438  476      42   7.887  43
## 3  AZ    MTN  3665  445  497      25   4.231  30
## 4  AR    WSC  2351  470  511       6   3.334  23
## 5  CA    PAC 29760  419  484      45   4.826  39
## 6  CO    MTN  3294  456  513      28   4.809  31
## 7  CN     NE  3287  430  471      74   7.914  43
## 8  DE     SA   666  433  470      58   6.016  35
## 9  DC     SA   607  409  441      68   8.210  39
## 10 FL     SA 12938  418  466      44   5.154  30
## 11 GA     SA  6478  401  443      57   4.860  29
## 12 HI    PAC  1108  404  481      52   5.008  32
## 13 ID    MTN  1007  466  502      17   3.200  25
## 14 IL    ENC 11431  466  528      16   5.062  34
## 15 IN    ENC  5544  408  459      54   5.051  32
## 16 IA    WNC  2777  511  577       5   4.839  28
## 17 KS    WNC  2478  492  548      10   5.009  29
## 18 KY    ESC  3685  473  521      10   4.390  29
## 19 LA    WSC  4220  476  517       9   4.012  26
## 20 ME     NE  1228  423  463      60   5.894  28
## 21 MD     SA  4781  430  478      59   6.184  38
## 22 MA     NE  6016  427  473      72   6.351  36
## 23 MI    ENC  9295  454  514      12   5.257  38
## 24 MN    WNC  4375  477  542      14   5.260  33
## 25 MS    ESC  2573  477  519       4   3.322  24
## 26 MO    WNC  5117  473  522      12   4.415  28
## 27 MT    MTN   799  464  523      20   5.184  26
## 28 NE    WNC  1578  484  546      10   4.381  26
## 29 NV    MTN  1202  434  487      24   4.564  32
## 30 NH     NE  1109  442  486      67   5.504  31
## 31 NJ     MA  7730  418  473      69   9.159  38
## 32 NM    MTN  1515  480  527      12   4.446  26
## 33 NY     MA 17990  412  470      70   8.500  42
## 34 NC     SA  6629  401  440      55   4.802  29
## 35 ND    WNC   639  505  564       6   3.685  23
## 36 OH    ENC 10847  450  499      22   5.639  32
## 37 OK    WSC  3146  478  523       9   3.742  24
## 38 OR    PAC  2842  439  484      49   5.291  32
## 39 PA     MA 11882  420  463      64   6.534  36
## 40 RI     NE  1003  422  461      62   6.989  37
## 41 SC     SA  3487  397  437      54   4.327  28
## 42 SD    WNC   696  506  555       5   3.730  22
## 43 TN    ESC  4877  483  525      12   3.707  28
## 44 TX    WSC 16987  413  461      42   4.238  28
## 45 UT    MTN  1723  492  539       5   2.993  25
## 46 VT     NE   563  431  466      62   5.740  31
## 47 VA     SA  6187  425  470      58   5.360  32
## 48 WA    PAC  4867  437  486      44   5.045  33
## 49 WV     SA  1793  443  490      15   5.046  26
## 50 WI    ENC  4892  476  543      11   5.946  33
## 51 WY    MTN   454  458  519      13   5.255  29
nrow(edu_us)
## [1] 51
## 1. Use the summary function to gain an overview of the data set. 
## Then display the mean and median for at least two attributes

summary(edu_us)
##       X                region               pop             SATV      
##  Length:51          Length:51          Min.   :  454   Min.   :397.0  
##  Class :character   Class :character   1st Qu.: 1215   1st Qu.:422.5  
##  Mode  :character   Mode  :character   Median : 3294   Median :443.0  
##                                        Mean   : 4877   Mean   :448.2  
##                                        3rd Qu.: 5780   3rd Qu.:474.5  
##                                        Max.   :29760   Max.   :511.0  
##       SATM          percent         dollars           pay       
##  Min.   :437.0   Min.   : 4.00   Min.   :2.993   Min.   :22.00  
##  1st Qu.:470.0   1st Qu.:11.50   1st Qu.:4.354   1st Qu.:27.50  
##  Median :490.0   Median :25.00   Median :5.045   Median :30.00  
##  Mean   :497.4   Mean   :33.75   Mean   :5.175   Mean   :30.94  
##  3rd Qu.:522.5   3rd Qu.:57.50   3rd Qu.:5.689   3rd Qu.:33.50  
##  Max.   :577.0   Max.   :74.00   Max.   :9.159   Max.   :43.00
m1 <- mean(edu_us$SATV)
m1
## [1] 448.1569
med1 <- median(edu_us$SATV)
med1
## [1] 443
m2 <- mean(edu_us$pop)
m2
## [1] 4876.647
med2 <- median(edu_us$pop)
med2
## [1] 3294
## SATV Mean   :448.2
## SATV Median   :443.0
## Pop  Mean   : 4877
## Pop  Median   : 3294

## 2. Create a new data frame with a subset of the columns and rows.  Make sure to rename it.

df2 <- subset(edu_us, SATV>445, select = -c(X,SATM))
df2
##    region   pop SATV percent dollars pay
## 1     ESC  4041  470       8   3.648  27
## 4     WSC  2351  470       6   3.334  23
## 6     MTN  3294  456      28   4.809  31
## 13    MTN  1007  466      17   3.200  25
## 14    ENC 11431  466      16   5.062  34
## 16    WNC  2777  511       5   4.839  28
## 17    WNC  2478  492      10   5.009  29
## 18    ESC  3685  473      10   4.390  29
## 19    WSC  4220  476       9   4.012  26
## 23    ENC  9295  454      12   5.257  38
## 24    WNC  4375  477      14   5.260  33
## 25    ESC  2573  477       4   3.322  24
## 26    WNC  5117  473      12   4.415  28
## 27    MTN   799  464      20   5.184  26
## 28    WNC  1578  484      10   4.381  26
## 32    MTN  1515  480      12   4.446  26
## 35    WNC   639  505       6   3.685  23
## 36    ENC 10847  450      22   5.639  32
## 37    WSC  3146  478       9   3.742  24
## 42    WNC   696  506       5   3.730  22
## 43    ESC  4877  483      12   3.707  28
## 45    MTN  1723  492       5   2.993  25
## 50    ENC  4892  476      11   5.946  33
## 51    MTN   454  458      13   5.255  29
nrow(df2)
## [1] 24
## 3. Create new column names for the new data frame.
newcolnames <- c("Reg","PopCol", "SATVCol", "PerCol", "DllsCol", "PayCol")
names(df2) <- newcolnames
df2
##    Reg PopCol SATVCol PerCol DllsCol PayCol
## 1  ESC   4041     470      8   3.648     27
## 4  WSC   2351     470      6   3.334     23
## 6  MTN   3294     456     28   4.809     31
## 13 MTN   1007     466     17   3.200     25
## 14 ENC  11431     466     16   5.062     34
## 16 WNC   2777     511      5   4.839     28
## 17 WNC   2478     492     10   5.009     29
## 18 ESC   3685     473     10   4.390     29
## 19 WSC   4220     476      9   4.012     26
## 23 ENC   9295     454     12   5.257     38
## 24 WNC   4375     477     14   5.260     33
## 25 ESC   2573     477      4   3.322     24
## 26 WNC   5117     473     12   4.415     28
## 27 MTN    799     464     20   5.184     26
## 28 WNC   1578     484     10   4.381     26
## 32 MTN   1515     480     12   4.446     26
## 35 WNC    639     505      6   3.685     23
## 36 ENC  10847     450     22   5.639     32
## 37 WSC   3146     478      9   3.742     24
## 42 WNC    696     506      5   3.730     22
## 43 ESC   4877     483     12   3.707     28
## 45 MTN   1723     492      5   2.993     25
## 50 ENC   4892     476     11   5.946     33
## 51 MTN    454     458     13   5.255     29
## 4. Use the summary function to create an overview of your new data frame.  
## The print the mean and median for the same two attributes.  Please compare

summary(df2)
##      Reg                PopCol         SATVCol          PerCol     
##  Length:24          Min.   :  454   Min.   :450.0   Min.   : 4.00  
##  Class :character   1st Qu.: 1562   1st Qu.:466.0   1st Qu.: 7.50  
##  Mode  :character   Median : 2962   Median :476.0   Median :10.50  
##                     Mean   : 3659   Mean   :476.5   Mean   :11.50  
##                     3rd Qu.: 4500   3rd Qu.:483.2   3rd Qu.:13.25  
##                     Max.   :11431   Max.   :511.0   Max.   :28.00  
##     DllsCol          PayCol     
##  Min.   :2.993   Min.   :22.00  
##  1st Qu.:3.701   1st Qu.:25.00  
##  Median :4.402   Median :27.50  
##  Mean   :4.386   Mean   :27.88  
##  3rd Qu.:5.093   3rd Qu.:29.50  
##  Max.   :5.946   Max.   :38.00
m11 <- mean(df2$SATVCol)
m11
## [1] 476.5417
med11 <- median(df2$SATVCol)
med11
## [1] 476
m22 <- mean(df2$PopCol)
m22
## [1] 3658.75
med22 <- median(df2$PopCol)
med22
## [1] 2961.5
## SATVCol Mean   :476.5
## SATVCol Median   :476.0
## PopCol  Mean   : 3659
## PopCol  Median   : 2962

## For at least 3 values in a column please rename so that every value in that column is renamed.  
## For example, suppose I have 20 values of the letter "e" in one column.  
## Rename those values so that all 20 would show as "excellent". 

df3 <- df2
df3[df3=="MTN"]<-"MTN_MTN"
df3[df3=="ENC"]<-"ENC_ENC"
df3[df3=="WNC"]<-"WNC_WNC"
df3
##        Reg PopCol SATVCol PerCol DllsCol PayCol
## 1      ESC   4041     470      8   3.648     27
## 4      WSC   2351     470      6   3.334     23
## 6  MTN_MTN   3294     456     28   4.809     31
## 13 MTN_MTN   1007     466     17   3.200     25
## 14 ENC_ENC  11431     466     16   5.062     34
## 16 WNC_WNC   2777     511      5   4.839     28
## 17 WNC_WNC   2478     492     10   5.009     29
## 18     ESC   3685     473     10   4.390     29
## 19     WSC   4220     476      9   4.012     26
## 23 ENC_ENC   9295     454     12   5.257     38
## 24 WNC_WNC   4375     477     14   5.260     33
## 25     ESC   2573     477      4   3.322     24
## 26 WNC_WNC   5117     473     12   4.415     28
## 27 MTN_MTN    799     464     20   5.184     26
## 28 WNC_WNC   1578     484     10   4.381     26
## 32 MTN_MTN   1515     480     12   4.446     26
## 35 WNC_WNC    639     505      6   3.685     23
## 36 ENC_ENC  10847     450     22   5.639     32
## 37     WSC   3146     478      9   3.742     24
## 42 WNC_WNC    696     506      5   3.730     22
## 43     ESC   4877     483     12   3.707     28
## 45 MTN_MTN   1723     492      5   2.993     25
## 50 ENC_ENC   4892     476     11   5.946     33
## 51 MTN_MTN    454     458     13   5.255     29
## 6. Display enough rows to see examples of all of steps 1-5 above
df3
##        Reg PopCol SATVCol PerCol DllsCol PayCol
## 1      ESC   4041     470      8   3.648     27
## 4      WSC   2351     470      6   3.334     23
## 6  MTN_MTN   3294     456     28   4.809     31
## 13 MTN_MTN   1007     466     17   3.200     25
## 14 ENC_ENC  11431     466     16   5.062     34
## 16 WNC_WNC   2777     511      5   4.839     28
## 17 WNC_WNC   2478     492     10   5.009     29
## 18     ESC   3685     473     10   4.390     29
## 19     WSC   4220     476      9   4.012     26
## 23 ENC_ENC   9295     454     12   5.257     38
## 24 WNC_WNC   4375     477     14   5.260     33
## 25     ESC   2573     477      4   3.322     24
## 26 WNC_WNC   5117     473     12   4.415     28
## 27 MTN_MTN    799     464     20   5.184     26
## 28 WNC_WNC   1578     484     10   4.381     26
## 32 MTN_MTN   1515     480     12   4.446     26
## 35 WNC_WNC    639     505      6   3.685     23
## 36 ENC_ENC  10847     450     22   5.639     32
## 37     WSC   3146     478      9   3.742     24
## 42 WNC_WNC    696     506      5   3.730     22
## 43     ESC   4877     483     12   3.707     28
## 45 MTN_MTN   1723     492      5   2.993     25
## 50 ENC_ENC   4892     476     11   5.946     33
## 51 MTN_MTN    454     458     13   5.255     29
nrow(df3)
## [1] 24
## BONUS - place the original .csv in a github file and have R read from the link.  
## This will be a very useful skill as you progress in your data science education and career. 

url2 <- 'https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/carData/States.csv'
edu_us2 <- read.csv(url, header=TRUE, stringsAsFactors=FALSE)
edu_us2
##     X region   pop SATV SATM percent dollars pay
## 1  AL    ESC  4041  470  514       8   3.648  27
## 2  AK    PAC   550  438  476      42   7.887  43
## 3  AZ    MTN  3665  445  497      25   4.231  30
## 4  AR    WSC  2351  470  511       6   3.334  23
## 5  CA    PAC 29760  419  484      45   4.826  39
## 6  CO    MTN  3294  456  513      28   4.809  31
## 7  CN     NE  3287  430  471      74   7.914  43
## 8  DE     SA   666  433  470      58   6.016  35
## 9  DC     SA   607  409  441      68   8.210  39
## 10 FL     SA 12938  418  466      44   5.154  30
## 11 GA     SA  6478  401  443      57   4.860  29
## 12 HI    PAC  1108  404  481      52   5.008  32
## 13 ID    MTN  1007  466  502      17   3.200  25
## 14 IL    ENC 11431  466  528      16   5.062  34
## 15 IN    ENC  5544  408  459      54   5.051  32
## 16 IA    WNC  2777  511  577       5   4.839  28
## 17 KS    WNC  2478  492  548      10   5.009  29
## 18 KY    ESC  3685  473  521      10   4.390  29
## 19 LA    WSC  4220  476  517       9   4.012  26
## 20 ME     NE  1228  423  463      60   5.894  28
## 21 MD     SA  4781  430  478      59   6.184  38
## 22 MA     NE  6016  427  473      72   6.351  36
## 23 MI    ENC  9295  454  514      12   5.257  38
## 24 MN    WNC  4375  477  542      14   5.260  33
## 25 MS    ESC  2573  477  519       4   3.322  24
## 26 MO    WNC  5117  473  522      12   4.415  28
## 27 MT    MTN   799  464  523      20   5.184  26
## 28 NE    WNC  1578  484  546      10   4.381  26
## 29 NV    MTN  1202  434  487      24   4.564  32
## 30 NH     NE  1109  442  486      67   5.504  31
## 31 NJ     MA  7730  418  473      69   9.159  38
## 32 NM    MTN  1515  480  527      12   4.446  26
## 33 NY     MA 17990  412  470      70   8.500  42
## 34 NC     SA  6629  401  440      55   4.802  29
## 35 ND    WNC   639  505  564       6   3.685  23
## 36 OH    ENC 10847  450  499      22   5.639  32
## 37 OK    WSC  3146  478  523       9   3.742  24
## 38 OR    PAC  2842  439  484      49   5.291  32
## 39 PA     MA 11882  420  463      64   6.534  36
## 40 RI     NE  1003  422  461      62   6.989  37
## 41 SC     SA  3487  397  437      54   4.327  28
## 42 SD    WNC   696  506  555       5   3.730  22
## 43 TN    ESC  4877  483  525      12   3.707  28
## 44 TX    WSC 16987  413  461      42   4.238  28
## 45 UT    MTN  1723  492  539       5   2.993  25
## 46 VT     NE   563  431  466      62   5.740  31
## 47 VA     SA  6187  425  470      58   5.360  32
## 48 WA    PAC  4867  437  486      44   5.045  33
## 49 WV     SA  1793  443  490      15   5.046  26
## 50 WI    ENC  4892  476  543      11   5.946  33
## 51 WY    MTN   454  458  519      13   5.255  29