require(RCurl)
## Loading required package: RCurl
## Loading required package: bitops
dframe <-   read.csv("https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/Stat2Data/RacialAnimus.csv", stringsAsFactors = FALSE)
colnames(dframe) <- c("X","MediaMarket","Age65Plus","BachPlus","Black","Hispanic","ObamaKerry","Animus")
str(dframe)
## 'data.frame':    196 obs. of  8 variables:
##  $ X          : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ MediaMarket: chr  "ABILENE (TX) - SWEETWATER (TX)" "ALBANY (GA)" "ALBANY (NY) - SCHENECTADY (NY) - TROY (NY)" "ALBUQUERQUE (NM) - SANTA FE (NM)" ...
##  $ Age65Plus  : num  17.5 14.1 16.7 14.5 14.6 ...
##  $ BachPlus   : num  18 14 28.8 26.2 16.6 ...
##  $ Black      : num  5.86 37.75 5.26 1.9 25.97 ...
##  $ Hispanic   : num  21.49 5.91 3.25 41.21 3.2 ...
##  $ ObamaKerry : num  1.56 3.28 4.03 7.89 -2.42 ...
##  $ Animus     : num  75.3 108 121.8 70.8 138.5 ...

1. Use the summary function to gain an overview of the data set. Then display the mean and median for at least two attributes.

summary(dframe)
##        X          MediaMarket          Age65Plus        BachPlus    
##  Min.   :  1.00   Length:196         Min.   : 9.41   Min.   :12.49  
##  1st Qu.: 49.75   Class :character   1st Qu.:13.63   1st Qu.:18.78  
##  Median : 98.50   Mode  :character   Median :15.37   Median :22.80  
##  Mean   : 98.50                      Mean   :15.29   Mean   :23.41  
##  3rd Qu.:147.25                      3rd Qu.:16.80   3rd Qu.:26.76  
##  Max.   :196.00                      Max.   :26.43   Max.   :42.53  
##      Black            Hispanic         ObamaKerry          Animus      
##  Min.   : 0.3061   Min.   : 0.6204   Min.   :-10.975   Min.   : 39.80  
##  1st Qu.: 2.5905   1st Qu.: 2.5224   1st Qu.:  2.448   1st Qu.: 78.87  
##  Median : 5.9980   Median : 4.5926   Median :  4.843   Median : 94.46  
##  Mean   :10.5070   Mean   :10.9089   Mean   :  4.511   Mean   : 99.07  
##  3rd Qu.:16.0445   3rd Qu.:10.9054   3rd Qu.:  7.056   3rd Qu.:113.16  
##  Max.   :47.6634   Max.   :94.1046   Max.   : 18.602   Max.   :239.49
mean(dframe$Hispanic)
## [1] 10.90892
median(dframe$Hispanic)
## [1] 4.592624
mean(dframe$Age65Plus)
## [1] 15.28978
median(dframe$Age65Plus)
## [1] 15.37234
mean(dframe$Animus)
## [1] 99.07268
median(dframe$Animus)
## [1] 94.46389

2. Create a new data frame with a subset of the columns and rows. Make sure to rename it.

dframe2 <- dframe[c(20:29),c(1,3,5,6,8)]
dframe2
##     X Age65Plus      Black   Hispanic    Animus
## 20 20  18.06248  5.3291030  0.7180326 239.49182
## 21 21  12.27619  0.8894419 12.2245569  55.45377
## 22 22  14.34131  5.5439210  7.1421919  79.09814
## 23 23  14.64544  5.9906535  2.4848993 138.39549
## 24 24  17.37742  9.2783384  3.2227662 117.82376
## 25 25  15.16676  1.5442786  1.6856974  79.16496
## 26 26  13.16253  0.4639686  2.6693578  67.91328
## 27 27  15.91208  2.7695124  2.2807610  68.41860
## 28 28  16.12255  8.1596985  2.3234825  69.58967
## 29 29  13.06703 31.3881378  3.4027321  86.76189

3. Create new column names for the new data frame.

colnames(dframe2) <- c("X", "Above65", "AfricanAmerican", "Latino","Animosity")
dframe2
##     X  Above65 AfricanAmerican     Latino Animosity
## 20 20 18.06248       5.3291030  0.7180326 239.49182
## 21 21 12.27619       0.8894419 12.2245569  55.45377
## 22 22 14.34131       5.5439210  7.1421919  79.09814
## 23 23 14.64544       5.9906535  2.4848993 138.39549
## 24 24 17.37742       9.2783384  3.2227662 117.82376
## 25 25 15.16676       1.5442786  1.6856974  79.16496
## 26 26 13.16253       0.4639686  2.6693578  67.91328
## 27 27 15.91208       2.7695124  2.2807610  68.41860
## 28 28 16.12255       8.1596985  2.3234825  69.58967
## 29 29 13.06703      31.3881378  3.4027321  86.76189

4. Use the summary function to create an overview of your new data frame. The print the mean and median for the same two attributes. Please compare.

summary(dframe2)
##        X            Above65      AfricanAmerican      Latino      
##  Min.   :20.00   Min.   :12.28   Min.   : 0.464   Min.   : 0.718  
##  1st Qu.:22.25   1st Qu.:13.46   1st Qu.: 1.851   1st Qu.: 2.291  
##  Median :24.50   Median :14.91   Median : 5.437   Median : 2.577  
##  Mean   :24.50   Mean   :15.01   Mean   : 7.136   Mean   : 3.815  
##  3rd Qu.:26.75   3rd Qu.:16.07   3rd Qu.: 7.617   3rd Qu.: 3.358  
##  Max.   :29.00   Max.   :18.06   Max.   :31.388   Max.   :12.225  
##    Animosity     
##  Min.   : 55.45  
##  1st Qu.: 68.71  
##  Median : 79.13  
##  Mean   :100.21  
##  3rd Qu.:110.06  
##  Max.   :239.49
mean(dframe2$Animosity)
## [1] 100.2111

The mean for Animus was 99.07268 the mean for Animosity is 100.211

median(dframe2$Animosity)
## [1] 79.13155

The median for Animus was 94.46389 the mean for Animosity is 79.13155

mean(dframe2$Latino)
## [1] 3.815448

The mean for Hispanic was 10.90892 the mean for Latino is 3.815448

median(dframe2$Latino)
## [1] 2.577129

The median for Hispanic was 4.592624 the median for L:atino is 2.577129

mean(dframe2$Above65)
## [1] 15.01338

The mean for Age65Plus was 15.28978 and the mean for Above65 is 13.26413

median(dframe2$Above65)
## [1] 14.9061

The median for Age65Plus was 15.37234 and the median for Above65 is 12.58338

The means and medians of each of these columns are all different.

5. For at least 3 values in a column please rename so that every value in that column is renamed. For example, suppose I have 20 values of the letter “e” in one column. Rename those values so that all 20 would show as “excellent”.

dframe2$Above65[dframe2$Above65<15] <- 10

6. Display enough rows to see examples of all of steps 1-5 above.

dframe2
##     X  Above65 AfricanAmerican     Latino Animosity
## 20 20 18.06248       5.3291030  0.7180326 239.49182
## 21 21 10.00000       0.8894419 12.2245569  55.45377
## 22 22 10.00000       5.5439210  7.1421919  79.09814
## 23 23 10.00000       5.9906535  2.4848993 138.39549
## 24 24 17.37742       9.2783384  3.2227662 117.82376
## 25 25 15.16676       1.5442786  1.6856974  79.16496
## 26 26 10.00000       0.4639686  2.6693578  67.91328
## 27 27 15.91208       2.7695124  2.2807610  68.41860
## 28 28 16.12255       8.1596985  2.3234825  69.58967
## 29 29 10.00000      31.3881378  3.4027321  86.76189

7. BONUS – place the original .csv in a github file and have R read from the link. This will be a very useful skill as you progress in your data science education and career.

dframe1 <-   read.csv("https://raw.githubusercontent.com/Luz917/rhw2bonus/47ab458f1ead467162760873af37cce1f0913bd2/RacialAnimus.csv.txt", stringsAsFactors = FALSE)
colnames(dframe1) <- c("X","MediaMarket","Age65Plus","BachPlus","Black","Hispanic","ObamaKerry","Animus")
str(dframe1)
## 'data.frame':    196 obs. of  8 variables:
##  $ X          : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ MediaMarket: chr  "ABILENE (TX) - SWEETWATER (TX)" "ALBANY (GA)" "ALBANY (NY) - SCHENECTADY (NY) - TROY (NY)" "ALBUQUERQUE (NM) - SANTA FE (NM)" ...
##  $ Age65Plus  : num  17.5 14.1 16.7 14.5 14.6 ...
##  $ BachPlus   : num  18 14 28.8 26.2 16.6 ...
##  $ Black      : num  5.86 37.75 5.26 1.9 25.97 ...
##  $ Hispanic   : num  21.49 5.91 3.25 41.21 3.2 ...
##  $ ObamaKerry : num  1.56 3.28 4.03 7.89 -2.42 ...
##  $ Animus     : num  75.3 108 121.8 70.8 138.5 ...
summary(dframe1)
##        X          MediaMarket          Age65Plus        BachPlus    
##  Min.   :  1.00   Length:196         Min.   : 9.41   Min.   :12.49  
##  1st Qu.: 49.75   Class :character   1st Qu.:13.63   1st Qu.:18.78  
##  Median : 98.50   Mode  :character   Median :15.37   Median :22.80  
##  Mean   : 98.50                      Mean   :15.29   Mean   :23.41  
##  3rd Qu.:147.25                      3rd Qu.:16.80   3rd Qu.:26.76  
##  Max.   :196.00                      Max.   :26.43   Max.   :42.53  
##      Black            Hispanic         ObamaKerry          Animus      
##  Min.   : 0.3061   Min.   : 0.6204   Min.   :-10.975   Min.   : 39.80  
##  1st Qu.: 2.5905   1st Qu.: 2.5224   1st Qu.:  2.448   1st Qu.: 78.87  
##  Median : 5.9980   Median : 4.5926   Median :  4.843   Median : 94.46  
##  Mean   :10.5070   Mean   :10.9089   Mean   :  4.511   Mean   : 99.07  
##  3rd Qu.:16.0445   3rd Qu.:10.9054   3rd Qu.:  7.056   3rd Qu.:113.16  
##  Max.   :47.6634   Max.   :94.1046   Max.   : 18.602   Max.   :239.49