library(RCurl)
## Warning: package 'RCurl' was built under R version 3.5.2
## Loading required package: bitops
#downloading file and placing it in preferred folder
download.file(url='https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/carData/Anscombe.csv', destfile = 'c:/Users/Javern/Documents/Data Science MS/Anscombe.csv', method = 'auto')

#Reading from downloaded file
Anscombe <- read.csv(file = 'c:/Users/Javern/Documents/Data Science MS/Anscombe.csv', header = TRUE, sep = ",", stringsAsFactors = FALSE)

colnames(Anscombe) <- c("City",  "Education", "Income", "Young", "Urban")

#Summary of original Anscombe dataset
summaryA <- summary(Anscombe)
summaryA
##      City             Education         Income         Young      
##  Length:51          Min.   :112.0   Min.   :2081   Min.   :326.2  
##  Class :character   1st Qu.:165.0   1st Qu.:2786   1st Qu.:342.1  
##  Mode  :character   Median :192.0   Median :3257   Median :354.1  
##                     Mean   :196.3   Mean   :3225   Mean   :358.9  
##                     3rd Qu.:228.5   3rd Qu.:3612   3rd Qu.:369.1  
##                     Max.   :372.0   Max.   :4425   Max.   :439.7  
##      Urban       
##  Min.   : 322.0  
##  1st Qu.: 552.5  
##  Median : 664.0  
##  Mean   : 664.5  
##  3rd Qu.: 790.5  
##  Max.   :1000.0
#Calculate mean and median of Anscombe dataset for atleast 2 columns
apply(Anscombe[,2:5], 2, mean, na.rm=TRUE)
## Education    Income     Young     Urban 
##  196.3137 3225.2941  358.8863  664.5098
apply(Anscombe[,2:5], 2, median, na.rm=TRUE)
## Education    Income     Young     Urban 
##     192.0    3257.0     354.1     664.0
#Subset of Anscombe dataset within a dataframe called Anscombedf
Anscombe2 <- data.frame("City" = Anscombe$City, "Income" = Anscombe$Income, "Education Expenditure" = Anscombe$Education, "Young" = Anscombe$Young, "Urban" = Anscombe$Urban, stringsAsFactors = FALSE)

#Subset showing income of various U.S cities greater than 3200
Anscombedf <- subset(Anscombe2, Anscombe$Income > 3200)
Anscombedf
##    City Income Education.Expenditure Young Urban
## 2    NH   3259                   169 345.9   564
## 4    MA   3835                   168 335.3   846
## 5    RI   3549                   180 327.1   871
## 6    CT   4256                   193 341.0   774
## 7    NY   4151                   261 326.2   856
## 8    NJ   3954                   214 333.5   889
## 9    PA   3419                   201 326.2   715
## 10   OH   3509                   172 354.5   753
## 11   IN   3412                   194 359.3   649
## 12   IL   3981                   189 348.9   830
## 13   MI   3675                   233 369.2   738
## 14   WI   3363                   209 360.7   659
## 15   MN   3341                   262 365.4   664
## 16   IO   3265                   234 343.8   572
## 17   MO   3257                   177 336.1   701
## 20   NE   3239                   148 349.9   615
## 21   KA   3303                   196 339.9   661
## 22   DE   3795                   248 375.9   722
## 23   MD   3742                   247 364.1   766
## 24   DC   4425                   246 352.1  1000
## 42   CO   3340                   192 358.1   785
## 46   NV   3957                   225 385.1   809
## 47   WA   3688                   215 341.3   726
## 48   OR   3317                   233 332.7   671
## 49   CA   3968                   273 348.4   909
## 50   AK   4146                   372 439.7   484
## 51   HI   3513                   212 382.9   831
#Summary of new data frame Anscombedf
summaryB <- summary(Anscombedf)
summaryB
##      City               Income     Education.Expenditure     Young      
##  Length:27          Min.   :3239   Min.   :148.0         Min.   :326.2  
##  Class :character   1st Qu.:3340   1st Qu.:190.5         1st Qu.:338.0  
##  Mode  :character   Median :3549   Median :212.0         Median :348.9  
##                     Mean   :3654   Mean   :217.1         Mean   :353.5  
##                     3rd Qu.:3956   3rd Qu.:240.0         3rd Qu.:362.4  
##                     Max.   :4425   Max.   :372.0         Max.   :439.7  
##      Urban       
##  Min.   : 484.0  
##  1st Qu.: 662.5  
##  Median : 738.0  
##  Mean   : 743.0  
##  3rd Qu.: 830.5  
##  Max.   :1000.0
#calculate mean and meadian for each column of Anscombedf for atleast 2 columns
apply(Anscombedf[,2:5], 2, mean, na.rm=TRUE)
##                Income Education.Expenditure                 Young 
##             3654.0370              217.1481              353.4519 
##                 Urban 
##              742.9630
apply(Anscombedf[,2:5], 2, median, na.rm=TRUE)
##                Income Education.Expenditure                 Young 
##                3549.0                 212.0                 348.9 
##                 Urban 
##                 738.0
#compare both summaries
identical(summaryA, summaryB)
## [1] FALSE
#For at least 3 values in a column please rename so that every value in that column is renamed. 
#Ranges of 150, 250, 350 and 500
Anscombedf$Education.Expenditure[Anscombedf$Education.Expenditure < 150] <- 150
Anscombedf$Education.Expenditure[Anscombedf$Education.Expenditure > 150 & Anscombedf$Education.Expenditure < 250] <- 250
Anscombedf$Education.Expenditure[Anscombedf$Education.Expenditure > 250 & Anscombedf$Education.Expenditure < 350] <- 350
Anscombedf$Education.Expenditure[Anscombedf$Education.Expenditure > 350] <- 500

#Displays current Anscombe data frame after changes made
Anscombedf
##    City Income Education.Expenditure Young Urban
## 2    NH   3259                   250 345.9   564
## 4    MA   3835                   250 335.3   846
## 5    RI   3549                   250 327.1   871
## 6    CT   4256                   250 341.0   774
## 7    NY   4151                   350 326.2   856
## 8    NJ   3954                   250 333.5   889
## 9    PA   3419                   250 326.2   715
## 10   OH   3509                   250 354.5   753
## 11   IN   3412                   250 359.3   649
## 12   IL   3981                   250 348.9   830
## 13   MI   3675                   250 369.2   738
## 14   WI   3363                   250 360.7   659
## 15   MN   3341                   350 365.4   664
## 16   IO   3265                   250 343.8   572
## 17   MO   3257                   250 336.1   701
## 20   NE   3239                   150 349.9   615
## 21   KA   3303                   250 339.9   661
## 22   DE   3795                   250 375.9   722
## 23   MD   3742                   250 364.1   766
## 24   DC   4425                   250 352.1  1000
## 42   CO   3340                   250 358.1   785
## 46   NV   3957                   250 385.1   809
## 47   WA   3688                   250 341.3   726
## 48   OR   3317                   250 332.7   671
## 49   CA   3968                   350 348.4   909
## 50   AK   4146                   500 439.7   484
## 51   HI   3513                   250 382.9   831
#BONUS - place the original .csv in a github file and have R read from the link.

read.csv(url("https://raw.githubusercontent.com/javernw/Week1-SQLWinterBridgeHomework/master/Anscombe.csv"))
##     X education income young urban
## 1  ME       189   2824 350.7   508
## 2  NH       169   3259 345.9   564
## 3  VT       230   3072 348.5   322
## 4  MA       168   3835 335.3   846
## 5  RI       180   3549 327.1   871
## 6  CT       193   4256 341.0   774
## 7  NY       261   4151 326.2   856
## 8  NJ       214   3954 333.5   889
## 9  PA       201   3419 326.2   715
## 10 OH       172   3509 354.5   753
## 11 IN       194   3412 359.3   649
## 12 IL       189   3981 348.9   830
## 13 MI       233   3675 369.2   738
## 14 WI       209   3363 360.7   659
## 15 MN       262   3341 365.4   664
## 16 IO       234   3265 343.8   572
## 17 MO       177   3257 336.1   701
## 18 ND       177   2730 369.1   443
## 19 SD       187   2876 368.7   446
## 20 NE       148   3239 349.9   615
## 21 KA       196   3303 339.9   661
## 22 DE       248   3795 375.9   722
## 23 MD       247   3742 364.1   766
## 24 DC       246   4425 352.1  1000
## 25 VA       180   3068 353.0   631
## 26 WV       149   2470 328.8   390
## 27 NC       155   2664 354.1   450
## 28 SC       149   2380 376.7   476
## 29 GA       156   2781 370.6   603
## 30 FL       191   3191 336.0   805
## 31 KY       140   2645 349.3   523
## 32 TN       137   2579 342.8   588
## 33 AL       112   2337 362.2   584
## 34 MS       130   2081 385.2   445
## 35 AR       134   2322 351.9   500
## 36 LA       162   2634 389.6   661
## 37 OK       135   2880 329.8   680
## 38 TX       155   3029 369.4   797
## 39 MT       238   2942 368.9   534
## 40 ID       170   2668 367.7   541
## 41 WY       238   3190 365.6   605
## 42 CO       192   3340 358.1   785
## 43 NM       227   2651 421.5   698
## 44 AZ       207   3027 387.5   796
## 45 UT       201   2790 412.4   804
## 46 NV       225   3957 385.1   809
## 47 WA       215   3688 341.3   726
## 48 OR       233   3317 332.7   671
## 49 CA       273   3968 348.4   909
## 50 AK       372   4146 439.7   484
## 51 HI       212   3513 382.9   831