library(RCurl)
## Warning: package 'RCurl' was built under R version 3.5.2
## Loading required package: bitops
#downloading file and placing it in preferred folder
download.file(url='https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/carData/Anscombe.csv', destfile = 'c:/Users/Javern/Documents/Data Science MS/Anscombe.csv', method = 'auto')
#Reading from downloaded file
Anscombe <- read.csv(file = 'c:/Users/Javern/Documents/Data Science MS/Anscombe.csv', header = TRUE, sep = ",", stringsAsFactors = FALSE)
colnames(Anscombe) <- c("City", "Education", "Income", "Young", "Urban")
#Summary of original Anscombe dataset
summaryA <- summary(Anscombe)
summaryA
## City Education Income Young
## Length:51 Min. :112.0 Min. :2081 Min. :326.2
## Class :character 1st Qu.:165.0 1st Qu.:2786 1st Qu.:342.1
## Mode :character Median :192.0 Median :3257 Median :354.1
## Mean :196.3 Mean :3225 Mean :358.9
## 3rd Qu.:228.5 3rd Qu.:3612 3rd Qu.:369.1
## Max. :372.0 Max. :4425 Max. :439.7
## Urban
## Min. : 322.0
## 1st Qu.: 552.5
## Median : 664.0
## Mean : 664.5
## 3rd Qu.: 790.5
## Max. :1000.0
#Calculate mean and median of Anscombe dataset for atleast 2 columns
apply(Anscombe[,2:5], 2, mean, na.rm=TRUE)
## Education Income Young Urban
## 196.3137 3225.2941 358.8863 664.5098
apply(Anscombe[,2:5], 2, median, na.rm=TRUE)
## Education Income Young Urban
## 192.0 3257.0 354.1 664.0
#Subset of Anscombe dataset within a dataframe called Anscombedf
Anscombe2 <- data.frame("City" = Anscombe$City, "Income" = Anscombe$Income, "Education Expenditure" = Anscombe$Education, "Young" = Anscombe$Young, "Urban" = Anscombe$Urban, stringsAsFactors = FALSE)
#Subset showing income of various U.S cities greater than 3200
Anscombedf <- subset(Anscombe2, Anscombe$Income > 3200)
Anscombedf
## City Income Education.Expenditure Young Urban
## 2 NH 3259 169 345.9 564
## 4 MA 3835 168 335.3 846
## 5 RI 3549 180 327.1 871
## 6 CT 4256 193 341.0 774
## 7 NY 4151 261 326.2 856
## 8 NJ 3954 214 333.5 889
## 9 PA 3419 201 326.2 715
## 10 OH 3509 172 354.5 753
## 11 IN 3412 194 359.3 649
## 12 IL 3981 189 348.9 830
## 13 MI 3675 233 369.2 738
## 14 WI 3363 209 360.7 659
## 15 MN 3341 262 365.4 664
## 16 IO 3265 234 343.8 572
## 17 MO 3257 177 336.1 701
## 20 NE 3239 148 349.9 615
## 21 KA 3303 196 339.9 661
## 22 DE 3795 248 375.9 722
## 23 MD 3742 247 364.1 766
## 24 DC 4425 246 352.1 1000
## 42 CO 3340 192 358.1 785
## 46 NV 3957 225 385.1 809
## 47 WA 3688 215 341.3 726
## 48 OR 3317 233 332.7 671
## 49 CA 3968 273 348.4 909
## 50 AK 4146 372 439.7 484
## 51 HI 3513 212 382.9 831
#Summary of new data frame Anscombedf
summaryB <- summary(Anscombedf)
summaryB
## City Income Education.Expenditure Young
## Length:27 Min. :3239 Min. :148.0 Min. :326.2
## Class :character 1st Qu.:3340 1st Qu.:190.5 1st Qu.:338.0
## Mode :character Median :3549 Median :212.0 Median :348.9
## Mean :3654 Mean :217.1 Mean :353.5
## 3rd Qu.:3956 3rd Qu.:240.0 3rd Qu.:362.4
## Max. :4425 Max. :372.0 Max. :439.7
## Urban
## Min. : 484.0
## 1st Qu.: 662.5
## Median : 738.0
## Mean : 743.0
## 3rd Qu.: 830.5
## Max. :1000.0
#calculate mean and meadian for each column of Anscombedf for atleast 2 columns
apply(Anscombedf[,2:5], 2, mean, na.rm=TRUE)
## Income Education.Expenditure Young
## 3654.0370 217.1481 353.4519
## Urban
## 742.9630
apply(Anscombedf[,2:5], 2, median, na.rm=TRUE)
## Income Education.Expenditure Young
## 3549.0 212.0 348.9
## Urban
## 738.0
#compare both summaries
identical(summaryA, summaryB)
## [1] FALSE
#For at least 3 values in a column please rename so that every value in that column is renamed.
#Ranges of 150, 250, 350 and 500
Anscombedf$Education.Expenditure[Anscombedf$Education.Expenditure < 150] <- 150
Anscombedf$Education.Expenditure[Anscombedf$Education.Expenditure > 150 & Anscombedf$Education.Expenditure < 250] <- 250
Anscombedf$Education.Expenditure[Anscombedf$Education.Expenditure > 250 & Anscombedf$Education.Expenditure < 350] <- 350
Anscombedf$Education.Expenditure[Anscombedf$Education.Expenditure > 350] <- 500
#Displays current Anscombe data frame after changes made
Anscombedf
## City Income Education.Expenditure Young Urban
## 2 NH 3259 250 345.9 564
## 4 MA 3835 250 335.3 846
## 5 RI 3549 250 327.1 871
## 6 CT 4256 250 341.0 774
## 7 NY 4151 350 326.2 856
## 8 NJ 3954 250 333.5 889
## 9 PA 3419 250 326.2 715
## 10 OH 3509 250 354.5 753
## 11 IN 3412 250 359.3 649
## 12 IL 3981 250 348.9 830
## 13 MI 3675 250 369.2 738
## 14 WI 3363 250 360.7 659
## 15 MN 3341 350 365.4 664
## 16 IO 3265 250 343.8 572
## 17 MO 3257 250 336.1 701
## 20 NE 3239 150 349.9 615
## 21 KA 3303 250 339.9 661
## 22 DE 3795 250 375.9 722
## 23 MD 3742 250 364.1 766
## 24 DC 4425 250 352.1 1000
## 42 CO 3340 250 358.1 785
## 46 NV 3957 250 385.1 809
## 47 WA 3688 250 341.3 726
## 48 OR 3317 250 332.7 671
## 49 CA 3968 350 348.4 909
## 50 AK 4146 500 439.7 484
## 51 HI 3513 250 382.9 831
#BONUS - place the original .csv in a github file and have R read from the link.
read.csv(url("https://raw.githubusercontent.com/javernw/Week1-SQLWinterBridgeHomework/master/Anscombe.csv"))
## X education income young urban
## 1 ME 189 2824 350.7 508
## 2 NH 169 3259 345.9 564
## 3 VT 230 3072 348.5 322
## 4 MA 168 3835 335.3 846
## 5 RI 180 3549 327.1 871
## 6 CT 193 4256 341.0 774
## 7 NY 261 4151 326.2 856
## 8 NJ 214 3954 333.5 889
## 9 PA 201 3419 326.2 715
## 10 OH 172 3509 354.5 753
## 11 IN 194 3412 359.3 649
## 12 IL 189 3981 348.9 830
## 13 MI 233 3675 369.2 738
## 14 WI 209 3363 360.7 659
## 15 MN 262 3341 365.4 664
## 16 IO 234 3265 343.8 572
## 17 MO 177 3257 336.1 701
## 18 ND 177 2730 369.1 443
## 19 SD 187 2876 368.7 446
## 20 NE 148 3239 349.9 615
## 21 KA 196 3303 339.9 661
## 22 DE 248 3795 375.9 722
## 23 MD 247 3742 364.1 766
## 24 DC 246 4425 352.1 1000
## 25 VA 180 3068 353.0 631
## 26 WV 149 2470 328.8 390
## 27 NC 155 2664 354.1 450
## 28 SC 149 2380 376.7 476
## 29 GA 156 2781 370.6 603
## 30 FL 191 3191 336.0 805
## 31 KY 140 2645 349.3 523
## 32 TN 137 2579 342.8 588
## 33 AL 112 2337 362.2 584
## 34 MS 130 2081 385.2 445
## 35 AR 134 2322 351.9 500
## 36 LA 162 2634 389.6 661
## 37 OK 135 2880 329.8 680
## 38 TX 155 3029 369.4 797
## 39 MT 238 2942 368.9 534
## 40 ID 170 2668 367.7 541
## 41 WY 238 3190 365.6 605
## 42 CO 192 3340 358.1 785
## 43 NM 227 2651 421.5 698
## 44 AZ 207 3027 387.5 796
## 45 UT 201 2790 412.4 804
## 46 NV 225 3957 385.1 809
## 47 WA 215 3688 341.3 726
## 48 OR 233 3317 332.7 671
## 49 CA 273 3968 348.4 909
## 50 AK 372 4146 439.7 484
## 51 HI 212 3513 382.9 831