path <- 'https://raw.githubusercontent.com/davidblumenstiel/nutsdataset/master/nuts'
nuts <- read.csv(path)
print(summary(nuts))
## X cones ntrees dbh
## Min. : 1.00 Min. : 0.00 Min. : 3.00 Min. :0.1200
## 1st Qu.:13.75 1st Qu.: 3.75 1st Qu.:10.75 1st Qu.:0.2150
## Median :26.50 Median :11.00 Median :13.00 Median :0.2700
## Mean :26.50 Mean :17.92 Mean :18.08 Mean :0.2787
## 3rd Qu.:39.25 3rd Qu.:22.75 3rd Qu.:22.00 3rd Qu.:0.3425
## Max. :52.00 Max. :91.00 Max. :74.00 Max. :0.7900
## height cover sntrees sheight
## Min. : 6.38 Min. :54.40 Min. :-1.0872 Min. :-3.0314219
## 1st Qu.:16.08 1st Qu.:85.10 1st Qu.:-0.5284 1st Qu.:-0.6499515
## Median :19.33 Median :90.00 Median :-0.3661 Median : 0.1496034
## Mean :18.72 Mean :87.08 Mean : 0.0000 Mean :-0.0000002
## 3rd Qu.:21.57 3rd Qu.:92.05 3rd Qu.: 0.2829 3rd Qu.: 0.7010628
## Max. :25.50 Max. :94.80 Max. : 4.0327 Max. : 1.6651962
## scover
## Min. :-3.9872
## 1st Qu.:-0.2419
## Median : 0.3559
## Mean : 0.0000
## 3rd Qu.: 0.6060
## Max. : 0.9415
print(paste("cones mean: ", mean(nuts[["cones"]])))
## [1] "cones mean: 17.9230769230769"
print(paste("cones median: ", median(nuts[["cones"]])))
## [1] "cones median: 11"
print(paste("height mean: ", mean(nuts[["height"]])))
## [1] "height mean: 18.7209616440993"
print(paste("height median: ", median(nuts[["height"]])))
## [1] "height median: 19.3299999237061"
newnuts <- subset(nuts, X<=30, select=c(X:cover))
newnuts
## X cones ntrees dbh height cover
## 1 1 61 32 0.23 20.42 91.3
## 2 2 4 4 0.27 15.20 61.5
## 3 3 15 34 0.17 15.97 91.4
## 4 4 9 22 0.23 22.42 92.0
## 5 5 42 22 0.18 19.45 93.2
## 6 6 4 21 0.23 23.07 93.5
## 7 7 12 19 0.22 21.06 88.5
## 8 8 27 15 0.26 18.82 88.0
## 9 9 0 12 0.23 19.16 89.8
## 10 10 4 9 0.12 6.38 73.3
## 11 11 91 5 0.79 25.50 94.8
## 12 12 20 12 0.20 12.02 94.2
## 13 13 5 15 0.19 9.06 76.8
## 14 14 14 42 0.15 8.82 77.2
## 15 15 35 74 0.15 17.91 91.3
## 16 16 11 23 0.15 15.93 92.2
## 17 17 47 67 0.14 13.79 91.8
## 18 18 17 33 0.17 14.60 88.6
## 19 19 16 12 0.34 13.99 92.4
## 20 20 0 7 0.40 16.16 85.2
## 21 21 44 14 0.37 20.88 92.9
## 22 22 18 23 0.23 15.54 91.5
## 23 23 9 13 0.27 16.98 90.7
## 24 24 16 7 0.32 19.20 89.0
## 25 25 60 11 0.26 20.03 93.5
## 26 26 3 7 0.29 15.87 91.9
## 27 27 5 10 0.35 20.87 90.7
## 28 28 5 11 0.31 21.55 90.4
## 29 29 2 3 0.42 20.37 69.9
## 30 30 32 11 0.33 18.27 92.6
colnames(newnuts) <- c("Tree", "NumCones", "NumTrees", "dbh", "Height", "Shade")
newnuts
## Tree NumCones NumTrees dbh Height Shade
## 1 1 61 32 0.23 20.42 91.3
## 2 2 4 4 0.27 15.20 61.5
## 3 3 15 34 0.17 15.97 91.4
## 4 4 9 22 0.23 22.42 92.0
## 5 5 42 22 0.18 19.45 93.2
## 6 6 4 21 0.23 23.07 93.5
## 7 7 12 19 0.22 21.06 88.5
## 8 8 27 15 0.26 18.82 88.0
## 9 9 0 12 0.23 19.16 89.8
## 10 10 4 9 0.12 6.38 73.3
## 11 11 91 5 0.79 25.50 94.8
## 12 12 20 12 0.20 12.02 94.2
## 13 13 5 15 0.19 9.06 76.8
## 14 14 14 42 0.15 8.82 77.2
## 15 15 35 74 0.15 17.91 91.3
## 16 16 11 23 0.15 15.93 92.2
## 17 17 47 67 0.14 13.79 91.8
## 18 18 17 33 0.17 14.60 88.6
## 19 19 16 12 0.34 13.99 92.4
## 20 20 0 7 0.40 16.16 85.2
## 21 21 44 14 0.37 20.88 92.9
## 22 22 18 23 0.23 15.54 91.5
## 23 23 9 13 0.27 16.98 90.7
## 24 24 16 7 0.32 19.20 89.0
## 25 25 60 11 0.26 20.03 93.5
## 26 26 3 7 0.29 15.87 91.9
## 27 27 5 10 0.35 20.87 90.7
## 28 28 5 11 0.31 21.55 90.4
## 29 29 2 3 0.42 20.37 69.9
## 30 30 32 11 0.33 18.27 92.6
print(summary(newnuts))
## Tree NumCones NumTrees dbh
## Min. : 1.00 Min. : 0.00 Min. : 3.00 Min. :0.1200
## 1st Qu.: 8.25 1st Qu.: 5.00 1st Qu.:10.25 1st Qu.:0.1825
## Median :15.50 Median :14.50 Median :13.50 Median :0.2300
## Mean :15.50 Mean :20.93 Mean :19.67 Mean :0.2657
## 3rd Qu.:22.75 3rd Qu.:30.75 3rd Qu.:22.75 3rd Qu.:0.3175
## Max. :30.00 Max. :91.00 Max. :74.00 Max. :0.7900
## Height Shade
## Min. : 6.38 Min. :61.50
## 1st Qu.:15.29 1st Qu.:88.53
## Median :18.09 Median :91.30
## Mean :17.31 Mean :88.00
## 3rd Qu.:20.41 3rd Qu.:92.35
## Max. :25.50 Max. :94.80
print(paste("NumCones mean: ", mean(newnuts[["NumCones"]])))
## [1] "NumCones mean: 20.9333333333333"
print(paste("NumCones median: ", median(newnuts[["NumCones"]])))
## [1] "NumCones median: 14.5"
print(paste("Height mean: ", mean(newnuts[["Height"]])))
## [1] "Height mean: 17.3096667289734"
print(paste("Height median: ", median(newnuts[["Height"]])))
## [1] "Height median: 18.0900001525879"
print("difference from old dataset: ")
## [1] "difference from old dataset: "
print(paste("NumCones mean change: ", mean(newnuts[["NumCones"]])-mean(nuts[["cones"]])))
## [1] "NumCones mean change: 3.01025641025641"
print(paste("NumCones median change: ", median(newnuts[["NumCones"]])-median(nuts[["cones"]])))
## [1] "NumCones median change: 3.5"
print(paste("Height mean change: ", mean(newnuts[["Height"]])-mean(nuts[["height"]])))
## [1] "Height mean change: -1.41129491512593"
print(paste("Height median change: ", median(newnuts[["Height"]])-median(nuts[["height"]])))
## [1] "Height median change: -1.2399997711182"
newnuts$NumCones[newnuts$NumCones == 4] <- "FOUR CONES!"
print(newnuts)
## Tree NumCones NumTrees dbh Height Shade
## 1 1 61 32 0.23 20.42 91.3
## 2 2 FOUR CONES! 4 0.27 15.20 61.5
## 3 3 15 34 0.17 15.97 91.4
## 4 4 9 22 0.23 22.42 92.0
## 5 5 42 22 0.18 19.45 93.2
## 6 6 FOUR CONES! 21 0.23 23.07 93.5
## 7 7 12 19 0.22 21.06 88.5
## 8 8 27 15 0.26 18.82 88.0
## 9 9 0 12 0.23 19.16 89.8
## 10 10 FOUR CONES! 9 0.12 6.38 73.3
## 11 11 91 5 0.79 25.50 94.8
## 12 12 20 12 0.20 12.02 94.2
## 13 13 5 15 0.19 9.06 76.8
## 14 14 14 42 0.15 8.82 77.2
## 15 15 35 74 0.15 17.91 91.3
## 16 16 11 23 0.15 15.93 92.2
## 17 17 47 67 0.14 13.79 91.8
## 18 18 17 33 0.17 14.60 88.6
## 19 19 16 12 0.34 13.99 92.4
## 20 20 0 7 0.40 16.16 85.2
## 21 21 44 14 0.37 20.88 92.9
## 22 22 18 23 0.23 15.54 91.5
## 23 23 9 13 0.27 16.98 90.7
## 24 24 16 7 0.32 19.20 89.0
## 25 25 60 11 0.26 20.03 93.5
## 26 26 3 7 0.29 15.87 91.9
## 27 27 5 10 0.35 20.87 90.7
## 28 28 5 11 0.31 21.55 90.4
## 29 29 2 3 0.42 20.37 69.9
## 30 30 32 11 0.33 18.27 92.6
Display enough rows to see examples of all of steps 1-5 above
BONUS – place the original .csv in a github file and have R read from the link. This will be a very useful skill as you progress in your data science education and career.