#Student Id - 2598414
options(repos = c(CRAN = "https://cran.rstudio.com/"))
library(ggplot2)
install.packages("ggplot2")
## Warning: package 'ggplot2' is in use and will not be installed
install.packages("knitr")
## Installing package into 'C:/Users/james/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'knitr' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\james\AppData\Local\Temp\Rtmpk72AYL\downloaded_packages
Dataone <- read.csv("whdataset10.csv")
install.packages("dplyr")
## Installing package into 'C:/Users/james/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'dplyr' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'dplyr'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\james\AppData\Local\R\win-library\4.4\00LOCK\dplyr\libs\x64\dplyr.dll
## to C:\Users\james\AppData\Local\R\win-library\4.4\dplyr\libs\x64\dplyr.dll:
## Permission denied
## Warning: restored 'dplyr'
##
## The downloaded binary packages are in
## C:\Users\james\AppData\Local\Temp\Rtmpk72AYL\downloaded_packages
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
install.packages("dplyr")
## Warning: package 'dplyr' is in use and will not be installed
library(dplyr)
#Q1
select(Dataone, PlantHeight)
## PlantHeight
## 1 93.7
## 2 100.3
## 3 83.8
## 4 87.2
## 5 90.8
## 6 100.7
## 7 87.6
## 8 84.3
## 9 83.4
## 10 97.0
## 11 96.2
## 12 107.8
## 13 85.0
## 14 83.0
## 15 88.6
## 16 93.6
## 17 98.5
## 18 90.0
## 19 76.1
## 20 132.6
## 21 91.7
## 22 93.9
## 23 78.3
## 24 91.3
## 25 95.7
## 26 75.2
## 27 85.7
## 28 95.0
## 29 75.8
## 30 87.8
## 31 89.5
## 32 94.0
## 33 95.7
## 34 127.1
## 35 121.2
## 36 98.3
## 37 124.5
## 38 90.0
## 39 87.8
## 40 94.9
## 41 119.7
## 42 91.6
## 43 87.4
## 44 108.0
## 45 91.7
## 46 80.8
## 47 82.9
## 48 88.6
## 49 106.6
## 50 104.1
## 51 129.0
## 52 103.4
## 53 130.6
## 54 100.6
## 55 93.1
## 56 95.8
## 57 93.8
## 58 91.1
## 59 102.4
## 60 96.8
## 61 111.8
## 62 79.6
## 63 85.2
## 64 127.2
## 65 81.7
## 66 82.2
## 67 104.7
## 68 85.2
## 69 105.0
## 70 99.9
## 71 76.9
## 72 109.7
## 73 83.9
## 74 88.2
## 75 77.8
## 76 105.7
## 77 77.7
## 78 105.0
## 79 120.4
## 80 110.3
## 81 87.7
## 82 88.3
## 83 100.9
## 84 100.2
## 85 83.0
## 86 91.9
## 87 133.3
## 88 85.2
## 89 101.8
## 90 89.4
## 91 134.2
## 92 113.8
## 93 120.1
## 94 93.4
## 95 116.9
## 96 117.1
## 97 109.3
## 98 98.9
## 99 95.7
## 100 110.4
## 101 105.6
## 102 85.7
## 103 112.1
## 104 116.1
## 105 93.5
## 106 96.4
## 107 130.9
## 108 106.6
## 109 101.0
## 110 95.3
## 111 96.8
## 112 91.4
## 113 88.2
## 114 102.3
## 115 117.3
## 116 96.0
## 117 106.9
## 118 135.2
## 119 91.1
## 120 97.4
## 121 96.4
## 122 96.2
## 123 78.7
## 124 86.2
## 125 135.0
## 126 92.1
## 127 105.3
## 128 102.6
## 129 82.2
## 130 99.4
## 131 104.5
## 132 92.0
## 133 125.2
## 134 135.2
## 135 93.0
## 136 106.5
## 137 117.9
## 138 117.6
## 139 89.6
## 140 90.6
## 141 107.8
## 142 95.4
## 143 100.1
## 144 99.4
## 145 93.9
## 146 91.5
## 147 111.9
## 148 113.3
## 149 93.8
## 150 97.1
## 151 99.8
## 152 96.3
## 153 100.1
## 154 123.3
## 155 90.1
## 156 70.2
## 157 103.5
## 158 123.1
## 159 101.1
## 160 80.3
## 161 89.5
## 162 86.8
## 163 89.3
## 164 80.0
## 165 98.1
## 166 120.5
## 167 106.7
## 168 92.7
## 169 132.2
## 170 95.3
## 171 94.8
## 172 79.2
## 173 87.9
## 174 115.4
## 175 76.7
## 176 111.7
## 177 82.6
## 178 94.6
## 179 98.0
## 180 86.0
## 181 112.6
## 182 78.6
## 183 114.1
## 184 97.8
## 185 96.9
## 186 99.6
## 187 113.9
## 188 79.7
## 189 94.9
## 190 92.0
## 191 86.6
## 192 96.6
## 193 86.5
## 194 80.4
## 195 86.5
## 196 88.8
## 197 71.0
## 198 113.9
## 199 100.4
## 200 95.9
## 201 99.0
## 202 79.0
## 203 98.2
## 204 104.8
## 205 110.2
## 206 87.3
## 207 85.7
## 208 76.7
## 209 87.3
## 210 101.1
## 211 92.1
## 212 111.6
## 213 101.0
## 214 78.1
## 215 134.2
## 216 84.5
## 217 129.8
## 218 77.9
## 219 109.8
## 220 103.9
## 221 99.7
## 222 114.2
## 223 102.3
## 224 89.7
## 225 104.0
## 226 116.6
## 227 95.4
## 228 112.0
## 229 90.9
## 230 106.6
## 231 93.2
## 232 76.0
## 233 98.4
## 234 82.2
## 235 99.1
## 236 116.0
## 237 89.7
## 238 78.3
## 239 130.0
## 240 115.3
## 241 95.5
## 242 86.6
## 243 103.7
## 244 129.8
## 245 100.0
## 246 87.3
## 247 84.1
## 248 86.5
## 249 119.4
## 250 95.1
## 251 81.8
## 252 97.5
## 253 106.9
## 254 84.8
## 255 133.0
## 256 89.1
## 257 105.6
## 258 108.9
## 259 86.4
## 260 90.3
## 261 84.7
## 262 98.2
## 263 97.8
## 264 126.7
## 265 115.3
## 266 98.7
## 267 83.3
## 268 88.7
## 269 122.2
## 270 113.9
## 271 104.9
## 272 101.4
## 273 96.8
## 274 86.8
## 275 95.7
## 276 81.2
## 277 87.6
## 278 99.4
## 279 100.9
## 280 104.0
## 281 94.8
## 282 92.8
## 283 101.2
## 284 97.7
## 285 112.5
## 286 92.0
## 287 114.4
## 288 76.5
## 289 75.1
## 290 77.5
## 291 87.3
## 292 105.0
## 293 85.6
## 294 74.6
## 295 75.9
## 296 91.8
## 297 101.2
## 298 102.8
## 299 94.9
## 300 90.0
Dfph <- select(Dataone, PlantHeight)
summarise(Dfph, meanheight = mean(PlantHeight))
## meanheight
## 1 98.05267
#Q2
P1 <- select(Dataone, Period, PlantHeight)
P2 <- filter(P1, Period == "Pre-1970")
P3 <- filter(P1, Period == "Post-1970")
library(ggplot2)
P4 <- ggplot(P1, aes(x=Period, y=PlantHeight)) + geom_boxplot(aes(colour=Period))
P4 + xlab("Period of Trial") + ylab("Plant Height (cm)") + ggtitle("Figure 1: Pre-1970 plant height is observed to be taller than post-1970 plant height")
#Q3
P5 <- select(Dataone, ThousandGrainWeight, SNP_15)
P6 <- filter(P5, SNP_15 == "GG")
P7 <- filter(P5, SNP_15 == "CC")
ggplot(P5, aes(x = SNP_15, y = ThousandGrainWeight)) + geom_point(aes(shape=SNP_15, colour=SNP_15)) + xlab("SNP15 Genotype") + ylab("Thousand Grain Weight (grams)") + ggtitle("Figure 2: The more common CC genotype is associated with heavier grains ")
#Q4
ONR <- P5[P5$SNP_15 =="GG", "ThousandGrainWeight"]
PNR <- P5[P5$SNP_15 == "CC", "ThousandGrainWeight"]
t.test(ONR, PNR, var.equal = TRUE)
##
## Two Sample t-test
##
## data: ONR and PNR
## t = -4.8839, df = 298, p-value = 1.7e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -4.256755 -1.811532
## sample estimates:
## mean of x mean of y
## 32.15616 35.19031
#Two sample T-test
t.test(ONR, PNR)
##
## Welch Two Sample t-test
##
## data: ONR and PNR
## t = -4.6068, df = 111.29, p-value = 1.095e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -4.339210 -1.729078
## sample estimates:
## mean of x mean of y
## 32.15616 35.19031
#Welch two sample t test