#Student Id - 2598414

options(repos = c(CRAN = "https://cran.rstudio.com/"))
library(ggplot2)
install.packages("ggplot2")
## Warning: package 'ggplot2' is in use and will not be installed
install.packages("knitr")
## Installing package into 'C:/Users/james/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'knitr' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\james\AppData\Local\Temp\Rtmpk72AYL\downloaded_packages
Dataone <- read.csv("whdataset10.csv")
install.packages("dplyr")
## Installing package into 'C:/Users/james/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'dplyr' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'dplyr'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\james\AppData\Local\R\win-library\4.4\00LOCK\dplyr\libs\x64\dplyr.dll
## to C:\Users\james\AppData\Local\R\win-library\4.4\dplyr\libs\x64\dplyr.dll:
## Permission denied
## Warning: restored 'dplyr'
## 
## The downloaded binary packages are in
##  C:\Users\james\AppData\Local\Temp\Rtmpk72AYL\downloaded_packages
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
install.packages("dplyr") 
## Warning: package 'dplyr' is in use and will not be installed
library(dplyr)  

#Q1

select(Dataone, PlantHeight)
##     PlantHeight
## 1          93.7
## 2         100.3
## 3          83.8
## 4          87.2
## 5          90.8
## 6         100.7
## 7          87.6
## 8          84.3
## 9          83.4
## 10         97.0
## 11         96.2
## 12        107.8
## 13         85.0
## 14         83.0
## 15         88.6
## 16         93.6
## 17         98.5
## 18         90.0
## 19         76.1
## 20        132.6
## 21         91.7
## 22         93.9
## 23         78.3
## 24         91.3
## 25         95.7
## 26         75.2
## 27         85.7
## 28         95.0
## 29         75.8
## 30         87.8
## 31         89.5
## 32         94.0
## 33         95.7
## 34        127.1
## 35        121.2
## 36         98.3
## 37        124.5
## 38         90.0
## 39         87.8
## 40         94.9
## 41        119.7
## 42         91.6
## 43         87.4
## 44        108.0
## 45         91.7
## 46         80.8
## 47         82.9
## 48         88.6
## 49        106.6
## 50        104.1
## 51        129.0
## 52        103.4
## 53        130.6
## 54        100.6
## 55         93.1
## 56         95.8
## 57         93.8
## 58         91.1
## 59        102.4
## 60         96.8
## 61        111.8
## 62         79.6
## 63         85.2
## 64        127.2
## 65         81.7
## 66         82.2
## 67        104.7
## 68         85.2
## 69        105.0
## 70         99.9
## 71         76.9
## 72        109.7
## 73         83.9
## 74         88.2
## 75         77.8
## 76        105.7
## 77         77.7
## 78        105.0
## 79        120.4
## 80        110.3
## 81         87.7
## 82         88.3
## 83        100.9
## 84        100.2
## 85         83.0
## 86         91.9
## 87        133.3
## 88         85.2
## 89        101.8
## 90         89.4
## 91        134.2
## 92        113.8
## 93        120.1
## 94         93.4
## 95        116.9
## 96        117.1
## 97        109.3
## 98         98.9
## 99         95.7
## 100       110.4
## 101       105.6
## 102        85.7
## 103       112.1
## 104       116.1
## 105        93.5
## 106        96.4
## 107       130.9
## 108       106.6
## 109       101.0
## 110        95.3
## 111        96.8
## 112        91.4
## 113        88.2
## 114       102.3
## 115       117.3
## 116        96.0
## 117       106.9
## 118       135.2
## 119        91.1
## 120        97.4
## 121        96.4
## 122        96.2
## 123        78.7
## 124        86.2
## 125       135.0
## 126        92.1
## 127       105.3
## 128       102.6
## 129        82.2
## 130        99.4
## 131       104.5
## 132        92.0
## 133       125.2
## 134       135.2
## 135        93.0
## 136       106.5
## 137       117.9
## 138       117.6
## 139        89.6
## 140        90.6
## 141       107.8
## 142        95.4
## 143       100.1
## 144        99.4
## 145        93.9
## 146        91.5
## 147       111.9
## 148       113.3
## 149        93.8
## 150        97.1
## 151        99.8
## 152        96.3
## 153       100.1
## 154       123.3
## 155        90.1
## 156        70.2
## 157       103.5
## 158       123.1
## 159       101.1
## 160        80.3
## 161        89.5
## 162        86.8
## 163        89.3
## 164        80.0
## 165        98.1
## 166       120.5
## 167       106.7
## 168        92.7
## 169       132.2
## 170        95.3
## 171        94.8
## 172        79.2
## 173        87.9
## 174       115.4
## 175        76.7
## 176       111.7
## 177        82.6
## 178        94.6
## 179        98.0
## 180        86.0
## 181       112.6
## 182        78.6
## 183       114.1
## 184        97.8
## 185        96.9
## 186        99.6
## 187       113.9
## 188        79.7
## 189        94.9
## 190        92.0
## 191        86.6
## 192        96.6
## 193        86.5
## 194        80.4
## 195        86.5
## 196        88.8
## 197        71.0
## 198       113.9
## 199       100.4
## 200        95.9
## 201        99.0
## 202        79.0
## 203        98.2
## 204       104.8
## 205       110.2
## 206        87.3
## 207        85.7
## 208        76.7
## 209        87.3
## 210       101.1
## 211        92.1
## 212       111.6
## 213       101.0
## 214        78.1
## 215       134.2
## 216        84.5
## 217       129.8
## 218        77.9
## 219       109.8
## 220       103.9
## 221        99.7
## 222       114.2
## 223       102.3
## 224        89.7
## 225       104.0
## 226       116.6
## 227        95.4
## 228       112.0
## 229        90.9
## 230       106.6
## 231        93.2
## 232        76.0
## 233        98.4
## 234        82.2
## 235        99.1
## 236       116.0
## 237        89.7
## 238        78.3
## 239       130.0
## 240       115.3
## 241        95.5
## 242        86.6
## 243       103.7
## 244       129.8
## 245       100.0
## 246        87.3
## 247        84.1
## 248        86.5
## 249       119.4
## 250        95.1
## 251        81.8
## 252        97.5
## 253       106.9
## 254        84.8
## 255       133.0
## 256        89.1
## 257       105.6
## 258       108.9
## 259        86.4
## 260        90.3
## 261        84.7
## 262        98.2
## 263        97.8
## 264       126.7
## 265       115.3
## 266        98.7
## 267        83.3
## 268        88.7
## 269       122.2
## 270       113.9
## 271       104.9
## 272       101.4
## 273        96.8
## 274        86.8
## 275        95.7
## 276        81.2
## 277        87.6
## 278        99.4
## 279       100.9
## 280       104.0
## 281        94.8
## 282        92.8
## 283       101.2
## 284        97.7
## 285       112.5
## 286        92.0
## 287       114.4
## 288        76.5
## 289        75.1
## 290        77.5
## 291        87.3
## 292       105.0
## 293        85.6
## 294        74.6
## 295        75.9
## 296        91.8
## 297       101.2
## 298       102.8
## 299        94.9
## 300        90.0
Dfph <- select(Dataone, PlantHeight)
summarise(Dfph, meanheight = mean(PlantHeight))
##   meanheight
## 1   98.05267

#Q2

P1 <- select(Dataone, Period, PlantHeight)

P2 <- filter(P1, Period == "Pre-1970")

P3 <- filter(P1, Period == "Post-1970")
library(ggplot2)
P4 <- ggplot(P1, aes(x=Period, y=PlantHeight)) + geom_boxplot(aes(colour=Period))
P4 + xlab("Period of Trial") + ylab("Plant Height (cm)") + ggtitle("Figure 1: Pre-1970 plant height is observed to be taller than post-1970 plant height")

#Q3

P5 <- select(Dataone, ThousandGrainWeight, SNP_15)
P6 <- filter(P5, SNP_15 == "GG")
P7 <- filter(P5, SNP_15 == "CC")
ggplot(P5, aes(x = SNP_15, y = ThousandGrainWeight)) + geom_point(aes(shape=SNP_15, colour=SNP_15)) + xlab("SNP15 Genotype") + ylab("Thousand Grain Weight (grams)")  + ggtitle("Figure 2: The more common CC genotype is associated with heavier grains ")

#Q4

ONR <- P5[P5$SNP_15 =="GG", "ThousandGrainWeight"]
PNR <- P5[P5$SNP_15 == "CC", "ThousandGrainWeight"]
t.test(ONR, PNR, var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  ONR and PNR
## t = -4.8839, df = 298, p-value = 1.7e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4.256755 -1.811532
## sample estimates:
## mean of x mean of y 
##  32.15616  35.19031
#Two sample T-test
t.test(ONR, PNR)
## 
##  Welch Two Sample t-test
## 
## data:  ONR and PNR
## t = -4.6068, df = 111.29, p-value = 1.095e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4.339210 -1.729078
## sample estimates:
## mean of x mean of y 
##  32.15616  35.19031
#Welch two sample t test