library(ggplot2)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
### Orchid Code ###
## Before reading the data into RStudio ----
# Check the Col names -- modification
Orchid <- read.csv("Orchid.csv")
names(Orchid)
## [1] "plant" "plot.No" "plant.No" "date"
## [5] "time" "flower.No" "condition" "spur.length"
## [9] "nectar.level" "sugar.percent" "scent" "pollinia.L"
## [13] "pollinia.R" "latitude" "longitude"
head(Orchid)
## plant plot.No plant.No date time flower.No condition spur.length
## 1 1 1 1 09-Jul 12:29 1 3 36
## 2 1 1 1 09-Jul 12:29 2 3 40
## 3 1 1 1 09-Jul 12:29 3 3 37
## 4 1 1 1 09-Jul 12:29 4 3 NA
## 5 1 1 1 09-Jul 12:29 5 3 NA
## 6 2 1 2 09-Jul 12:34 1 3 44
## nectar.level sugar.percent scent pollinia.L pollinia.R latitude
## 1 2 28 2 P P 7.143
## 2 12 28 2 P P 7.143
## 3 4 28 2 P P 7.143
## 4 NA NA 2 P P 7.143
## 5 NA NA 2 P P 7.143
## 6 7 27 2 P P 7.141
## longitude
## 1 38.457
## 2 38.457
## 3 38.457
## 4 38.457
## 5 38.457
## 6 38.450
## Question1 ## ----
## How does the sugar concentration compare between individual flowers on a single plant?
## Another way to put the question is: how variable is the sugar concentration between flowers in the same plant?
# Since Q1 we are focusing on sugar concentration, using 'Index.Q1' marking the non-NA data
# the index of data that sugar concentration is not NA
Index.Q1 <-!is.na(Orchid$sugar.percent)
# the dataset used for Q1
# it is a subdataset with full record of sugar concentration
Orchid.Q1 <- Orchid[Index.Q1,]
# the number of unique plants that has sugar concentration data
# the total number of plants is 172
length(unique(Orchid.Q1$plant))
## [1] 168
# use a loop to record the number of flowers for each plant
# n.flower <- NULL
# for(i in 1:172){
# n.flower <- c(n.flower,length(unique(Orchid.Q1$flower.No[Orchid.Q1$plant==i])))
# # if the number of flower is 0, print the plant number
# if(length(unique(Orchid.Q1$flower.No[Orchid.Q1$plant==i]))==0) print(i)
# }
# # 8, 31, 111, 164
# table(n.flower)
Plant.Index <- unique(Orchid.Q1$plant)
SD.vec <- NULL
RANGE.vec <- NULL
N.flower.vec <- NULL
for(j in Plant.Index){
INDEX <- Orchid.Q1$plant==j
Plant.Sugar <- Orchid.Q1$sugar.percent[INDEX]
SD.vec <- c(SD.vec,sd(Plant.Sugar))
RANGE.vec <- c(RANGE.vec,diff(range(Plant.Sugar)))
N.flower.vec <- c(N.flower.vec,max(Orchid.Q1$flower.No[INDEX]))
}
SD.vec[48] <- 0
# Histogram of 168 Sd and Range:
# hist(SD.vec,ylim=c(0,110),main="Histogram of Standard Deviation of \n Percentage
# Sugar Content in Each Plant",xlab="Standard Deviation",breaks=seq(0,10.5,by=1.5))
# hist(RANGE.vec,ylim=c(0,100),main="Histogram of the Ranges of the \n Percentage Sugar Content in Each Plant",
# xlab="Range",breaks=seq(0,20,by=2))
#
# plot(SD.vec~N.flower.vec,xlab="Number of Flowers",ylab="Standard Deviation",
# main="Scatterplot of Standard Deviations in Each Plant \n Against the Number of Flowers Sampled from Each Plant")
# plot(RANGE.vec~N.flower.vec,xlab="Number of Flowers",ylab="Range",
# main="Scatterplot of Range in Each Plant \n Against the Number of Flowers Sampled from Each Plant")
df.Q1 <- data.frame(SD=SD.vec,N.flower=N.flower.vec,Range=RANGE.vec)
# Q1 Histograms ------
ggplot(df.Q1,aes(df.Q1$SD)) + geom_histogram(binwidth = 1.5,fill=I("turquoise2"),col=I("lightblue3"),alpha=0.5) +
labs(x="Standard Deviation",title="Histogram of Standard Deviation of Percentage
Sugar Content in Each Plant") + theme(plot.title = element_text(hjust = 0.5))

ggplot(df.Q1,aes(df.Q1$Range)) + geom_histogram(binwidth =2,fill=I("plum2"),col=I("khaki1"),alpha=0.5) +
labs(x="Range",title="Histogram of the Ranges of the Percentage Sugar Content in Each Plant") + theme(plot.title = element_text(hjust = 0.5))

# Q1 Scatter plots ----
ggplot(df.Q1,aes(x=N.flower,y=SD)) + geom_point(alpha=0.5,size=3,col="chartreuse4") +
labs(x="Number of Flowers",y="Standard Deviation",title="Scatterplot of Standard Deviations in Each Plant \n Against the Number of Flowers Sampled from Each Plant")+
theme(plot.title = element_text(hjust = 0.5))

# not very much variation for the most part
ggplot(df.Q1,aes(x=N.flower,y=Range)) + geom_point(alpha=0.5,size=3,col="purple") +
labs(x="Number of Flowers",y="Range",title="Scatterplot of Range in Each Plant Against the Number of Flowers Sampled from Each Plant")

# The spread is small for many of the plants
## Question2 ## ----
## How does the sugar concentration compare between individual plants?
Day.vec <- as.numeric(substr(Orchid.Q1$date,1,2)) # for later, Q5
Day.num.vec <- NULL
Mean.Sugar.vec <- NULL
Mean.Condition.vec <- NULL
for(j in Plant.Index){
INDEX <- Orchid.Q1$plant==j
Plant.Sugar <- Orchid.Q1$sugar.percent[INDEX]
Condition <- Orchid.Q1$condition[INDEX]
Mean.Sugar.vec <- c(Mean.Sugar.vec,mean(Plant.Sugar))
Mean.Condition.vec <- c(Mean.Condition.vec,mean(Condition))
Day.num.vec <- c(Day.num.vec, as.numeric(unique(Day.vec[INDEX])[1]))
}
df.Q2 <- data.frame(MeanSugar=Mean.Sugar.vec,MeanCondition=Mean.Condition.vec)
# Q2 histogram ----
#hist(Mean.Sugar.vec,xlab="Average Sugar Concentration",main = "Histogram of the Averages of the \n Percentage Sugar Content in Each Plant")
ggplot(df.Q2,aes(x=df.Q2$MeanSugar)) + geom_histogram(binwidth = 4, fill="goldenrod1",col=I("darkorange2"),alpha=0.5) +
labs(x="Average Sugar Concentration",title="Histogram of the Averages of the Percentage Sugar Content in Each Plant") +
theme(plot.title = element_text(hjust = 0.5))

# Q2 boxplot ----
# boxplot(Mean.Sugar.vec,main="Boxplot of the Average Sugar Concentrations")
ggplot(df.Q2,aes(x="",y=df.Q2$MeanSugar)) + geom_boxplot(width=0.35) +
labs(x="",y="Average Sugar Concentration",title="Boxplot of the Average Sugar Concentrations") +
theme(plot.title = element_text(hjust = 0.5))

# Q2 scatterplot ----
#plot(Mean.Condition.vec,Mean.Sugar.vec,xlab="Condition",ylab="Sugar Concentration (%)",main="Scatterplot of Average Sugar Concentration in the Plant \n Against Average Condition of the Flowers on the Plant")
ggplot(df.Q2,aes(x=df.Q2$MeanCondition,y=df.Q2$MeanSugar)) + geom_point(alpha=0.5,size=3,col="firebrick1") +
labs(x="Condition",y="Sugar Concentration (%)",title="Scatterplot of Average Sugar Concentration in the Plant Against Average Condition of the Flowers on the Plant") + theme(plot.title = element_text(hjust = 0.5))

## Question3 ## ----
## Does the sugar concentration change within a twenty-four-hour period?
HourMin <- as.POSIXct(Orchid.Q1$time, format="%H:%M")
df.Q3 <- data.frame(time = HourMin,sugar.percent=Orchid.Q1$sugar.percent)
# Q3 Scatterplot 1 ----
#plot(sugar.percent~time,data=Orchid.Q1,xlab="Time of Day",ylab="Percent Sugar Content",main="Variation in Sugar Concentration \n in the Flowers Over the Day")
ggplot(df.Q3,aes(x=time,y=sugar.percent)) + geom_point(alpha=0.25,size=3,col="limegreen") +
labs(x="Time of Day",y="Percent Sugar Content",title ="Variation in Sugar Concentration in the Flowers Over the Day") + theme(plot.title = element_text(hjust = 0.5))

# Q3 Scatterplot 2 ----
Hour.fraction <- integer(length(HourMin))
Hour.fraction[minute(HourMin)>30] <- 1
Hour.integer <- hour(HourMin)
Hour <- Hour.fraction + Hour.integer
Mean.Half.Hour <- aggregate(Orchid.Q1$sugar.percent,list(Hour),mean)
ggplot(Mean.Half.Hour,aes(x=Group.1,y=x)) + geom_point(size=5,alpha=0.5,shape=18,col="maroon1") +
labs(x="Time of Day",y="Percent Sugar Concentration",title="Average Variation in Sugar Concentration in the Flowers Over the Day by Half Hour Periods")+
theme(plot.title = element_text(hjust = 0.5))

## Question 4 ## ----
## Is there a difference in the sugar concentration between plants that flowered early
## in the season compared to plants that folowered late in the season?
Date <- as.Date(paste("17",Orchid.Q1$date,sep="-"),"%y-%d-%b")
Date1 <- format(Date, format="%m-%d")
Uniquedate.Mean.Sugar <- NULL
Uniquedate.Date.vec <- NULL
for(j in Plant.Index){
INDEX <- Orchid.Q1$plant==j
# Unique.date <- unique(Orchid.Q1$date[INDEX])
Unique.date <- unique(Date1[INDEX])
if(length(Unique.date)==1){
Uniquedate.Mean.Sugar <- c(Uniquedate.Mean.Sugar, mean(Orchid.Q1$sugar.percent[INDEX]))
Uniquedate.Date.vec <- c(Uniquedate.Date.vec,as.character(Unique.date))
}
}
# Q4 Scatterplot ----
df.Q4 <- data.frame(Date = Uniquedate.Date.vec, Mean.Sugar=Uniquedate.Mean.Sugar)
ggplot(data=df.Q4, aes(Date,Mean.Sugar)) + geom_point(size=3,col=I("deepskyblue"),alpha=0.5)+
labs(x="Date",y="Sugar Concentration",title="Scatterplot of the Average Sugar Concentration per Plant by Date of Measurement")+
theme(plot.title = element_text(hjust = 0.5))

## Question 5 ## ----
## Is there a difference between the sugar concentration in the early opened flowers and lately
## opened flowers on an individual plant, i.e. bottom versus top of the plant?
# My process of the dataset may have some glitch!
# Q5 -- Prat I -----
Bi.Mean.Sugar <- NULL
for(j in Plant.Index){
INDEX <- Orchid.Q1$plant==j
Unique.date <- unique(Date[INDEX])
Sugar.plant <- Orchid.Q1$sugar.percent[INDEX]
Date.plant <- Date[INDEX]
if(length(unique(Date.plant))==2){
Mean.Sugar1 <- mean(Sugar.plant[Date.plant==Unique.date[1]])
Mean.Sugar2 <- mean(Sugar.plant[Date.plant==Unique.date[2]])
Bi.Mean.Sugar <- rbind(Bi.Mean.Sugar,c(Mean.Sugar1,Mean.Sugar2))
}
}
Diff.diff.time <- Bi.Mean.Sugar[,1]-Bi.Mean.Sugar[,2]
Q5.table0 <- rbind(cbind(apply(Bi.Mean.Sugar,2,mean),apply(Bi.Mean.Sugar,2,sd)),
c(mean(Diff.diff.time),sd(Diff.diff.time)))
Q5.table <- cbind(dim(Bi.Mean.Sugar)[1],Q5.table0,Q5.table0[,2]/sqrt(dim(Bi.Mean.Sugar)[1]))
row.names(Q5.table) <- c("Early date","Later date","Difference")
colnames(Q5.table) <- c("N","Mean","StDev","SE Mean")
round(Q5.table,3)
## N Mean StDev SE Mean
## Early date 39 24.916 2.635 0.422
## Later date 39 22.534 5.549 0.889
## Difference 39 2.383 5.434 0.870
# Q5 Boxplot I ----
# boxplot(Diff.diff.time)
df.Q5.1 <- data.frame(Diff=Diff.diff.time)
ggplot(df.Q5.1,aes(x="",y=Diff)) + geom_boxplot(width=0.35) +
labs(x="",y="Difference",title="Boxplot of Differences") +theme(plot.title = element_text(hjust = 0.5))

t.test(Bi.Mean.Sugar[,1],Bi.Mean.Sugar[,2],paired = T)
##
## Paired t-test
##
## data: Bi.Mean.Sugar[, 1] and Bi.Mean.Sugar[, 2]
## t = 2.7383, df = 38, p-value = 0.009348
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.6211556 4.1440154
## sample estimates:
## mean of the differences
## 2.382585
# Q5 -- Part II ----
Bottom.vec <- NULL
Top.vec <- NULL
count = 0
for(j in Plant.Index){
INDEX <- Orchid.Q1$plant==j
Sugar.plant <- Orchid.Q1$sugar.percent[INDEX]
n <- length(Sugar.plant)
if(length(unique(Day.vec[INDEX]))==1 & length(Sugar.plant)>=6){
Top.vec <- c(Top.vec,mean(Sugar.plant[1:3]))
Bottom.vec <- c(Bottom.vec,mean(Sugar.plant[(n-2):n]))
count=count+1
}
}
Q5.Mat <- cbind(count, rbind(c(mean(Top.vec),sd(Top.vec)),c(mean(Bottom.vec),sd(Bottom.vec)),c(mean(Top.vec-Bottom.vec),sd(Top.vec-Bottom.vec))))
Q5.Mat <- cbind(Q5.Mat,Q5.Mat[,3]/sqrt(Q5.Mat[,1]))
rownames(Q5.Mat) <- c("Bottom flowers","Top flowers", "Difference")
colnames(Q5.Mat) <- c("N","Mean","StDev","SE Mean")
Q5.Mat
## N Mean StDev SE Mean
## Bottom flowers 67 24.2462687 5.374162 0.6565584
## Top flowers 67 23.6019900 2.944629 0.3597436
## Difference 67 0.6442786 4.562685 0.5574207
# Q5 Boxplot II ----
df.Q5.2 <- data.frame(Diff=Top.vec-Bottom.vec)
ggplot(df.Q5.2,aes(x="",y=Diff)) + geom_boxplot(width=0.35) +
labs(x="",y="Difference",title="Boxplot of Differences") +
theme(plot.title = element_text(hjust = 0.5))

t.test(Top.vec,Bottom.vec,paired = T)
##
## Paired t-test
##
## data: Top.vec and Bottom.vec
## t = 1.1558, df = 66, p-value = 0.2519
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.4686477 1.7572049
## sample estimates:
## mean of the differences
## 0.6442786
# Question 6 ## ----
## Is there a difference in the sugar concentration between plants with a large
## inflourescene and a plant with a small inflourescence?
flower.no.total <- NULL
for(j in Plant.Index){
INDEX <- Orchid.Q1$plant==j
Plant.Flower.vec <- Orchid.Q1$flower.No[INDEX]
flower.no.total <- c(flower.no.total,Plant.Flower.vec[length(Plant.Flower.vec)])
}
# Q6 Scatterplot ----
df.Q6 <- data.frame(Mean.Sugar=Mean.Sugar.vec,Flower.Total=flower.no.total,Date=Day.num.vec) # Mean.Sugar.vec is from Q2.
ggplot(df.Q6,aes(Flower.Total,Mean.Sugar)) + geom_point(alpha=0.7,size=3,col="gold2") +
labs(x="Number of Flowers",y="Sugar Concentration",title="Scatterplot of the Average Sugar Concentration per Plant by the Number of Flowers on the Plant") +
theme(plot.title = element_text(hjust = 0.5))

Mod.Q6 <- lm(Mean.Sugar~Flower.Total,data=df.Q6)
summary(Mod.Q6)
##
## Call:
## lm(formula = Mean.Sugar ~ Flower.Total, data = df.Q6)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.0069 -1.8828 0.3507 2.6062 11.4803
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.33589 0.75162 31.05 <2e-16 ***
## Flower.Total 0.05699 0.08504 0.67 0.504
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.114 on 166 degrees of freedom
## Multiple R-squared: 0.002698, Adjusted R-squared: -0.00331
## F-statistic: 0.4491 on 1 and 166 DF, p-value: 0.5037
anova(Mod.Q6)
## Analysis of Variance Table
##
## Response: Mean.Sugar
## Df Sum Sq Mean Sq F value Pr(>F)
## Flower.Total 1 7.6 7.5994 0.4491 0.5037
## Residuals 166 2809.1 16.9220
Mod1.Q6 <- lm(Mean.Sugar~Flower.Total+Date,data=df.Q6)
summary(Mod1.Q6)
##
## Call:
## lm(formula = Mean.Sugar ~ Flower.Total + Date, data = df.Q6)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.8914 -2.1183 0.1565 2.7022 11.8499
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.57044 1.41641 18.053 <2e-16 ***
## Flower.Total 0.04591 0.08463 0.543 0.5882
## Date -0.15445 0.08322 -1.856 0.0652 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.084 on 165 degrees of freedom
## Multiple R-squared: 0.02309, Adjusted R-squared: 0.01125
## F-statistic: 1.95 on 2 and 165 DF, p-value: 0.1455
anova(Mod1.Q6)
## Analysis of Variance Table
##
## Response: Mean.Sugar
## Df Sum Sq Mean Sq F value Pr(>F)
## Flower.Total 1 7.60 7.599 0.4557 0.50059
## Date 1 57.45 57.447 3.4448 0.06523 .
## Residuals 165 2751.60 16.676
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Question 7 ## ----
## How does the scent fluctuate within a twenty-four-hour period?
Orchid.Q7 <- Orchid[complete.cases(Orchid[,c("time","scent")]),]
Orchid.Q7$time <- as.POSIXct(Orchid.Q7$time, format="%H:%M")
# Q7 - Scatterplot
ggplot(Orchid.Q7,aes(time,scent)) + geom_point(alpha=0.2,size=3,col="slateblue1") +
labs(x="Time of Day",y="Strenth of Scent", title="Strength of the Scent of Flowers Varying by Time of Day")
