Before beginning to use R, you will need to download it to your computer. R can be downloaded from the CRAN website. You will then choose the download that is best for your system.
R studio tends to be more user friendly than R, so after you have downloaded R, download Rstudio from the Rstudio website. R must be installed before you can install R studio!
There are MANY packages available in R that allow you to use functions that other people have created
If you are working on a Windows OS, any backslash (‘\’) in a Windows file path will need to be replaced by forward slash (‘/’)
NOTE: R is case sensitive, so ‘file’ and ‘File’, for example, are not the same
data1=read.csv(file="filename.csv", header=TRUE, sep=",")
data2=read.table(file="filename.txt", header=TRUE, sep=",", na.strings="NA")
qplot(x=xvar,y=yvar,data=dataset-name)
The name of your data set would replace dataset-name
If you have multiple groups (e.g. treatment groups) in your data set and wish to distinguish between these in your plot, you can add another argument to the line of code:
qplot(x=,y=,data=, colour=groupingvariable)
library(ggplot2) # Load necessary package - make sure this package has been installed onto your system first
data(Orange) #load in the data on Orange trees
head(Orange) # Look at the first 6 rows of data to get a better understanding of the data
## Tree age circumference
## 1 1 118 30
## 2 1 484 58
## 3 1 664 87
## 4 1 1004 115
## 5 1 1231 120
## 6 1 1372 142
plot1=qplot(x=age,y=circumference, data=Orange) #plot AGE on the x-axis and CIRCUMFERENCE on the y-axis
plot1 #Print the plot that was stored as plot1 in the line of code above
The data in the Orange data set is actually collected over time, and we have multiple measurements for each tree. (You may be collecting measurements on mice over time, vs trees for example, or may want to distinguish between different populations, replicates, etc.)
To make a distinction between different groups in the data set (different trees in this example), we can color the plot based on the group. This is done by adding the colour option.
plot2=qplot(x=age,y=circumference,colour=Tree, data=Orange) #Add color to the basic scatterplot by using colour=
plot2 #Print the plot
As noted above, this data was collected over time. Therefore, it may make sense to connect the points for each unique tree to see it’s change in size as it increases in age. (This type of plot is helpful for visualizing change over time in a variety of situations, like change in tumor size in individual mice over time)
plot3=qplot(x=age,y=circumference,data=Orange,colour=Tree,geom="line") # create lines (instead of points)
plot3 #print the plot
If your data is not collected overtime and is not grouped (so distinguishing between color is not needed), you may be interested in viewing a trend among your data. To do so, you would add the geom=c(“point”,“smooth”) code, as done below. This will provide you with a smoothed trendline and also 95% confidence bands.
plot4=qplot(x=age,y=circumference,data=Orange,geom=c("point","smooth")) #Add a trend line
plot4
If you are interested, instead, in fitting a best-fit line through your data, you would need to add geom_smooth(method=lm) to the line of code, as shown below.
plot5=qplot(x=age,y=circumference,data=Orange)+geom_smooth(method=lm) #Add a line of best fit
plot5
If you would then like to know the equation of your line of best fit, you can run the linear model function.
lm(circumference~age,data=Orange)
##
## Call:
## lm(formula = circumference ~ age, data = Orange)
##
## Coefficients:
## (Intercept) age
## 17.400 0.107
plot6=qplot(x=age,y=circumference,data=Orange)+geom_abline(intercept=100,slope=0,colour="red",size=1.5) #Add a red horizontal line at Y=100
plot6
NOTE: This example uses a different data set than the previous examples
Suppose you want to distinguish not only between treatment groups, but are also interested in distinguishing between the different doses within each treatment group, for example. Or perhaps you are looking at different populations, which you want to distinguish from one another, but also have technical replicates within each population that you are interested in distinguishing between. Then it may be helpful to color-coordinate one variable while having symbols to distinguish between the other variable.
data(ToothGrowth) # Load in the data set
tooth=ToothGrowth #store the data as 'tooth'
head(tooth) #view the first 6 rows of data to see what it looks like
## len supp dose
## 1 4.2 VC 0.5
## 2 11.5 VC 0.5
## 3 7.3 VC 0.5
## 4 5.8 VC 0.5
## 5 6.4 VC 0.5
## 6 10.0 VC 0.5
shape_color=ggplot(data=ToothGrowth, mapping=aes(x=supp,y=len,colour=supp,shape=as.factor(dose)))+geom_jitter()
shape_color #Print the plot
To view all of the plots at once, perhaps for comparison purposes, you can create a lattice of the plots. Below, 6 of the 7 plots from above are plotted together.
library(grid)
pushViewport(viewport(layout=grid.layout(2,3)))
print(plot1,vp=viewport(layout.pos.row=1,layout.pos.col=1))
print(plot4,vp=viewport(layout.pos.row=1,layout.pos.col=2))
print(plot3,vp=viewport(layout.pos.row=2,layout.pos.col=1))
print(plot2,vp=viewport(layout.pos.row=2,layout.pos.col=2))
print(plot5,vp=viewport(layout.pos.row=1,layout.pos.col=3))
print(plot6,vp=viewport(layout.pos.row=2,layout.pos.col=3))
The first example uses a data set in which there are 6 different types of insect repellent. We are interested in comparing the number of insect bites based on the type of repellent used.
library(data.table) #load in the necessary libraries
library(ggplot2)
data(InsectSprays) #read in the Insect Spray data set
head(InsectSprays) #view the top 6 rows of data to see what the variables are
## count spray
## 1 10 A
## 2 7 A
## 3 20 A
## 4 14 A
## 5 14 A
## 6 12 A
insect=InsectSprays #assign the data values to an object called 'insect'
#find the mean, standard deviation, and standard error of each group (type of insect repellent) in the 'insect' data set
insect_table=data.table(insect)
insect_summary=insect_table[,sapply(.SD,function(x) list(mean=round(mean(x),3),sd=round(sd(x),3),se=round(sqrt(var(x))/length(x),3))),by=spray]
insect_summary=setnames(insect_summary,c("Spray","Mean","Std.Dev","Std.Err")) #Make the names of the data set more informative
#Create a bar chart to compare the different types of insect repellent
bar_insectspray=ggplot(insect_summary,aes(x=Spray,y=Mean,fill=Spray))+geom_bar(stat="identity")+xlab("Insect Spray")+ylab("Avg Number of Insect Bites")+ ggtitle("Comparing Insect Repellents")+geom_errorbar(aes(ymin=Mean-Std.Err,ymax=Mean+Std.Err),width=.2)+theme(plot.title=element_text(face="bold"))+guides(fill=FALSE)
bar_insectspray #print the bar graph created above
# Create a boxplot of the insect data set to compare the different groups
ggplot(InsectSprays,aes(x=spray,y=count,fill=spray))+geom_boxplot()+guides(fill=FALSE)
The darker line inside each box indicates the median number of insect bites for each type of Insect Spray. If you are interested in also including the mean in a box plot, you can do so by adding the stat_summary portion below.
ggplot(InsectSprays,aes(x=spray,y=count,fill=spray))+geom_boxplot()+guides(fill=FALSE) + stat_summary(fun.y=mean,geom="point",shape=1,size=4)
You can make the axis labels more informative by adding an xlab and ylab label to your plot. You are also able to add a main title if desired. All three labels have been added below.
box_insectspray3=ggplot(InsectSprays,aes(x=spray,y=count,fill=spray))+ geom_boxplot()+ guides(fill=FALSE) + stat_summary(fun.y=mean,geom="point",shape=1,size=4)+xlab("Insect Spray")+ylab("Number of Insect Bites") + ggtitle("Comparing Insect Repellents")+theme(plot.title=element_text(face="bold"))
box_insectspray3 #Print the graph created above
To view all of your data points, overlaid on the box plot, add the geom_jitter() option
box_insectspray4=ggplot(InsectSprays,aes(x=spray,y=count,fill=spray))+ geom_boxplot()+ guides(fill=FALSE) + stat_summary(fun.y=mean,geom="point",shape=1,size=4)+xlab("Insect Spray")+ylab("Number of Insect Bites") + ggtitle("Comparing Insect Repellents")+theme(plot.title=element_text(face="bold")) + geom_jitter()
box_insectspray4 #Print the box graph with all individual points plotted
library(grid)
pushViewport(viewport(layout=grid.layout(1,2)))
print(bar_insectspray,vp=viewport(layout.pos.row=1,layout.pos.col=1))
print(box_insectspray3,vp=viewport(layout.pos.row=1,layout.pos.col=2))
If we put them on the same scale, the resulting side-by-side plot would be:
pushViewport(viewport(layout=grid.layout(1,2)))
print(bar_insectspray2,vp=viewport(layout.pos.row=1,layout.pos.col=1))
print(box_insectspray3,vp=viewport(layout.pos.row=1,layout.pos.col=2))
plant=PlantGrowth #storing the plant growth data
head(plant) #view the top 6 rows of data
## weight group
## 1 4.17 ctrl
## 2 5.58 ctrl
## 3 5.18 ctrl
## 4 6.11 ctrl
## 5 4.50 ctrl
## 6 4.61 ctrl
#Find the mean, standard deviation, and standard error of the mean for each group -- this is done so that we are able to create the bar chart with error bars
library(data.table) # Load in necessary package
plant_table=data.table(plant) # create a data table of the plant data set
plant_summary=plant_table[,sapply(.SD,function(x) list(mean=round(mean(x),3),sd=round(sd(x),3),se=round(sqrt(var(x))/length(x),3))),by=group]
plant_summary=setnames(plant_summary,c("TrmtGroup","Mean","Std.Dev","Std.Err")) #Give data set more informative names
#Create a bar chart to compare the different treatment groups
bar_plant=ggplot(plant_summary,aes(x=TrmtGroup,y=Mean,fill=TrmtGroup))+geom_bar(stat="identity")+guides(fill=FALSE)+xlab("Treatment Group")+ylab("Average Size")+geom_errorbar(aes(ymin=Mean-Std.Err,ymax=Mean+Std.Err),width=.2)
bar_plant # print the graph that was just created
# Create a box plot on the same information as above
box_plant=ggplot(plant,aes(x=group,y=weight,fill=group))+geom_boxplot()+guides(fill=FALSE) + stat_summary(fun.y=mean,geom="point",shape=3,size=4)+xlab("Treatment Group")+ylab("Weight") + ggtitle("Comparing Treatment Groups")+theme(plot.title=element_text(face="bold"))
box_plant #print the graph that we have just created
For comparison, I have plotted the boxplot and bar charts side-by-side
Note that in the bar chart it appears as though the bars are all very similar in height, making it very difficult to tell if there are indeed any differences between the treatment groups
The box plot shows the distribution of the data (vs. only the mean), enabling you to see that aside from the 2 outliers, trees in treatment group 1 tend to have a much lower weight than the trees in treatment group 2
As I will explain later, the box plots for treatments 1 and 2 are not overlapping. Therefore, we can conclude that there is a statistically significant difference between these two treatment groups without even performing any test of significance
library(grid)
pushViewport(viewport(layout=grid.layout(1,2)))
print(bar_plant,vp=viewport(layout.pos.row=1,layout.pos.col=1))
print(box_plant,vp=viewport(layout.pos.row=1,layout.pos.col=2))
As done in Example 1, I have put the two graphs on the same scale to allow for easier comparison
library(grid)
pushViewport(viewport(layout=grid.layout(1,2)))
print(bar_plant2,vp=viewport(layout.pos.row=1,layout.pos.col=1))
print(box_plant,vp=viewport(layout.pos.row=1,layout.pos.col=2))
This information was taken from Comparing Box Plots. More examples and information on comparing boxplots can be found by following the link to the website.
For a brief explanation of how to read box plots, view this website
To create a basic ROC curve in R, you will need to load the pROC package. We will use the aSAH data set from the pROC package in the ROC example.
In the ‘aSAH’ data set, the response variable (the binary indication of whether the sample is truly cancer/non-cancer) are stored as the variable outcome.
library(pROC)
data(aSAH)
#To create an ROC curve, you must first create an ROC object in R using the response/case of the samples and also the predicted value of each observation
roc_obj=roc(aSAH$outcome,aSAH$s100b)
#Now, plot the ROC curve
plot(x=(1-roc_obj$specificities),y=roc_obj$sensitivities,type="l",lwd=2,
xlab="1-Specificity",ylab="Sensitivity",main="ROC Curve")
abline(0,1,col='grey',lty=2,lwd=2)
grid()
#The area under the curve is stored in the roc_obj object, so print this object to view the AUC
roc_obj
##
## Call:
## roc.default(response = aSAH$outcome, predictor = aSAH$s100b)
##
## Data: aSAH$s100b in 72 controls (aSAH$outcome Good) < 41 cases (aSAH$outcome Poor).
## Area under the curve: 0.731
If you are interested in comparing the area under two different ROC curves, create two ROC objects and then use the following code:
roc.test(roc_obj1, roc_obj2)
library(COUNT) #load the library necessary for the data set (make sure you have installed the COUNT package before trying to load the library)
data(azdrg112) #load the data set
hospital=azdrg112 #store the information in a data frame called 'hospital' so that the name is easier to remember
head(hospital) #Print the 1st 6 rows to see what the data looks like
## los gender type1 age75
## 1 53 0 1 0
## 2 30 0 1 0
## 3 28 0 1 1
## 4 22 0 1 0
## 5 25 0 1 0
## 6 9 1 1 0
hospital_table=table(azdrg112$gender,azdrg112$type1) #Create a contingency table of counts for comparing GENDER and TYPE OF ADMISSION
row.names(hospital_table)=c("Elective","Emergency") # Give the rows more descriptive names
colnames(hospital_table)=c("Female","Male") # Give the columns more descriptive names
hospital_table #print the table
##
## Female Male
## Elective 201 453
## Emergency 434 710
chisq.test(x=azdrg112$gender,y=azdrg112$type1)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: azdrg112$gender and azdrg112$type1
## X-squared = 9.138, df = 1, p-value = 0.002503
Suppose you are looking to compare the accuracy of two binary diagnostic tests. You have the gold standard results (1=presence of disease, 0=absence), the results of one diagnostic test(1=disease,0=absence), and the results of a second diagnostic test (1=disease,0=cancer).
The data set Paired1 in the DTComPair package in R contains the information that we will use in this example. d is the gold standard and y1 and y2 are the two diagnostic tests
library(DTComPair)
data(Paired1)
paired_table=table(Test1=Paired1$y1,Test2=Paired1$y2,Standard=Paired1$d) #Put your gold standard last when creating this table.
paired_table #view the table just created - note that there is actually one table for the gold-standard diagnosis of 0=absent and one table for the gold-standard diagnosis of 1=disease
## , , Standard = 0
##
## Test2
## Test1 0 1
## 0 155 22
## 1 53 31
##
## , , Standard = 1
##
## Test2
## Test1 0 1
## 0 32 22
## 1 78 319
To compare the sensitivity of the two tests, you will run a McNemar test on the table for the gold-standard diagnosis of 1=Disease
To compare the specificity of the two tests, you will run a McNemar test on the table for the gold-standard diagnosis of 0=Absence of Disease
mcnemar.test(paired_table[,,2]) #Comparing sensitivity of Test 1 and Test 2 to determine if the sensitivities are significantly different
##
## McNemar's Chi-squared test with continuity correction
##
## data: paired_table[, , 2]
## McNemar's chi-squared = 30.25, df = 1, p-value = 3.798e-08
mcnemar.test(paired_table[,,1]) #Comparing specificity of Test1 and Test2 to determine if the specifcities are significantly different
##
## McNemar's Chi-squared test with continuity correction
##
## data: paired_table[, , 1]
## McNemar's chi-squared = 12, df = 1, p-value = 0.000532
Use for small samples (e.g. the total number of observations is less than 20) and if the assumptions for the chi-square test are not met (e.g. The expected count of some cells in your contingency table are less than 5)
For the example below, I have just created a table with counts in each of the cells. You would want to replace the counts in the matrix with the counts from your 2x2 contingency table.
R Code :
sample=matrix(c(7,2,5,8),nrow=2,ncol=2)
fisher.test(sample)
##
## Fisher's Exact Test for Count Data
##
## data: sample
## p-value = 0.09907
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.6243 70.8945
## sample estimates:
## odds ratio
## 5.146
Assumptions: Dependent/response variable is normally distributed or the sample size in each group is large (over 30)
Assumptions: All of the assumptions mentioned above AND the observations are independent
R Code:
# Check Assumptions - you do not need to look at both histogram and qqplot. One or the other will be fine.
#histogram to check normality
hist(dataset$responseVariable) #look to see if the plot looks bell-shaped
#qqplot to check normality
qqnorm(dataset$responseVariable) # look for any significant departures from the line
qqline(dataset$responseVariable)
t.test(a,b,data=NAME-OF-DATA-SET)
a and b are numeric observations on two independent groups that you are comparing (the data of interest for the two groups are in two separate columns)
t.test(a,b,paired=TRUE,data=NAME-OF-DATA-SET)
a and b are the two paired groups that you are comparing (both are numeric)
library(PairedData) # make sure you have installed the PairedData package before using the library() statement to read in the package
data(Anorexia) #Load in the data set
weights=Anorexia #store the data as 'weights'
#Check t-test assumptions
#histogram to check normality
hist(weights$Prior) #look to see if the plot looks bell-shaped - the prior readings look very good, despite the smaller sample size of 17
hist(weights$Post) # Not normally distributed, but will assume the assumptions are met for this example
#qqplot to check normality
qqnorm(weights$Prior) # look for any significant departures from the line - No significant departures exist here, so assumption met for Prior variable
qqline(weights$Prior)
qqnorm(weights$Post) #Here, significant departures from the line do exist, but for the sake of this example, we will assume that the assumptions for the t-test are met
qqline(weights$Post)
t.test(weights$Prior,weights$Post,paired=TRUE) # Here we are comparing the prior weight to the post-treatment weight, indicating that the data is paired
##
## Paired t-test
##
## data: weights$Prior and weights$Post
## t = -4.185, df = 16, p-value = 0.0007003
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -10.945 -3.585
## sample estimates:
## mean of the differences
## -7.265
t.test(weights$Prior,weights$Post,paired=TRUE,alt="less")
##
## Paired t-test
##
## data: weights$Prior and weights$Post
## t = -4.185, df = 16, p-value = 0.0003501
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf -4.234
## sample estimates:
## mean of the differences
## -7.265
If the assumptions for the independent t-test are NOT met, use the Mann Whitney U-Test * Use for INDEPENDENT (unpaired) samples
wilcox.test(response~grouping-variable,data=NAME-OF-DATA-SET)
Use this format for your code if your data of interest for your two groups is in one column and another column indicates the group (e.g. One column contains the signal values from an array and another column in the data set contains only 1’s or 2’s, based on whether the value in the first column is from group 1 or group 2)
# I have created a sample data set to use for this example - you will not be creating a data set, as you will be using data from your own experiments
fluorescence=data.frame(matrix(c(
1.392676457, 1,
1.134213708, 1,
1.212874511, 1,
1.425741846, 1,
1.216919192, 1,
0.971547408, 1,
1.129713097, 1,
1.614557152, 1,
1.024572862, 1,
1.596483278, 1,
0.728298752, 0,
0.795764338, 0,
0.493569149, 0,
0.786838171, 0,
1.248626706, 0,
0.84200978, 0,
0.348761555, 0,
0.953396245, 0,
0.807893464, 0,
0.67913391, 0),byrow=TRUE,ncol=2))
colnames(fluorescence)=c("Expression","Case") #Give column names more descriptive titles
#Now, run a Mann-Whitney U-test
wilcox.test(Expression~Case,data=fluorescence)
##
## Wilcoxon rank sum test
##
## data: Expression by Case
## W = 6, p-value = 0.0003248
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(y1,y2,data=NAME-OF-DATA-SET)
# The 10 cancer samples are first, then the 10 non-cancer samples
fluorescence2=data.frame(matrix(c(1.392676457,1.134213708,1.212874511,1.425741846,1.216919192,0.971547408,1.129713097,1.614557152,1.024572862,1.596483278,0.728298752, 0.795764338,0.493569149,0.786838171,1.248626706,0.84200978,0.348761555,0.953396245,0.807893464,0.67913391),byrow=FALSE,ncol=2))
colnames(fluorescence2)=c("Cancer","Non") # Cancer and non-cancer samples are now in separate columns
fluorescence2 #printed data set to show you the difference in formatting
## Cancer Non
## 1 1.3927 0.7283
## 2 1.1342 0.7958
## 3 1.2129 0.4936
## 4 1.4257 0.7868
## 5 1.2169 1.2486
## 6 0.9715 0.8420
## 7 1.1297 0.3488
## 8 1.6146 0.9534
## 9 1.0246 0.8079
## 10 1.5965 0.6791
#Now, run the Mann-Whitney U-test on this data
wilcox.test(fluorescence2$Cancer,fluorescence2$Non)
##
## Wilcoxon rank sum test
##
## data: fluorescence2$Cancer and fluorescence2$Non
## W = 94, p-value = 0.0003248
## alternative hypothesis: true location shift is not equal to 0
If the assumptions for the PAIRED t-test are not met, use the Signed-Rank Test
Use for PAIRED samples
The approach is exactly the same as that for the Mann-Whitney U-test, so there will be no example for this test
wilcox.test(y1,y2,paired=TRUE,data=NAME-OF-DATA-SET)
y1 and y2 are the two paired groups that you are comparing (both are numeric)
wilcox.test(response~grouping-variable,paired=TRUE,data=NAME-OF-DATA-SET)
If you are comparing the means of more than two groups, use ANOVA instead of multiple t-tests
Assumptions:
Homogeneity of variance (variance in each group is approximately the same as the variance in all other groups)
Normality of data within groups
library(data.table) #load in the necessary libraries
data(PlantGrowth) #read in the data set
plant=PlantGrowth #assign the data values to an object called 'plant'
#Check ANOVA assumptions
bartlett.test(weight~group, data=plant) #Use this to check for homogeneity of variance between your groups - a small p-value indicates that this assumption is NOT met
##
## Bartlett test of homogeneity of variances
##
## data: weight by group
## Bartlett's K-squared = 2.879, df = 2, p-value = 0.2371
qqnorm(plant$weight) #Use this to check normality
qqline(plant$weight) #Significant departures from the line suggest violations of normality
The Bartlett test of homogeneity of variances resulted in a p-value of 0.2371, indicating that the variances between the treatment groups are not significantly different. Therefore, the HOV assumption is met
The QQ-plot showed no significant departures from normality, so the normality assumption is also met
R Code for ANOVA:
aov(response~groupingvariable,data=DATASETNAME)
anova_test=aov(weight~group,data=plant) # Store the results from ANOVA as 'anova_test'
summary(anova_test) # Have R print out the F-statistic and the p-value for this test
## Df Sum Sq Mean Sq F value Pr(>F)
## group 2 3.77 1.883 4.85 0.016 *
## Residuals 27 10.49 0.389
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pairwise.t.test(response,groupingvariable,p.adjust.method="bonf") #This is the general form
pairwise.t.test(plant$weight,plant$group,p.adjust.method="bonf")
##
## Pairwise comparisons using t tests with pooled SD
##
## data: plant$weight and plant$group
##
## ctrl trt1
## trt1 0.583 -
## trt2 0.263 0.013
##
## P value adjustment method: bonferroni
Use this when the ANOVA assumptions are NOT met
library(data.table) #load in the necessary libraries
library(ggplot2)
data(InsectSprays) #read in the Insect Spray data set
head(InsectSprays) #view the top 6 rows of data to see what the variables are
## count spray
## 1 10 A
## 2 7 A
## 3 20 A
## 4 14 A
## 5 14 A
## 6 12 A
insect=InsectSprays #assign the data values to an object called 'insect'
#Check ANOVA assumptions
bartlett.test(count~spray, data=insect) #Use this to check for homogeneity of variance between your groups - a small p-value indicates that this assumption is NOT met
##
## Bartlett test of homogeneity of variances
##
## data: count by spray
## Bartlett's K-squared = 25.96, df = 5, p-value = 9.085e-05
qqnorm(insect$count) #Use this to check normality
qqline(insect$count) #Significant departures from the line suggest violations of normality
# We have already loaded in the data set for checking the assumptions, so here we will only perform the Kruskal-Wallis test
kruskal.test(count~spray,data=insect)
##
## Kruskal-Wallis rank sum test
##
## data: count by spray
## Kruskal-Wallis chi-squared = 54.69, df = 5, p-value = 1.511e-10
pairwise.wilcox.test(insect$count,insect$spray,p.adjust.method="bonf")
##
## Pairwise comparisons using Wilcoxon rank sum test
##
## data: insect$count and insect$spray
##
## A B C D E
## B 1.00000 - - - -
## C 0.00058 0.00058 - - -
## D 0.00117 0.00104 0.03977 - -
## E 0.00051 0.00051 0.78860 1.00000 -
## F 1.00000 1.00000 0.00052 0.00105 0.00052
##
## P value adjustment method: bonferroni
http://cran.r-project.org/doc/contrib/Torfs+Brauer-Short-R-Intro.pdf
http://www.r-tutor.com/r-introduction
http://www.dummies.com/how-to/content/what-a-boxplot-can-tell-you-about-a-statistical-da.html
http://www.r-bloggers.com/creating-scatter-plots-using-ggplot2/
http://maths.nayland.school.nz/Year_11/AS1.10_Multivar_data/11_Comparing_Boxplots.htm
http://cran.r-project.org/web/packages/pROC/pROC.pdf http://cran.r-project.org/web/packages/DTComPair/DTComPair.pdf
http://cran.r-project.org/web/packages/COUNT/COUNT.pdf
Here are a few helpful sites on how to create heatmaps with R:
6. Comments
If you want to write comments or notes in your code to remind yourself or others of what a section of code does, begin the line with the pound sign (#) and then type your note
Any line beginning with # will be ignored
# This line of code would be ignored when you run your R scriptBut this line of code would be read