The purpose of this Rpub is to document and annotate the key functions utilized throughout the quantitative methods in geography course.
Basic function:
Related functions:
co2 <- read.csv("C:/CU BOULDER/Coursework/Y2S2/GEOG 5023 - Quant Methods Geo/Labs/lab1/co2_LAB1.csv",
header = T) #import csv file with field names
names(co2) #fields (column names)
## [1] "year" "month" "time" "co2" "site"
head(co2, n = 10) #whole dataset, first 10 rows
## year month time co2 site
## 1 1969 2 1969 324.4 0
## 2 1969 3 1969 325.6 0
## 3 1969 4 1969 326.7 0
## 4 1969 5 1969 327.3 0
## 5 1969 6 1970 326.8 0
## 6 1969 7 1970 325.9 0
## 7 1969 8 1970 323.7 0
## 8 1969 9 1970 322.4 0
## 9 1969 10 1970 321.8 0
## 10 1969 11 1970 322.9 0
head(co2$site, n = 10) #single field of dataset, first 10 rows
## [1] 0 0 0 0 0 0 0 0 0 0
summary(co2) #summarize dataset
## year month time co2 site
## Min. :1969 Min. : 1.00 Min. :1969 Min. :322 Min. :0.0
## 1st Qu.:1978 1st Qu.: 4.00 1st Qu.:1979 1st Qu.:337 1st Qu.:0.0
## Median :1988 Median : 7.00 Median :1989 Median :351 Median :0.5
## Mean :1988 Mean : 6.51 Mean :1989 Mean :352 Mean :0.5
## 3rd Qu.:1998 3rd Qu.: 9.75 3rd Qu.:1998 3rd Qu.:366 3rd Qu.:1.0
## Max. :2007 Max. :12.00 Max. :2008 Max. :389 Max. :1.0
summary(co2$co2) #summary stats for one field
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 321 337 351 352 366 389
var(co2$co2) #variance for one field
## [1] 304.1
hist(co2$co2, col = "green", breaks = seq(300, 400, by = 5), xlab = "CO2", main = "Histogram of CO2") #histogram with options
boxplot(co2$co2, main = "Boxplot of CO2", ylab = "CO2", col = "green") #boxplot with options
plot(co2$time, co2$co2, xlab = "Obs.", ylab = "CO2", main = "Scatterplot of CO2 over Time",
pch = 2, col = "green") #scatterplot of co2
qqnorm(co2$co2) #checking normality graphically
qqline(co2$co2) #line if normal
shapiro.test(co2$co2) #shapiro-wilk normality test, null is that data are normal
##
## Shapiro-Wilk normality test
##
## data: co2$co2
## W = 0.963, p-value = 1.202e-14
ks.test(co2$co2, "pnorm") #kolmogorov-smirnov test, null is that data are normal
## Warning: ties should not be present for the Kolmogorov-Smirnov test
##
## One-sample Kolmogorov-Smirnov test
##
## data: co2$co2
## D = 1, p-value < 2.2e-16
## alternative hypothesis: two-sided
t.test(co2$co2 ~ co2$site, var.equal = T) #difference in means test, null is that means for the two groups are equal
##
## Two Sample t-test
##
## data: co2$co2 by co2$site
## t = -0.832, df = 932, p-value = 0.4056
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -3.19 1.29
## sample estimates:
## mean in group 0 mean in group 1
## 351.4 352.3
var.test(co2$co2 ~ co2$site) #difference in variances test, null is that variances for the two groups are equal
##
## F test to compare two variances
##
## data: co2$co2 by co2$site
## F = 1.022, num df = 466, denom df = 466, p-value = 0.8163
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.8519 1.2255
## sample estimates:
## ratio of variances
## 1.022
wilcox.test(co2$co2 ~ co2$site) #wilcoxon rank sum test, null is that medians for the groups are equal
##
## Wilcoxon rank sum test with continuity correction
##
## data: co2$co2 by co2$site
## W = 105523, p-value = 0.393
## alternative hypothesis: true location shift is not equal to 0
chisq.test(matrix(c(101, 141, 1385, 10266), 2), correct = F) #chisquare test, null is that categories are independent
##
## Pearson's Chi-squared test
##
## data: matrix(c(101, 141, 1385, 10266), 2)
## X-squared = 193.2, df = 1, p-value < 2.2e-16