# Read in csv files
library(readr)
#CSV_File <- read_csv("F:/R_Training/Ecoli_Data_set(Auckland).csv")
#CSV_File <- read_csv("C:/Users/Jorn/Documents/R_training/Ecoli_Data_set(HBRC).csv")
CSV_File <- read_csv('C:\\Users\\Jorn\\Documents\\R_training\\Ecoli_Data_set(HBRC).csv')
## Parsed with column specification:
## cols(
## Agency = col_character(),
## SiteName = col_character(),
## Name = col_character(),
## NumItems = col_integer(),
## TSType = col_character(),
## DataType = col_character(),
## Interpolation = col_character(),
## ItemNumber = col_integer(),
## ItemName = col_character(),
## ItemFormat = col_logical(),
## Units = col_character(),
## Format = col_character(),
## DateFormat = col_character(),
## NumItems2 = col_integer(),
## T = col_character(),
## I1 = col_double(),
## I2 = col_character()
## )
View(CSV_File)
#Subset based on less columns
CSV_File_Subset1 <- (CSV_File[,c("SiteName", "Name", "T","I1", "Units")])
View(CSV_File_Subset1)
#Subset based on a query of values example 1
View(subset(CSV_File_Subset1, SiteName == "Wairoa River at Railway Br."))
#Subset based on a query of values example 1
View(subset(CSV_File_Subset1, SiteName == "Wairoa River at Railway Br." & Name =="E. Coli"))
#Subset based on a query of values example 2
View(subset(CSV_File_Subset1, SiteName == "Wairoa River at Railway Br." & Name =="E. Coli" & I1 > 1000))
#Subset based on a query of values example 3
View(subset(CSV_File_Subset1, SiteName == "Wairoa River at Railway Br." & Name =="E. Coli" & I1 < 50 | I1 > 1000))
#Subset based on a query of values example 4
View(subset(CSV_File_Subset1, SiteName == "Wairoa River at Railway Br." & Name =="E. Coli" & I1 > 150 & I1 < 800))
#View Unique values of the a column/field example 1
View(unique(CSV_File_Subset1$SiteName))
#View Unique values of the a column/field example 2
View(unique(c(CSV_File_Subset1$SiteName,CSV_File_Subset1$Name)))
#Stats data set
Statsdataset <- (subset(CSV_File_Subset1, SiteName == "Wairoa River at Railway Br." & Name == "Dissolved Reactive Phosphorus"))
View(Statsdataset)
#Minimum measured Dissolved Reactive Phosphorus
min(Statsdataset$I1)
## [1] 0.004
#Maximum measured Dissolved Reactive Phosphorus
max(Statsdataset$I1)
## [1] 0.043
#Mean measured Dissolved Reactive Phosphorus
mean(Statsdataset$I1)
## [1] 0.01074048
#Mediam measured Dissolved Reactive Phosphorus
median(Statsdataset$I1)
## [1] 0.0085
#Summary measured Dissolved Reactive Phosphorus
summary(Statsdataset$I1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00400 0.00415 0.00850 0.01074 0.01432 0.04300
#Boxplot example (1 variable & 1 site)
library(ggplot2) # geom_boxplot proposes several arguments to custom appearance
ggplot(Statsdataset, aes(x=Name, y=I1)) +
geom_boxplot(
# custom boxes
color="blue",
fill="blue",
alpha=0.2,
# Notch?
notch=TRUE,
notchwidth = 0.8,
# custom outliers
outlier.colour="red",
outlier.fill="red",
outlier.size=3
)

#Subset based on a query of values example 1
Statsdataset2 <- (subset(CSV_File_Subset1, Name =="E. Coli"))
View(Statsdataset2)
# geom_boxplot proposes several arguments to custom appearance
ggplot(Statsdataset2, aes(x=reorder(SiteName, I1), y=I1, fill=SiteName)) +
geom_boxplot() +
xlab("SiteName") +
theme(legend.position="none")

#Graph
library(plyr) # required library to rename fields/headings/columns
Statsdataset3 <- rename(Statsdataset2, c("SiteName"="Site Name", "Name"="Measurement", "T"="Time", "I1"="E. Coli (cfu/100ml)"))
View(Statsdataset3)
xlt <- as.POSIXlt(Statsdataset3$Time)
xlt$year
## [1] -1870 -1885 -1886 -1884 -1884 -1878 -1886 -1871 -1885 -1873 -1873
## [12] -1869 -1872 -1884 -1874 -1885 -1893 -1889 -1889 -1880 -1888 -1883
## [23] -1891 -1875 -1869 -1870 -1878 -1871 -1885 -1879 -1894 -1874 -1878
## [34] -1874 -1892 -1881 -1880 -1897 -1877 -1875 -1883 -1872 -1873 -1869
## [45] -1871 -1880 -1872 -1875 -1873 -1870 -1871 -1876 -1895 -1897 -1891
## [56] -1869 -1875 -1871 -1899 -1893 -1874 -1897 -1893 -1872 -1870 -1894
## [67] -1899 -1878 -1874 -1878 -1869 -1873 -1880 -1872 -1879 -1893 -1872
## [78] -1874 -1878 -1879 -1884 -1892 -1883 -1879 -1869 -1871 -1870 -1872
## [89] -1872 -1871 -1874 -1873 -1875 -1881 -1881 -1878 -1870 -1873 -1875
## [100] -1872 -1882 -1871 -1872 -1870 -1870 -1880 -1874 -1880 -1872 -1873
## [111] -1895 -1874 -1874 -1870 -1873 -1899 -1870 -1889 -1870 -1872 -1873
## [122] -1871 -1871 -1877 -1888 -1872 -1875 -1870 -1895 -1895 -1883 -1880
## [133] -1882 -1870 -1876 -1889 -1891 -1885 -1887 -1873 -1888 -1897 -1889
## [144] -1884 -1878 -1887 -1883 -1885 -1880 -1884 -1877 -1889 -1889 -1878
## [155] -1881 -1884 -1883 -1898 -1875 -1877 -1879 -1882 -1874 -1878 -1878
## [166] -1883 -1872 -1888 -1871 -1874 -1869 -1872 -1873 -1877 -1879 -1880
## [177] -1899 -1879 -1879 -1883 -1880 -1883 -1885 -1881 -1884 -1879 -1879
## [188] -1883 -1878 -1874 -1880 -1885 -1873 -1884 -1877 -1874 -1875 -1879
## [199] -1881 -1889 -1873 -1876 -1898 -1870 -1873 -1884 -1880 -1890 -1884
## [210] -1887 -1882 -1890 -1872 -1876 -1869 -1880 -1874 -1873 -1881 -1875
## [221] -1874 -1882 -1892 -1895 -1895 -1898 -1876 -1895 -1897 -1885 -1891
## [232] -1896 -1883 -1893 -1897 -1883 -1887 -1899 -1882 -1871 -1882 -1892
## [243] -1887 -1886 -1874 -1893 -1894 -1885 -1899 -1888 -1898 -1874 -1879
## [254] -1883 -1899 -1895 -1879 -1874 -1871 -1881 -1884 -1883 -1893 -1890
## [265] -1897 -1881 -1891 -1886 -1882 -1873 -1874 -1876 -1897 -1880 -1883
## [276] -1886 -1889 -1870 -1874 -1877 -1888 -1885 -1886 -1889 -1883 -1894
Statsdataset3$Time <- as.Date(Statsdataset3$Time , "%d/%m/%y")
View(as.Date(Statsdataset3$Time , "%d/%m/%y"))
min(Statsdataset3$Time)
## [1] "2020-01-08"
max(Statsdataset3$Time)
## [1] "2020-12-22"
qplot(x=`Site Name` , y=`E. Coli (cfu/100ml)` , data=Statsdataset3 , geom=c("boxplot","jitter") , fill=`Site Name`) +
ggtitle("E. Coli Measurements")
