source( "getRiceData.R" )
data <- getRiceData()
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
summary( data )
## RecordID PAR WindSpeed_mph WindDir
## Min. :43816 Min. : 0.000 Min. : 0.000 Min. : 0.00
## 1st Qu.:45866 1st Qu.: 0.000 1st Qu.: 2.467 1st Qu.: 37.31
## Median :47915 Median : 0.046 Median : 4.090 Median :137.30
## Mean :47915 Mean : 241.984 Mean : 5.446 Mean :146.20
## 3rd Qu.:49965 3rd Qu.: 337.900 3rd Qu.: 7.292 3rd Qu.:249.95
## Max. :52014 Max. :1957.000 Max. :30.650 Max. :360.00
##
## RelHumidity BP_HG H2O_TempC SpCond_mScm
## Min. :15.37 Min. :29.11 Min. :-0.140 Min. :0.0110
## 1st Qu.:42.25 1st Qu.:29.87 1st Qu.: 3.930 1st Qu.:0.1430
## Median :56.40 Median :30.01 Median : 5.450 Median :0.1650
## Mean :58.37 Mean :30.02 Mean : 5.529 Mean :0.1611
## 3rd Qu.:76.59 3rd Qu.:30.21 3rd Qu.: 7.410 3rd Qu.:0.1760
## Max. :93.00 Max. :30.58 Max. :13.300 Max. :0.2110
## NA's :1 NA's :1
## Salinity_ppt PH Turbidity_ntu Chla_ugl
## Min. :0.0000 Min. :6.43 Min. : 6.20 Min. : 1.3
## 1st Qu.:0.0700 1st Qu.:7.50 1st Qu.: 15.50 1st Qu.: 3.7
## Median :0.0800 Median :7.58 Median : 21.80 Median : 6.7
## Mean :0.0759 Mean :7.60 Mean : 24.54 Mean :137.3
## 3rd Qu.:0.0800 3rd Qu.:7.69 3rd Qu.: 30.30 3rd Qu.:302.6
## Max. :0.1000 Max. :9.00 Max. :187.70 Max. :330.1
## NA's :1 NA's :1 NA's :1 NA's :1
## BGAPC_CML ODO_sat ODO_mgl Depth_m
## Min. : 188 Min. : 87.5 Min. :10.34 Min. :3.705
## 1st Qu.: 971 1st Qu.: 99.2 1st Qu.:12.34 1st Qu.:4.451
## Median : 1369 Median :101.8 Median :12.88 Median :4.684
## Mean :153571 Mean :102.0 Mean :12.88 Mean :4.677
## 3rd Qu.:345211 3rd Qu.:104.1 3rd Qu.:13.34 3rd Qu.:4.913
## Max. :345471 Max. :120.8 Max. :14.99 Max. :5.454
## NA's :1 NA's :1 NA's :1
## Date Airtemp Rain
## Min. :2014-01-01 00:00:00 Min. :-15.6950 Min. :0.00000
## 1st Qu.:2014-01-22 08:22:30 1st Qu.: -0.2528 1st Qu.:0.00000
## Median :2014-02-12 16:45:00 Median : 3.0222 Median :0.00000
## Mean :2014-02-12 16:45:00 Mean : 3.7751 Mean :0.02137
## 3rd Qu.:2014-03-06 01:07:30 3rd Qu.: 8.0056 3rd Qu.:0.00000
## Max. :2014-03-27 09:30:00 Max. : 23.8167 Max. :8.81380
##
## WindSpeed
## Min. : 0.000
## 1st Qu.: 3.970
## Median : 6.582
## Mean : 8.764
## 3rd Qu.:11.735
## Max. :49.326
##
names( data )
## [1] "RecordID" "PAR" "WindSpeed_mph" "WindDir"
## [5] "RelHumidity" "BP_HG" "H2O_TempC" "SpCond_mScm"
## [9] "Salinity_ppt" "PH" "Turbidity_ntu" "Chla_ugl"
## [13] "BGAPC_CML" "ODO_sat" "ODO_mgl" "Depth_m"
## [17] "Date" "Airtemp" "Rain" "WindSpeed"
names(data)
## [1] "RecordID" "PAR" "WindSpeed_mph" "WindDir"
## [5] "RelHumidity" "BP_HG" "H2O_TempC" "SpCond_mScm"
## [9] "Salinity_ppt" "PH" "Turbidity_ntu" "Chla_ugl"
## [13] "BGAPC_CML" "ODO_sat" "ODO_mgl" "Depth_m"
## [17] "Date" "Airtemp" "Rain" "WindSpeed"
max( data$Date ) - min( data$Date )
## Time difference of 85.39583 days
data[ !complete.cases( data ), ]
## RecordID PAR WindSpeed_mph WindDir RelHumidity BP_HG H2O_TempC
## 1589 45404 1077 3.487 152.2 48.65 29.93 NA
## SpCond_mScm Salinity_ppt PH Turbidity_ntu Chla_ugl BGAPC_CML ODO_sat
## 1589 NA NA NA NA NA NA NA
## ODO_mgl Depth_m Date Airtemp Rain WindSpeed
## 1589 NA 4.658868 2014-01-17 13:00:00 6.088889 0 5.611769
na_count <- sapply( data, function(y)
sum(length(which(is.na(y)))))
na_count <- data.frame(na_count)
na_count
## na_count
## RecordID 0
## PAR 0
## WindSpeed_mph 0
## WindDir 0
## RelHumidity 0
## BP_HG 0
## H2O_TempC 1
## SpCond_mScm 1
## Salinity_ppt 1
## PH 1
## Turbidity_ntu 1
## Chla_ugl 1
## BGAPC_CML 1
## ODO_sat 1
## ODO_mgl 1
## Depth_m 0
## Date 0
## Airtemp 0
## Rain 0
## WindSpeed 0
which( data$PH == min(data$PH, na.rm=TRUE))
## [1] 4322
library(tidyverse)
## -- Attaching packages ------------------------------------ tidyverse 1.2.1 --
## v ggplot2 3.2.1 v readr 1.3.1
## v tibble 2.1.3 v purrr 0.3.2
## v tidyr 0.8.3 v stringr 1.4.0
## v ggplot2 3.2.1 v forcats 0.4.0
## -- Conflicts --------------------------------------- tidyverse_conflicts() --
## x lubridate::as.difftime() masks base::as.difftime()
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x lubridate::intersect() masks base::intersect()
## x dplyr::lag() masks stats::lag()
## x lubridate::setdiff() masks base::setdiff()
## x lubridate::union() masks base::union()
data %>% select(PH, RecordID, Date ) %>% arrange( desc(PH) ) -> t
data %>% select(PH, RecordID, Date ) %>% arrange( PH ) -> p
p[1:10,]
## PH RecordID Date
## 1 6.43 48137 2014-02-15 00:15:00
## 2 6.48 45204 2014-01-15 11:00:00
## 3 6.52 45159 2014-01-14 23:45:00
## 4 6.59 48136 2014-02-15 00:00:00
## 5 6.60 48824 2014-02-22 04:00:00
## 6 6.62 48086 2014-02-14 11:30:00
## 7 6.81 45549 2014-01-19 01:15:00
## 8 6.84 45354 2014-01-17 00:30:00
## 9 6.86 48185 2014-02-15 12:15:00
## 10 6.87 45059 2014-01-13 22:45:00
library(tidyverse)
data %>% select(PH, RecordID, Date ) %>% arrange( desc(PH) ) -> t
data %>% select(PH, RecordID, Date ) %>% arrange( PH ) -> p
p[8189:8198,]
## PH RecordID Date
## 8189 8.52 50917 2014-03-15 23:15:00
## 8190 8.52 50919 2014-03-15 23:45:00
## 8191 8.55 50907 2014-03-15 20:45:00
## 8192 8.64 50914 2014-03-15 22:30:00
## 8193 8.68 50908 2014-03-15 21:00:00
## 8194 8.85 50909 2014-03-15 21:15:00
## 8195 8.92 50910 2014-03-15 21:30:00
## 8196 8.98 50911 2014-03-15 21:45:00
## 8197 8.99 50912 2014-03-15 22:00:00
## 8198 9.00 50913 2014-03-15 22:15:00
library(tidyverse)
sm_data <- select(data, Date, RecordID, Airtemp)
hitemp_data <- filter( sm_data,
weekdays(Date) %in% c("Saturday"))
hitemp_data<-arrange (hitemp_data, desc( Airtemp ))
hitemp_data[1:5,]
## Date RecordID Airtemp
## 1 2014-03-22 16:30:00 51562 22.67222
## 2 2014-03-22 17:15:00 51565 22.10556
## 3 2014-03-22 17:30:00 51566 22.05000
## 4 2014-03-22 17:45:00 51567 21.88889
## 5 2014-03-22 16:15:00 51561 21.85556
data$Weekday <- weekdays( data$Date )
days <- c( "Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday" )
data$Weekday <- factor( data$Weekday, ordered=TRUE, levels=days )
data %>%
group_by(Weekday) %>%
summarize(Windspeed=max(WindSpeed))
## # A tibble: 7 x 2
## Weekday Windspeed
## <ord> <dbl>
## 1 Monday 36.7
## 2 Tuesday 34.5
## 3 Wednesday 38.4
## 4 Thursday 35.2
## 5 Friday 49.3
## 6 Saturday 35.8
## 7 Sunday 34.0
library(ggplot2)
data %>%
ggplot( aes( H2O_TempC, fill=Weekday )) + geom_histogram( binwidth = 0.5 ) + facet_grid( Weekday~. )
## Warning: Removed 1 rows containing non-finite values (stat_bin).

source( "getBeetledata.R" )
beetle_data <- getBeetleData()
summary( beetle_data )
## Species Clade Pop PC1
## Length:363 Length:363 Length:363 Min. :-5.717
## Class :character Class :character Class :character 1st Qu.:-1.862
## Mode :character Mode :character Mode :character Median : 1.317
## Mean : 0.000
## 3rd Qu.: 2.084
## Max. : 3.492
## PC2
## Min. :-2.2754
## 1st Qu.:-1.3277
## Median :-0.5726
## Mean : 0.0000
## 3rd Qu.: 0.5671
## Max. : 8.4950
names( beetle_data )
## [1] "Species" "Clade" "Pop" "PC1" "PC2"
beetle_data$Species <- factor(beetle_data$Species)
beetle_data$Clade <- factor(beetle_data$Clade)
beetle_data$Pop <- factor(beetle_data$Pop)
beetle_data$PC1 <- factor(beetle_data$PC1)
beetle_data$PC2 <- factor(beetle_data$PC2)
df <- data.frame( x=1:5, y=1:25, shape=1:25 )
p <- ggplot( df, aes(x,y))
p <- p + geom_point( aes(shape=shape), size=5, fill="darkgreen", color="blue")
p <- p + scale_shape_identity()
p

library(ggplot2)
p <- ggplot( beetle_data, aes( x=PC1, y=PC2, shape=Species, color=Clade) ) +
xlab("First Principal Component") +
ylab("Second Principal Component") +
ggtitle("Comparing Beetles Based on Genetic Markers and Geography")
p <- p + geom_point(size=2)
p
