source( "getRiceData.R" )
data <- getRiceData()
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
summary( data )
##     RecordID          PAR           WindSpeed_mph       WindDir      
##  Min.   :43816   Min.   :   0.000   Min.   : 0.000   Min.   :  0.00  
##  1st Qu.:45866   1st Qu.:   0.000   1st Qu.: 2.467   1st Qu.: 37.31  
##  Median :47915   Median :   0.046   Median : 4.090   Median :137.30  
##  Mean   :47915   Mean   : 241.984   Mean   : 5.446   Mean   :146.20  
##  3rd Qu.:49965   3rd Qu.: 337.900   3rd Qu.: 7.292   3rd Qu.:249.95  
##  Max.   :52014   Max.   :1957.000   Max.   :30.650   Max.   :360.00  
##                                                                      
##   RelHumidity        BP_HG         H2O_TempC       SpCond_mScm    
##  Min.   :15.37   Min.   :29.11   Min.   :-0.140   Min.   :0.0110  
##  1st Qu.:42.25   1st Qu.:29.87   1st Qu.: 3.930   1st Qu.:0.1430  
##  Median :56.40   Median :30.01   Median : 5.450   Median :0.1650  
##  Mean   :58.37   Mean   :30.02   Mean   : 5.529   Mean   :0.1611  
##  3rd Qu.:76.59   3rd Qu.:30.21   3rd Qu.: 7.410   3rd Qu.:0.1760  
##  Max.   :93.00   Max.   :30.58   Max.   :13.300   Max.   :0.2110  
##                                  NA's   :1        NA's   :1       
##   Salinity_ppt          PH       Turbidity_ntu       Chla_ugl    
##  Min.   :0.0000   Min.   :6.43   Min.   :  6.20   Min.   :  1.3  
##  1st Qu.:0.0700   1st Qu.:7.50   1st Qu.: 15.50   1st Qu.:  3.7  
##  Median :0.0800   Median :7.58   Median : 21.80   Median :  6.7  
##  Mean   :0.0759   Mean   :7.60   Mean   : 24.54   Mean   :137.3  
##  3rd Qu.:0.0800   3rd Qu.:7.69   3rd Qu.: 30.30   3rd Qu.:302.6  
##  Max.   :0.1000   Max.   :9.00   Max.   :187.70   Max.   :330.1  
##  NA's   :1        NA's   :1      NA's   :1        NA's   :1      
##    BGAPC_CML         ODO_sat         ODO_mgl         Depth_m     
##  Min.   :   188   Min.   : 87.5   Min.   :10.34   Min.   :3.705  
##  1st Qu.:   971   1st Qu.: 99.2   1st Qu.:12.34   1st Qu.:4.451  
##  Median :  1369   Median :101.8   Median :12.88   Median :4.684  
##  Mean   :153571   Mean   :102.0   Mean   :12.88   Mean   :4.677  
##  3rd Qu.:345211   3rd Qu.:104.1   3rd Qu.:13.34   3rd Qu.:4.913  
##  Max.   :345471   Max.   :120.8   Max.   :14.99   Max.   :5.454  
##  NA's   :1        NA's   :1       NA's   :1                      
##       Date                        Airtemp              Rain        
##  Min.   :2014-01-01 00:00:00   Min.   :-15.6950   Min.   :0.00000  
##  1st Qu.:2014-01-22 08:22:30   1st Qu.: -0.2528   1st Qu.:0.00000  
##  Median :2014-02-12 16:45:00   Median :  3.0222   Median :0.00000  
##  Mean   :2014-02-12 16:45:00   Mean   :  3.7751   Mean   :0.02137  
##  3rd Qu.:2014-03-06 01:07:30   3rd Qu.:  8.0056   3rd Qu.:0.00000  
##  Max.   :2014-03-27 09:30:00   Max.   : 23.8167   Max.   :8.81380  
##                                                                    
##    WindSpeed     
##  Min.   : 0.000  
##  1st Qu.: 3.970  
##  Median : 6.582  
##  Mean   : 8.764  
##  3rd Qu.:11.735  
##  Max.   :49.326  
## 
names( data )
##  [1] "RecordID"      "PAR"           "WindSpeed_mph" "WindDir"      
##  [5] "RelHumidity"   "BP_HG"         "H2O_TempC"     "SpCond_mScm"  
##  [9] "Salinity_ppt"  "PH"            "Turbidity_ntu" "Chla_ugl"     
## [13] "BGAPC_CML"     "ODO_sat"       "ODO_mgl"       "Depth_m"      
## [17] "Date"          "Airtemp"       "Rain"          "WindSpeed"
names(data)
##  [1] "RecordID"      "PAR"           "WindSpeed_mph" "WindDir"      
##  [5] "RelHumidity"   "BP_HG"         "H2O_TempC"     "SpCond_mScm"  
##  [9] "Salinity_ppt"  "PH"            "Turbidity_ntu" "Chla_ugl"     
## [13] "BGAPC_CML"     "ODO_sat"       "ODO_mgl"       "Depth_m"      
## [17] "Date"          "Airtemp"       "Rain"          "WindSpeed"
max( data$Date ) - min( data$Date )
## Time difference of 85.39583 days
data[ !complete.cases( data ), ]
##      RecordID  PAR WindSpeed_mph WindDir RelHumidity BP_HG H2O_TempC
## 1589    45404 1077         3.487   152.2       48.65 29.93        NA
##      SpCond_mScm Salinity_ppt PH Turbidity_ntu Chla_ugl BGAPC_CML ODO_sat
## 1589          NA           NA NA            NA       NA        NA      NA
##      ODO_mgl  Depth_m                Date  Airtemp Rain WindSpeed
## 1589      NA 4.658868 2014-01-17 13:00:00 6.088889    0  5.611769
na_count <- sapply( data, function(y)
  sum(length(which(is.na(y)))))
na_count <- data.frame(na_count)
na_count
##               na_count
## RecordID             0
## PAR                  0
## WindSpeed_mph        0
## WindDir              0
## RelHumidity          0
## BP_HG                0
## H2O_TempC            1
## SpCond_mScm          1
## Salinity_ppt         1
## PH                   1
## Turbidity_ntu        1
## Chla_ugl             1
## BGAPC_CML            1
## ODO_sat              1
## ODO_mgl              1
## Depth_m              0
## Date                 0
## Airtemp              0
## Rain                 0
## WindSpeed            0
which( data$PH == min(data$PH, na.rm=TRUE))
## [1] 4322
library(tidyverse)
## -- Attaching packages ------------------------------------ tidyverse 1.2.1 --
## v ggplot2 3.2.1     v readr   1.3.1
## v tibble  2.1.3     v purrr   0.3.2
## v tidyr   0.8.3     v stringr 1.4.0
## v ggplot2 3.2.1     v forcats 0.4.0
## -- Conflicts --------------------------------------- tidyverse_conflicts() --
## x lubridate::as.difftime() masks base::as.difftime()
## x lubridate::date()        masks base::date()
## x dplyr::filter()          masks stats::filter()
## x lubridate::intersect()   masks base::intersect()
## x dplyr::lag()             masks stats::lag()
## x lubridate::setdiff()     masks base::setdiff()
## x lubridate::union()       masks base::union()
data %>% select(PH, RecordID, Date ) %>% arrange( desc(PH) ) -> t

data %>% select(PH, RecordID, Date ) %>% arrange( PH ) -> p
p[1:10,]
##      PH RecordID                Date
## 1  6.43    48137 2014-02-15 00:15:00
## 2  6.48    45204 2014-01-15 11:00:00
## 3  6.52    45159 2014-01-14 23:45:00
## 4  6.59    48136 2014-02-15 00:00:00
## 5  6.60    48824 2014-02-22 04:00:00
## 6  6.62    48086 2014-02-14 11:30:00
## 7  6.81    45549 2014-01-19 01:15:00
## 8  6.84    45354 2014-01-17 00:30:00
## 9  6.86    48185 2014-02-15 12:15:00
## 10 6.87    45059 2014-01-13 22:45:00
library(tidyverse)
data %>% select(PH, RecordID, Date ) %>% arrange( desc(PH) ) -> t

data %>% select(PH, RecordID, Date ) %>% arrange( PH ) -> p
p[8189:8198,]
##        PH RecordID                Date
## 8189 8.52    50917 2014-03-15 23:15:00
## 8190 8.52    50919 2014-03-15 23:45:00
## 8191 8.55    50907 2014-03-15 20:45:00
## 8192 8.64    50914 2014-03-15 22:30:00
## 8193 8.68    50908 2014-03-15 21:00:00
## 8194 8.85    50909 2014-03-15 21:15:00
## 8195 8.92    50910 2014-03-15 21:30:00
## 8196 8.98    50911 2014-03-15 21:45:00
## 8197 8.99    50912 2014-03-15 22:00:00
## 8198 9.00    50913 2014-03-15 22:15:00
library(tidyverse)
sm_data <- select(data, Date, RecordID, Airtemp)
hitemp_data <- filter( sm_data, 
                weekdays(Date) %in% c("Saturday")) 
hitemp_data<-arrange (hitemp_data, desc( Airtemp ))
hitemp_data[1:5,]
##                  Date RecordID  Airtemp
## 1 2014-03-22 16:30:00    51562 22.67222
## 2 2014-03-22 17:15:00    51565 22.10556
## 3 2014-03-22 17:30:00    51566 22.05000
## 4 2014-03-22 17:45:00    51567 21.88889
## 5 2014-03-22 16:15:00    51561 21.85556
data$Weekday <- weekdays( data$Date )
days <- c( "Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday" )
data$Weekday <- factor( data$Weekday, ordered=TRUE, levels=days )
data %>%
  group_by(Weekday) %>%
  summarize(Windspeed=max(WindSpeed))
## # A tibble: 7 x 2
##   Weekday   Windspeed
##   <ord>         <dbl>
## 1 Monday         36.7
## 2 Tuesday        34.5
## 3 Wednesday      38.4
## 4 Thursday       35.2
## 5 Friday         49.3
## 6 Saturday       35.8
## 7 Sunday         34.0
library(ggplot2)
data %>%
  ggplot( aes( H2O_TempC, fill=Weekday )) + geom_histogram( binwidth = 0.5 ) + facet_grid( Weekday~. )
## Warning: Removed 1 rows containing non-finite values (stat_bin).

source( "getBeetledata.R" )
beetle_data <- getBeetleData()
summary( beetle_data )
##    Species             Clade               Pop                 PC1        
##  Length:363         Length:363         Length:363         Min.   :-5.717  
##  Class :character   Class :character   Class :character   1st Qu.:-1.862  
##  Mode  :character   Mode  :character   Mode  :character   Median : 1.317  
##                                                           Mean   : 0.000  
##                                                           3rd Qu.: 2.084  
##                                                           Max.   : 3.492  
##       PC2         
##  Min.   :-2.2754  
##  1st Qu.:-1.3277  
##  Median :-0.5726  
##  Mean   : 0.0000  
##  3rd Qu.: 0.5671  
##  Max.   : 8.4950
names( beetle_data )
## [1] "Species" "Clade"   "Pop"     "PC1"     "PC2"
beetle_data$Species <- factor(beetle_data$Species)
beetle_data$Clade <- factor(beetle_data$Clade)
beetle_data$Pop <- factor(beetle_data$Pop)
beetle_data$PC1 <- factor(beetle_data$PC1)
beetle_data$PC2 <- factor(beetle_data$PC2)
df <- data.frame( x=1:5, y=1:25, shape=1:25 )
p <- ggplot( df, aes(x,y)) 
p <- p + geom_point( aes(shape=shape), size=5, fill="darkgreen", color="blue") 
p <- p + scale_shape_identity()
p

library(ggplot2)

p <- ggplot( beetle_data, aes( x=PC1, y=PC2, shape=Species, color=Clade) ) +
  xlab("First Principal Component") +
  ylab("Second Principal Component") +
  ggtitle("Comparing Beetles Based on Genetic Markers and Geography")


p <- p + geom_point(size=2)
p