library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
library(readr)
dataFdr="D:\\D Drive\\Certificate Course\\data"
filename="combined.csv"
dataFile=paste(dataFdr,filename,sep="\\")
survey=read_csv(dataFile)
## Rows: 34786 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): species_id, sex, genus, species, taxa, plot_type
## dbl (7): record_id, month, day, year, plot_id, hindfoot_length, weight
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
filename="legal_weed_age_GSS2016_ch1.csv"
dataFile=paste(dataFdr,filename,sep="\\")
##dataFile
mar=read.csv(dataFile)
mar_cleaned=mar%>%mutate(grass=factor(grass))%>%
mutate(grass=if_else(grass=="DK"|grass=="IAP",NA,grass))%>%
mutate(grass=droplevels(grass))%>%
mutate(age=recode(age,"89 OR OLDER"="89"))%>%
mutate(age=as.numeric(age))%>%
mutate(age_cat=cut(age,c(-Inf,30,60,75,Inf), c("<30","30-59","60-74","75+")))
View(mar_cleaned)
summary(mar_cleaned)
## grass age age_cat
## LEGAL :1126 Min. :18.00 <30 : 535
## NOT LEGAL: 717 1st Qu.:34.00 30-59:1516
## NA's :1024 Median :49.00 60-74: 564
## Mean :49.16 75+ : 242
## 3rd Qu.:62.00 NA's : 10
## Max. :89.00
## NA's :10
filename="combined.csv"
dataFile=paste(dataFdr,filename,sep="\\")
survey=read_csv(dataFile)
## Rows: 34786 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): species_id, sex, genus, species, taxa, plot_type
## dbl (7): record_id, month, day, year, plot_id, hindfoot_length, weight
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
names(survey)
## [1] "record_id" "month" "day" "year"
## [5] "plot_id" "species_id" "sex" "hindfoot_length"
## [9] "weight" "genus" "species" "taxa"
## [13] "plot_type"
View(survey)
survey_cln=drop_na(survey)
#scatter plot with transparency
ggplot(survey_cln,aes(x=weight,y=hindfoot_length))+geom_point(alpha=.1)
#scatter plot with pattern
ggplot(survey_cln,aes(x=weight,y=hindfoot_length))+geom_point()+geom_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
#contour Plot
ggplot(survey_cln,aes(x=weight,y=hindfoot_length))+geom_density2d()
data_yr=survey_cln%>%group_by(year,genus)%>%count()
#View(data_yr)
#Timeseries Plot
ggplot(data_yr,aes(x=year,y=n))+geom_line()
#Timeseries Plot with multiple group
ggplot(data_yr,aes(x=year,y=n,group=genus))+geom_line()
#Timeseries Plot with multiple group with different colors
ggplot(data_yr,aes(x=year,y=n,colour=genus))+geom_line()
#arranged in multiple rows and columns with same axis measure
ggplot(data_yr,aes(x=year,y=n))+geom_line()+facet_wrap(~genus)
#arranged in multiple rows and columns with different axis measure
ggplot(data_yr,aes(x=year,y=n))+geom_line()+facet_wrap(~genus,scales="free")
data_yr=survey_cln%>%group_by(year,genus,sex)%>%count()
ggplot(data_yr,aes(x=year,y=n,colour = sex))+geom_line()+facet_wrap(~genus)
ggplot(data_yr,aes(x=year,y=n,colour = sex))+geom_line()+facet_grid(genus~sex)
#arranged in single row and multiple columns
ggplot(data_yr,aes(x=year,y=n))+geom_line()+facet_grid(genus~sex)
#Time series plot with labels
ggplot(data_yr,aes(x=year,y=n,colour=sex))+geom_line()+facet_grid(genus~sex)+labs(title="Observed genera over time", x="Year of observation",y="Number of Animals")
ggplot(data_yr,aes(x=year,y=n,colour=sex))+geom_line()+facet_grid(genus~sex)+labs(title="Observed genera over time", x="Year of observation",y="Number of Animals")+
theme(axis.text.x = element_text(size=7,angle=90),
axis.text.y=element_text(size=7),
strip.text = element_text(size=7,angle=45))
#Histogram plot with title
ggplot(survey_cln,aes(x=weight))+geom_histogram()+labs(title="Weight Distribution of Animals", x="Weight",y="Frequency")+theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Histogram plot with title and colour
ggplot(survey_cln,aes(x=weight,fill=sex))+geom_histogram(bins=100)+labs(title="Weight Distribution of Animals", x="Weight",y="Frequency")+theme_bw()
#Histogram plot with title,color and facet wrap
ggplot(survey_cln,aes(x=weight,fill=sex))+geom_histogram(binsize=20)+labs(title="Weight Distribution of Animals", x="Weight",y="Frequency")+theme_bw()+facet_wrap(~sex)
## Warning in geom_histogram(binsize = 20): Ignoring unknown parameters: `binsize`
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(survey_cln,aes(x=weight,colour=sex))+geom_freqpoly()+labs(title="Distribution of animal by weight", x="Weight",y="Frequency")+theme_bw()+facet_wrap(~sex)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(survey_cln,aes(x=genus,fill=genus))+geom_bar()+labs(title="Distribution of Animals by genera", x="Species",y="Frequency")+
theme(axis.text.x=element_text(size=7,angle=90))
mar_cleaned=mar_cleaned%>%drop_na(grass)
#names(mar_cleaned)
#Bar Plot
ggplot(data,aes(x=knowledge_stat,y=knowledge_prog))+ geom_point(position=position_jitter(h=0.1, w=0.1),shape = 21, alpha = 0.5, size = 3,color=“red”) + lims(x=c(0,5),y=c(0,5)) + theme_classic() + coord_fixed() + geom_vline(xintercept = 2.5) + geom_hline(yintercept = 2.5)+ labs(x=“Knowledge in Statistics”,y=“Knowledge in Programming”, title=“Participants Distribution”)
```