Task1: Working directory and read the file

setwd("D:/Class Materials & Work/ED_Psych 521- Data Management and Visualiztion/R Stat Package/R_Lesson 6- Data Visualization_GGplot2/Practice 8")

#Read dataset file
CleanedData<-read.csv("Tarid_MergedData.Reduced_cleaned.csv", header=T)

#Activating the package
library(ggrepel)
library(ggpubr)
library(tidyverse)

#Label categorical variable and save labeled dataset out as separate .csv file

CleanedData$Gender<-factor(CleanedData$female,levels=c(0,1),labels=c('Male','Female'))
CleanedData$SchoolType.Labeled<-factor(CleanedData$SchoolType,levels=c(0,1),labels=c('Public','Private'))

#Get variables name
names(CleanedData)
##  [1] "IDstudent"          "ITSEX"              "BSBS17A"           
##  [4] "BSBS17B"            "BSBS17C"            "BSBS17D"           
##  [7] "BSBS17E"            "BSBS17F"            "BSBS17G"           
## [10] "ScienceScore"       "PUBPRIV"            "female"            
## [13] "SchoolType"         "BSBS17B.R"          "BSBS17D.R"         
## [16] "ScienceAttitude"    "Gender"             "SchoolType.Labeled"
#[1] "IDstudent"          "ITSEX"              "BSBS17A"            "BSBS17B"           
#[5] "BSBS17C"            "BSBS17D"            "BSBS17E"            "BSBS17F"           
#[9] "BSBS17G"            "ScienceScore"       "PUBPRIV"            "female"            
#[13] "SchoolType"        "BSBS17B.R"          "BSBS17D.R"          "ScienceAttitude"   
#[17] "Gender"            "SchoolType.Labeled"

Task2: Bar chart for SchoolType

#Draw the chart
ggplot(CleanedData, aes(SchoolType.Labeled))+
  geom_bar(fill=c("seagreen2","salmon"))+
  ggtitle("Bar chart for SchoolType")+
  theme(plot.title=element_text(face='bold',size=14,hjust=0.5,color = "darkred"))

Task3: Grouped Bar Chart by both Gender and SchoolType

#Draw the chart
ggplot(CleanedData, aes(Gender,fill=SchoolType.Labeled))+
  geom_bar(position="dodge", alpha=0.5)+
  scale_fill_manual(values=c("salmon", "seagreen2"))+
  ggtitle("grouped bar chart GenderxSchType")+
  theme(plot.title=element_text(face='bold',size=14,hjust=0.5,color = "darkred"))

Task4: Stacked Bar Chart by both Gender and SchoolType

#Draw the chart
ggplot(CleanedData, aes(Gender,fill=SchoolType.Labeled))+
  geom_bar(alpha=0.4)+
  scale_fill_manual(values=c("salmon", "seagreen2"))

Task5: Pie chart for Gender and SchoolType

#Aggregate the data first

CleanedData.Aggregated.Gender.SchoolType<-CleanedData %>% group_by(Gender,SchoolType.Labeled) %>% summarise(count.both=n())

#Draw the chart
ggplot(CleanedData.Aggregated.Gender.SchoolType, aes(x="",y=count.both,fill=Gender))+
  geom_bar(stat='identity',position='stack',color='white')+
  geom_text_repel(aes(label=paste(Gender,count.both)))+
  coord_polar("y",start=0)+
  facet_wrap(~SchoolType.Labeled)+
  scale_fill_manual(values=c('salmon','seagreen2'))+
  ylab('SchoolType')+
  xlab('Student Gender')+
  ggtitle('Pie chart of Gender by SchoolType')

Task6: Histogram with normal curve for ScienceAttitude

#Draw the chart
ggplot(CleanedData, aes(ScienceAttitude))+
  geom_histogram(aes(y=..density..),binwidth=0.1,colour='seagreen2',fill='salmon',alpha=0.4)+
  stat_function(fun = dnorm, args=list(mean=mean(CleanedData$ScienceAttitude, na.rm = T), 
  sd=sd(CleanedData$ScienceAttitude,na.rm=T)),colour='indianred2',size=1)+
  geom_vline(aes(xintercept=mean(ScienceAttitude,na.rm=T)),linetype='dashed',size=1)
## Warning: Removed 24 rows containing non-finite values (stat_bin).

Task7: Histograms for ScienceScore by the two gender groups/two plots in the same panel

par(mfrow=c(2,1)) #For multiple row/column

#Draw the chart
ggplot(CleanedData, aes(ScienceScore))+
  geom_histogram(aes(y=..density..),binwidth=15,colour='seagreen2',fill='salmon',alpha=0.4)+
  stat_function(fun = dnorm, args=list(mean=mean(CleanedData$ScienceScore, na.rm = T), 
  sd=sd(CleanedData$ScienceScore,na.rm=T)),colour='indianred2',size=1)+
  facet_wrap(~Gender)+
  geom_density(color='lightslateblue',size=1)

Task8: ScatterPlot for ScienceScore and ScienceAttitude

#Draw the chart
ggplot(CleanedData, aes(ScienceAttitude, ScienceScore))+
  geom_point(colour="salmon")+
  geom_smooth(aes(colour='lm'),method='lm',se=F)+
  geom_smooth(aes(colour='loess'),method='loess',se=F)+
  labs(colour='Method')+
  scale_color_manual(values=c('darkred','darkgreen'))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 24 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 24 rows containing non-finite values (stat_smooth).
## Warning: Removed 24 rows containing missing values (geom_point).

Task9: Boxplot for Science Attitude

#Draw the chart
ggplot(CleanedData, aes(x = '', y=ScienceAttitude))+
  geom_boxplot(colour='salmon', fill='seagreen2')
## Warning: Removed 24 rows containing non-finite values (stat_boxplot).

Task10: Boxplot for ScienceAttitude by both Gender and School Type

#Draw the chart
ggplot(CleanedData, aes(x = SchoolType.Labeled, y=ScienceAttitude))+
  geom_boxplot(colour='salmon', fill='seagreen2')+
  facet_wrap(~Gender)
## Warning: Removed 24 rows containing non-finite values (stat_boxplot).