Task1: Specify working directory

setwd("D:/Class Materials & Work/ED_Psych 521- Data Management and Visualiztion/R Stat Package/R_Lesson 5- Data Visualization_Base/Practice 7")

library(tidyverse)

#Read dataset file
CleanedData<-read.csv("Tarid_MergedData.Reduced_cleaned.csv", header=T)

Task2: Bar chart for SchoolType

#Extract Schooltype to label the variables
SchoolType<-factor(CleanedData$SchoolType,levels=c(0,1),labels=c('Public','Private'))


#Draw the chart
barplot(table(SchoolType), main="Bar Chart of SchoolType",
        col=c("rosybrown1","seagreen2"),
        xlab="SchoolType",ylab="Count")

Task3: Grouped bar chart for gender and school type

#Extract 'female' to label the variables
female<-factor(CleanedData$female,levels=c(0,1),labels=c('male','female'))

#Draw the chart

barplot(table(female,SchoolType), main="Distribution of SchoolType BY Gender", 
        col=c("rosybrown1","seagreen2"),legend = rownames(table(female,SchoolType)),beside = T,
        xlab="SchoolType")

Task4: Stacked bar chart for gender and school type

#Draw the chart

barplot(table(female,SchoolType), main="Distribution of SchoolType BY Gender", 
        col=c("rosybrown1","seagreen2"),legend = rownames(table(female,SchoolType)),beside = F,
        xlab="SchoolType")

Task5: Pie chart for Gender and SchoolType

#We already extracted and labeled gender and schooltype

#Create a file for the chart
pdf("Task5_Pie.gender.SchType.pdf")
par(mfrow=c(2,1)) #For multiple row/column

#Label the female variable
female.labeled<-c("male","female")

#Define percentage ('table' is for finding frequency)
female.pct<-paste(female.labeled,table(female)/1500*100,"%")

#Draw the chart
pie(table(female), labels = female.pct, main= "Pie Chart of Gender",
    col=c("rosybrown1","seagreen2"))

#Label the schooltype variable
SchoolType.labeled<-c("Public","Private")

#Define percentage ('table' is for finding frequency)
SchoolType.pct<-paste(SchoolType.labeled,table(SchoolType)/1500*100,"%")

pie(table(SchoolType), labels = SchoolType.pct, main= "Pie Chart of SchoolType",
    col=c("peachpuff","skyblue1"))

Task6: Histogram with normal curve for ScienceAttitude

#Extract variable from the dataset 
ScienceAttitude<-CleanedData$ScienceAttitude

#Do the chart
hist(ScienceAttitude, breaks=8, col="seagreen2", xlab="SciAttR", 
     main="Histogram of ScienceAttitude with Normal Curve",prob=T) 
curve(dnorm(x, mean=mean(ScienceAttitude, na.rm = T), sd=sd(ScienceAttitude,na.rm=T)), 
      col="salmon", lwd=2, add=TRUE)

Task7: histograms for ScienceScore by the two gender groups/two plots in the same panel.

#Extract ScienceScore from the dataset
ScienceScore<-CleanedData$ScienceScore

par(mfrow=c(2,1)) #For multiple row/column

#Do the chart

#Male
hist(ScienceScore[which(female=="male")], breaks=8, col="seagreen2", xlab="SciScore", 
     main="Histogram of ScienceScore for male students",prob=T) 
curve(dnorm(x, mean=mean(ScienceScore, na.rm = T), sd=sd(ScienceScore,na.rm=T)), 
      col="salmon", lwd=2, add=TRUE)

#Female
hist(ScienceScore[which(female=="female")], breaks=8, col="seagreen2", xlab="SciScore", 
     main="Histogram of ScienceScore for female students",prob=T) 
curve(dnorm(x, mean=mean(ScienceScore, na.rm = T), sd=sd(ScienceScore,na.rm=T)), 
      col="salmon", lwd=2, add=TRUE)

Task8: ScatterPlot for ScienceScore and ScienceAttitude

#We already extracted ScienceScore and Science attitude from the dataset

#Do the chart
plot(ScienceAttitude, ScienceScore, main="ScatterPlot SciAttR vs SciScore", 
     xlab="ScienceAttitude", ylab="ScienceScore", pch=19, col="salmon") 

Task9: Boxplot for Science Attitude.

#We already extracted ScienceScore and Science attitude from the dataset

#Do the chart
boxplot(ScienceAttitude,main="Boxplot of ScienceAttitude", xlab="SciAttR", col="lightblue")

Task10: Box plot for ScienceAttitude by both Gender and SchoolType.

#We already extracted ScienceScore and Science attitude from the dataset

#Do the chart
boxplot(ScienceAttitude~female*SchoolType,main="Boxplot of SciAttR by Gender and SchoolType", notch=F,
        xlab="ScienceAttR", col=c("lightblue","darkgreen"))