setwd(“D:/StatisticalComputing2024/data”)
hsb=read.csv(“hsb2.csv”) print(hsb)
#female: 0=Male; 1=Female), #ethnic background (race: 1=African American; 2=Asian; 3=Hispanic; 4=White), #socio-economic status (ses: 1=Low; 2=Middle; 3=High), #school type (schtyp: 1=Public; 2=Private), #program type (prog: 1=General; 2=Academic; 3=Vocational # hsb\(gender = factor(hsb\)female, levels=0:1, labels=c(“Male”, “Female”))
hsb\(schtyp2 = factor(hsb\)schtyp, levels=1:2, labels=c(“Public”, “Private”))
hsb\(race2 = factor(hsb\)race, levels=1:4, labels=c(“African-America”, “Asian”, “Hispanic”, “White”))
hsb\(se2 = factor(hsb\)ses, levels=1:3, labels=c(“Low”, “Middle”, “High”))
hsb\(prog2 = factor(hsb\)prog, levels=1:3, labels=c(“General”,“Vocational”, “Academic”))
print(hsb)
names(hsb) #DECODING # Convert categorical variables to numeric labels hsbdecoded <- transform(hsb, gender = factor(female, levels=0:1, labels=c(“Male”, “Female”)), schtyp2 = factor(schtyp, levels=1:2, labels=c(“Public”, “Private”)), race2 = factor(race, levels=1:4, labels=c(“African-America”, “Asian”, “Hispanic”, “White”)), ses2 = factor(ses, levels=1:3, labels=c(“Low”, “Middle”, “High”)), prog2 = factor(prog, levels=1:3, labels=c(“General”,“Vocational”, “Academic”)))
hsbdecoded <- transform(hsb, female = factor(hsb\(female, levels = 0:1, labels = c("Male", "Female")), schtyp = factor(hsb\)schtyp, levels = 1:2, labels = c(“Public”, “Private”)), race = factor(hsb\(race, levels = 1:4, labels = c("African-American", "Asian", "Hispanic", "White")), ses = factor(hsb\)ses, levels = 1:3, labels = c(“Low”, “Middle”, “High”)), prog = factor(hsb$prog, levels = 1:3, labels = c(“General”, “Academic”, “Vocational”)))
print(hsbdecoded)
var.test(hsb\(math, hsb\)science) t.test(hsb\(math, hsb\)science, var.equal = TRUE)
#Decoding hsbdecoded <- transform(hsbencoded, gendernum = as.numeric(gender), schtypnum = as.numeric(schtyp2), racenum = as.numeric(race2), sesnum = as.numeric(ses2), prognum = as.numeric(prog2))
print(hsbdecodeda)
#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> #> encoding with dplyr #> #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> library(magrittr) library(dplyr) cols <- c(“gender”, “schtyp2”, “race2”, “ses2”, “prog2”)
hsb %<>% mutate_each_(funs(factor(.)),cols) str(hsb)
hsb\(score = apply(hsb[,7:11], 1, sum) hsb\)mean = apply(hsb[,7:11], 1, mean)
print(hsb)
boxplot(score~race, data=hsb) boxplot(score~race2, data=hsb) boxplot(score~race2, data=hsb, col=c(“pink”, “blue”, “orange”, “purple”))
#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>. #> Encoding using caret package in R #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> #install.packages(“caret”, dependencies = TRUE) library(caret)
#define one-hot encoding function dummy <- dummyVars(” ~ .”, data=hsb)
#perform one-hot encoding on data frame final_hsb <- data.frame(predict(dummy, newdata=hsb))
#view final data frame print(final_hsb)
install.packages(“dplyr”, dependencies=TRUE)
tab = table(hsb$race2) tab # Create pie chart pie(tab, main = “Pie Chart Example”)
library(plotrix)
counts <- c(10, 20, 30, 40) labels <- c(“African-America”, “Asian”, “Hispanic”, “White”)
pie3D(tab, labels=labels, main = “3D Pie Chart Example”)
?cor.test cor.test(hsb\(read, hsb\)write, method=“pearson”) cor.test(hsb\(math, hsb\)science, method=“pearson”) cor(hsb[,7:11])
sd(hsb\(score) mean(hsb\)score) sqrt(200)(261.9-265)
t.test(hsb\(score, mu=265) wilcox.test(hsb\)score, mu=265)
var.test(hsb\(math, hsb\)science) t.test(hsb\(math, hsb\)science) t.test(hsb\(math, hsb\)science, var.equal = TRUE) wilcox.test(hsb\(math, hsb\)science) ?wilcox.test
fit1 = lm(score~race2, data=hsb) anova(fit1)
fit2 = aov(score~race2, data=hsb) anova(fit2) kruskal.test(hsb\(score~hsb\)race2)