impliment
getwd()
## [1] "C:/Users/Shalini/Downloads/Data Sets/Data Sets/KmeansClustering"
mydata<-read.csv("universities.csv")
data<-mydata
summary(data)
## Univ SAT Top10 Accept
## Brown : 1 Min. :1005 Min. : 28.00 Min. :14.0
## CalTech : 1 1st Qu.:1240 1st Qu.: 74.00 1st Qu.:24.0
## CMU : 1 Median :1285 Median : 81.00 Median :36.0
## Columbia : 1 Mean :1266 Mean : 76.48 Mean :39.2
## Cornell : 1 3rd Qu.:1340 3rd Qu.: 90.00 3rd Qu.:50.0
## Dartmouth: 1 Max. :1415 Max. :100.00 Max. :90.0
## (Other) :19
## SFRatio Expenses GradRate
## Min. : 6.00 Min. : 8704 Min. :67.00
## 1st Qu.:11.00 1st Qu.:15140 1st Qu.:81.00
## Median :12.00 Median :27553 Median :90.00
## Mean :12.72 Mean :27388 Mean :86.72
## 3rd Qu.:14.00 3rd Qu.:34870 3rd Qu.:94.00
## Max. :25.00 Max. :63575 Max. :97.00
##
colnames(data)
## [1] "Univ" "SAT" "Top10" "Accept" "SFRatio" "Expenses"
## [7] "GradRate"
#attach(data)
sd(data$Expenses)
## [1] 14424.88
var(data$GradRate)
## [1] 82.04333
boxplot(data)

hist(data$SAT)

hist(data$SFRatio)

plot(data)

qqnorm(data$GradRate)
qqline(data$GradRate)

data.frame(summary(data))
## Var1 Var2 Freq
## 1 Univ Brown : 1
## 2 Univ CalTech : 1
## 3 Univ CMU : 1
## 4 Univ Columbia : 1
## 5 Univ Cornell : 1
## 6 Univ Dartmouth: 1
## 7 Univ (Other) :19
## 8 SAT Min. :1005
## 9 SAT 1st Qu.:1240
## 10 SAT Median :1285
## 11 SAT Mean :1266
## 12 SAT 3rd Qu.:1340
## 13 SAT Max. :1415
## 14 SAT <NA>
## 15 Top10 Min. : 28.00
## 16 Top10 1st Qu.: 74.00
## 17 Top10 Median : 81.00
## 18 Top10 Mean : 76.48
## 19 Top10 3rd Qu.: 90.00
## 20 Top10 Max. :100.00
## 21 Top10 <NA>
## 22 Accept Min. :14.0
## 23 Accept 1st Qu.:24.0
## 24 Accept Median :36.0
## 25 Accept Mean :39.2
## 26 Accept 3rd Qu.:50.0
## 27 Accept Max. :90.0
## 28 Accept <NA>
## 29 SFRatio Min. : 6.00
## 30 SFRatio 1st Qu.:11.00
## 31 SFRatio Median :12.00
## 32 SFRatio Mean :12.72
## 33 SFRatio 3rd Qu.:14.00
## 34 SFRatio Max. :25.00
## 35 SFRatio <NA>
## 36 Expenses Min. : 8704
## 37 Expenses 1st Qu.:15140
## 38 Expenses Median :27553
## 39 Expenses Mean :27388
## 40 Expenses 3rd Qu.:34870
## 41 Expenses Max. :63575
## 42 Expenses <NA>
## 43 GradRate Min. :67.00
## 44 GradRate 1st Qu.:81.00
## 45 GradRate Median :90.00
## 46 GradRate Mean :86.72
## 47 GradRate 3rd Qu.:94.00
## 48 GradRate Max. :97.00
## 49 GradRate <NA>
model1<-lm(data$SAT~data$Expenses)
summary(model1)
##
## Call:
## lm(formula = data$SAT ~ data$Expenses)
##
## Residuals:
## Min 1Q Median 3Q Max
## -154.23 -40.76 19.44 45.62 91.99
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.106e+03 3.027e+01 36.542 < 2e-16 ***
## data$Expenses 5.852e-03 9.822e-04 5.958 4.49e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 69.41 on 23 degrees of freedom
## Multiple R-squared: 0.6068, Adjusted R-squared: 0.5897
## F-statistic: 35.49 on 1 and 23 DF, p-value: 4.489e-06