Basic codings

impliment

getwd()
## [1] "C:/Users/Shalini/Downloads/Data Sets/Data Sets/KmeansClustering"
mydata<-read.csv("universities.csv")
data<-mydata
summary(data)
##         Univ         SAT           Top10            Accept    
##  Brown    : 1   Min.   :1005   Min.   : 28.00   Min.   :14.0  
##  CalTech  : 1   1st Qu.:1240   1st Qu.: 74.00   1st Qu.:24.0  
##  CMU      : 1   Median :1285   Median : 81.00   Median :36.0  
##  Columbia : 1   Mean   :1266   Mean   : 76.48   Mean   :39.2  
##  Cornell  : 1   3rd Qu.:1340   3rd Qu.: 90.00   3rd Qu.:50.0  
##  Dartmouth: 1   Max.   :1415   Max.   :100.00   Max.   :90.0  
##  (Other)  :19                                                 
##     SFRatio         Expenses        GradRate    
##  Min.   : 6.00   Min.   : 8704   Min.   :67.00  
##  1st Qu.:11.00   1st Qu.:15140   1st Qu.:81.00  
##  Median :12.00   Median :27553   Median :90.00  
##  Mean   :12.72   Mean   :27388   Mean   :86.72  
##  3rd Qu.:14.00   3rd Qu.:34870   3rd Qu.:94.00  
##  Max.   :25.00   Max.   :63575   Max.   :97.00  
## 
colnames(data)
## [1] "Univ"     "SAT"      "Top10"    "Accept"   "SFRatio"  "Expenses"
## [7] "GradRate"
#attach(data)
sd(data$Expenses)
## [1] 14424.88
var(data$GradRate)
## [1] 82.04333
boxplot(data)

hist(data$SAT)

hist(data$SFRatio)

plot(data)

qqnorm(data$GradRate)
qqline(data$GradRate)

data.frame(summary(data))
##    Var1        Var2             Freq
## 1              Univ   Brown    : 1  
## 2              Univ   CalTech  : 1  
## 3              Univ   CMU      : 1  
## 4              Univ   Columbia : 1  
## 5              Univ   Cornell  : 1  
## 6              Univ   Dartmouth: 1  
## 7              Univ   (Other)  :19  
## 8               SAT   Min.   :1005  
## 9               SAT   1st Qu.:1240  
## 10              SAT   Median :1285  
## 11              SAT   Mean   :1266  
## 12              SAT   3rd Qu.:1340  
## 13              SAT   Max.   :1415  
## 14              SAT             <NA>
## 15            Top10 Min.   : 28.00  
## 16            Top10 1st Qu.: 74.00  
## 17            Top10 Median : 81.00  
## 18            Top10 Mean   : 76.48  
## 19            Top10 3rd Qu.: 90.00  
## 20            Top10 Max.   :100.00  
## 21            Top10             <NA>
## 22           Accept   Min.   :14.0  
## 23           Accept   1st Qu.:24.0  
## 24           Accept   Median :36.0  
## 25           Accept   Mean   :39.2  
## 26           Accept   3rd Qu.:50.0  
## 27           Accept   Max.   :90.0  
## 28           Accept             <NA>
## 29          SFRatio  Min.   : 6.00  
## 30          SFRatio  1st Qu.:11.00  
## 31          SFRatio  Median :12.00  
## 32          SFRatio  Mean   :12.72  
## 33          SFRatio  3rd Qu.:14.00  
## 34          SFRatio  Max.   :25.00  
## 35          SFRatio             <NA>
## 36         Expenses  Min.   : 8704  
## 37         Expenses  1st Qu.:15140  
## 38         Expenses  Median :27553  
## 39         Expenses  Mean   :27388  
## 40         Expenses  3rd Qu.:34870  
## 41         Expenses  Max.   :63575  
## 42         Expenses             <NA>
## 43         GradRate  Min.   :67.00  
## 44         GradRate  1st Qu.:81.00  
## 45         GradRate  Median :90.00  
## 46         GradRate  Mean   :86.72  
## 47         GradRate  3rd Qu.:94.00  
## 48         GradRate  Max.   :97.00  
## 49         GradRate             <NA>
model1<-lm(data$SAT~data$Expenses)
summary(model1)
## 
## Call:
## lm(formula = data$SAT ~ data$Expenses)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -154.23  -40.76   19.44   45.62   91.99 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.106e+03  3.027e+01  36.542  < 2e-16 ***
## data$Expenses 5.852e-03  9.822e-04   5.958 4.49e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 69.41 on 23 degrees of freedom
## Multiple R-squared:  0.6068, Adjusted R-squared:  0.5897 
## F-statistic: 35.49 on 1 and 23 DF,  p-value: 4.489e-06