READING THE DATA
chicago_public_schools_test = read.csv(file = "Chicago_Public_Schools_Test.csv")
head(chicago_public_schools_test)
chicago_public_schools_train = read.csv(file = "Chicago_Public_Schools_Train.csv")
head(chicago_public_schools_train)
EXTRACTING VALUES
safety_score_train = chicago_public_schools_train$Safety.Score
family_involvement_score_train = chicago_public_schools_train$Family.Involvement.Score
environment_score_train = chicago_public_schools_train$Environment.Score
instruction_score_train = chicago_public_schools_train$Instruction.Score
leaders_score_train = chicago_public_schools_train$Leaders.Score
teachers_score_train = chicago_public_schools_train$Teachers.Score
parent_engagement_score_train = chicago_public_schools_train$Parent.Engagement.Score
parent_environment_score_train = chicago_public_schools_train$Parent.Environment.Score
avg_student_attendance_train = chicago_public_schools_train$Average.Student.Attendance
rate_of_misconduct_train = chicago_public_schools_train$Rate.of.Misconducts
avg_teacher_attendance_train = chicago_public_schools_train$Average.Teacher.Attendance
ISAT_exceeding_math_train = chicago_public_schools_train$ISAT.Exceeding.Math
ISAT_exceeding_reading_train = chicago_public_schools_train$ISAT.Exceeding.Reading
college_enrollment_train = chicago_public_schools_train$College.Enrollment
latitude_train = chicago_public_schools_train$Latitude
longitude_train = chicago_public_schools_train$Longitude
comm_area_number_train = chicago_public_schools_train$Community.Area.Number
ward_train = chicago_public_schools_train$Ward
police_district_train = chicago_public_schools_train$Police.District
BASIC MATH
mean_safety_score = mean(safety_score_train)
mean_safety_score
[1] 51.73
sd_safety_score = sd(safety_score_train)
sd_safety_score
[1] 20.88199
var_safety_score = var(safety_score_train)
var_safety_score
[1] 436.0577
sqrt()
Error in sqrt() : 0 arguments passed to 'sqrt' which requires 1
SIGNAL TO NOISE RATIO
snr_safety_score = safety_score_mean / safety_score_sd
snr_safety_score
[1] 2.477254
mean_environment_score = mean(environment_score_train)
mean_environment_score
[1] 51.11
sd_environment_score = sd(environment_score_train)
sd_environment_score
[1] 17.30703
snr_environment_score = mean_environment_score / sd_environment_score
snr_environment_score
[1] 2.953136
MAXIMUM, MINIMUM, AND RANGE
max_safety_score = max(safety_score_train)
max_safety_score
[1] 99
min_safety_score = min(safety_score_train)
min_safety_score
[1] 6
range_safety_score = max_safety_score - min_safety_score
range_safety_score
[1] 93
FINDING UPPER AND LOWER THRESHOLD FOR FINDING OUTLIERS
upper_threshold = mean_safety_score + 3*sd_safety_score
upper_threshold
[1] 114.376
lower_threshold = mean_safety_score - 3*sd_safety_score
lower_threshold
[1] -10.91598
environment_upper_threshold = mean_environment_score + 3*sd_environment_score
environment_upper_threshold
[1] 103.0311
environment_lower_threshold = mean_environment_score - 3*sd_environment_score
environment_lower_threshold
[1] -0.8110852
SUMMARY FUNCTION SPECIFIC (Can do for individual or whole data. Both shown below.)
summary(safety_score_train)
Min. 1st Qu. Median Mean 3rd Qu. Max.
6.00 35.75 50.00 51.73 61.75 99.00
SUMMARY FUNCTION GENERAL
summary(chicago_public_schools_train)
School.ID
Min. :609772
1st Qu.:609878
Median :610008
Mean :610033
3rd Qu.:610178
Max. :610541
Name.of.School
Abraham Lincoln Elementary School : 1
Adam Clayton Powell Paideia Community Academy Elementary School: 1
Agustin Lara Elementary Academy : 1
Albany Park Multicultural Academy : 1
Alessandro Volta Elementary School : 1
Alexander Graham Elementary School : 1
(Other) :94
Elementary_Middle_or_High.School Street.Address City
ES:96 10041 S Union Ave : 1 Chicago:100
MS: 4 10115 S Prairie Ave : 1
10538 S Langley Ave : 1
10810 S Avenue H : 1
10845 S Union Ave : 1
(Other) :92
NA's : 3
State ZIP.Code Network.Manager Safety.Icon
IL :98 Min. :60607 Pershing Elementary Network :11 Average :40
NA's: 2 1st Qu.:60618 Midway Elementary Network :10 Strong :21
Median :60625 O'Hare Elementary Network : 9 Very Strong:10
Mean :60629 Fullerton Elementary Network: 8 Very Weak : 3
3rd Qu.:60637 Skyway Elementary Network : 8 Weak :26
Max. :60660 (Other) :47
NA's : 7
Safety.Score Family.Involvement.Icon Family.Involvement.Score
Min. : 6.00 Average :39 Min. :16.00
1st Qu.:35.75 Strong :21 1st Qu.:38.00
Median :50.00 Very Strong: 9 Median :50.00
Mean :51.73 Very Weak : 2 Mean :51.37
3rd Qu.:61.75 Weak :29 3rd Qu.:62.25
Max. :99.00 Max. :99.00
Environment.Icon Environment.Score Instruction.Icon Instruction.Score
Average :45 Min. : 2.00 Average :44 Min. : 1.00
Strong :25 1st Qu.:40.00 Strong :23 1st Qu.:39.75
Very Strong: 6 Median :50.00 Very Strong: 8 Median :50.00
Very Weak : 2 Mean :51.11 Very Weak : 2 Mean :52.08
Weak :22 3rd Qu.:63.50 Weak :23 3rd Qu.:63.00
Max. :99.00 Max. :99.00
Leaders.Icon Leaders.Score Teachers.Icon Teachers.Score
Average :37 Min. :20.00 Average :51 Min. : 8.00
Strong :28 1st Qu.:38.75 Strong :19 1st Qu.:41.00
Very Strong: 8 Median :51.00 Very Strong: 7 Median :51.00
Weak :27 Mean :52.07 Very Weak : 6 Mean :51.08
3rd Qu.:63.25 Weak :17 3rd Qu.:61.00
Max. :97.00 Max. :99.00
Parent.Engagement.Icon Parent.Engagement.Score Parent.Environment.Icon
Average:61 Min. :43.0 Average:61
Strong :18 1st Qu.:47.0 Strong :19
Weak :21 Median :50.0 Weak :20
Mean :50.3
3rd Qu.:52.0
Max. :68.0
Parent.Environment.Score Average.Student.Attendance Rate.of.Misconducts
Min. :38.00 95.50% : 8 Min. : 0.00
1st Qu.:47.00 95.10% : 6 1st Qu.: 4.30
Median :50.00 95.60% : 5 Median : 10.20
Mean :50.04 92.50% : 4 Mean : 17.74
3rd Qu.:53.00 95.90% : 4 3rd Qu.: 25.00
Max. :65.00 93.40% : 3 Max. :100.50
(Other):70
Average.Teacher.Attendance ISAT.Exceeding.Math ISAT.Exceeding.Reading
95.90% : 6 Min. : 3.20 Min. : 1.30
96.90% : 6 1st Qu.:10.00 1st Qu.: 7.50
96.00% : 5 Median :16.55 Median :11.60
96.10% : 5 Mean :21.83 Mean :16.66
96.50% : 5 3rd Qu.:25.68 3rd Qu.:19.80
94.70% : 4 Max. :92.80 Max. :92.30
(Other):69
College.Enrollment Latitude Longitude Community.Area.Number
Min. : 192.0 Min. :41.69 Min. :-87.83 Min. : 1.00
1st Qu.: 344.0 1st Qu.:41.76 1st Qu.:-87.71 1st Qu.:22.75
Median : 515.5 Median :41.84 Median :-87.68 Median :38.50
Mean : 564.1 Mean :41.84 Mean :-87.68 Mean :40.09
3rd Qu.: 717.2 3rd Qu.:41.91 3rd Qu.:-87.64 3rd Qu.:61.00
Max. :1560.0 Max. :42.01 Max. :-87.53 Max. :77.00
Community.Area.Name Ward Police.District
WEST GARFIELD PARK: 5 Min. : 1.00 Min. : 1.00
WEST TOWN : 5 1st Qu.:12.75 1st Qu.: 7.00
BRIGHTON PARK : 4 Median :20.50 Median :10.00
ALBANY PARK : 3 Mean :22.31 Mean :11.61
ASHBURN : 3 3rd Qu.:30.25 3rd Qu.:17.00
AUBURN GRESHAM : 3 Max. :50.00 Max. :25.00
(Other) :77
Location
(41.68733881, -87.70325179): 1
(41.68810403, -87.53600985): 1
(41.69093337, -87.65870614): 1
(41.69305404, -87.6808619) : 1
(41.69651565, -87.63993763): 1
(41.69719792, -87.6972638) : 1
(Other) :94
BASIC PLOTTING
plot(safety_score_train)
Z VALUE (When X is a given value to test. In this example I chose 120)
x = 120
Zscore_safety_score = (x - mean_safety_score) / sd_safety_score
Zscore_safety_score
[1] 3.269324
FREQUENCY TABLE, BAR PLOT, AND HISTOGRAM
safety_score_frequency_table = table(safety_score_train)
safety_score_frequency_table
safety_score_train
6 11 19 20 22 23 25 26 27 28 29 31 32 33 35 36 37 38 41 42 43 44 45 46 48 49 50
1 1 1 1 1 2 2 1 1 3 2 2 3 1 3 1 1 2 1 2 2 4 2 3 3 3 2
51 52 53 54 55 56 57 58 59 60 61 64 65 66 67 68 70 73 74 75 76 77 78 83 86 87 92
2 2 2 3 1 2 1 3 2 3 3 2 1 1 1 1 1 1 1 2 1 1 2 1 1 2 1
99
5
safety_score_frequency_barplot = barplot(safety_score_frequency_table)
safety_score_frequency_barplot
[,1]
[1,] 0.7
[2,] 1.9
[3,] 3.1
[4,] 4.3
[5,] 5.5
[6,] 6.7
[7,] 7.9
[8,] 9.1
[9,] 10.3
[10,] 11.5
[11,] 12.7
[12,] 13.9
[13,] 15.1
[14,] 16.3
[15,] 17.5
[16,] 18.7
[17,] 19.9
[18,] 21.1
[19,] 22.3
[20,] 23.5
[21,] 24.7
[22,] 25.9
[23,] 27.1
[24,] 28.3
[25,] 29.5
[26,] 30.7
[27,] 31.9
[28,] 33.1
[29,] 34.3
[30,] 35.5
[31,] 36.7
[32,] 37.9
[33,] 39.1
[34,] 40.3
[35,] 41.5
[36,] 42.7
[37,] 43.9
[38,] 45.1
[39,] 46.3
[40,] 47.5
[41,] 48.7
[42,] 49.9
[43,] 51.1
[44,] 52.3
[45,] 53.5
[46,] 54.7
[47,] 55.9
[48,] 57.1
[49,] 58.3
[50,] 59.5
[51,] 60.7
[52,] 61.9
[53,] 63.1
[54,] 64.3
[55,] 65.5
safety_score_histogram = hist(safety_score_train)
safety_score_histogram
$breaks
[1] 0 10 20 30 40 50 60 70 80 90 100
$counts
[1] 1 3 12 13 22 21 10 8 4 6
$density
[1] 0.001 0.003 0.012 0.013 0.022 0.021 0.010 0.008 0.004 0.006
$mids
[1] 5 15 25 35 45 55 65 75 85 95
$xname
[1] "safety_score_train"
$equidist
[1] TRUE
attr(,"class")
[1] "histogram"
PLOT SAFETY SCORE VS. FAMILY INVOLVEMENT
plot(safety_score_train, family_involvement_score_train)
scatter.smooth(safety_score_train, family_involvement_score_train)
CORRELATION (Between safety score and family involvement)
cor(safety_score_train,family_involvement_score_train)
[1] 0.7144638
CORRELATION FOR ALL VARIABLES (cor between ISAT reading an ISAT math is highest)
cor(chicago_public_schools_train[, c(10,12,14,16,18,20,22,24,26,28,29,30,31,32,33,35,36)])
Safety.Score Family.Involvement.Score Environment.Score
Safety.Score 1.000000000 0.71446375 0.6331058
Family.Involvement.Score 0.714463752 1.00000000 0.5347450
Environment.Score 0.633105824 0.53474498 1.0000000
Instruction.Score 0.476736067 0.42281132 0.8021495
Leaders.Score 0.450972507 0.68753901 0.3111567
Teachers.Score 0.483625772 0.72024487 0.3857252
Parent.Engagement.Score 0.592748544 0.54837537 0.4628990
Parent.Environment.Score 0.142646323 0.09603933 0.2465991
Rate.of.Misconducts -0.495669900 -0.43192336 -0.3371369
ISAT.Exceeding.Math 0.712572291 0.61804691 0.4467668
ISAT.Exceeding.Reading 0.726116154 0.63542520 0.4461274
College.Enrollment 0.009967808 0.07970977 -0.1561447
Latitude 0.356703198 0.24808717 0.2408997
Longitude -0.268029157 -0.23382757 -0.1810229
Community.Area.Number -0.333390485 -0.22394833 -0.2203719
Ward 0.233385546 0.21651341 0.1368001
Police.District 0.341978820 0.31440629 0.2099816
Instruction.Score Leaders.Score Teachers.Score
Safety.Score 0.47673607 0.450972507 0.48362577
Family.Involvement.Score 0.42281132 0.687539014 0.72024487
Environment.Score 0.80214954 0.311156740 0.38572521
Instruction.Score 1.00000000 0.275612091 0.36928230
Leaders.Score 0.27561209 1.000000000 0.84169946
Teachers.Score 0.36928230 0.841699464 1.00000000
Parent.Engagement.Score 0.35134311 0.367960680 0.36580246
Parent.Environment.Score 0.17761525 0.186121281 0.17695283
Rate.of.Misconducts -0.31390321 -0.292251074 -0.33635458
ISAT.Exceeding.Math 0.42562395 0.354367646 0.40654925
ISAT.Exceeding.Reading 0.42144369 0.347355779 0.38259880
College.Enrollment -0.13517904 0.065591651 0.08089638
Latitude 0.08053262 0.051394903 0.10137811
Longitude -0.11812292 -0.117790806 -0.17495661
Community.Area.Number -0.09394514 0.003562534 -0.06463487
Ward 0.05829859 -0.046955755 -0.02924497
Police.District 0.14048120 0.084473084 0.13792510
Parent.Engagement.Score Parent.Environment.Score
Safety.Score 0.59274854 0.142646323
Family.Involvement.Score 0.54837537 0.096039333
Environment.Score 0.46289904 0.246599115
Instruction.Score 0.35134311 0.177615250
Leaders.Score 0.36796068 0.186121281
Teachers.Score 0.36580246 0.176952832
Parent.Engagement.Score 1.00000000 0.416902232
Parent.Environment.Score 0.41690223 1.000000000
Rate.of.Misconducts -0.25821245 -0.045287557
ISAT.Exceeding.Math 0.52707099 -0.033321734
ISAT.Exceeding.Reading 0.57619786 0.002284523
College.Enrollment -0.09338198 -0.041730969
Latitude 0.14905577 0.229623945
Longitude -0.29037015 -0.135261846
Community.Area.Number -0.15761104 -0.177620170
Ward 0.21690821 0.195830879
Police.District 0.29428698 0.285486204
Rate.of.Misconducts ISAT.Exceeding.Math
Safety.Score -0.49566990 0.71257229
Family.Involvement.Score -0.43192336 0.61804691
Environment.Score -0.33713693 0.44676682
Instruction.Score -0.31390321 0.42562395
Leaders.Score -0.29225107 0.35436765
Teachers.Score -0.33635458 0.40654925
Parent.Engagement.Score -0.25821245 0.52707099
Parent.Environment.Score -0.04528756 -0.03332173
Rate.of.Misconducts 1.00000000 -0.40693328
ISAT.Exceeding.Math -0.40693328 1.00000000
ISAT.Exceeding.Reading -0.35054403 0.94774090
College.Enrollment -0.10993665 -0.06453246
Latitude -0.23734628 0.18993892
Longitude 0.16531982 -0.17766164
Community.Area.Number 0.19383470 -0.20739282
Ward -0.16714201 0.21848234
Police.District -0.26605091 0.26992417
ISAT.Exceeding.Reading College.Enrollment Latitude
Safety.Score 0.726116154 0.009967808 0.35670320
Family.Involvement.Score 0.635425201 0.079709769 0.24808717
Environment.Score 0.446127381 -0.156144721 0.24089974
Instruction.Score 0.421443690 -0.135179042 0.08053262
Leaders.Score 0.347355779 0.065591651 0.05139490
Teachers.Score 0.382598805 0.080896380 0.10137811
Parent.Engagement.Score 0.576197862 -0.093381983 0.14905577
Parent.Environment.Score 0.002284523 -0.041730969 0.22962394
Rate.of.Misconducts -0.350544029 -0.109936648 -0.23734628
ISAT.Exceeding.Math 0.947740900 -0.064532464 0.18993892
ISAT.Exceeding.Reading 1.000000000 -0.091278184 0.14306352
College.Enrollment -0.091278184 1.000000000 0.10148654
Latitude 0.143063516 0.101486543 1.00000000
Longitude -0.124845355 -0.202645192 -0.55146576
Community.Area.Number -0.192982956 0.046068177 -0.82238553
Ward 0.213716652 0.090032088 0.63631360
Police.District 0.261136018 0.066708299 0.55592915
Longitude Community.Area.Number Ward
Safety.Score -0.2680292 -0.333390485 0.23338555
Family.Involvement.Score -0.2338276 -0.223948325 0.21651341
Environment.Score -0.1810229 -0.220371883 0.13680006
Instruction.Score -0.1181229 -0.093945137 0.05829859
Leaders.Score -0.1177908 0.003562534 -0.04695575
Teachers.Score -0.1749566 -0.064634873 -0.02924497
Parent.Engagement.Score -0.2903701 -0.157611037 0.21690821
Parent.Environment.Score -0.1352618 -0.177620170 0.19583088
Rate.of.Misconducts 0.1653198 0.193834695 -0.16714201
ISAT.Exceeding.Math -0.1776616 -0.207392816 0.21848234
ISAT.Exceeding.Reading -0.1248454 -0.192982956 0.21371665
College.Enrollment -0.2026452 0.046068177 0.09003209
Latitude -0.5514658 -0.822385531 0.63631360
Longitude 1.0000000 0.292572616 -0.46982444
Community.Area.Number 0.2925726 1.000000000 -0.51125531
Ward -0.4698244 -0.511255309 1.00000000
Police.District -0.5188634 -0.437746322 0.71629262
Police.District
Safety.Score 0.34197882
Family.Involvement.Score 0.31440629
Environment.Score 0.20998162
Instruction.Score 0.14048120
Leaders.Score 0.08447308
Teachers.Score 0.13792510
Parent.Engagement.Score 0.29428698
Parent.Environment.Score 0.28548620
Rate.of.Misconducts -0.26605091
ISAT.Exceeding.Math 0.26992417
ISAT.Exceeding.Reading 0.26113602
College.Enrollment 0.06670830
Latitude 0.55592915
Longitude -0.51886345
Community.Area.Number -0.43774632
Ward 0.71629262
Police.District 1.00000000
SIMPLE LINEAR REGRESSION (Between safety_score and family_involvement_score) safety score predicted = 11.59217 + 0.78135*family involvement score
linear_reg = lm(safety_score_train ~ family_involvement_score_train)
summary(linear_reg)
Call:
lm(formula = safety_score_train ~ family_involvement_score_train)
Residuals:
Min 1Q Median 3Q Max
-32.627 -8.840 -0.925 8.741 61.623
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 11.59217 4.23345 2.738 0.00734 **
family_involvement_score_train 0.78135 0.07729 10.109 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 14.68 on 98 degrees of freedom
Multiple R-squared: 0.5105, Adjusted R-squared: 0.5055
F-statistic: 102.2 on 1 and 98 DF, p-value: < 2.2e-16
PREDICTED VALUES USING SIMPLE LINEAR REGRESSION (Assume fam involvement = 90)
safety_score_predicted_LR = 11.59217 + 0.78135*90
safety_score_predicted_LR
[1] 81.91367
PLOTTING WITH A TREND LINE
plot(safety_score_train,family_involvement_score_train)
abline(reg, col="blue", lwd=2)
only using the first two of 3 regression coefficients
MULTIPLE LINEAR REGRESSION safety score = 0.47290 + 0.57574fam involvement + 0.42421environment score
multiple_linear_regression= lm(safety_score_train ~ family_involvement_score_train + environment_score_train)
summary(multiple_linear_regression)
Call:
lm(formula = safety_score_train ~ family_involvement_score_train +
environment_score_train)
Residuals:
Min 1Q Median 3Q Max
-27.318 -7.387 -0.214 9.256 51.530
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.47290 4.54278 0.104 0.917
family_involvement_score_train 0.57574 0.08324 6.917 4.96e-10 ***
environment_score_train 0.42421 0.09184 4.619 1.18e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 13.36 on 97 degrees of freedom
Multiple R-squared: 0.5987, Adjusted R-squared: 0.5905
F-statistic: 72.36 on 2 and 97 DF, p-value: < 2.2e-16
problem_9_regression_1 = lm(ISAT_exceeding_math_train ~ safety_score_train + parent_environment_score_train)
summary(problem_9_regression_1)
Call:
lm(formula = ISAT_exceeding_math_train ~ safety_score_train +
parent_environment_score_train)
Residuals:
Min 1Q Median 3Q Max
-23.032 -7.241 -1.196 5.276 47.839
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 18.40039 14.27842 1.289 0.2006
safety_score_train 0.60722 0.05855 10.371 <2e-16 ***
parent_environment_score_train -0.55923 0.28658 -1.951 0.0539 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 12.04 on 97 degrees of freedom
Multiple R-squared: 0.5264, Adjusted R-squared: 0.5166
F-statistic: 53.9 on 2 and 97 DF, p-value: < 2.2e-16
score_predict_final = coef(problem_9_regression_1)[1] + coef(problem_9_regression_1)[2] + coef(problem_9_regression_1)[3]
problem_9_regression_2 = lm(ISAT_exceeding_math_train ~ rate_of_misconduct_train + teachers_score_train)
summary(problem_9_regression_2)
Call:
lm(formula = ISAT_exceeding_math_train ~ rate_of_misconduct_train +
teachers_score_train)
Residuals:
Min 1Q Median 3Q Max
-22.319 -8.709 -2.169 3.127 58.882
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 11.83466 5.33293 2.219 0.02881 *
rate_of_misconduct_train -0.25621 0.07865 -3.258 0.00155 **
teachers_score_train 0.28462 0.08754 3.251 0.00158 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 15.18 on 97 degrees of freedom
Multiple R-squared: 0.2476, Adjusted R-squared: 0.2321
F-statistic: 15.96 on 2 and 97 DF, p-value: 1.018e-06
PREDICTING VALUES USING MULTIPLE LINEAR REGRESSION (Assume fam involvement = 90 and environment = 80)
safety_score_predicted_MLR = 0.47290 + 0.57574*90 + 0.42421*80
safety_score_predicted_MLR
[1] 86.2263
LINEAR MODEL VS. ACTUAL DATA
plot(safety_score_train,family_involvement_score_train)
abline(linear_reg, col="blue", lwd=2)
QUADRATIC MODEL
family_involvement_score_train2 = family_involvement_score_train^2
quad_model = lm(safety_score_train ~ family_involvement_score_train +family_involvement_score_train2)
summary(quad_model)
Call:
lm(formula = safety_score_train ~ family_involvement_score_train +
family_involvement_score_train2)
Residuals:
Min 1Q Median 3Q Max
-32.158 -9.109 -0.833 7.946 60.800
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 23.018349 9.959782 2.311 0.0229 *
family_involvement_score_train 0.331606 0.363352 0.913 0.3637
family_involvement_score_train2 0.003893 0.003073 1.267 0.2083
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 14.64 on 97 degrees of freedom
Multiple R-squared: 0.5184, Adjusted R-squared: 0.5085
F-statistic: 52.21 on 2 and 97 DF, p-value: 4.067e-16
PREDICT USING QUADRATIC MODEL (When x=1)
safety_score_predicted_QM = 23.018349 + 0.331606*1 + 0.003893*1^2
safety_score_predicted_QM
[1] 23.35385
PLOTTING QUADRATIC MODEL VS. LINEAR
safety_score_predicted_QM = predict(quad_model,data=chicago_public_schools_train)
plot(safety_score_predicted_QM, col="red", pch=16)
plot(safety_score_train,family_involvement_score_train, pch=16)
CUBIC MODEL
family_involvement_score_train3 = family_involvement_score_train^3
cubic_model = lm(safety_score_train ~ family_involvement_score_train + family_involvement_score_train2 + family_involvement_score_train3)
summary(cubic_model)
Call:
lm(formula = safety_score_train ~ family_involvement_score_train +
family_involvement_score_train2 + family_involvement_score_train3)
Residuals:
Min 1Q Median 3Q Max
-31.819 -9.375 -0.920 7.808 61.020
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.855e+01 2.370e+01 1.205 0.231
family_involvement_score_train -2.293e-02 1.424e+00 -0.016 0.987
family_involvement_score_train2 1.061e-02 2.628e-02 0.404 0.687
family_involvement_score_train3 -3.822e-05 1.484e-04 -0.258 0.797
Residual standard error: 14.71 on 96 degrees of freedom
Multiple R-squared: 0.5188, Adjusted R-squared: 0.5037
F-statistic: 34.49 on 3 and 96 DF, p-value: 3.247e-15
PREDICT USING CUBIC MODEL
safety_score_predicted_CM = predict(cubic_model,data=chicago_public_schools_train)
PLOT CUBIC MODEL VS. LINEAR
plot(safety_score_train,family_involvement_score_train, pch=16)
plot(safety_score_predicted_CM, col="green", pch=16)
score_predict_final = 18.40039 + 0.60722* safety_score_train +