# A tibble: 100,729 × 79
X Y OBJECTID NCESSCH NMCNTY SURVYEAR
<dbl> <dbl> <dbl> <chr> <chr> <chr>
1 -149. 61.6 1 020051000480 Matanuska-Susitna Borough 2017-2018
2 -157. 71.3 2 020061000470 North Slope Borough 2017-2018
3 -151. 60.5 3 020039000448 Kenai Peninsula Borough 2017-2018
4 -151. 60.6 4 020039000463 Kenai Peninsula Borough 2017-2018
5 -151. 60.6 5 020039000513 Kenai Peninsula Borough 2017-2018
6 -133. 56.1 6 020070000526 Prince of Wales-Hyder Census Area 2017-2018
7 -135. 57.5 7 020073000477 Hoonah-Angoon Census Area 2017-2018
8 -149. 63.9 8 020077000447 Denali Borough 2017-2018
9 -166. 54.1 9 020000700004 Aleutians East Borough 2017-2018
10 -163. 54.9 10 020000700007 Aleutians East Borough 2017-2018
STABR LEAID ST_LEAID LEA_NAME
<chr> <chr> <chr> <chr>
1 AK 0200510 AK-33 Matanuska-Susitna Borough School District
2 AK 0200610 AK-36 North Slope Borough School District
3 AK 0200390 AK-24 Kenai Peninsula Borough School District
4 AK 0200390 AK-24 Kenai Peninsula Borough School District
5 AK 0200390 AK-24 Kenai Peninsula Borough School District
6 AK 0200700 AK-44 Southeast Island School District
7 AK 0200730 AK-09 Chatham School District
8 AK 0200770 AK-02 Denali Borough School District
9 AK 0200007 AK-56 Aleutians East Borough School District
10 AK 0200007 AK-56 Aleutians East Borough School District
SCH_NAME LSTREET1 LSTREET2
<chr> <chr> <chr>
1 John Shaw Elementary 3750 E Paradise Ln <NA>
2 Kiita Learning Community 5246 Karluk St <NA>
3 Soldotna Montessori Charter School 158 E Park Ave <NA>
4 Kaleidoscope School of Arts & Science 549 N Forest Dr <NA>
5 Marathon School 405 Marathon Rd <NA>
6 Whale Pass School 126 Bayview Rd <NA>
7 Chatham Correspondence 500 Big Dog Salmon Way <NA>
8 Denali PEAK 1 Suntrana St <NA>
9 Akutan School 202 Volcano Dr <NA>
10 False Pass School 300 Valley Rd <NA>
LSTREET3 LCITY LSTATE LZIP LZIP4 PHONE GSLO GSHI
<lgl> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 NA Wasilla AK 99654 <NA> (907)352-0500 PK 05
2 NA Utqiagvik AK 99723 <NA> (907)852-9677 09 12
3 NA Soldotna AK 99669 <NA> (907)260-9221 KG 06
4 NA Kenai AK 99611 <NA> (907)283-0804 KG 05
5 NA Kenai AK 99611 <NA> (907)335-3343 07 12
6 NA Whale Pass AK 99950 <NA> (907)846-5320 PK 12
7 NA Angoon AK 99820 <NA> (907)788-3302 KG 12
8 NA Healy AK 99743 <NA> (907)683-2278 PK 12
9 NA Akutan AK 99553 <NA> (907)698-2205 PK 12
10 NA False Pass AK 99583 <NA> (907)548-2224 PK 12
VIRTUAL TOTFRL FRELCH REDLCH PK KG G01 G02 G03 G04
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Not a virtual school 183 158 25 30 81 63 80 62 58
2 Not a virtual school 27 27 0 NA NA NA NA NA NA
3 Not a virtual school 43 23 20 NA 23 23 27 22 25
4 Not a virtual school 69 50 19 NA 40 43 42 46 46
5 Not a virtual school -9 -9 -9 NA NA NA NA NA NA
6 Not a virtual school 17 17 0 0 0 3 1 2 2
7 Not a virtual school 3 -1 -1 NA 2 2 1 1 1
8 Not a virtual school 3 -1 -1 42 40 44 56 59 61
9 Not a virtual school 3 -1 -1 0 4 0 3 1 1
10 Not a virtual school -9 -9 -9 0 0 0 1 1 0
G05 G06 G07 G08 G09 G10 G11 G12 G13 TOTAL MEMBER AM
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <lgl> <dbl> <dbl> <dbl>
1 73 NA NA NA NA NA NA NA NA 447 447 50
2 NA NA NA NA 0 3 7 20 NA 30 30 27
3 28 19 NA NA NA NA NA NA NA 167 167 8
4 43 NA NA NA NA NA NA NA NA 260 260 16
5 NA NA 0 1 1 2 1 0 NA 5 5 0
6 2 1 5 1 0 0 0 1 NA 18 18 0
7 0 0 0 0 1 2 0 1 NA 11 11 2
8 59 54 55 74 47 51 48 47 NA 737 737 53
9 0 2 0 0 0 0 1 1 NA 13 13 11
10 1 0 1 1 1 0 0 0 NA 6 6 4
HI BL WH HP TR FTE LATCOD LONCOD ULOCALE
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
1 12 5 351 2 23 24.9 61.6 -149. 41-Rural: Fringe
2 0 0 0 1 2 3 71.3 -157. 33-Town: Remote
3 5 0 136 0 15 10.4 60.5 -151. 33-Town: Remote
4 14 3 168 0 56 16.8 60.6 -151. 33-Town: Remote
5 0 1 3 1 0 0.670 60.6 -151. 33-Town: Remote
6 1 0 13 0 4 1.90 56.1 -133. 43-Rural: Remote
7 0 5 4 0 0 0 57.5 -135. 43-Rural: Remote
8 76 39 443 8 97 5.79 63.9 -149. 43-Rural: Remote
9 0 0 1 0 1 1.96 54.1 -166. 43-Rural: Remote
10 0 0 2 0 0 1.39 54.9 -163. 43-Rural: Remote
STUTERATIO STITLEI AMALM AMALF ASALM ASALF HIALM HIALF BLALM BLALF
<dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 18.0 Yes 33 17 1 3 10 2 3 2
2 10 Not Applicable 16 11 0 0 0 0 0 0
3 16.1 Not Applicable 4 4 0 3 2 3 0 0
4 15.5 Not Applicable 10 6 1 2 6 8 3 0
5 7.46 Yes 0 0 0 0 0 0 0 1
6 9.47 Yes 0 0 0 0 1 0 0 0
7 NA Not Applicable 1 1 0 0 0 0 3 2
8 127. Not Applicable 21 32 13 8 33 43 20 19
9 6.63 Yes 6 5 0 0 0 0 0 0
10 4.32 Yes 2 2 0 0 0 0 0 0
WHALM WHALF HPALM HPALF TRALM TRALF TOTMENROL TOTFENROL STATUS UG AE
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <lgl>
1 193 158 0 2 11 12 251 196 1 NA NA
2 0 0 1 0 1 1 18 12 1 NA NA
3 58 78 0 0 7 8 71 96 1 NA NA
4 82 86 0 0 26 30 128 132 1 NA NA
5 1 2 0 1 0 0 1 4 1 NA NA
6 5 8 0 0 4 0 10 8 1 NA NA
7 1 3 0 0 0 0 5 6 1 NA NA
8 221 222 4 4 48 49 360 377 1 NA NA
9 0 1 0 0 1 0 7 6 1 NA NA
10 1 1 0 0 0 0 3 3 1 NA NA
SCHOOL_TYPE_TEXT SY_STATUS_TEXT SCHOOL_LEVEL AS
<chr> <chr> <chr> <dbl>
1 Regular school Currently operational Elementary 4
2 Alternative/other school Currently operational High 0
3 Regular school Currently operational Elementary 3
4 Regular school Currently operational Elementary 3
5 Alternative/other school Currently operational High 0
6 Regular school Currently operational Other 0
7 Regular school Currently operational Other 0
8 Regular school Currently operational Other 21
9 Regular school Currently operational Other 0
10 Regular school Currently operational Other 0
CHARTER_TEXT MAGNET_TEXT
<chr> <chr>
1 No No
2 No No
3 Yes No
4 Yes No
5 No No
6 No No
7 No No
8 No No
9 No No
10 No No
# ℹ 100,719 more rows
I left the columns I needed for later data analysis and visualization, renamed them to clarify what information the column would contain, and filtered some outrange values.
Public_School_clean <- Public_School_clean %>%filter(`Full Time Enrollment Rate`<85&`Student to Teacher Ratio`<30)Public_School_clean %>%print(n =10, width =Inf)
# A tibble: 88,423 × 6
`School Name` States `Full Time Enrollment Rate`
<chr> <chr> <dbl>
1 John Shaw Elementary AK 24.9
2 Kiita Learning Community AK 3
3 Soldotna Montessori Charter School AK 10.4
4 Kaleidoscope School of Arts & Science AK 16.8
5 Marathon School AK 0.670
6 Whale Pass School AK 1.90
7 Akutan School AK 1.96
8 False Pass School AK 1.39
9 King Cove School AK 12
10 Sand Point School AK 12.9
`Student to Teacher Ratio` `School Types` `School Level`
<dbl> <chr> <chr>
1 18.0 Regular school Elementary
2 10 Alternative/other school High
3 16.1 Regular school Elementary
4 15.5 Regular school Elementary
5 7.46 Alternative/other school High
6 9.47 Regular school Other
7 6.63 Regular school Other
8 4.32 Regular school Other
9 7.83 Regular school Other
10 9.27 Regular school Other
# ℹ 88,413 more rows
3.2 Clean the “Marriage Law Vote” data set
I put the division information in the “District” column into a new column and cleaned the rows that did not contain any information.
4.1.1 ‘Full Time Enrollment Rate’ and ‘Student to Teacher Ratio’
I use the “geom_point()” function to visualize the relationship between “Full Time Enrollment Rate” and “Student to Teacher Ratio” as these variables are numerical.
To make the graphic more reader-friendly, I used
the “col=” argument to make the graphic more colorful and easier to understand;
the “facet_wrap()” argument to make the graphic include the third variable, “School Types,” and divide a large amount of information from the two numerical variables into smaller pieces;
the “alpha=” argument to make each point more transparent, which can make the points overlap with others more obvious;
the “scale” argument to adjust the axis range;
the “theme()” argument to center the title and change the legend’s position from the right to the bottom of the graphic;
the “ggthemes::theme_few()” to clean up the background, which makes the graphic more nitty and might help the reader focus more on the useful information.
Public_School_clean %>%ggplot(aes(y=`Full Time Enrollment Rate`, na.rm=TRUE, x=`Student to Teacher Ratio`,na.rm=TRUE, shape=`School Types`, col=`School Types`))+geom_point(alpha=.5)+scale_x_continuous(limits=range(Public_School_clean$`Student to Teacher Ratio`))+scale_y_continuous(limits=range(Public_School_clean$`Full Time Enrollment Rate`))+facet_wrap(vars(`School Types`), scales="free")+ ggthemes::theme_few()+labs(title ="'Full Time Enrollment Rate' And 'Student to Teacher Ratio'")+theme(plot.title =element_text(hjust=0.5))+theme(legend.position ="bottom")
4.1.2 Percentage of school for various states
I created a bar chart to present the information about the percentage of schools for each state. This is because this graph type is suitable for visualizing categorical and numerical variables.
To make the graphic more reader-friendly, I used
the “fill=” argument to make the graphic more colorful;
the “scale” argument to adjust the label and name of the “y” axis;
the “theme()” argument to center the title and change the legend’s position from the right to the bottom of the graphic;
the “ggthemes::theme_few()” to clean up the background, which makes the graphic more nitty and might help the reader focus more on the useful information.
Public_School_clean%>%group_by(States)%>%summarise(Count =n())%>%ungroup()%>%mutate(perc = Count/sum(Count)) %>%ggplot(aes(x=States, y=perc, fill=States))+geom_col()+scale_y_continuous(label = scales::percent, name ="Percentage of States")+ ggthemes::theme_few()+labs(title ="Percentage Of School In Various States")+theme(plot.title =element_text(hjust=0.5))+theme(axis.text.x =element_text(angle=90))+theme(legend.position ="bottom")
4.1.3 Percentage and toal number for various school levels
I created a bar chart to show the information about the number and percentage of different school levels. This is because this graph type is suitable for visualizing categorical and numerical variables.
To make the graphic more reader-friendly, I used
the “fill=” argument to make the graphic more colorful;
the “scale” argument to adjust the range, label, and name of the “y” axis;
the “theme()” argument to center the title and change the legend’s position from the right to the bottom of the graphic;
the “ggthemes::theme_few()” to clean up the background, which makes the graphic more nitty and might help the reader focus more on the useful information;
the “geom_text()” function to label the total number of various school levels.
Public_School_clean%>%group_by(`School Level`)%>%summarise(Count =n())%>%ungroup()%>%mutate(perc = Count/sum(Count)) %>%ggplot(aes(x=`School Level`, y=perc, fill=`School Level`))+geom_col()+scale_y_continuous(limits=range(0,1),label = scales::percent, n.breaks =10, name ="Percentage of School Level")+geom_text(aes(label = Count), size=3, vjust=-.5)+ ggthemes::theme_few()+labs(title ="Total Number And Percentage For School Level ")+theme(plot.title =element_text(hjust=0.5))+theme(axis.text.x =element_text(angle=90))+theme(legend.position ="bottom")
4.1.4 Full-time enrollment rate for Various shcool levels
I created a bar chart to show the full-time enrollment rate at different school levels. This is because this graph type is suitable for visualizing categorical and numerical variables.
To make the graphic more reader-friendly, I used
the “fill=” argument to make the graphic more colorful;
the “scale” argument to adjust the range of the “y” axis;
the position = "dodge" argument to make the bars side by side (which makes the third variable, “School Types,” more clear to show in the graphic);
the “theme()” argument to center the title and change the legend’s position from the right to the bottom of the graphic;
the “ggthemes::theme_few()” to clean up the background, which makes the graphic more nitty and might help the reader focus more on the useful information.
Public_School_clean%>%ggplot(aes(y=`Full Time Enrollment Rate`, x=`School Level`, fill=`School Types`))+geom_col(position ="dodge")+scale_y_continuous(limits =range(0,100))+ ggthemes::theme_few()+labs(title ="Full Time Enrollment Rate For Various Shcool Level")+theme(plot.title =element_text(hjust=0.5))+theme(legend.position ="bottom")+theme(axis.text.x =element_text(angle=90))
4.2 Visualize the “Marriage Law Vote” data set
Following analysis mainly regard the people who are eligible for voting and their voting results.
# A tibble: 450 × 5
Division District `Eligible Participants_total_number`
<chr> <chr> <dbl>
1 New South Wales Divisions Banks 105254
2 New South Wales Divisions Banks 105254
3 New South Wales Divisions Banks 105254
4 New South Wales Divisions Barton 109371
5 New South Wales Divisions Barton 109371
6 New South Wales Divisions Barton 109371
7 New South Wales Divisions Bennelong 106375
8 New South Wales Divisions Bennelong 106375
9 New South Wales Divisions Bennelong 106375
10 New South Wales Divisions Berowra 105090
Status Values
<fct> <dbl>
1 Yes 84079
2 No 247
3 No Res. 20928
4 Yes 85137
5 No 226
6 No Res. 24008
7 Yes 86158
8 No 244
9 No Res. 19973
10 Yes 88840
# ℹ 440 more rows
4.2.1 Percentage distribution of response status based on different divisions in Austrilia
I use the “geom_col()” function to create a bar chart to visualize the percentage distribution of response status as this graph type is suitable for one category variable and one numerical variable.
To make the graphic more reader-friendly, I used
the “fill=” argument to make the graphic more colorful;
the “facet_wrap()” argument to make the graphic include the third variable, “Division;”
the “scale” argument to adjust the axis range, label, and name;
the “theme()” argument to center the title and change the legend’s position from the right to the bottom of the graphic;
the “ggthemes::theme_few()” to clean up the background, which makes the graphic more nitty and might help the reader focus more on the useful information;
the “scale_fill_okabeito()” to make the color scale more color-blind friendly.
# A tibble: 24 × 6
# Groups: Division [8]
Division Status sum_total sum_status prop labels
<chr> <fct> <dbl> <dbl> <dbl> <chr>
1 "Australian Capital Territory " Yes 288108 236979 0.823 82%
2 "Australian Capital Territory " No 288108 534 0.00185 0%
3 "Australian Capital Territory " No Res. 288108 50595 0.176 18%
4 "New South Wales " Yes 5187681 4111200 0.792 79%
5 "New South Wales " No 5187681 11036 0.00213 0%
6 "New South Wales " No Res. 5187681 1065445 0.205 21%
7 "Northern Territory " Yes 138101 80376 0.582 58%
8 "Northern Territory " No 138101 229 0.00166 0%
9 "Northern Territory " No Res. 138101 57496 0.416 42%
10 "Queensland " Yes 3150873 2448075 0.777 78%
# ℹ 14 more rows
law_vote_Eligible_1 %>%ggplot(aes(y=prop, x=Status,fill=Status))+geom_col()+geom_bar(stat="Identity", alpha=.75)+scale_fill_okabeito(name="Response Status")+facet_wrap(vars(Division))+scale_x_discrete(name="Response Status")+scale_y_continuous(limits=range(0,1),name="Percentage", label = scales::percent)+geom_text(aes(label = labels), size=3, vjust=-.5)+ ggthemes::theme_few()+labs(X=" Response Status", Y="Percentage", title =" Percentage Distribution Of Response Status- Eligible Participants")+theme(plot.title =element_text(hjust=0.5))+theme(legend.position ="bottom")
4.2.2 Total number and percentage for response status
I created a bar chart to show the total number and percentage for response status. This is because this graph type is suitable for visualizing categorical and numerical variables.
To make the graphic more reader-friendly, I used
the “fill=” argument to make the graphic more colorful;
the “scale” argument to adjust the range, label, and name of the axis;
the “theme()” argument to center the title and change the legend’s position from the right to the bottom of the graphic;
the “ggthemes::theme_few()” to clean up the background, which makes the graphic more nitty and might help the reader focus more on the useful information;
the “geom_text()” function to label the total number of various school levels.