This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
#ANALİZDE KULLANILACAK PAKETLERİN YÜKLENMESİ VE KÜTÜPHANEYE ALMAK
library(ggplot2)
library(readr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ stringr 1.5.2
## ✔ forcats 1.0.1 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
You can also embed plots, for example:
## Rows: 395 Columns: 33
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (17): school, sex, address, famsize, Pstatus, Mjob, Fjob, reason, guardi...
## dbl (16): age, Medu, Fedu, traveltime, studytime, failures, famrel, freetime...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#VERİ SETİNİ İNCELEMEK için glimpse FONKSİYONU KULLANILMIŞTIR
glimpse(df)
## Rows: 395
## Columns: 33
## $ school <chr> "GP", "GP", "GP", "GP", "GP", "GP", "GP", "GP", "GP", "GP",…
## $ sex <chr> "F", "F", "F", "F", "F", "M", "M", "F", "M", "M", "F", "F",…
## $ age <dbl> 18, 17, 15, 15, 16, 16, 16, 17, 15, 15, 15, 15, 15, 15, 15,…
## $ address <chr> "U", "U", "U", "U", "U", "U", "U", "U", "U", "U", "U", "U",…
## $ famsize <chr> "GT3", "GT3", "LE3", "GT3", "GT3", "LE3", "LE3", "GT3", "LE…
## $ Pstatus <chr> "A", "T", "T", "T", "T", "T", "T", "A", "A", "T", "T", "T",…
## $ Medu <dbl> 4, 1, 1, 4, 3, 4, 2, 4, 3, 3, 4, 2, 4, 4, 2, 4, 4, 3, 3, 4,…
## $ Fedu <dbl> 4, 1, 1, 2, 3, 3, 2, 4, 2, 4, 4, 1, 4, 3, 2, 4, 4, 3, 2, 3,…
## $ Mjob <chr> "at_home", "at_home", "at_home", "health", "other", "servic…
## $ Fjob <chr> "teacher", "other", "other", "services", "other", "other", …
## $ reason <chr> "course", "course", "other", "home", "home", "reputation", …
## $ guardian <chr> "mother", "father", "mother", "mother", "father", "mother",…
## $ traveltime <dbl> 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 3, 1, 2, 1, 1, 1, 3, 1, 1,…
## $ studytime <dbl> 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 3, 1, 2, 3, 1, 3, 2, 1, 1,…
## $ failures <dbl> 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0,…
## $ schoolsup <chr> "yes", "no", "yes", "no", "no", "no", "no", "yes", "no", "n…
## $ famsup <chr> "no", "yes", "no", "yes", "yes", "yes", "no", "yes", "yes",…
## $ paid <chr> "no", "no", "yes", "yes", "yes", "yes", "no", "no", "yes", …
## $ activities <chr> "no", "no", "no", "yes", "no", "yes", "no", "no", "no", "ye…
## $ nursery <chr> "yes", "no", "yes", "yes", "yes", "yes", "yes", "yes", "yes…
## $ higher <chr> "yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes", "ye…
## $ internet <chr> "no", "yes", "yes", "yes", "no", "yes", "yes", "no", "yes",…
## $ romantic <chr> "no", "no", "no", "yes", "no", "no", "no", "no", "no", "no"…
## $ famrel <dbl> 4, 5, 4, 3, 4, 5, 4, 4, 4, 5, 3, 5, 4, 5, 4, 4, 3, 5, 5, 3,…
## $ freetime <dbl> 3, 3, 3, 2, 3, 4, 4, 1, 2, 5, 3, 2, 3, 4, 5, 4, 2, 3, 5, 1,…
## $ goout <dbl> 4, 3, 2, 2, 2, 2, 4, 4, 2, 1, 3, 2, 3, 3, 2, 4, 3, 2, 5, 3,…
## $ Dalc <dbl> 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1,…
## $ Walc <dbl> 1, 1, 3, 1, 2, 2, 1, 1, 1, 1, 2, 1, 3, 2, 1, 2, 2, 1, 4, 3,…
## $ health <dbl> 3, 3, 3, 5, 5, 5, 3, 1, 1, 5, 2, 4, 5, 3, 3, 2, 2, 4, 5, 5,…
## $ absences <dbl> 6, 4, 10, 2, 4, 10, 0, 6, 0, 0, 0, 4, 2, 2, 0, 4, 6, 4, 16,…
## $ G1 <dbl> 5, 5, 7, 15, 6, 15, 12, 6, 16, 14, 10, 10, 14, 10, 14, 14, …
## $ G2 <dbl> 6, 5, 8, 14, 10, 15, 12, 5, 18, 15, 8, 12, 14, 10, 16, 14, …
## $ G3 <dbl> 6, 6, 10, 15, 10, 15, 11, 6, 19, 15, 9, 12, 14, 11, 16, 14,…
#ALIŞTIRMA 1 VERİ DÜZENLEME
# TÜM VERİ SETİNİN İÇERİSİNDEN ÖĞRENCİLERİN OKUL, YAŞ, ÇALIŞMA SÜRESİ, 3 FARKLI NOT DEĞERİ, DERSE KATILIM VE CİNSİYET DEĞİŞKENLERİ SEÇİLMİŞTİR.
df_sel <- df %>% select(school, age, studytime, G1, G2, G3, absences, sex)
# VERİ SETİNDE BULUNAN ÖĞRENCİLERDEN ERKEK VE YAŞI 19'DAN BÜYÜK ÖĞRENCİLER FİLTRELENMİŞ VE YAŞLARI BÜYÜKTEN KÜÇÜĞE DOĞRU SIRALANMIŞTIR
df_fil <- df %>% filter(sex == "M", age >= 19) %>%
arrange(desc(age))
# MUTATE İLE ÖĞRENCİ NOTLARININ AĞIRLIKLI NOT ORTALMASINA İLİŞKİN average_g DEĞİŞKENİ OLUŞTURULMUŞ VE IF ELSE İLE average_g <=10 PASS VEYA FAİL OLARAK ATANMIŞTIR.
df_mut <- df_sel %>% mutate(
average_g = round( ((((G1 + G2) / 2) + G3)/2),2),
pass = if_else(average_g >=10, "pass", "fail")
)
# ÖĞRENCİLER OKUL VE CİNSİYETE GÖRE GRUPLANMIŞ, NOT, DERSTE BULUNMA VE DERSTEN GEÇME ORANLARININ ORTALAMASI HESAPLANMIŞTIR
summary_by_school_sex <- df_mut %>%
group_by(school, sex) %>%
summarise(
mean_avg = round(mean(average_g, na.rm = TRUE), 2),
median_absences = median(absences, na.rm = TRUE),
pass_rate_pct = round(mean(pass == "pass", na.rm = TRUE) * 100, 1)) %>%
arrange(desc(mean_avg))
## `summarise()` has grouped output by 'school'. You can override using the
## `.groups` argument.
#ALIŞTIRMA 2 VERİ GÖRSELLEŞTİRME
#Öğrencilerin Agırlıklandırılmıs Not Ortalamalarına Gore Histogram Grafiginde Dagılımı
ggplot(df_mut , aes(x = average_g)) +
geom_histogram(bins = 20, fill = "#4E79A7", color = "white", alpha = 0.8) +
geom_vline(aes(xintercept = mean(average_g, na.rm = TRUE)),
color = "red", linetype = "dashed", linewidth = 2) +
labs(
title = "Ogrencilerin Not Ortalamalarının Dağılımı",
x = "Agirliklandirilmis Not Ortalamasi",
y = "Ogrenci Sayısı"
) +
theme_minimal(base_size = 15)
#Ogrencilerin not dağılımı normal dagılıma yakın bir dagılım göstermektedir. Ogrenci puanları grafigin orta noktalarına yıgılmıs ve uc noktalara dogru azalmıstır.
#Ogrencilerin Ders Calısma Suresi ile Ortalama Notları Arassındaki İliskinin Dağılım Grafiği ile İncelenmesi
ggplot(df_mut, aes(x = studytime, y = average_g)) +
geom_point(aes(color = sex), size = 2, alpha = 0.7) +
scale_color_manual(values = c("F" = "red", "M" = "green"),
name = "Cinsiyet", labels = c("Kız", "Erkek")) +
labs(
title = "Ders Calısma Suresi ile Ortalama Not Arasındaki İliski",
x = "Ders Calısma Suresi (1=az, 4=cok)",
y = "Ortalama Not"
) +
theme_minimal(base_size = 13)
# Az (1 saat) calısma suresinde erkek ogrencilerin daha yuksek notlar aldıgı, kız ogrencilerin ise daha dusuk notlara sahip oldugu gorulmektedir. Erkek ogrencilerin cogu az (1 saat) calısma süresine yıgılmısken kız ogrencilerin ise 2 saat ve 3 saat calısma sürelerine yıgıldıgını gormekteyız.
Note that the echo = FALSE
parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.