Odev_1

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(readxl)
library(dplyr)
data <- read_excel("C:/Users/Administrator/Desktop/Kubra_Hoca/Odev_1/student-mat.xlsx")
glimpse(data)

## Rows: 395
## Columns: 33
## $ school     <chr> "GP", "GP", "GP", "GP", "GP", "GP", "GP", "GP", "GP", "GP",…
## $ sex        <chr> "F", "F", "F", "F", "F", "M", "M", "F", "M", "M", "F", "F",…
## $ age        <dbl> 18, 17, 15, 15, 16, 16, 16, 17, 15, 15, 15, 15, 15, 15, 15,…
## $ address    <chr> "U", "U", "U", "U", "U", "U", "U", "U", "U", "U", "U", "U",…
## $ famsize    <chr> "GT3", "GT3", "LE3", "GT3", "GT3", "LE3", "LE3", "GT3", "LE…
## $ Pstatus    <chr> "A", "T", "T", "T", "T", "T", "T", "A", "A", "T", "T", "T",…
## $ Medu       <dbl> 4, 1, 1, 4, 3, 4, 2, 4, 3, 3, 4, 2, 4, 4, 2, 4, 4, 3, 3, 4,…
## $ Fedu       <dbl> 4, 1, 1, 2, 3, 3, 2, 4, 2, 4, 4, 1, 4, 3, 2, 4, 4, 3, 2, 3,…
## $ Mjob       <chr> "at_home", "at_home", "at_home", "health", "other", "servic…
## $ Fjob       <chr> "teacher", "other", "other", "services", "other", "other", …
## $ reason     <chr> "course", "course", "other", "home", "home", "reputation", …
## $ guardian   <chr> "mother", "father", "mother", "mother", "father", "mother",…
## $ traveltime <dbl> 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 3, 1, 2, 1, 1, 1, 3, 1, 1,…
## $ studytime  <dbl> 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 3, 1, 2, 3, 1, 3, 2, 1, 1,…
## $ failures   <dbl> 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0,…
## $ schoolsup  <chr> "yes", "no", "yes", "no", "no", "no", "no", "yes", "no", "n…
## $ famsup     <chr> "no", "yes", "no", "yes", "yes", "yes", "no", "yes", "yes",…
## $ paid       <chr> "no", "no", "yes", "yes", "yes", "yes", "no", "no", "yes", …
## $ activities <chr> "no", "no", "no", "yes", "no", "yes", "no", "no", "no", "ye…
## $ nursery    <chr> "yes", "no", "yes", "yes", "yes", "yes", "yes", "yes", "yes…
## $ higher     <chr> "yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes", "ye…
## $ internet   <chr> "no", "yes", "yes", "yes", "no", "yes", "yes", "no", "yes",…
## $ romantic   <chr> "no", "no", "no", "yes", "no", "no", "no", "no", "no", "no"…
## $ famrel     <dbl> 4, 5, 4, 3, 4, 5, 4, 4, 4, 5, 3, 5, 4, 5, 4, 4, 3, 5, 5, 3,…
## $ freetime   <dbl> 3, 3, 3, 2, 3, 4, 4, 1, 2, 5, 3, 2, 3, 4, 5, 4, 2, 3, 5, 1,…
## $ goout      <dbl> 4, 3, 2, 2, 2, 2, 4, 4, 2, 1, 3, 2, 3, 3, 2, 4, 3, 2, 5, 3,…
## $ Dalc       <dbl> 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1,…
## $ Walc       <dbl> 1, 1, 3, 1, 2, 2, 1, 1, 1, 1, 2, 1, 3, 2, 1, 2, 2, 1, 4, 3,…
## $ health     <dbl> 3, 3, 3, 5, 5, 5, 3, 1, 1, 5, 2, 4, 5, 3, 3, 2, 2, 4, 5, 5,…
## $ absences   <dbl> 6, 4, 10, 2, 4, 10, 0, 6, 0, 0, 0, 4, 2, 2, 0, 4, 6, 4, 16,…
## $ G1         <dbl> 5, 5, 7, 15, 6, 15, 12, 6, 16, 14, 10, 10, 14, 10, 14, 14, …
## $ G2         <dbl> 6, 5, 8, 14, 10, 15, 12, 5, 18, 15, 8, 12, 14, 10, 16, 14, …
## $ G3         <dbl> 6, 6, 10, 15, 10, 15, 11, 6, 19, 15, 9, 12, 14, 11, 16, 14,…

#data_1 veri seti için sütun seçme
data_1<- data %>%
  select(school, sex, age, studytime, failures, absences, G3)

#15–19 yaş arasındaki öğrencileri ve 30 günden az devamsızlığı olanları filtrele

data_2 <- data_1 %>%
  filter(age >= 15 & age <= 19, absences <= 30)

#oğrencinin final notuna göre gecme/kalma durumu için yeni bir sütun oluşturma
data_3 <- data_2 %>%
  mutate(final_not = if_else(G3 >= 10, "Gecti", "Kaldı"))

#Aile desteği alıp almama durumlarına göre öğrencilerin devamsızlık ortalamarını inceleme
data_ozet <- data %>%
  group_by(famsup) %>%
  summarise (ortalama_devamsizlik = mean(absences))
data_ozet

## # A tibble: 2 × 2
##   famsup ortalama_devamsizlik
##   <chr>                 <dbl>
## 1 no                     5.46
## 2 yes                    5.86

#Öğrencilerin devamsızlık durumlarına göre azalan sıralama
devamsizlik_sirali <- data %>%
  arrange(desc(absences))

library(ggplot2)

ggplot(data, aes(x = Medu, y = G3,color = as.factor(Medu))) +
  geom_point() +
   geom_smooth(method = "lm", se = TRUE, color = "black")+
  labs(
    title = "Annenin Eğitim Düzeyine Göre Öğrencilerin Başarı Dağılımı",
    x = "Annenin Eğitim Düzeyi",
    y = "Final Notu (G3)"
  ) +
  theme_minimal()

## `geom_smooth()` using formula = 'y ~ x'

Grafikte, öğrencilerin annelerinin eğitim düzeyi (Medu) ile final notları (G3) arasındaki ilişki gösterilmiştir. Grafik incelendiğinde, annenin eğitim düzeyi yükseldikçe (4 en yüksek) öğrencilerin başarı düzeylerinin de genel olarak arttığı görülmektedir. Yükseköğretim mezunu annelere sahip öğrencilerin not dağılımlarının, daha düşük eğitim düzeyine sahip annelerin çocuklarına göre daha yüksek olduğu dikkat çekmektedir.

ggplot(data, aes(x = romantic, y = G3, fill = romantic)) +
  geom_col() +
  scale_x_discrete(labels = c("no" = "İlişki Yok", "yes" = "İlişki Var")) +
  labs(
    title = "Romantik İlişki Durumuna Göre Başarı Dağılımı",
    x = "",
    y = "Final Notu (G3)"
  ) +
  theme_minimal()

Bu sütun grafiği, öğrencilerin romantik ilişki durumlarına (romantic değişkeni) göre ortalama final notlarını (G3) göstermektedir. Grafik incelendiğinde, romantik ilişkisi olmayan öğrencilerin ortalama başarı düzeylerinin, ilişkisi olan öğrencilere göre daha yüksek olduğu görülmektedir.

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

Odev_1

Tugba Erdogan Bozkurt

2025-10-09