# 1. Wczytanie danych (dostosuj ścieżkę pliku, jeśli masz inną)
SOCR_HeightWeight <- read.csv("SOCR-HeightWeight.csv")

# 2. Podgląd struktury danych
str(SOCR_HeightWeight)
## 'data.frame':    25000 obs. of  3 variables:
##  $ Index         : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Height.Inches.: num  65.8 71.5 69.4 68.2 67.8 ...
##  $ Weight.Pounds.: num  113 136 153 142 144 ...
head(SOCR_HeightWeight)
##   Index Height.Inches. Weight.Pounds.
## 1     1       65.78331       112.9925
## 2     2       71.51521       136.4873
## 3     3       69.39874       153.0269
## 4     4       68.21660       142.3354
## 5     5       67.78781       144.2971
## 6     6       68.69784       123.3024
# 3. Nazwy kolumn – zobaczysz je w raporcie
names(SOCR_HeightWeight)
## [1] "Index"          "Height.Inches." "Weight.Pounds."
# 4. Zamiana na liczby (jeśli wczytało się jako tekst)
SOCR_HeightWeight$Height <- as.numeric(SOCR_HeightWeight$Height)
SOCR_HeightWeight$Weight <- as.numeric(SOCR_HeightWeight$Weight)

# 5. Usunięcie wierszy z brakami
SOCR_HeightWeight <- na.omit(SOCR_HeightWeight)

# 6. Korelacja wzrost–waga
corr_res <- cor.test(
  SOCR_HeightWeight$Height,
  SOCR_HeightWeight$Weight
)
corr_res
## 
##  Pearson's product-moment correlation
## 
## data:  SOCR_HeightWeight$Height and SOCR_HeightWeight$Weight
## t = 91.981, df = 24998, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4935390 0.5120626
## sample estimates:
##       cor 
## 0.5028585
# 7. Regresja liniowa: waga ~ wzrost
model <- lm(
  Weight ~ Height,
  data = SOCR_HeightWeight
)
summary(model)
## 
## Call:
## lm(formula = Weight ~ Height, data = SOCR_HeightWeight)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -40.302  -6.711  -0.052   6.814  39.093 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -82.57574    2.28022  -36.21   <2e-16 ***
## Height        3.08348    0.03352   91.98   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.08 on 24998 degrees of freedom
## Multiple R-squared:  0.2529, Adjusted R-squared:  0.2528 
## F-statistic:  8461 on 1 and 24998 DF,  p-value: < 2.2e-16