R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this: ### Q1 #### a

# setwd("C:/Users/avivg/OneDrive/Documents/R/Statistical Analysis Course/Ex1")

# df <- read.csv("q1.csv")

# head(df)

AGE <- c(53,43,33,45,46,55,41,55,36,45,55,50,49,47,69,51,48,62,45,37,50,50,50,58,53,57,53,61,47,56,44,46,58,48,38,74,60,32,51,50,40,61,63,56,45,61,70,59,57,69,44,56,50,56,43,48,52,62,48)

SAL = c(145,621,262,208,362,424,339,736,291,58,498,643,390,332,750,368,659,234,396,300,343,536,543,217,298,1103,406,254,862,204,206,250,21,298,350,800,726,370,536,291,808,543,149,350,242,198,213,296,317,482,155,802,200,282,573,388,250,396,572)

df <-data.frame(AGE, SAL) 
age <- df$AGE
head(df)
##   AGE SAL
## 1  53 145
## 2  43 621
## 3  33 262
## 4  45 208
## 5  46 362
## 6  55 424
# print("head age:")
# print(head(age))
# print("mean age:")
print(mean(age))
## [1] 51.54237
# print("median age:")
print(median(age))
## [1] 50
age <- df$AGE

salary <- df$SAL
# print("head age:")
print(head(salary))
## [1] 145 621 262 208 362 424
# print("mean salary:")
print(mean(salary))
## [1] 404.1695
# print("median salary:")
print(median(salary))
## [1] 350

b

#quantile
quant9 <- quantile(salary,0.9)
quant9
##   90% 
## 738.8
plot(df)
abline(h=712,col="red")

names(age) <- salary
age_10 <- age[names(age) > 712.9]
print("Ages of top 10 %:")
## [1] "Ages of top 10 %:"
print(age_10)
## 736 750 862 800 726 808 802 
##  55  69  47  74  60  40  56

c

sd(salary) # Standard deviation
## [1] 220.5335
# print("The count of managers up to ONE-SD from the mean salary")
print(sum(abs(mean(salary) -  salary ) < sd(salary) ))#managers with salary that are one or more standard deviation 
## [1] 44
# print("The count of managers up to TWO-SD from the mean salary")
print(sum(abs(mean(salary) -  salary ) < 2*sd(salary) ))#managers with salary that are one or more standard deviation 
## [1] 57

d

#age hist
hist(age)

hist(age ,freq=F)

#salary hist
hist(salary,breaks=10)

hist(salary, freq=F,breaks=10)

e

#plot the data frame (ages and salaries)
plot(df)
title("Age VS Salary")

cor(df$AGE, df$SAL) #low positive correlation
## [1] 0.1275554

2

a

df <- read.csv("firstgoalclean.csv")
head(df)
##   Div     Date    HomeTeam   AwayTeam FTHG FTAG FTR HTHG HTAG HTR       Referee
## 1  E0 15/08/09 Aston Villa      Wigan    0    2   A    0    1   A M Clattenburg
## 2  E0 15/08/09   Blackburn   Man City    0    2   A    0    1   A        M Dean
## 3  E0 15/08/09      Bolton Sunderland    0    1   A    0    1   A    A Marriner
## 4  E0 15/08/09     Chelsea       Hull    2    1   H    1    1   D       A Wiley
## 5  E0 15/08/09     Everton    Arsenal    1    6   A    0    3   A      M Halsey
## 6  E0 15/08/09  Portsmouth     Fulham    0    1   A    0    1   A    M Atkinson
##   HS AS HST AST HF AF HC AC HY AY HR AR HTFGM ATFGM FGM
## 1 11 14   5   7 15 14  4  6  2  2  0  0     0    31  31
## 2 17  8   9   5 12  9  5  4  2  1  0  0     0     3   3
## 3 11 20   3  13 16 10  4  7  2  1  0  0     0     5   5
## 4 26  7  12   3 13 15 12  4  1  2  0  0    37    28  28
## 5  8 15   5   9 11 13  4  9  0  0  0  0    90    26  26
## 6 16  9   4   3 11 18  6  4  3  2  0  0     0    14  14
length(df$Div)
## [1] 348
hist(df$FGM, breaks = 50)

b

# b
hist(df$HF) # normal dist' 

c

# c
sd(df$FGM)
## [1] 21.96523
mean(df$FGM)
## [1] 29.85345
mean(df$FGM)
## [1] 29.85345
sd(df$HF)
## [1] 3.5547

d

# d half sd from mean
filterFGM <- abs(df$FGM - mean(df$FGM)) <= sd(df$FGM)/2
length(df$FGM[filterFGM])
## [1] 120
filterHF <- abs(df$HF - mean(df$HF)) <= sd(df$HF)/2
length(df$HF[filterHF])
## [1] 154
# tziun teken
head(   (df$FGM[filterFGM] - mean(df$FGM)) / sd(df$FGM)  )  # showing only five of them
## [1]  0.05219849 -0.08438102 -0.17543402 -0.49411953 -0.35754002  0.18877799
head(  (df$HF[filterHF] - mean(df$HF)) / sd(df$HF) )
## [1]  0.09215579  0.37347346 -0.18916188 -0.18916188  0.37347346 -0.18916188

e

# E quantiles
quantile(df$FGM, 0.4)
##  40% 
## 19.8
quantile(df$FGM, 0.7)
## 70% 
##  38

Q 5

a

AGE <- c(53,43,33,45,46,55,41,55,36,45,55,50,49,47,69,51,48,62,45,37,50,50,50,58,53,57,53,61,47,56,44,46,58,48,38,74,60,32,51,50,40,61,63,56,45,61,70,59,57,69,44,56,50,56,43,48,52,62,48)

SAL = c(145,621,262,208,362,424,339,736,291,58,498,643,390,332,750,368,659,234,396,300,343,536,543,217,298,1103,406,254,862,204,206,250,21,298,350,800,726,370,536,291,808,543,149,350,242,198,213,296,317,482,155,802,200,282,573,388,250,396,572)

Sal_after_tax <- SAL*0.8-10
names(Sal_after_tax) <- SAL
mean(SAL)
## [1] 404.1695
mean(Sal_after_tax)
## [1] 313.3356
median(SAL)
## [1] 350
median(Sal_after_tax)
## [1] 270
sd(SAL)
## [1] 220.5335
sd(Sal_after_tax)
## [1] 176.4268
hist(Sal_after_tax, breaks = 20)

#plot the data frame (ages and salaries)

# cor(df$AGE, df$SAL) #/low positive correlation