library(DBI)
library(odbc)
odbcListDrivers()
## name attribute
## 1 SQL Server APILevel
## 2 SQL Server ConnectFunctions
## 3 SQL Server CPTimeout
## 4 SQL Server DriverODBCVer
## 5 SQL Server FileUsage
## 6 SQL Server SQLLevel
## 7 SQL Server UsageCount
## 8 Microsoft Access Driver (*.mdb, *.accdb) UsageCount
## 9 Microsoft Access Driver (*.mdb, *.accdb) APILevel
## 10 Microsoft Access Driver (*.mdb, *.accdb) ConnectFunctions
## 11 Microsoft Access Driver (*.mdb, *.accdb) DriverODBCVer
## 12 Microsoft Access Driver (*.mdb, *.accdb) FileUsage
## 13 Microsoft Access Driver (*.mdb, *.accdb) FileExtns
## 14 Microsoft Access Driver (*.mdb, *.accdb) SQLLevel
## 15 Microsoft Excel Driver (*.xls, *.xlsx, *.xlsm, *.xlsb) UsageCount
## 16 Microsoft Excel Driver (*.xls, *.xlsx, *.xlsm, *.xlsb) APILevel
## 17 Microsoft Excel Driver (*.xls, *.xlsx, *.xlsm, *.xlsb) ConnectFunctions
## 18 Microsoft Excel Driver (*.xls, *.xlsx, *.xlsm, *.xlsb) DriverODBCVer
## 19 Microsoft Excel Driver (*.xls, *.xlsx, *.xlsm, *.xlsb) FileUsage
## 20 Microsoft Excel Driver (*.xls, *.xlsx, *.xlsm, *.xlsb) FileExtns
## 21 Microsoft Excel Driver (*.xls, *.xlsx, *.xlsm, *.xlsb) SQLLevel
## 22 Microsoft Access Text Driver (*.txt, *.csv) UsageCount
## 23 Microsoft Access Text Driver (*.txt, *.csv) APILevel
## 24 Microsoft Access Text Driver (*.txt, *.csv) ConnectFunctions
## 25 Microsoft Access Text Driver (*.txt, *.csv) DriverODBCVer
## 26 Microsoft Access Text Driver (*.txt, *.csv) FileUsage
## 27 Microsoft Access Text Driver (*.txt, *.csv) FileExtns
## 28 Microsoft Access Text Driver (*.txt, *.csv) SQLLevel
## 29 MySQL ODBC 8.0 ANSI Driver UsageCount
## 30 MySQL ODBC 8.0 Unicode Driver UsageCount
## value
## 1 2
## 2 YYY
## 3 60
## 4 03.50
## 5 0
## 6 1
## 7 1
## 8 3
## 9 1
## 10 YYN
## 11 02.50
## 12 2
## 13 *.mdb,*.accdb
## 14 0
## 15 3
## 16 1
## 17 YYN
## 18 02.50
## 19 2
## 20 *.xls,*.xlsx, *.xlsb
## 21 0
## 22 3
## 23 1
## 24 YYN
## 25 02.50
## 26 2
## 27 *.txt, *.csv
## 28 0
## 29 1
## 30 1
library(DBI)
con2 = DBI::dbConnect(odbc(),
Driver = "MySQL ODBC 8.0 ANSI Driver",
Server = "127.0.0.1",
UID = "root",
PWD = "Amanda31/",
Port = 3306,
Database = "kopi")
data<- dbGetQuery(con2, "SELECT Sales, Profit FROM factTable")
Analisis ini dilakukan untuk mengetahui hubungan antara Sales dan Profit pada Coffee Chain Dataset. Berdasarkan eksplorasi data dan visualisasi menggunakan scatter plot
head(data) # lihat 6 data awal
## Sales Profit
## 1 219 94
## 2 190 68
## 3 234 101
## 4 100 30
## 5 134 54
## 6 180 53
str(data) # struktur data
## 'data.frame': 4248 obs. of 2 variables:
## $ Sales : int 219 190 234 100 134 180 341 150 140 130 ...
## $ Profit: int 94 68 101 30 54 53 99 0 33 17 ...
summary(data) # statistik deskriptif
## Sales Profit
## Min. : 17 Min. :-638.0
## 1st Qu.:100 1st Qu.: 17.0
## Median :138 Median : 40.0
## Mean :193 Mean : 61.1
## 3rd Qu.:230 3rd Qu.: 92.0
## Max. :912 Max. : 778.0
dim(data) # jumlah baris & kolom
## [1] 4248 2
library(ggplot2)
# Histogram (distribusi Sales)
ggplot(data, aes(x = Sales)) +
geom_histogram(fill = "blue", color = "black", bins = 30)
# Boxplot (cek outlier Profit)
ggplot(data, aes(y = Profit)) +
geom_boxplot(fill="red")
# Scatter plot (hubungan Sales vs Profit)
ggplot(data, aes(x = Sales, y = Profit)) +
geom_point(color = "blue") +
geom_smooth(method = "lm", color = "red") +
labs(title = "Hubungan Sales dan Profit",
x = "Sales",
y = "Profit")
## `geom_smooth()` using formula = 'y ~ x'
cor(data$Sales, data$Profit)
## [1] 0.7973309
model1 <- lm(Profit ~ Sales, data = data)
summary(model1)
##
## Call:
## lm(formula = Profit ~ Sales, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -606.28 -9.15 11.77 28.31 466.85
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -42.456004 1.527850 -27.79 <2e-16 ***
## Sales 0.536582 0.006233 86.08 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 61.39 on 4246 degrees of freedom
## Multiple R-squared: 0.6357, Adjusted R-squared: 0.6357
## F-statistic: 7410 on 1 and 4246 DF, p-value: < 2.2e-16
Berdasarkan hasil visualisasi menggunakan scatter plot dapat disimpulkan bahwasanya terdapat hubungan positif antara variabel Sales dengan Profit . Hal ini dapat dilihat dari pola titik-titik yang cenderung meningkat, yang artinya peningkatan nilai Sales akan diikuti dengan peningkatan nilai Profit.
Berdasarkan hasil uji korelasi menunjukkan bahwa adanya hubungan ositif antara variabel sales dengan variabel profit. yang berarti semakin tinggi nilai Sales semakin tinggi pula profit yang didapatkan.
Berdasarkan hasil analisis regresi diperoleh bahwa variabel Sales memiliki pengaruh terhadap profit. Nilai koefisien regresi yang positif menunjukkan bahwa setiap peningkatan pada Sales akan diikuti peningkatan Profit.