knitr::opts_chunk$set(echo = TRUE)
setwd("~/Desktop/mydata")
require(readr)
## Loading required package: readr

Column Names and dimensions of Data

# 1a. view data
library(readr)
data <- read_csv("social_media_users.csv")
## Rows: 10000 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (5): Platform, Owner, Primary Usage, Country, Verified Account
## dbl  (1): Daily Time Spent (min)
## date (1): Date Joined
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# view data
data
## # A tibble: 10,000 × 7
##    Platform             Owner     `Primary Usage` Country Daily Time Spent (mi…¹
##    <chr>                <chr>     <chr>           <chr>                    <dbl>
##  1 WhatsApp             Meta      Messaging       Switze…                  114. 
##  2 WeChat               Tencent   Messaging and … Madaga…                   49.6
##  3 Snapchat             Snap Inc. Multimedia mes… Pitcai…                   29.0
##  4 Instagram            Meta      Photo and vide… Timor-…                  295. 
##  5 Threads              Meta      Text-based soc… Bermuda                   71.8
##  6 X (formerly Twitter) X Corp.   Microblogging   Falkla…                   92.8
##  7 Instagram            Meta      Photo and vide… Maurit…                  228. 
##  8 TikTok               ByteDance Short-form vid… Uganda                    44.9
##  9 Quora                Quora In… Q&A knowledge … Norway                   140. 
## 10 X (formerly Twitter) X Corp.   Microblogging   Anguil…                  235. 
## # ℹ 9,990 more rows
## # ℹ abbreviated name: ¹​`Daily Time Spent (min)`
## # ℹ 2 more variables: `Verified Account` <chr>, `Date Joined` <date>
# look at column names
names(data)
## [1] "Platform"               "Owner"                  "Primary Usage"         
## [4] "Country"                "Daily Time Spent (min)" "Verified Account"      
## [7] "Date Joined"
# look at dimensions of data (rows and columns)
dim(data)
## [1] 10000     7

Basic Summary of Dataset

#omit NAs
data <- na.omit(data)
summary(data)
##    Platform            Owner           Primary Usage        Country         
##  Length:10000       Length:10000       Length:10000       Length:10000      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  Daily Time Spent (min) Verified Account    Date Joined        
##  Min.   :  5.02         Length:10000       Min.   :2015-05-04  
##  1st Qu.: 78.92         Class :character   1st Qu.:2017-10-25  
##  Median :152.74         Mode  :character   Median :2020-04-22  
##  Mean   :152.21                            Mean   :2020-04-27  
##  3rd Qu.:225.64                            3rd Qu.:2022-10-20  
##  Max.   :300.00                            Max.   :2025-05-03

Earliest and Latest Date Joined

# Convert to Date
data$DateJoined <- as.Date(data$`Date Joined`)

# Find earliest
earliest_row <- data[which.min(data$DateJoined), c("Platform", "DateJoined")]

# Print as a sentence
print(paste("The earliest date joined was", earliest_row$DateJoined, "by platform:",earliest_row$Platform))
## [1] "The earliest date joined was 2015-05-04 by platform: Quora"
# Find latest
latest_row <- data[which.max(data$DateJoined), c("Platform", "DateJoined")]

# Print as a sentence
print(paste("The latest date joined was", latest_row$DateJoined, "by platform:", latest_row$Platform))
## [1] "The latest date joined was 2025-05-03 by platform: Instagram"

Platforms with the most and least daily time spent by minutes

# Find platform with highest daily time
max_row <- data[which.max(data$`Daily Time Spent (min)`), 
                c("Platform", "Daily Time Spent (min)")]

# Print as a sentence
print(paste("The platform with the highest daily time spent is", max_row$Platform, "with", max_row$`Daily Time Spent (min)`, "minutes"))
## [1] "The platform with the highest daily time spent is Telegram with 300 minutes"
# Find platform with lowest daily time
min_row <- data[which.min(data$`Daily Time Spent (min)`), 
                c("Platform", "Daily Time Spent (min)")]

# Print as a sentence
print(paste("The platform with the lowest daily time spent is", min_row$Platform, "with", min_row$`Daily Time Spent (min)`, "minutes"))
## [1] "The platform with the lowest daily time spent is Snapchat with 5.02 minutes"