library(readr)
data <- read_csv("C:/Users/bhaga/OneDrive/Desktop/new dataset/social media_new.csv")
## Rows: 1628 Columns: 40
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): City, Current Status, Do you own multiple profiles on Instagram?, ...
## dbl (11): Age, Latitude, Longitude, How many subscriber do you have on youtu...
## num (14): How many followers do you have on Instagram?, How many posts do yo...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(data)
colnames(data)
##  [1] "Age"                                                 
##  [2] "City"                                                
##  [3] "Current Status"                                      
##  [4] "Do you own multiple profiles on Instagram?"          
##  [5] "Gender"                                              
##  [6] "Highest Education"                                   
##  [7] "Location (City Airport Code)"                        
##  [8] "Phone OS"                                            
##  [9] "State"                                               
## [10] "Zone"                                                
## [11] "How many followers do you have on Instagram?"        
## [12] "How many posts do you have on Instagram?"            
## [13] "Latitude"                                            
## [14] "Longitude"                                           
## [15] "Time Spent on Facebook in last week (in minutes)"    
## [16] "Time Spent on Facebook in last weekend (in minutes)" 
## [17] "Time Spent on Instagram in last week (in minutes)"   
## [18] "Time Spent on Instagram in last weekend (in minutes)"
## [19] "Time Spent on WhatsApp in last week (in minutes)"    
## [20] "Time Spent on WhatsApp in last weekend (in minutes)" 
## [21] "Total Facebook Usage"                                
## [22] "Total Instagram Usage"                               
## [23] "Total Social Media Usage"                            
## [24] "Total Week Usage"                                    
## [25] "Total Weekend Usage"                                 
## [26] "Total WhatsApp Usage"                                
## [27] "How many subscriber do you have on youtube"          
## [28] "Income from YouTube (rs,month)"                      
## [29] "Internet Speed (Mbps)"                               
## [30] "Total YouTube Usage (minutes)"                       
## [31] "Weekly YouTube Usage (minutes)"                      
## [32] "Profession/Activity"                                 
## [33] "Likes on Instagram"                                  
## [34] "Shares on Instagram"                                 
## [35] "Hobby"                                               
## [36] "Verified Account on instagram"                       
## [37] "Date of Birth"                                       
## [38] "Time Spent on Twitter per Week (minutes)"            
## [39] "Total Time Spent on Twitter (minutes)"               
## [40] "Groups Joined on Instagram"
str(data)
## spc_tbl_ [1,628 × 40] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Age                                                 : num [1:1628] 24 39 22 26 50 25 52 45 25 27 ...
##  $ City                                                : chr [1:1628] "Delhi" "Delhi" "Mumbai" "Bengaluru" ...
##  $ Current Status                                      : chr [1:1628] "Working professional" "Working professional" "Working professional" "Sabbatical" ...
##  $ Do you own multiple profiles on Instagram?          : chr [1:1628] "No" "No" "No" "Yes" ...
##  $ Gender                                              : chr [1:1628] "Female" "Female" "Male" "Female" ...
##  $ Highest Education                                   : chr [1:1628] "Graduation" "Post graduation" "Graduation" "Graduation" ...
##  $ Location (City Airport Code)                        : chr [1:1628] "DEL" "DEL" "BOM" "BLR" ...
##  $ Phone OS                                            : chr [1:1628] "iOs" "iOs" "Android" "Android" ...
##  $ State                                               : chr [1:1628] "Delhi" "Delhi" "Maharashtra" "Karnataka" ...
##  $ Zone                                                : chr [1:1628] "Northern" "Northern" "Western" "Southern" ...
##  $ How many followers do you have on Instagram?        : num [1:1628] 456 0 400 485 0 ...
##  $ How many posts do you have on Instagram?            : num [1:1628] 20 0 6 16 0 220 0 0 340 37 ...
##  $ Latitude                                            : num [1:1628] 28.7 28.7 19 13 28.7 ...
##  $ Longitude                                           : num [1:1628] 77.2 77.2 72.8 77.6 77.2 ...
##  $ Time Spent on Facebook in last week (in minutes)    : num [1:1628] 0 6000 500 1500 1500 1000 300 983 1160 480 ...
##  $ Time Spent on Facebook in last weekend (in minutes) : num [1:1628] 0 2160 2000 1500 1500 1200 900 873 870 840 ...
##  $ Time Spent on Instagram in last week (in minutes)   : num [1:1628] 770 0 1000 2000 0 3000 0 0 1240 720 ...
##  $ Time Spent on Instagram in last weekend (in minutes): num [1:1628] 400 0 1000 2000 0 840 215 0 340 300 ...
##  $ Time Spent on WhatsApp in last week (in minutes)    : num [1:1628] 900 5000 7000 1680 2400 2100 1800 583 1760 3000 ...
##  $ Time Spent on WhatsApp in last weekend (in minutes) : num [1:1628] 120 2000 2000 1680 1300 600 1500 834 450 600 ...
##  $ Total Facebook Usage                                : num [1:1628] 0 8160 2500 3000 3000 ...
##  $ Total Instagram Usage                               : num [1:1628] 1170 0 2000 4000 0 3840 215 0 1580 1020 ...
##  $ Total Social Media Usage                            : num [1:1628] 2190 15160 13500 10360 6700 ...
##  $ Total Week Usage                                    : num [1:1628] 1670 11000 8500 5180 3900 ...
##  $ Total Weekend Usage                                 : num [1:1628] 520 4160 5000 5180 2800 ...
##  $ Total WhatsApp Usage                                : num [1:1628] 1020 7000 9000 3360 3700 ...
##  $ How many subscriber do you have on youtube          : num [1:1628] 33356 25394 34603 13645 49876 ...
##  $ Income from YouTube (rs,month)                      : num [1:1628] 88447 64764 4387 99695 81297 ...
##  $ Internet Speed (Mbps)                               : num [1:1628] 46.6 83.5 50.5 99.6 22.1 ...
##  $ Total YouTube Usage (minutes)                       : num [1:1628] 272543 220056 2629 154271 178485 ...
##  $ Weekly YouTube Usage (minutes)                      : num [1:1628] 305 2090 528 1545 2836 ...
##  $ Profession/Activity                                 : chr [1:1628] "Photographer" "Singer" "Content Creator" "Blogger" ...
##  $ Likes on Instagram                                  : chr [1:1628] "679k" "400k" "447k" "443k" ...
##  $ Shares on Instagram                                 : chr [1:1628] "16K" "28K" "86K" "39K" ...
##  $ Hobby                                               : chr [1:1628] "Reading" "Reading" "Playing Musical Instruments" "Cooking/Baking" ...
##  $ Verified Account on instagram                       : chr [1:1628] "Yes" "Yes" "No" "No" ...
##  $ Date of Birth                                       : chr [1:1628] "8/31/1982" "12/17/1992" "4/16/2002" "12/10/1965" ...
##  $ Time Spent on Twitter per Week (minutes)            : num [1:1628] 356 1774 1848 199 1210 ...
##  $ Total Time Spent on Twitter (minutes)               : num [1:1628] 2492 12418 12936 1393 8470 ...
##  $ Groups Joined on Instagram                          : num [1:1628] 4 5 2 4 3 9 2 3 2 6 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Age = col_double(),
##   ..   City = col_character(),
##   ..   `Current Status` = col_character(),
##   ..   `Do you own multiple profiles on Instagram?` = col_character(),
##   ..   Gender = col_character(),
##   ..   `Highest Education` = col_character(),
##   ..   `Location (City Airport Code)` = col_character(),
##   ..   `Phone OS` = col_character(),
##   ..   State = col_character(),
##   ..   Zone = col_character(),
##   ..   `How many followers do you have on Instagram?` = col_number(),
##   ..   `How many posts do you have on Instagram?` = col_number(),
##   ..   Latitude = col_double(),
##   ..   Longitude = col_double(),
##   ..   `Time Spent on Facebook in last week (in minutes)` = col_number(),
##   ..   `Time Spent on Facebook in last weekend (in minutes)` = col_number(),
##   ..   `Time Spent on Instagram in last week (in minutes)` = col_number(),
##   ..   `Time Spent on Instagram in last weekend (in minutes)` = col_number(),
##   ..   `Time Spent on WhatsApp in last week (in minutes)` = col_number(),
##   ..   `Time Spent on WhatsApp in last weekend (in minutes)` = col_number(),
##   ..   `Total Facebook Usage` = col_number(),
##   ..   `Total Instagram Usage` = col_number(),
##   ..   `Total Social Media Usage` = col_number(),
##   ..   `Total Week Usage` = col_number(),
##   ..   `Total Weekend Usage` = col_number(),
##   ..   `Total WhatsApp Usage` = col_number(),
##   ..   `How many subscriber do you have on youtube` = col_double(),
##   ..   `Income from YouTube (rs,month)` = col_double(),
##   ..   `Internet Speed (Mbps)` = col_double(),
##   ..   `Total YouTube Usage (minutes)` = col_double(),
##   ..   `Weekly YouTube Usage (minutes)` = col_double(),
##   ..   `Profession/Activity` = col_character(),
##   ..   `Likes on Instagram` = col_character(),
##   ..   `Shares on Instagram` = col_character(),
##   ..   Hobby = col_character(),
##   ..   `Verified Account on instagram` = col_character(),
##   ..   `Date of Birth` = col_character(),
##   ..   `Time Spent on Twitter per Week (minutes)` = col_double(),
##   ..   `Total Time Spent on Twitter (minutes)` = col_double(),
##   ..   `Groups Joined on Instagram` = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
summary(data)
##       Age            City           Current Status    
##  Min.   :13.00   Length:1628        Length:1628       
##  1st Qu.:22.00   Class :character   Class :character  
##  Median :24.00   Mode  :character   Mode  :character  
##  Mean   :26.86                                        
##  3rd Qu.:27.00                                        
##  Max.   :74.00                                        
##  Do you own multiple profiles on Instagram?    Gender         
##  Length:1628                                Length:1628       
##  Class :character                           Class :character  
##  Mode  :character                           Mode  :character  
##                                                               
##                                                               
##                                                               
##  Highest Education  Location (City Airport Code)   Phone OS        
##  Length:1628        Length:1628                  Length:1628       
##  Class :character   Class :character             Class :character  
##  Mode  :character   Mode  :character             Mode  :character  
##                                                                    
##                                                                    
##                                                                    
##     State               Zone          
##  Length:1628        Length:1628       
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
##                                       
##  How many followers do you have on Instagram?
##  Min.   :     0.0                            
##  1st Qu.:   183.0                            
##  Median :   370.0                            
##  Mean   :   868.1                            
##  3rd Qu.:   657.0                            
##  Max.   :116000.0                            
##  How many posts do you have on Instagram?    Latitude        Longitude    
##  Min.   :   0.00                          Min.   : 8.486   Min.   :69.67  
##  1st Qu.:  10.00                          1st Qu.:18.988   1st Qu.:72.84  
##  Median :  43.50                          Median :22.563   Median :77.23  
##  Mean   :  99.08                          Mean   :22.760   Mean   :77.89  
##  3rd Qu.: 111.25                          3rd Qu.:28.652   3rd Qu.:78.46  
##  Max.   :2858.00                          Max.   :32.736   Max.   :94.91  
##  Time Spent on Facebook in last week (in minutes)
##  Min.   :   0.0                                  
##  1st Qu.:   2.0                                  
##  Median :  63.0                                  
##  Mean   : 175.2                                  
##  3rd Qu.: 240.0                                  
##  Max.   :6000.0                                  
##  Time Spent on Facebook in last weekend (in minutes)
##  Min.   :   0.00                                    
##  1st Qu.:   0.00                                    
##  Median :  30.00                                    
##  Mean   :  75.69                                    
##  3rd Qu.:  89.00                                    
##  Max.   :2160.00                                    
##  Time Spent on Instagram in last week (in minutes)
##  Min.   :   0.0                                   
##  1st Qu.: 120.0                                   
##  Median : 357.0                                   
##  Mean   : 505.2                                   
##  3rd Qu.: 675.0                                   
##  Max.   :6000.0                                   
##  Time Spent on Instagram in last weekend (in minutes)
##  Min.   :   0.0                                      
##  1st Qu.:  48.0                                      
##  Median : 135.0                                      
##  Mean   : 215.0                                      
##  3rd Qu.: 281.5                                      
##  Max.   :2560.0                                      
##  Time Spent on WhatsApp in last week (in minutes)
##  Min.   :   4.0                                  
##  1st Qu.: 300.0                                  
##  Median : 600.0                                  
##  Mean   : 854.9                                  
##  3rd Qu.:1009.0                                  
##  Max.   :7000.0                                  
##  Time Spent on WhatsApp in last weekend (in minutes) Total Facebook Usage
##  Min.   :   0.0                                      Min.   :   0.0      
##  1st Qu.: 100.0                                      1st Qu.:  10.0      
##  Median : 200.0                                      Median : 101.5      
##  Mean   : 294.9                                      Mean   : 250.9      
##  3rd Qu.: 360.0                                      3rd Qu.: 334.2      
##  Max.   :2800.0                                      Max.   :8160.0      
##  Total Instagram Usage Total Social Media Usage Total Week Usage
##  Min.   :   0.0        Min.   :   12            Min.   :    8   
##  1st Qu.: 190.8        1st Qu.:  970            1st Qu.:  670   
##  Median : 522.5        Median : 1658            Median : 1170   
##  Mean   : 720.2        Mean   : 2121            Mean   : 1535   
##  3rd Qu.: 970.0        3rd Qu.: 2670            3rd Qu.: 1895   
##  Max.   :8240.0        Max.   :15780            Max.   :12734   
##  Total Weekend Usage Total WhatsApp Usage
##  Min.   :   0.0      Min.   :   9        
##  1st Qu.: 243.0      1st Qu.: 450        
##  Median : 425.5      Median : 812        
##  Mean   : 585.6      Mean   :1150        
##  3rd Qu.: 709.0      3rd Qu.:1400        
##  Max.   :5180.0      Max.   :9000        
##  How many subscriber do you have on youtube Income from YouTube (rs,month)
##  Min.   :   33                              Min.   :   11                 
##  1st Qu.:12783                              1st Qu.:23870                 
##  Median :24629                              Median :47898                 
##  Mean   :24795                              Mean   :49166                 
##  3rd Qu.:36702                              3rd Qu.:74162                 
##  Max.   :49939                              Max.   :99991                 
##  Internet Speed (Mbps) Total YouTube Usage (minutes)
##  Min.   : 2.03         Min.   :   224               
##  1st Qu.:24.82         1st Qu.: 78476               
##  Median :50.47         Median :153087               
##  Mean   :50.34         Mean   :150846               
##  3rd Qu.:75.83         3rd Qu.:225062               
##  Max.   :99.93         Max.   :299562               
##  Weekly YouTube Usage (minutes) Profession/Activity Likes on Instagram
##  Min.   :   2                   Length:1628         Length:1628       
##  1st Qu.: 782                   Class :character    Class :character  
##  Median :1456                   Mode  :character    Mode  :character  
##  Mean   :1480                                                         
##  3rd Qu.:2226                                                         
##  Max.   :2998                                                         
##  Shares on Instagram    Hobby           Verified Account on instagram
##  Length:1628         Length:1628        Length:1628                  
##  Class :character    Class :character   Class :character             
##  Mode  :character    Mode  :character   Mode  :character             
##                                                                      
##                                                                      
##                                                                      
##  Date of Birth      Time Spent on Twitter per Week (minutes)
##  Length:1628        Min.   :   0                            
##  Class :character   1st Qu.: 561                            
##  Mode  :character   Median :1168                            
##                     Mean   :1179                            
##                     3rd Qu.:1790                            
##                     Max.   :2399                            
##  Total Time Spent on Twitter (minutes) Groups Joined on Instagram
##  Min.   :    0                         Min.   : 0.00             
##  1st Qu.: 3927                         1st Qu.: 2.00             
##  Median : 8180                         Median : 5.00             
##  Mean   : 8255                         Mean   : 4.96             
##  3rd Qu.:12532                         3rd Qu.: 8.00             
##  Max.   :16793                         Max.   :10.00
head(data)
## # A tibble: 6 × 40
##     Age City  `Current Status` Do you own multiple …¹ Gender `Highest Education`
##   <dbl> <chr> <chr>            <chr>                  <chr>  <chr>              
## 1    24 Delhi Working profess… No                     Female Graduation         
## 2    39 Delhi Working profess… No                     Female Post graduation    
## 3    22 Mumb… Working profess… No                     Male   Graduation         
## 4    26 Beng… Sabbatical       Yes                    Female Graduation         
## 5    50 Delhi Working profess… No                     Male   Graduation         
## 6    25 Vish… Working profess… Yes                    Female Post graduation    
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 34 more variables: `Location (City Airport Code)` <chr>, `Phone OS` <chr>,
## #   State <chr>, Zone <chr>,
## #   `How many followers do you have on Instagram?` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## #   Longitude <dbl>, `Time Spent on Facebook in last week (in minutes)` <dbl>,
## #   `Time Spent on Facebook in last weekend (in minutes)` <dbl>, …
# Create a matrix with the first 3 rows and 3 columns of the data
matrix_data <- matrix(data[1:3, 1:3], nrow = 3, ncol = 3, byrow = TRUE)

# Print the matrix
matrix_data
##      [,1]      [,2]        [,3]       
## [1,] numeric,3 character,3 character,3
## [2,] numeric,3 character,3 character,3
## [3,] numeric,3 character,3 character,3
#Average age
mean(data$Age)
## [1] 26.85811
# Median age
median(data$Age)
## [1] 24
# Maximum age
max(data$Age)
## [1] 74
# Minimum age
min(data$Age)
## [1] 13
# Total number of males
sum(data$Gender == "Male")
## [1] 813
# Total number of females
sum(data$Gender == "Female")
## [1] 813
# Average number of followers on Instagram
mean(data$`How many followers do you have on Instagram?`)
## [1] 868.1474
# Median number of followers on Instagram
median(data$`How many followers do you have on Instagram?`)
## [1] 370
# Maximum number of followers on Instagram
max(data$`How many followers do you have on Instagram?`)
## [1] 116000
#  Minimum number of followers on Instagram
min(data$`How many followers do you have on Instagram?`)
## [1] 0
#  Average number of posts on Instagram
mean(data$`How many posts do you have on Instagram?`)
## [1] 99.07985
#  Median number of posts on Instagram
median(data$`How many posts do you have on Instagram?`)
## [1] 43.5
#  Maximum number of posts on Instagram
max(data$`How many posts do you have on Instagram?`)
## [1] 2858
#  Minimum number of posts on Instagram
min(data$`How many posts do you have on Instagram?`)
## [1] 0
#  Total time spent on Facebook in last week
sum(data$`Time Spent on Facebook in last week (in minutes)`)
## [1] 285275
#  Total time spent on Instagram in last week
sum(data$`Time Spent on Instagram in last week (in minutes)`)
## [1] 822407
# Total time spent on WhatsApp in last week
sum(data$`Time Spent on WhatsApp in last week (in minutes)`)
## [1] 1391726
# Average total social media usage
mean(data$`Total Social Media Usage`)
## [1] 2120.885
# Median total social media usage
median(data$`Total Social Media Usage`)
## [1] 1658.5
# Maximum total social media usage
max(data$`Total Social Media Usage`)
## [1] 15780
#@**************************************************************************************************************

# Only females
females <- subset(data, Gender == "Female")
females
## # A tibble: 813 × 40
##      Age City           `Current Status`     Do you own multiple profil…¹ Gender
##    <dbl> <chr>          <chr>                <chr>                        <chr> 
##  1    24 Delhi          Working professional No                           Female
##  2    39 Delhi          Working professional No                           Female
##  3    26 Bengaluru      Sabbatical           Yes                          Female
##  4    25 Vishakhapatnam Working professional Yes                          Female
##  5    45 Durgapur       Sabbatical           No                           Female
##  6    45 Delhi          Working professional No                           Female
##  7    21 Delhi          Working professional No                           Female
##  8    26 Delhi          Working professional No                           Female
##  9    25 Mumbai         Sabbatical           No                           Female
## 10    22 Kolkata        Student              No                           Female
## # ℹ 803 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## #   Longitude <dbl>, …
# Only males
males <- subset(data, Gender == "Male")
males
## # A tibble: 813 × 40
##      Age City      `Current Status`     Do you own multiple profiles on…¹ Gender
##    <dbl> <chr>     <chr>                <chr>                             <chr> 
##  1    22 Mumbai    Working professional No                                Male  
##  2    50 Delhi     Working professional No                                Male  
##  3    52 Jaipur    Working professional No                                Male  
##  4    25 Bengaluru Student              No                                Male  
##  5    27 Delhi     Student              Yes                               Male  
##  6    27 Bengaluru Working professional No                                Male  
##  7    22 Delhi     Sabbatical           Yes                               Male  
##  8    26 Agra      Working professional No                                Male  
##  9    25 Ahmedabad Student              No                                Male  
## 10    18 Jaipur    Student              No                                Male  
## # ℹ 803 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## #   Longitude <dbl>, …
# Only students
students <- subset(data, `Current Status` == "Student")
students
## # A tibble: 637 × 40
##      Age City      `Current Status` Do you own multiple profiles on Ins…¹ Gender
##    <dbl> <chr>     <chr>            <chr>                                 <chr> 
##  1    25 Bengaluru Student          No                                    Male  
##  2    27 Delhi     Student          Yes                                   Male  
##  3    25 Ahmedabad Student          No                                    Male  
##  4    18 Jaipur    Student          No                                    Male  
##  5    22 Kolkata   Student          No                                    Female
##  6    26 Kolkata   Student          No                                    Female
##  7    23 Delhi     Student          No                                    Female
##  8    17 Mumbai    Student          Yes                                   Male  
##  9    23 Kolkata   Student          Yes                                   Female
## 10    22 Ahmedabad Student          No                                    Male  
## # ℹ 627 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## #   Longitude <dbl>, …
# Only working professionals
working_professionals <- subset(data, `Current Status` == "Working professional")
working_professionals
## # A tibble: 796 × 40
##      Age City           `Current Status`     Do you own multiple profil…¹ Gender
##    <dbl> <chr>          <chr>                <chr>                        <chr> 
##  1    24 Delhi          Working professional No                           Female
##  2    39 Delhi          Working professional No                           Female
##  3    22 Mumbai         Working professional No                           Male  
##  4    50 Delhi          Working professional No                           Male  
##  5    25 Vishakhapatnam Working professional Yes                          Female
##  6    52 Jaipur         Working professional No                           Male  
##  7    27 Bengaluru      Working professional No                           Male  
##  8    45 Delhi          Working professional No                           Female
##  9    21 Delhi          Working professional No                           Female
## 10    26 Agra           Working professional No                           Male  
## # ℹ 786 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## #   Longitude <dbl>, …
# Only people from Northern zone
northern <- subset(data, Zone == "Northern")
northern
## # A tibble: 542 × 40
##      Age City   `Current Status`     Do you own multiple profiles on In…¹ Gender
##    <dbl> <chr>  <chr>                <chr>                                <chr> 
##  1    24 Delhi  Working professional No                                   Female
##  2    39 Delhi  Working professional No                                   Female
##  3    50 Delhi  Working professional No                                   Male  
##  4    52 Jaipur Working professional No                                   Male  
##  5    27 Delhi  Student              Yes                                  Male  
##  6    45 Delhi  Working professional No                                   Female
##  7    22 Delhi  Sabbatical           Yes                                  Male  
##  8    21 Delhi  Working professional No                                   Female
##  9    26 Agra   Working professional No                                   Male  
## 10    26 Delhi  Working professional No                                   Female
## # ℹ 532 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## #   Longitude <dbl>, …
# Only people from Southern zone
southern <- subset(data, Zone == "Southern")
southern
## # A tibble: 211 × 40
##      Age City           `Current Status`     Do you own multiple profil…¹ Gender
##    <dbl> <chr>          <chr>                <chr>                        <chr> 
##  1    26 Bengaluru      Sabbatical           Yes                          Female
##  2    25 Vishakhapatnam Working professional Yes                          Female
##  3    25 Bengaluru      Student              No                           Male  
##  4    27 Bengaluru      Working professional No                           Male  
##  5    32 Bengaluru      Working professional No                           Male  
##  6    27 Chennai        Working professional No                           Male  
##  7    23 Chennai        Student              No                           Female
##  8    22 Chennai        Student              Yes                          Female
##  9    32 Bengaluru      Working professional No                           Female
## 10    21 Chennai        Student              No                           Female
## # ℹ 201 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## #   Longitude <dbl>, …
#  Only people from Eastern zone
eastern <- subset(data, Zone == "Eastern")
eastern
## # A tibble: 271 × 40
##      Age City        `Current Status`     Do you own multiple profiles …¹ Gender
##    <dbl> <chr>       <chr>                <chr>                           <chr> 
##  1    45 Durgapur    Sabbatical           No                              Female
##  2    24 Cooch-behar Working professional No                              Male  
##  3    22 Kolkata     Student              No                              Female
##  4    26 Kolkata     Student              No                              Female
##  5    50 Kolkata     Working professional No                              Female
##  6    23 Kolkata     Student              Yes                             Female
##  7    25 Kolkata     Working professional No                              Male  
##  8    45 Bagdogra    Working professional Yes                             Female
##  9    25 Kolkata     Student              No                              Male  
## 10    45 Kolkata     Working professional No                              Female
## # ℹ 261 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## #   Longitude <dbl>, …
# Only people from Western zone
western <- subset(data, Zone == "Western")
western
## # A tibble: 543 × 40
##      Age City      `Current Status`     Do you own multiple profiles on…¹ Gender
##    <dbl> <chr>     <chr>                <chr>                             <chr> 
##  1    22 Mumbai    Working professional No                                Male  
##  2    25 Ahmedabad Student              No                                Male  
##  3    25 Mumbai    Sabbatical           No                                Female
##  4    25 Ahmedabad Self Employed        Yes                               Male  
##  5    17 Mumbai    Student              Yes                               Male  
##  6    22 Ahmedabad Student              No                                Male  
##  7    24 Pune      Working professional No                                Female
##  8    21 Mumbai    Sabbatical           Yes                               Female
##  9    51 Ahmedabad Working professional No                                Male  
## 10    22 Mumbai    Sabbatical           No                                Male  
## # ℹ 533 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## #   Longitude <dbl>, …
#Only people with Android phone OS
android <- subset(data, `Phone OS` == "Android")
android
## # A tibble: 1,115 × 40
##      Age City           `Current Status`     Do you own multiple profil…¹ Gender
##    <dbl> <chr>          <chr>                <chr>                        <chr> 
##  1    22 Mumbai         Working professional No                           Male  
##  2    26 Bengaluru      Sabbatical           Yes                          Female
##  3    25 Vishakhapatnam Working professional Yes                          Female
##  4    52 Jaipur         Working professional No                           Male  
##  5    45 Durgapur       Sabbatical           No                           Female
##  6    25 Bengaluru      Student              No                           Male  
##  7    27 Delhi          Student              Yes                          Male  
##  8    27 Bengaluru      Working professional No                           Male  
##  9    21 Delhi          Working professional No                           Female
## 10    26 Agra           Working professional No                           Male  
## # ℹ 1,105 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## #   Longitude <dbl>, …
# Only people with iOs phone OS
ios <- subset(data, `Phone OS` == "iOs")
ios
## # A tibble: 508 × 40
##      Age City     `Current Status`     Do you own multiple profiles on …¹ Gender
##    <dbl> <chr>    <chr>                <chr>                              <chr> 
##  1    24 Delhi    Working professional No                                 Female
##  2    39 Delhi    Working professional No                                 Female
##  3    50 Delhi    Working professional No                                 Male  
##  4    45 Delhi    Working professional No                                 Female
##  5    22 Delhi    Sabbatical           Yes                                Male  
##  6    18 Jaipur   Student              No                                 Male  
##  7    22 Chennai  Student              Yes                                Female
##  8    23 Guwahati Student              No                                 Female
##  9    45 Bagdogra Working professional Yes                                Female
## 10    28 Kolkata  Sabbatical           No                                 Female
## # ℹ 498 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## #   Longitude <dbl>, …
#Only people with Graduation education
graduation <- subset(data, `Highest Education` == "Graduation")
graduation
## # A tibble: 950 × 40
##      Age City      `Current Status`     Do you own multiple profiles on…¹ Gender
##    <dbl> <chr>     <chr>                <chr>                             <chr> 
##  1    24 Delhi     Working professional No                                Female
##  2    22 Mumbai    Working professional No                                Male  
##  3    26 Bengaluru Sabbatical           Yes                               Female
##  4    50 Delhi     Working professional No                                Male  
##  5    45 Durgapur  Sabbatical           No                                Female
##  6    25 Bengaluru Student              No                                Male  
##  7    27 Delhi     Student              Yes                               Male  
##  8    27 Bengaluru Working professional No                                Male  
##  9    45 Delhi     Working professional No                                Female
## 10    21 Delhi     Working professional No                                Female
## # ℹ 940 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## #   Longitude <dbl>, …
#Only people with Post graduation education
post_graduation <- subset(data, `Highest Education` == "Post graduation")
post_graduation
## # A tibble: 541 × 40
##      Age City           `Current Status`     Do you own multiple profil…¹ Gender
##    <dbl> <chr>          <chr>                <chr>                        <chr> 
##  1    39 Delhi          Working professional No                           Female
##  2    25 Vishakhapatnam Working professional Yes                          Female
##  3    52 Jaipur         Working professional No                           Male  
##  4    22 Delhi          Sabbatical           Yes                          Male  
##  5    26 Delhi          Working professional No                           Female
##  6    25 Mumbai         Sabbatical           No                           Female
##  7    22 Kolkata        Student              No                           Female
##  8    26 Kolkata        Student              No                           Female
##  9    27 Chennai        Working professional No                           Male  
## 10    32 Bengaluru      Working professional No                           Female
## # ℹ 531 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## #   Longitude <dbl>, …
#Only people with High School education
high_school <- subset(data, `Highest Education` == "High School")
high_school
## # A tibble: 137 × 40
##      Age City      `Current Status`     Do you own multiple profiles on…¹ Gender
##    <dbl> <chr>     <chr>                <chr>                             <chr> 
##  1    18 Jaipur    Student              No                                Male  
##  2    35 Delhi     Working professional Yes                               Female
##  3    23 Delhi     Student              No                                Female
##  4    50 Kolkata   Working professional No                                Female
##  5    17 Mumbai    Student              Yes                               Male  
##  6    16 Kolkata   Student              No                                Male  
##  7    16 Jaipur    Student              No                                Female
##  8    15 Chennai   Student              Yes                               Female
##  9    20 Hyderabad Student              Yes                               Male  
## 10    16 Mumbai    Student              No                                Male  
## # ℹ 127 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## #   Longitude <dbl>, …
#Only people with multiple profiles on Instagram
multiple_profiles <- subset(data, `Do you own multiple profiles on Instagram?` == "Yes")
multiple_profiles
## # A tibble: 308 × 40
##      Age City           `Current Status`     Do you own multiple profil…¹ Gender
##    <dbl> <chr>          <chr>                <chr>                        <chr> 
##  1    26 Bengaluru      Sabbatical           Yes                          Female
##  2    25 Vishakhapatnam Working professional Yes                          Female
##  3    27 Delhi          Student              Yes                          Male  
##  4    22 Delhi          Sabbatical           Yes                          Male  
##  5    25 Ahmedabad      Self Employed        Yes                          Male  
##  6    35 Delhi          Working professional Yes                          Female
##  7    17 Mumbai         Student              Yes                          Male  
##  8    23 Kolkata        Student              Yes                          Female
##  9    26 Kanpur         Working professional Yes                          Male  
## 10    22 Chennai        Student              Yes                          Female
## # ℹ 298 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## #   Longitude <dbl>, …
#Only people who own a single profile on Instagram
single_profile <- subset(data, `Do you own multiple profiles on Instagram?` == "No")
single_profile
## # A tibble: 1,316 × 40
##      Age City      `Current Status`     Do you own multiple profiles on…¹ Gender
##    <dbl> <chr>     <chr>                <chr>                             <chr> 
##  1    24 Delhi     Working professional No                                Female
##  2    39 Delhi     Working professional No                                Female
##  3    22 Mumbai    Working professional No                                Male  
##  4    50 Delhi     Working professional No                                Male  
##  5    52 Jaipur    Working professional No                                Male  
##  6    45 Durgapur  Sabbatical           No                                Female
##  7    25 Bengaluru Student              No                                Male  
##  8    27 Bengaluru Working professional No                                Male  
##  9    45 Delhi     Working professional No                                Female
## 10    21 Delhi     Working professional No                                Female
## # ℹ 1,306 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## #   Longitude <dbl>, …
#Only people who spent more than 500 minutes on Facebook in last week
more_than_500_fb <- subset(data, `Time Spent on Facebook in last week (in minutes)` > 500)
more_than_500_fb
## # A tibble: 130 × 40
##      Age City           `Current Status`     Do you own multiple profil…¹ Gender
##    <dbl> <chr>          <chr>                <chr>                        <chr> 
##  1    39 Delhi          Working professional No                           Female
##  2    26 Bengaluru      Sabbatical           Yes                          Female
##  3    50 Delhi          Working professional No                           Male  
##  4    25 Vishakhapatnam Working professional Yes                          Female
##  5    45 Durgapur       Sabbatical           No                           Female
##  6    25 Bengaluru      Student              No                           Male  
##  7    27 Bengaluru      Working professional No                           Male  
##  8    22 Delhi          Sabbatical           Yes                          Male  
##  9    18 Jaipur         Student              No                           Male  
## 10    25 Mumbai         Sabbatical           No                           Female
## # ℹ 120 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## #   Longitude <dbl>, …
#Only people who spent more than 500 minutes on Instagram in last week
more_than_500_insta <- subset(data, `Time Spent on Instagram in last week (in minutes)` > 500)
more_than_500_insta
## # A tibble: 559 × 40
##      Age City           `Current Status`     Do you own multiple profil…¹ Gender
##    <dbl> <chr>          <chr>                <chr>                        <chr> 
##  1    24 Delhi          Working professional No                           Female
##  2    22 Mumbai         Working professional No                           Male  
##  3    26 Bengaluru      Sabbatical           Yes                          Female
##  4    25 Vishakhapatnam Working professional Yes                          Female
##  5    25 Bengaluru      Student              No                           Male  
##  6    27 Delhi          Student              Yes                          Male  
##  7    45 Delhi          Working professional No                           Female
##  8    22 Delhi          Sabbatical           Yes                          Male  
##  9    18 Jaipur         Student              No                           Male  
## 10    25 Mumbai         Sabbatical           No                           Female
## # ℹ 549 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## #   Longitude <dbl>, …
#Only people who spent more than 500 minutes on WhatsApp in last week
more_than_500_whatsapp <- subset(data, `Time Spent on WhatsApp in last week (in minutes)` > 500)
more_than_500_whatsapp
## # A tibble: 913 × 40
##      Age City           `Current Status`     Do you own multiple profil…¹ Gender
##    <dbl> <chr>          <chr>                <chr>                        <chr> 
##  1    24 Delhi          Working professional No                           Female
##  2    39 Delhi          Working professional No                           Female
##  3    22 Mumbai         Working professional No                           Male  
##  4    26 Bengaluru      Sabbatical           Yes                          Female
##  5    50 Delhi          Working professional No                           Male  
##  6    25 Vishakhapatnam Working professional Yes                          Female
##  7    52 Jaipur         Working professional No                           Male  
##  8    45 Durgapur       Sabbatical           No                           Female
##  9    25 Bengaluru      Student              No                           Male  
## 10    27 Delhi          Student              Yes                          Male  
## # ℹ 903 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## #   Longitude <dbl>, …
#Only people who have more than 1000 followers on Instagram
more_than_1000_followers <- subset(data,`How many followers do you have on Instagram?` > 1000)
more_than_1000_followers
## # A tibble: 199 × 40
##      Age City      `Current Status`     Do you own multiple profiles on…¹ Gender
##    <dbl> <chr>     <chr>                <chr>                             <chr> 
##  1    25 Bengaluru Student              No                                Male  
##  2    23 Kolkata   Student              Yes                               Female
##  3    21 Chennai   Student              No                                Female
##  4    21 Chennai   Student              Yes                               Female
##  5    21 Mumbai    Sabbatical           Yes                               Female
##  6    57 Delhi     Sabbatical           Yes                               Male  
##  7    22 Delhi     Student              No                                Female
##  8    35 Mumbai    Working professional Yes                               Female
##  9    26 Ahmedabad Working professional No                                Male  
## 10    21 Mumbai    Student              No                                Female
## # ℹ 189 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## #   Longitude <dbl>, …
#Only people who have more than 100 posts on Instagram
more_than_100_posts <- subset(data, `How many posts do you have on Instagram?` > 100)
more_than_100_posts
## # A tibble: 444 × 40
##      Age City           `Current Status`     Do you own multiple profil…¹ Gender
##    <dbl> <chr>          <chr>                <chr>                        <chr> 
##  1    25 Vishakhapatnam Working professional Yes                          Female
##  2    25 Bengaluru      Student              No                           Male  
##  3    25 Ahmedabad      Self Employed        Yes                          Male  
##  4    50 Kolkata        Working professional No                           Female
##  5    21 Chennai        Student              No                           Female
##  6    25 Kolkata        Working professional No                           Male  
##  7    45 Bagdogra       Working professional Yes                          Female
##  8    38 Bengaluru      Working professional No                           Female
##  9    24 Kolkata        Student              Yes                          Male  
## 10    21 Chennai        Student              Yes                          Female
## # ℹ 434 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## #   Longitude <dbl>, …
require(ggplot2)
## Loading required package: ggplot2
# Create a linear regression model
model <- lm(`Total Social Media Usage` ~ Age + `How many followers do you have on Instagram?` + `Time Spent on Facebook in last week (in minutes)`, data = data)

# Create a data frame with the actual and predicted values
predictions <- data.frame(data$Age,data$`How many followers do you have on Instagram?`,data$`Time Spent on Facebook in last week (in minutes)`)
colnames(predictions)<-c("Age","How many followers do you have on Instagram?","Time Spent on Facebook in last week (in minutes)")
predictions<-predict(model,predictions)
predictions<-as.data.frame(predictions)
plot_predict<-cbind.data.frame(data$Age,predictions)
colnames(plot_predict)<-c("Age","Prediction")

require(ggplot2)
# Create a scatter plot of the actual vs predicted values
ggplot(plot_predict, aes(x = Age, y = Prediction)) +
  geom_point() +
  geom_smooth() +
  labs(title = "Actual vs Predicted Total Social Media Usage", x = "Actual", y = "Predicted")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

# Scatterplot on more than 1000 followers vs more than 100 posts
plot(data[data$`How many followers do you have on Instagram?` > 1000, ]$`How many followers do you have on Instagram?`, data[data$`How many followers do you have on Instagram?` > 1000, ]$`How many posts do you have on Instagram?`, col = "blue", xlab = "Number of followers", ylab = "Number of posts")
points(data[data$`How many followers do you have on Instagram?` <= 1000 & data$`How many posts do you have on Instagram?` > 100, ]$`How many followers do you have on Instagram?`, data[data$`How many followers do you have on Instagram?` <= 1000 & data$`How many posts do you have on Instagram?` > 100, ]$`How many posts do you have on Instagram?`, col = "red")
legend("topright", legend = c("More than 1000 followers", "More than 100 posts"), col = c("blue", "red"), pch = 1)

require(ggplot2)

# Create a pie chart for gender distribution
ggplot(data, aes(x = "", fill = Gender)) +
  geom_bar(width = 1) +
  coord_polar(theta = "y") +
  labs(title = "Gender Distribution", fill = "Gender")

# Create a histogram for age distribution
ggplot(data, aes(x = Age, fill = factor(Age))) +
  geom_histogram(binwidth = 5, color = "black") +
  scale_fill_viridis_d() +
  labs(title = "Age Distribution", x = "Age", y = "Count") +
  theme_minimal()

# Create a bar chart with education level
ggplot(data, aes(x = `Highest Education`, fill = Gender)) +
  geom_bar(color = "black", size = 0.5, width = 0.7, position = position_dodge()) +
  labs(title = "Education Level Distribution", x = "Education Level", y = "Count") +
  theme_minimal() +
  theme(legend.position = "top", legend.title = element_blank()) +
  scale_fill_manual(values = c("#FFC0CB", "#ADD8E6", "#90EE90")) +
  guides(fill = guide_legend(reverse = TRUE)) +
  geom_text(aes(label=after_stat(count)), stat='count', position=position_dodge(width=0.7), vjust=-0.5, size=3)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Create a bar chart with phone operating system
ggplot(data, aes(x = `Phone OS`, fill = Gender)) +
  geom_bar() +
  labs(title = "Phone Operating System Distribution", x = "Phone Operating System", y = "Count")

library(ggplot2)

# Create a ggplot bar chart for Gender by Zone/Area with Total Social Media Usage
ggplot(data, aes(x = `Zone`, y = `Total Social Media Usage`, fill = Gender)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(x = "Zone/Area", y = "Total Social Media Usage", title = "Gender by Zone/Area with Total Social Media Usage") +
  scale_fill_brewer(palette = "Set3") +  # Choose a colorful palette
  theme_minimal()

# Create a pie chart for phone operating system distribution
ggplot(data, aes(x = "", fill = `Phone OS`)) +
  geom_bar(width = 1) +
  coord_polar(theta = "y") +
  labs(title = "Phone Operating System Distribution", fill = "Phone Operating System")

# Create a scatter plot for time spent on Facebook and Instagram
ggplot(data, aes(x = `Time Spent on Facebook in last week (in minutes)`, y = `Time Spent on Instagram in last week (in minutes)`)) +
  geom_point(color = "#FFC0CB") +
  labs(title = "Time Spent on Facebook vs. Time Spent on Instagram", x = "Time Spent on Facebook (in minutes)", y = "Time Spent on Instagram (in minutes)") 

# Create a histogram for social media usage distribution
ggplot(data, aes(x = `Total Social Media Usage`)) +
  geom_histogram(binwidth = 500, fill = "#FFC0CB", color = "black") +
  labs(title = "Social Media Usage Distribution", x = "Total Social Media Usage (in minutes)", y = "Count") +
  theme_minimal() +
  theme(plot.background = element_rect(fill = "#ADD8E6"),
        axis.text = element_text(size = 12, color = "black"),
        axis.title = element_text(size = 14, color = "black"),
        plot.title = element_text(size = 16, color = "black"))

library(ggplot2)

# Create a ggplot bar chart
ggplot(data, aes(x=`Profession/Activity`, y=`Total Social Media Usage`, fill=`Profession/Activity`)) +
  geom_bar(stat="identity") +
  labs(x="Profession/Activity", y="Total Social Media Usage", title="Total Social Media Usage by Profession/Activity") +
  theme_minimal() +
  scale_fill_brewer(palette="Set3")  # Choose a colorful palette

# Create a histogram for time spent on WhatsApp distribution
ggplot(data, aes(x = `Time Spent on WhatsApp in last week (in minutes)`, fill = Gender)) +
  geom_histogram(binwidth = 100) +
  labs(title = "Time Spent on WhatsApp Distribution", x = "Time Spent on WhatsApp (in minutes)", y = "Count")

# Create a bar chart for education level and social media usage
ggplot(data, aes(x = `Highest Education`, y = `Total Social Media Usage`, fill = `Highest Education`)) +
  geom_bar(stat = "summary", fun = "mean") +
  labs(title = "Education Level vs. Social Media Usage", x = "Education Level", y = "Total Social Media Usage (in minutes)") +
  scale_fill_manual(values = c("#FFC0CB", "#ADD8E6", "#90EE90", "#FFD700")) +
  theme_minimal() 

# Distribution of the number of posts on Instagram
ggplot(data, aes(x = `How many posts do you have on Instagram?`)) +
  geom_histogram(binwidth = 50, fill = "#ADD8E6") +
  labs(title = "Number of Posts on Instagram Distribution", x = "Number of Posts on Instagram", y = "Count") +
  theme_minimal()

# Relationship between age and the number of posts on Instagram
ggplot(data, aes(x = Age, y = `How many posts do you have on Instagram?`, color = Age)) +
  geom_point() +
  labs(title = "Age vs Number of Posts on Instagram", x = "Age", y = "Number of Posts on Instagram")

# Distribution of the time spent on Facebook
ggplot(data, aes(x = `Time Spent on Facebook in last week (in minutes)`)) +
  geom_histogram(binwidth = 100, fill = "pink") +
  labs(title = "Time Spent on Facebook Distribution", x = "Time Spent on Facebook (in minutes)", y = "Count") +
  theme_minimal() 

# Relationship between age and the time spent on Facebook
ggplot(data, aes(x = Age, y = `Time Spent on Facebook in last week (in minutes)`, color = Age)) +
  geom_point() +
  labs(title = "Age vs Time Spent on Facebook", x = "Age", y = "Time Spent on Facebook (in minutes)")

# Relationship between age and the time spent on Instagram
ggplot(data, aes(x = Age, y = `Time Spent on Instagram in last week (in minutes)`, color = Age)) +
  geom_point() +
  labs(title = "Age vs Time Spent on Instagram", x = "Age", y = "Time Spent on Instagram (in minutes)")

# Distribution of the total social media usage
ggplot(data, aes(x = `Total Social Media Usage`, fill = Gender)) +
  geom_histogram(binwidth = 500) +
  labs(title = "Total Social Media Usage Distribution", x = "Total Social Media Usage", y = "Count") +
  scale_fill_manual(values = c("#ADD8E6", "#FFC0CB", "#FF0000")) +  # Add a third color value
  theme_minimal() +
  theme(legend.position = "bottom")

library(ggplot2)
library(maps)
## Warning: package 'maps' was built under R version 4.3.2
data <- read_csv("C:/Users/bhaga/OneDrive/Desktop/new dataset/social media_new.csv")
## Rows: 1628 Columns: 40
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): City, Current Status, Do you own multiple profiles on Instagram?, ...
## dbl (11): Age, Latitude, Longitude, How many subscriber do you have on youtu...
## num (14): How many followers do you have on Instagram?, How many posts do yo...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Create a ggplot object with your dataset
p <- ggplot(data, aes(x = Longitude, y = Latitude, color = State, fill = State)) +
  geom_point(size = 3) +  # Add points to represent data with size
  coord_fixed(ratio = 1) +  # Aspect ratio
  theme_minimal()  # Plain background theme

# Add India map outline
india_map <- map_data("world", region = "India")
p <- p + geom_polygon(data = india_map, aes(x = long, y = lat, group = group), fill = NA, color = "black")

print(p)

# Relationship between age and the total social media usage
library(ggplot2)

# Create a scatter plot with color and a specific theme
ggplot(data, aes(x = Age, y = `Total Social Media Usage`, color = Age)) +
  geom_point() +
  labs(title = "Age vs Total Social Media Usage", x = "Age", y = "Total Social Media Usage") 

# Relationship between the number of followers on Instagram and the number of posts on Instagram
ggplot(data, aes(x = `How many followers do you have on Instagram?`, y = `How many posts do you have on Instagram?`)) +
  geom_point() +
  labs(title = "Number of Followers vs Number of Posts on Instagram", x = "Number of Followers on Instagram", y = "Number of Posts on Instagram")

# Pie chart for current status distribution
ggplot(data, aes(x = "", fill = `Current Status`)) +
  geom_bar(width = 1) +
  coord_polar(theta = "y") +
  labs(title = "Current Status Distribution", fill = "Current Status")

#.******************************************************************************************************************************************


# Read the CSV file
data <- read_csv("C:/Users/bhaga/OneDrive/Desktop/new dataset/social media_new.csv")
## Rows: 1628 Columns: 40
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): City, Current Status, Do you own multiple profiles on Instagram?, ...
## dbl (11): Age, Latitude, Longitude, How many subscriber do you have on youtu...
## num (14): How many followers do you have on Instagram?, How many posts do yo...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Load the ggplot2 package
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ stringr   1.5.0
## ✔ forcats   1.0.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ purrr::map()    masks maps::map()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Sample data
data <- data <- data %>% select(Age,Gender,`Total WhatsApp Usage`,`Total Social Media Usage`,`Total Facebook Usage`,`Total Instagram Usage`,`Total YouTube Usage (minutes)`)



# Create a barplot for Age vs. Total Social Media Usage with Gender as a legend
ggplot(data, aes(x = Age, y = `Total Facebook Usage`, fill = Gender)) +
  geom_bar(stat = "identity") +
  labs(title = "Total Facebook Usage by Age and Gender", x = "Age Group", y = "Total Usage (minutes)") +
  scale_fill_manual(values = c("Male" = "blue", "Female" = "pink")) +
  theme_minimal()

# Create a barplot for Age vs. Total Instagram Usage with Gender as a legend
ggplot(data, aes(x = Age, y = `Total Instagram Usage`, fill = Gender)) +
  geom_bar(stat = "identity") +
  labs(title = "Total Instagram Usage by Age and Gender", x = "Age Group", y = "Total Usage (minutes)") +
  scale_fill_manual(values = c("Male" = "blue", "Female" = "red")) +
  theme_minimal()

# Create a barplot for Age vs. Total WhatsApp Usage with Gender as a legend
ggplot(data, aes(x = Age, y = `Total WhatsApp Usage`, fill = Gender)) +
  geom_bar(stat = "identity") +
  labs(title = "Total WhatsApp Usage by Age and Gender", x = "Age Group", y = "Total Usage (minutes)") +
  scale_fill_manual(values = c("Male" = "maroon", "Female" = "pink")) +
  theme_minimal()

# Create a barplot for Age vs. Total YouTube Usage with Gender as a legend
ggplot(data, aes(x = Age, y = `Total YouTube Usage (minutes)`, fill = Gender)) +
  geom_bar(stat = "identity") +
  labs(title = "Total YouTube Usage by Age and Gender", x = "Age Group", y = "Total Usage (minutes)") +
  scale_fill_manual(values = c("Male" = "maroon", "Female" = "skyblue")) +
  theme_minimal()

# Combine all the data into a long format
data_long <- data %>%
  pivot_longer(cols = c(`Total Facebook Usage`, `Total Instagram Usage`, `Total WhatsApp Usage`, `Total YouTube Usage (minutes)`),
               names_to = "Platform", values_to = "Total Usage")

# Create a single plot with facets
ggplot(data_long, aes(x = Age, y = `Total Usage`, fill = Gender)) +
  geom_bar(stat = "identity") +
  labs(title = "Social Media Usage by Age and Gender", x = "Age Group", y = "Total Usage (minutes)") +
  scale_fill_manual(values = c("Male" = "blue", "Female" = "red")) +
  theme_minimal() +
  facet_wrap(~ Platform, scales = "free_y")

library(readr)
data <- read_csv("C:/Users/bhaga/OneDrive/Desktop/new dataset/social media_new.csv")
## Rows: 1628 Columns: 40
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): City, Current Status, Do you own multiple profiles on Instagram?, ...
## dbl (11): Age, Latitude, Longitude, How many subscriber do you have on youtu...
## num (14): How many followers do you have on Instagram?, How many posts do yo...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Load required library
library(ggplot2)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
# Create a data frame with the given data
data <- data[, c("Age", "Income from YouTube (rs,month)")]

# Perform k-means clustering with 2 clusters
set.seed(123)  # For reproducibility
k <- 2
kmeans_result <- kmeans(data, centers = k)

# Add the cluster assignments to the original data
data$Cluster <- as.factor(kmeans_result$cluster)

# Print the cluster assignments
print(data)
## # A tibble: 1,628 × 3
##      Age `Income from YouTube (rs,month)` Cluster
##    <dbl>                            <dbl> <fct>  
##  1    24                            88447 2      
##  2    39                            64764 2      
##  3    22                             4387 1      
##  4    26                            99695 2      
##  5    50                            81297 2      
##  6    25                            51770 2      
##  7    52                            38003 1      
##  8    45                            54216 2      
##  9    25                            48149 1      
## 10    27                            12657 1      
## # ℹ 1,618 more rows
# Elbow Method to determine the optimal number of clusters
wcss <- vector("numeric", length = 10)  # Initialize a vector to store Within-Cluster Sum of Squares (WCSS)

# Calculate WCSS for different numbers of clusters (k)
for (i in 1:10) {
  kmeans_temp <- kmeans(data[, c("Age", "Income from YouTube (rs,month)")], centers = i)
  wcss[i] <- kmeans_temp$tot.withinss  # Store the WCSS for the current k
}

# Plot the Elbow Method graph
plot(1:10, wcss, type = "b", xlab = "K (Number of Clusters)", ylab = "WCSS (Within-Cluster Sum of Squares)")  # Set x and y axis labels
abline(v = k, col = "red", lty = 2)  # Highlight the chosen k

# Visualization of the clustering with centroids
ggplot(data, aes(x = Age, y = `Income from YouTube (rs,month)`, color = Cluster)) +
  geom_point() +
  geom_point(data = as.data.frame(kmeans_result$centers), aes(x = Age, y = `Income from YouTube (rs,month)`), color = "darkred", size = 3, shape = 4) +
  labs(title = "K-Means Clustering (k = 2) with Centroids", x = "Age", y = "Income from YouTube") +
  scale_color_manual(values = c("blue", "green"))

library(readr)
library(dplyr)
library(ggplot2)

# Read the CSV file
data <- read_csv("C:/Users/bhaga/OneDrive/Desktop/new dataset/social media_new.csv")
## Rows: 1628 Columns: 40
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): City, Current Status, Do you own multiple profiles on Instagram?, ...
## dbl (11): Age, Latitude, Longitude, How many subscriber do you have on youtu...
## num (14): How many followers do you have on Instagram?, How many posts do yo...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Select the 'Age' and 'Total Social Media Usage' variables
data <- data %>% select(Age, `Total Facebook Usage`)

# Remove any rows with missing values
data <- na.omit(data)

# Convert the 'Age' variable to numeric
data$Age <- as.numeric(data$Age)

# Create a linear regression model
model <- lm(`Total Facebook Usage` ~ Age, data = data)

# Predict the next 5-year value
new_data <- data.frame(Age = c(31, 32, 33, 34, 35))
prediction <- predict(model, newdata = new_data)

# Add a 'Group' column to distinguish older and younger based on age threshold
data <- data %>%
  mutate(Group = ifelse(Age > 35, "Older", "Younger"))

# Create a scatter plot with a smoothing line, colored by 'Group'
ggplot(data, aes(x = Age, y = `Total Facebook Usage`, color = Group)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE,color = "blue") +
  labs(title = "Age vs. Total Facebook Usage", x = "Age", y = "Total Facebook Usage") +
  scale_color_manual(values = c("Younger" = "green", "Older" = "purple")) +
  geom_vline(xintercept = 35, linetype = "dashed")  # Add a vertical dashed line at age 35
## `geom_smooth()` using formula = 'y ~ x'

library(readr)
library(dplyr)
library(ggplot2)

# Read the CSV file
data <- read_csv("C:/Users/bhaga/OneDrive/Desktop/new dataset/social media_new.csv")
## Rows: 1628 Columns: 40
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): City, Current Status, Do you own multiple profiles on Instagram?, ...
## dbl (11): Age, Latitude, Longitude, How many subscriber do you have on youtu...
## num (14): How many followers do you have on Instagram?, How many posts do yo...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Select the 'Age' and 'Total Instagram Usage' variables
data <- data %>% select(Age, `Total Instagram Usage`)

# Remove any rows with missing values
data <- na.omit(data)

# Convert the 'Age' variable to numeric
data$Age <- as.numeric(data$Age)

# Create a linear regression model
model <- lm(`Total Instagram Usage` ~ Age, data = data)

# Predict the next 5-year value
new_data <- data.frame(Age = c(31, 32, 33, 34, 35))
prediction <- predict(model, newdata = new_data)

# Add a 'Group' column to distinguish older and younger based on age threshold
data <- data %>%
  mutate(Group = ifelse(Age > 35, "Older", "Younger"))

# Create a scatter plot with a smoothing line, colored by 'Group'
ggplot(data, aes(x = Age, y = `Total Instagram Usage`, color = Group)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE,color = "blue") +
  labs(title = "Age vs. Total Instagram Usage", x = "Age", y = "Total Instagram Usage") +
  scale_color_manual(values = c("Younger" = "maroon", "Older" = "blue")) +
  geom_vline(xintercept = 35, linetype = "dashed")  # Add a vertical dashed line at age 35
## `geom_smooth()` using formula = 'y ~ x'

library(readr)
library(dplyr)
library(ggplot2)

# Read the CSV file
data <- read_csv("C:/Users/bhaga/OneDrive/Desktop/new dataset/social media_new.csv")
## Rows: 1628 Columns: 40
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): City, Current Status, Do you own multiple profiles on Instagram?, ...
## dbl (11): Age, Latitude, Longitude, How many subscriber do you have on youtu...
## num (14): How many followers do you have on Instagram?, How many posts do yo...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Select the 'Age' and 'Total WhatsApp Usage' variables
data <- data %>% select(Age, `Total WhatsApp Usage`)

# Remove any rows with missing values
data <- na.omit(data)

# Convert the 'Age' variable to numeric
data$Age <- as.numeric(data$Age)

# Create a linear regression model
model <- lm(`Total WhatsApp Usage` ~ Age, data = data)

# Predict the next 5-year value
new_data <- data.frame(Age = c(31, 32, 33, 34, 35))
prediction <- predict(model, newdata = new_data)

# Add a 'Group' column to distinguish older and younger based on age threshold
data <- data %>%
  mutate(Group = ifelse(Age > 35, "Older", "Younger"))

# Create a scatter plot with a smoothing line, colored by 'Group'
ggplot(data, aes(x = Age, y = `Total WhatsApp Usage`, color = Group)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE,color="blue") +
  labs(title = "Age vs. Total WhatsApp Usage", x = "Age", y = "Total WhatsApp Usage") +
  scale_color_manual(values = c("Younger" = "red", "Older" = "blue")) +
  geom_vline(xintercept = 35, linetype = "dashed")  # Add a vertical dashed line at age 35
## `geom_smooth()` using formula = 'y ~ x'

library(readr)
library(dplyr)
library(ggplot2)

# Read the CSV file
data <- read_csv("C:/Users/bhaga/OneDrive/Desktop/new dataset/social media_new.csv")
## Rows: 1628 Columns: 40
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): City, Current Status, Do you own multiple profiles on Instagram?, ...
## dbl (11): Age, Latitude, Longitude, How many subscriber do you have on youtu...
## num (14): How many followers do you have on Instagram?, How many posts do yo...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Select the 'Age' and 'Total Social Media Usage' variables
data <- data %>% select(Age, `Total Social Media Usage`)

# Remove any rows with missing values
data <- na.omit(data)

# Convert the 'Age' variable to numeric
data$Age <- as.numeric(data$Age)

# Create a linear regression model
model <- lm(`Total Social Media Usage` ~ Age, data = data)

# Predict the next 5-year value
new_data <- data.frame(Age = c(31, 32, 33, 34, 35))
prediction <- predict(model, newdata = new_data)

# Add a 'Group' column to distinguish older and younger based on age threshold
data <- data %>%
  mutate(Group = ifelse(Age > 35, "Older", "Younger"))

# Create a scatter plot with a smoothing line, colored by 'Group'
ggplot(data, aes(x = Age, y = `Total Social Media Usage`, color = Group)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE,color = "blue") +
  labs(title = "Age vs. Total Social Media Usage", x = "Age", y = "Total Social Media Usage") +
  scale_color_manual(values = c("Younger" = "red", "Older" = "blue")) +
  geom_vline(xintercept = 35, linetype = "dashed")  # Add a vertical dashed line at age 35
## `geom_smooth()` using formula = 'y ~ x'

library(readr)
library(dplyr)
library(ggplot2)

# Read the CSV file
data <- read_csv("C:/Users/bhaga/OneDrive/Desktop/new dataset/social media_new.csv")
## Rows: 1628 Columns: 40
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): City, Current Status, Do you own multiple profiles on Instagram?, ...
## dbl (11): Age, Latitude, Longitude, How many subscriber do you have on youtu...
## num (14): How many followers do you have on Instagram?, How many posts do yo...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Select the 'Age' and social media usage variables
data <- data %>% select(Age, `Total Facebook Usage`, `Total Social Media Usage`, `Total Instagram Usage`, `Total WhatsApp Usage`)

# Remove any rows with missing values
data <- na.omit(data)

# Convert the 'Age' variable to numeric
data$Age <- as.numeric(data$Age)

# Create linear regression models for each platform and predict the next 5-year values
platforms <- c("Facebook", "Social Media", "Instagram", "WhatsApp")
for (platform in platforms) {
  model <- lm(paste0("`Total ", platform, " Usage` ~ Age"), data = data)
  new_data <- data.frame(Age = c(31, 32, 33, 34, 35))
  prediction <- predict(model, newdata = new_data)
  cat("Predicted values for Total", platform, "Usage:", prediction, "\n")
}
## Predicted values for Total Facebook Usage: 284.0396 292.0368 300.0339 308.031 316.0281 
## Predicted values for Total Social Media Usage: 2002.076 1973.391 1944.706 1916.022 1887.337 
## Predicted values for Total Instagram Usage: 615.9642 590.8018 565.6394 540.477 515.3145 
## Predicted values for Total WhatsApp Usage: 1102.072 1090.553 1079.033 1067.514 1055.994
# Melt the data for easier plotting
data_long <- data %>%
  pivot_longer(-Age, names_to = "Platform", values_to = "Usage")

# Add a 'Group' column to distinguish older and younger based on age threshold
data_long <- data_long %>%
  mutate(Group = ifelse(Age > 35, "Older", "Younger"))

# Create a single screen with all four scatter plots and linear regression models
ggplot(data_long, aes(x = Age, y = Usage, color = Group)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE,color="blue") +
  labs(title = "Age vs. Social Media Usage by Platform", x = "Age", y = "Usage") +
  scale_color_manual(values = c("Younger" = "red", "Older" = "blue")) +
  geom_vline(xintercept = 35, linetype = "dashed") +  # Add a vertical dashed line at age 35
  facet_wrap(~Platform, scales = "free_y")
## `geom_smooth()` using formula = 'y ~ x'

#thank you mam
ggplot() +
  geom_text(aes(x = 0.5, y = 0.5, label = "Thank You Mam", size = 10)) +
  theme_void()