There are 385 diamonds.
There are 21 diamonds that are over 15,000 per carat.
## cut color clarity
## 27415 Fair J I1
## 27630 Fair J I1
## 27130 Fair H I1
## 25999 Premium J I1
## 25998 Premium I I1
The average prices for are colors are the following table.
## D E F G H I J
## 3169.95 3076.75 3724.89 3999.14 4486.67 5091.87 5323.82
The minimum prices for are clarity are the following table.
## I1 IF SI1 SI2 VS1 VS2 VVS1 VVS2
## 345 369 326 326 327 334 336 336
The maximum prices for are cut are the following table.
## Fair Good Ideal Premium Very Good
## 18574 18788 18806 18823 18818
## min max count mean
## D 357 18693 6775 3169.95
## E 326 18731 9797 3076.75
## F 342 18791 9542 3724.89
## G 354 18818 11292 3999.14
## H 337 18803 8304 4486.67
## I 334 18823 5422 5091.87
## J 335 18710 2808 5323.82
import pandas as pan
import numpy as np
def count(x):
c = pan.Series.count(x)
return c
diamond_data = pan.read_csv('https://raw.githubusercontent.com/tidyverse/ggplot2/main/data-raw/diamonds.csv')
Cut_Unique_values = pan.unique(diamond_data['cut'])
Color_Unique_values = pan.unique(diamond_data['color'])
Clarity_Unique_values = pan.unique(diamond_data['clarity'])
# Question 1
Question_1 =np.sum((diamond_data['color']=='F')&(diamond_data['clarity']=='IF'))
print(Question_1)
diamond_data = diamond_data.assign(Price_per_carat =diamond_data.price/diamond_data.carat )
# Question 2
Question_2 =np.sum(diamond_data['Price_per_carat']>15000)
print(Question_2)
# Question 3
Question_3 = diamond_data[(diamond_data['carat']>=2)&(diamond_data['carat']<=2.5)]
Question_3_2 = Question_3[(Question_3['cut'] =="Good") | (Question_3['cut'] =="Premium")]
print(Question_3_2.head(5))
# Question 4
Question_4 = diamond_data[(diamond_data['cut']=="Premium")&(diamond_data['clarity']=='IF')]
print('\n\n Q4', round(Question_4['Price_per_carat'].mean(),2))
# Question 5
Question_5 = diamond_data.sort_values(by ='carat',ascending = False).copy()
Question_5_a = Question_5.iloc[:,1:4]
print(Question_5.iloc[:,1:4].head(5))
# Question 6
Question_6 = round(diamond_data.groupby('color')['price'].mean(numeric_only=True),2)
# Question 7
Question_7 = diamond_data.groupby('clarity')['price'].min()
# Question 8
Question_8 = diamond_data.groupby('cut')['price'].max()
print(diamond_data.sort_values('carat',ascending=(False)).head(5))
# Question 9
Question_9 = diamond_data.groupby('color')['price'].aggregate([min,max,count,np.mean])
Question_9['mean'] = Question_9['mean'].round(2)