knitr::opts_chunk$set(echo = TRUE)

Pandas Cheat Sheets

I.Creating DataFrame

1. From Dictionary of Lists

import pandas as pd
df1 = pd.DataFrame(
  {'a': [11, 12, 13],
   'b': [21, 22, 21],
   'c': ['aa', 'bb', 'cc']},
   index = [1, 2, 3]
   )
print(df1)
##     a   b   c
## 1  11  21  aa
## 2  12  22  bb
## 3  13  21  cc

2. From List of Lists

df2 = pd.DataFrame(
  [[11, 21, 'aa'],
   [12, 22, 'bb'],
   [13, 23, 'cc']],
   index = [1,2,3],
  columns = ['a', 'b', 'c'])
print(df2)
##     a   b   c
## 1  11  21  aa
## 2  12  22  bb
## 3  13  23  cc

3. From Lists with Zip

ls1 = [11, 12, 13]
ls2 = [21, 22, 23 ]
ls3 = ['aa', 'bb', 'cc']
df3 = pd.DataFrame(list(zip(ls1, ls2, ls3)),
                   columns = ['a', 'b', 'c'])
print(df3)
##     a   b   c
## 0  11  21  aa
## 1  12  22  bb
## 2  13  23  cc

4. Convert series to data frame

ls = [1, 2, 3]
ser = pd.Series(ls)
ser.to_frame(name = 'Name')
##    Name
## 0     1
## 1     2
## 2     3

5. Convert string to integer

df_int =

df_string = pd.DataFrame(
  {'a': ['11', '12', '13'],
   'b': ['21', '22', '21'],
   'c': ['aa', 'bb', 'cc']},
   index = [1, 2, 3]
   )
print(df_string)
##     a   b   c
## 1  11  21  aa
## 2  12  22  bb
## 3  13  21  cc
print(df_string.info())
## <class 'pandas.core.frame.DataFrame'>
## Int64Index: 3 entries, 1 to 3
## Data columns (total 3 columns):
##  #   Column  Non-Null Count  Dtype 
## ---  ------  --------------  ----- 
##  0   a       3 non-null      object
##  1   b       3 non-null      object
##  2   c       3 non-null      object
## dtypes: object(3)
## memory usage: 96.0+ bytes
## None
df_string[['a', 'b']] = df_string[['a', 'b']].astype(int)
print(df_string)
##     a   b   c
## 1  11  21  aa
## 2  12  22  bb
## 3  13  21  cc
print(df_string.info())
## <class 'pandas.core.frame.DataFrame'>
## Int64Index: 3 entries, 1 to 3
## Data columns (total 3 columns):
##  #   Column  Non-Null Count  Dtype 
## ---  ------  --------------  ----- 
##  0   a       3 non-null      int64 
##  1   b       3 non-null      int64 
##  2   c       3 non-null      object
## dtypes: int64(2), object(1)
## memory usage: 96.0+ bytes
## None

To be continued.