import pandas as pd
import numpy as np6 Pandas
Xdf=pd.DataFrame(np.arange(16).reshape(4,4))
Xdf| 0 | 1 | 2 | 3 | |
|---|---|---|---|---|
| 0 | 0 | 1 | 2 | 3 |
| 1 | 4 | 5 | 6 | 7 |
| 2 | 8 | 9 | 10 | 11 |
| 3 | 12 | 13 | 14 | 15 |
Xdf[0]0 0
1 4
2 8
3 12
Name: 0, dtype: int64
Xdf[0:3]| 0 | 1 | 2 | 3 | |
|---|---|---|---|---|
| 0 | 0 | 1 | 2 | 3 |
| 1 | 4 | 5 | 6 | 7 |
| 2 | 8 | 9 | 10 | 11 |
Xdf[Xdf[1]<10]| 0 | 1 | 2 | 3 | |
|---|---|---|---|---|
| 0 | 0 | 1 | 2 | 3 |
| 1 | 4 | 5 | 6 | 7 |
| 2 | 8 | 9 | 10 | 11 |
Xdf[Xdf<10]| 0 | 1 | 2 | 3 | |
|---|---|---|---|---|
| 0 | 0.0 | 1.0 | 2.0 | 3.0 |
| 1 | 4.0 | 5.0 | 6.0 | 7.0 |
| 2 | 8.0 | 9.0 | NaN | NaN |
| 3 | NaN | NaN | NaN | NaN |
Xdf[[0,3,2]]| 0 | 3 | 2 | |
|---|---|---|---|
| 0 | 0 | 3 | 2 |
| 1 | 4 | 7 | 6 |
| 2 | 8 | 11 | 10 |
| 3 | 12 | 15 | 14 |
Xdf.loc[0:1]| 0 | 1 | 2 | 3 | |
|---|---|---|---|---|
| 0 | 0 | 1 | 2 | 3 |
| 1 | 4 | 5 | 6 | 7 |
Xdf.iloc[0:1]| 0 | 1 | 2 | 3 | |
|---|---|---|---|---|
| 0 | 0 | 1 | 2 | 3 |
Xdf[0:1]| 0 | 1 | 2 | 3 | |
|---|---|---|---|---|
| 0 | 0 | 1 | 2 | 3 |
# Exercise 2
DF=pd.DataFrame(dict(X1=np.random.uniform(0,1,15),
X2=np.random.standard_normal(15),
X3=1))
DF| X1 | X2 | X3 | |
|---|---|---|---|
| 0 | 0.002925 | -0.693743 | 1 |
| 1 | 0.033349 | 0.774359 | 1 |
| 2 | 0.879925 | -0.317643 | 1 |
| 3 | 0.810953 | 0.087859 | 1 |
| 4 | 0.655664 | 0.336143 | 1 |
| 5 | 0.748835 | -0.021620 | 1 |
| 6 | 0.971919 | -0.463584 | 1 |
| 7 | 0.933795 | 0.763103 | 1 |
| 8 | 0.898300 | -0.311474 | 1 |
| 9 | 0.382806 | -0.697514 | 1 |
| 10 | 0.621569 | 1.838894 | 1 |
| 11 | 0.693038 | 0.949865 | 1 |
| 12 | 0.629064 | -0.204435 | 1 |
| 13 | 0.961133 | 2.522102 | 1 |
| 14 | 0.370875 | 1.190071 | 1 |
DF[(DF['X1']>0.5)&(DF['X1']<=0.8)].indexIndex([4, 5, 10, 11, 12], dtype='int64')
DF.loc[(DF['X1']>0.5)&(DF['X1']<=0.8)].indexIndex([4, 5, 10, 11, 12], dtype='int64')
DF.loc[(DF['X1']>0.5)&(DF['X1']<=0.8),:].indexIndex([4, 5, 10, 11, 12], dtype='int64')
DF.iloc[np.array((DF['X1']>0.5)&(DF['X1']<=0.8))].indexIndex([4, 5, 10, 11, 12], dtype='int64')
DF[['X3','X2','X1']]| X3 | X2 | X1 | |
|---|---|---|---|
| 0 | 1 | -0.693743 | 0.002925 |
| 1 | 1 | 0.774359 | 0.033349 |
| 2 | 1 | -0.317643 | 0.879925 |
| 3 | 1 | 0.087859 | 0.810953 |
| 4 | 1 | 0.336143 | 0.655664 |
| 5 | 1 | -0.021620 | 0.748835 |
| 6 | 1 | -0.463584 | 0.971919 |
| 7 | 1 | 0.763103 | 0.933795 |
| 8 | 1 | -0.311474 | 0.898300 |
| 9 | 1 | -0.697514 | 0.382806 |
| 10 | 1 | 1.838894 | 0.621569 |
| 11 | 1 | 0.949865 | 0.693038 |
| 12 | 1 | -0.204435 | 0.629064 |
| 13 | 1 | 2.522102 | 0.961133 |
| 14 | 1 | 1.190071 | 0.370875 |
DF.loc[DF['X2']<0,['X1']]| X1 | |
|---|---|
| 0 | 0.002925 |
| 2 | 0.879925 |
| 5 | 0.748835 |
| 6 | 0.971919 |
| 8 | 0.898300 |
| 9 | 0.382806 |
| 12 | 0.629064 |
6.0.0.1 Input / Output functions in Pandas
# read_csv, read_excel, read_table, read_fwf ...
import os
os.getcwd()'/Users/hwan/Desktop/Homepage/study_24spring'
pd.read_csv('pandas.csv')| S.No | Name | Age | City | Salary | |
|---|---|---|---|---|---|
| 0 | 1 | Tom | 28 | Toronto | 20000 |
| 1 | 2 | Lee | 32 | HongKong | 3000 |
| 2 | 3 | Steven | 43 | Bay Area | 8300 |
| 3 | 4 | Ram | 38 | Hyderabad | 3900 |
pd.read_csv('pandas.csv',sep=',')| S.No | Name | Age | City | Salary | |
|---|---|---|---|---|---|
| 0 | 1 | Tom | 28 | Toronto | 20000 |
| 1 | 2 | Lee | 32 | HongKong | 3000 |
| 2 | 3 | Steven | 43 | Bay Area | 8300 |
| 3 | 4 | Ram | 38 | Hyderabad | 3900 |
pd.read_csv('pandas.csv',index_col=0)| Name | Age | City | Salary | |
|---|---|---|---|---|
| S.No | ||||
| 1 | Tom | 28 | Toronto | 20000 |
| 2 | Lee | 32 | HongKong | 3000 |
| 3 | Steven | 43 | Bay Area | 8300 |
| 4 | Ram | 38 | Hyderabad | 3900 |
pd.read_csv('pandas.csv',index_col='Name')| S.No | Age | City | Salary | |
|---|---|---|---|---|
| Name | ||||
| Tom | 1 | 28 | Toronto | 20000 |
| Lee | 2 | 32 | HongKong | 3000 |
| Steven | 3 | 43 | Bay Area | 8300 |
| Ram | 4 | 38 | Hyderabad | 3900 |
pd.read_csv('pandas.csv',header=None)| 0 | 1 | 2 | 3 | 4 | |
|---|---|---|---|---|---|
| 0 | S.No | Name | Age | City | Salary |
| 1 | 1 | Tom | 28 | Toronto | 20000 |
| 2 | 2 | Lee | 32 | HongKong | 3000 |
| 3 | 3 | Steven | 43 | Bay Area | 8300 |
| 4 | 4 | Ram | 38 | Hyderabad | 3900 |
pd.read_csv('pandas.csv',names=[0,1,2,3,4])| 0 | 1 | 2 | 3 | 4 | |
|---|---|---|---|---|---|
| 0 | S.No | Name | Age | City | Salary |
| 1 | 1 | Tom | 28 | Toronto | 20000 |
| 2 | 2 | Lee | 32 | HongKong | 3000 |
| 3 | 3 | Steven | 43 | Bay Area | 8300 |
| 4 | 4 | Ram | 38 | Hyderabad | 3900 |
pd.read_csv('pandas.csv',names=[1,2,3,4])| 1 | 2 | 3 | 4 | |
|---|---|---|---|---|
| S.No | Name | Age | City | Salary |
| 1 | Tom | 28 | Toronto | 20000 |
| 2 | Lee | 32 | HongKong | 3000 |
| 3 | Steven | 43 | Bay Area | 8300 |
| 4 | Ram | 38 | Hyderabad | 3900 |
pd.read_csv('pandas.csv',dtype={'Salary':np.float64})| S.No | Name | Age | City | Salary | |
|---|---|---|---|---|---|
| 0 | 1 | Tom | 28 | Toronto | 20000.0 |
| 1 | 2 | Lee | 32 | HongKong | 3000.0 |
| 2 | 3 | Steven | 43 | Bay Area | 8300.0 |
| 3 | 4 | Ram | 38 | Hyderabad | 3900.0 |
pd.read_csv('pandas.csv',names=['a','b','c','d','e'])| a | b | c | d | e | |
|---|---|---|---|---|---|
| 0 | S.No | Name | Age | City | Salary |
| 1 | 1 | Tom | 28 | Toronto | 20000 |
| 2 | 2 | Lee | 32 | HongKong | 3000 |
| 3 | 3 | Steven | 43 | Bay Area | 8300 |
| 4 | 4 | Ram | 38 | Hyderabad | 3900 |
pd.read_csv('pandas.csv',names=['a','b','c','d'],header=0,index_col=0)| a | b | c | d | |
|---|---|---|---|---|
| 1 | Tom | 28 | Toronto | 20000 |
| 2 | Lee | 32 | HongKong | 3000 |
| 3 | Steven | 43 | Bay Area | 8300 |
| 4 | Ram | 38 | Hyderabad | 3900 |
pd.read_csv('pandas.csv',names=['a','b','c','d'],skiprows=1,index_col=0)| a | b | c | d | |
|---|---|---|---|---|
| 1 | Tom | 28 | Toronto | 20000 |
| 2 | Lee | 32 | HongKong | 3000 |
| 3 | Steven | 43 | Bay Area | 8300 |
| 4 | Ram | 38 | Hyderabad | 3900 |
pd.read_csv('pandas.csv',na_values=['Tom'])| S.No | Name | Age | City | Salary | |
|---|---|---|---|---|---|
| 0 | 1 | NaN | 28 | Toronto | 20000 |
| 1 | 2 | Lee | 32 | HongKong | 3000 |
| 2 | 3 | Steven | 43 | Bay Area | 8300 |
| 3 | 4 | Ram | 38 | Hyderabad | 3900 |
6.0.0.2 Binary Operations
A = pd.DataFrame(np.random.randint(0, 20, (2, 2)), columns=list('AB'))
B = pd.DataFrame(np.random.randint(0, 10, (3, 3)), columns=list('BAC'))
print( A, B, sep="\n\n") A B
0 0 1
1 18 1
B A C
0 1 9 9
1 4 6 2
2 4 1 0
A+B| A | B | C | |
|---|---|---|---|
| 0 | 9.0 | 2.0 | NaN |
| 1 | 24.0 | 5.0 | NaN |
| 2 | NaN | NaN | NaN |
A.add(B,fill_value=0)| A | B | C | |
|---|---|---|---|
| 0 | 9.0 | 2.0 | 9.0 |
| 1 | 24.0 | 5.0 | 2.0 |
| 2 | 1.0 | 4.0 | 0.0 |
B-B.loc[0,:]| B | A | C | |
|---|---|---|---|
| 0 | 0 | 0 | 0 |
| 1 | 3 | -3 | -7 |
| 2 | 3 | -8 | -9 |
B-B.loc[0:0,:]| B | A | C | |
|---|---|---|---|
| 0 | 0.0 | 0.0 | 0.0 |
| 1 | NaN | NaN | NaN |
| 2 | NaN | NaN | NaN |
B.sub(B.loc[0,:])| B | A | C | |
|---|---|---|---|
| 0 | 0 | 0 | 0 |
| 1 | 3 | -3 | -7 |
| 2 | 3 | -8 | -9 |
B.sub(B['A'],axis=0)| B | A | C | |
|---|---|---|---|
| 0 | -8 | 0 | 0 |
| 1 | -2 | 0 | -4 |
| 2 | 3 | 0 | -1 |
6.0.0.3 Pandas DataFrame Manipulation
import numpy as np
import pandas as pd
df = pd.DataFrame(np.random.rand(7, 5))
df| 0 | 1 | 2 | 3 | 4 | |
|---|---|---|---|---|---|
| 0 | 0.254541 | 0.022793 | 0.292160 | 0.256052 | 0.742068 |
| 1 | 0.959650 | 0.210256 | 0.937732 | 0.523141 | 0.660096 |
| 2 | 0.694372 | 0.674311 | 0.502167 | 0.164087 | 0.527422 |
| 3 | 0.591313 | 0.952835 | 0.800817 | 0.357809 | 0.029770 |
| 4 | 0.004058 | 0.930224 | 0.908414 | 0.104988 | 0.372187 |
| 5 | 0.062482 | 0.480021 | 0.220693 | 0.820707 | 0.578013 |
| 6 | 0.678025 | 0.481827 | 0.727163 | 0.119174 | 0.252283 |
df[[0,1]]| 0 | 1 | |
|---|---|---|
| 0 | 0.254541 | 0.022793 |
| 1 | 0.959650 | 0.210256 |
| 2 | 0.694372 | 0.674311 |
| 3 | 0.591313 | 0.952835 |
| 4 | 0.004058 | 0.930224 |
| 5 | 0.062482 | 0.480021 |
| 6 | 0.678025 | 0.481827 |