7  7. Pandas Practice

import numpy as np
import pandas as pd
import FinanceDataReader as fdr

7.1 Practice 1 : Magic formula investing

krx = fdr.StockListing('KRX')
krx.iloc[:, :4]
Code ISU_CD Name Market
0 005930 KR7005930003 삼성전자 KOSPI
1 000660 KR7000660001 SK하이닉스 KOSPI
2 373220 KR7373220003 LG에너지솔루션 KOSPI
3 005380 KR7005380001 현대차 KOSPI
4 207940 KR7207940008 삼성바이오로직스 KOSPI
... ... ... ... ...
2805 002995 KR7002991008 금호건설우 KOSPI
2806 266170 KR7266170000 뿌리깊은나무들 KONEX
2807 217320 KR7217320001 썬테크 KONEX
2808 245450 KR7245450002 씨앤에스링크 KONEX
2809 308700 KR7308700004 테크엔 KONEX

2810 rows × 4 columns

df = pd.read_csv('investment_hw/PER_ROA.csv')
df.iloc[:5, [0, 3, 5, 6, 7, 8, 9]]
종목명 등락률 거래량 시가총액 영업이익 PER ROA
0 AJ네트웍스 -1.48% 27984 2177 -213.0 4.14 2.18
1 AJ렌터카 -0.47% 50886 2325 218.0 61.40 0.28
2 AK홀딩스 -1.99% 15535 4564 2697.0 4.27 6.52
3 ARIRANG 200 -0.10% 234206 7572 NaN NaN NaN
4 ARIRANG 200동일가중 -0.19% 31 46 NaN NaN NaN
df = df.loc [ df.isnull().sum(axis=1) == 0, : ]
per = per_val = df['PER']
per_val[ per < 0 ] = np.nan
per_rank = per_val.rank( ascending=True, na_option='bottom')
per_rank.head()
0      35.0
1     519.0
2      38.0
50    222.0
51    408.0
Name: PER, dtype: float64
roa= roa_val =  df['ROA']
roa_val[roa < 0] = np.nan
roa_rank = roa_val.rank( ascending=False, na_option='bottom')
roa_rank.head()
0     355.0
1     556.0
2     110.0
50    239.5
51     37.0
Name: ROA, dtype: float64
result = per_rank + roa_rank
result_rank = result.rank( ascending=True, na_option='bottom')
result_rank[ result_rank > 10 ] = 0
result_rank [ result_rank > 0 ] = 1
result_rank
0       0.0
1       0.0
2       0.0
50      0.0
51      0.0
       ... 
1522    0.0
1523    0.0
1524    0.0
1526    0.0
1529    0.0
Length: 758, dtype: float64
result_rank.sum()
10.0
mf_df = df.loc[ result_rank > 0 , ['종목명', '시가총액']].copy()
mf_df
종목명 시가총액
96 HDC 7468
440 SIMPAC 1835
459 SK하이닉스 541634
749 대한유화 7508
1040 세아제강지주 2044
1057 신대양제지 2756
1157 에쓰씨엔지니어링 354
1298 케이씨 1694
1443 한일홀딩스 2904
1513 효성 18353
mf_stock_list = df.loc[ result_rank > 0, '종목명'].values
mf_stock_list
array(['HDC', 'SIMPAC', 'SK하이닉스', '대한유화', '세아제강지주', '신대양제지', '에쓰씨엔지니어링',
       '케이씨', '한일홀딩스', '효성'], dtype=object)
krx.iloc[:,:4].head()
Code ISU_CD Name Market
0 005930 KR7005930003 삼성전자 KOSPI
1 000660 KR7000660001 SK하이닉스 KOSPI
2 373220 KR7373220003 LG에너지솔루션 KOSPI
3 005380 KR7005380001 현대차 KOSPI
4 207940 KR7207940008 삼성바이오로직스 KOSPI
mf_df['종목코드']=''
for stock in mf_stock_list:
    mf_df.loc[ mf_df['종목명']==stock, '종목코드']=krx[krx['Name']==stock]['Code'].values
mf_df
종목명 시가총액 종목코드
96 HDC 7468 012630
440 SIMPAC 1835 009160
459 SK하이닉스 541634 000660
749 대한유화 7508 006650
1040 세아제강지주 2044 003030
1057 신대양제지 2756 016590
1157 에쓰씨엔지니어링 354 023960
1298 케이씨 1694 029460
1443 한일홀딩스 2904 003300
1513 효성 18353 004800
for x in mf_df['종목코드'].values :
    df = fdr.DataReader( x, '2019-01-01', '2019-12-31' )
    
    cum_ret = df.loc[df.index[-1], 'Close'] / df.loc[df.index[0], 'Close']-1
    mf_df.loc[mf_df['종목코드']==x, '수익률'] = cum_ret
    
    historical_max = df['Close'].cummax()
    daily_drawdown = df['Close']/historical_max - 1.
    
    MDD = daily_drawdown.min()
    mf_df.loc[mf_df['종목코드']==x, '최대낙폭'] = MDD
    
    df['daily_rtn'] = df['Close'].pct_change( periods = 1 )
    VOL = df['daily_rtn'].std()*np.sqrt(252)
    mf_df.loc[mf_df['종목코드']==x, '변동성'] = VOL
    
    df = None
mf_df.sort_values('시가총액', ascending=False)
종목명 시가총액 종목코드 수익률 최대낙폭 변동성
459 SK하이닉스 541634 000660 0.552805 -0.228606 0.353011
1513 효성 18353 004800 0.649635 -0.115124 0.300178
749 대한유화 7508 006650 -0.203390 -0.407609 0.346504
96 HDC 7468 012630 -0.332326 -0.504808 0.330864
1443 한일홀딩스 2904 003300 -0.197424 -0.363892 0.324106
1057 신대양제지 2756 016590 0.034783 -0.388466 0.364506
1040 세아제강지주 2044 003030 -0.008368 -0.228353 0.240910
440 SIMPAC 1835 009160 0.206967 -0.299566 0.407815
1298 케이씨 1694 029460 0.594017 -0.369942 0.442983
1157 에쓰씨엔지니어링 354 023960 -0.062092 -0.479592 0.565059

7.2 Practice 2 : Mean reversion strategy

df = pd.read_csv('investment_hw/SPY.csv')
df.head()
Date Open High Low Close Adj Close Volume
0 1993-01-29 43.96875 43.96875 43.75000 43.93750 26.706757 1003200
1 1993-02-01 43.96875 44.25000 43.96875 44.25000 26.896694 480500
2 1993-02-02 44.21875 44.37500 44.12500 44.34375 26.953669 201300
3 1993-02-03 44.40625 44.84375 44.37500 44.81250 27.238594 529400
4 1993-02-04 44.96875 45.09375 44.46875 45.00000 27.352570 531500
df.describe()
Open High Low Close Adj Close Volume
count 6648.000000 6648.000000 6648.000000 6648.000000 6648.000000 6.648000e+03
mean 133.762935 134.541071 132.893598 133.759854 110.399391 8.440122e+07
std 59.488006 59.671285 59.277882 59.492056 64.113369 9.837713e+07
min 43.343750 43.531250 42.812500 43.406250 26.383823 5.200000e+03
25% 96.780937 97.735000 95.726562 96.921875 71.256485 6.966775e+06
50% 124.433750 125.335938 123.500000 124.312500 93.641503 5.709990e+07
75% 151.702503 152.514999 150.810624 151.791714 125.251474 1.229908e+08
max 296.040009 296.309998 293.760010 295.859985 294.427979 8.710263e+08
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6648 entries, 0 to 6647
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       6648 non-null   object 
 1   Open       6648 non-null   float64
 2   High       6648 non-null   float64
 3   Low        6648 non-null   float64
 4   Close      6648 non-null   float64
 5   Adj Close  6648 non-null   float64
 6   Volume     6648 non-null   int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 363.7+ KB
df['Date']=pd.to_datetime(df.Date)
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6648 entries, 0 to 6647
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       6648 non-null   datetime64[ns]
 1   Open       6648 non-null   float64       
 2   High       6648 non-null   float64       
 3   Low        6648 non-null   float64       
 4   Close      6648 non-null   float64       
 5   Adj Close  6648 non-null   float64       
 6   Volume     6648 non-null   int64         
dtypes: datetime64[ns](1), float64(5), int64(1)
memory usage: 363.7 KB
price_df = df.loc[:,['Date', 'Adj Close']].copy()
price_df.head()
Date Adj Close
0 1993-01-29 26.706757
1 1993-02-01 26.896694
2 1993-02-02 26.953669
3 1993-02-03 27.238594
4 1993-02-04 27.352570
price_df.set_index(['Date'], inplace=True)
price_df.head()
Adj Close
Date
1993-01-29 26.706757
1993-02-01 26.896694
1993-02-02 26.953669
1993-02-03 27.238594
1993-02-04 27.352570
# 볼린저 밴드
# - 상단 밴드 : 중간밴드 + 2*20일 이동표준편차
# - 중간 밴드 : 20일 이동표준편차
# - 하단 밴드 : 중간밴드 - 2*20일 이동표준편차
n = 20
sigma = 2
def bollinger_band(price_df, n, sigma):
    bb = price_df.copy()
    bb['center']= price_df['Adj Close'].rolling(n).mean()
    bb['ub']= bb['center'] + sigma * price_df['Adj Close'].rolling(window=n).std()
    bb['lb']= bb['center'] - sigma * price_df['Adj Close'].rolling(window=n).std()
    return bb
bollinger = bollinger_band(price_df, n, sigma)
bollinger.tail()
Adj Close center ub lb
Date
2019-06-18 290.984741 282.981668 293.213256 272.750080
2019-06-19 291.641541 283.307582 294.219075 272.396089
2019-06-20 294.427979 283.816605 295.809022 271.824187
2019-06-21 294.000000 284.477884 297.200319 271.755449
2019-06-24 293.640015 285.089319 298.355028 271.823610
base_date = '2009-01-02'
sample = bollinger.loc[base_date:]
sample.head()
Adj Close center ub lb
Date
2009-01-02 75.099487 71.378963 74.538229 68.219698
2009-01-05 75.010582 71.711677 74.931608 68.491746
2009-01-06 75.511505 71.964058 75.543401 68.384716
2009-01-07 73.249435 71.980327 75.580937 68.379718
2009-01-08 73.548378 72.071645 75.736733 68.406557
def create_trade_book(sample):
    book = sample[['Adj Close']].copy()
    book['trade'] = ''
    return book
def tradings ( sample, book ):
    for i in sample.index:
        if sample.loc[i, 'Adj Close'] > sample.loc[i, 'ub']:
            book.loc[i, 'trade']=''
        elif sample.loc[i, 'lb'] > sample.loc[i, 'Adj Close']:
            if book.shift(1).loc[i, 'trade']=='buy':
                book.loc[i, 'trade']='buy'
            else :
                book.loc[i, 'trade']='buy'
        elif (sample.loc[i, 'ub']>= sample.loc[i, 'Adj Close'] and
              sample.loc[i, 'Adj Close']>=sample.loc[i, 'lb']):
            if book.shift(1).loc[i, 'trade'] == 'buy':
                book.loc[i, 'trade']='buy'
            else :
                book.loc[i, 'trade']=''
    return book
book = create_trade_book(sample)
book = tradings(sample, book)
book.tail(10)
Adj Close trade
Date
2019-06-11 287.501678 buy
2019-06-12 286.994171 buy
2019-06-13 288.178375 buy
2019-06-14 287.859955 buy
2019-06-17 287.969391 buy
2019-06-18 290.984741 buy
2019-06-19 291.641541 buy
2019-06-20 294.427979 buy
2019-06-21 294.000000 buy
2019-06-24 293.640015 buy
def returns(book):
    rtn = 1.0
    book['return']=1
    buy = 0.0
    sell = 0.0
    for i in book.index:
        #long 진입
        if book.loc[i, 'trade']=='buy' and book.shift(1).loc[i, 'trade']=='':
            buy = book.loc[i, 'Adj Close']
            print('진입일 :', i, '\n\t\t long 진입가격:', buy)
        #long 청산
        elif book.loc[i, 'trade']=='' and book.shift(1).loc[i, 'trade']=='buy':
            sell = book.loc[i, 'Adj Close']
            rtn = (sell-buy)/buy + 1 # 손익계산
            book.loc[i, 'return']= rtn
            print('청산일 :', i, '\n\t\t long 진입가격:', buy,
                  'long 청산가격:', sell, '\n\t\t| return :', round(rtn, 4))
        
        if book.loc[i, 'trade']=='': # 제로 포지션
            buy = sell = 0.0
            
    book['acc return'] = book['return'].cumprod()
    acc_rtn = book['acc return'][-1]
    print( 'Accumulated return :', round( acc_rtn, 4))
    return( round(acc_rtn, 4) )
returns(book)
진입일 : 2009-01-20 00:00:00 
         long 진입가격: 65.089966
청산일 : 2009-03-23 00:00:00 
         long 진입가격: 65.089966 long 청산가격: 66.898392 
        | return : 1.0278
진입일 : 2010-01-22 00:00:00 
         long 진입가격: 90.269791
청산일 : 2010-04-14 00:00:00 
         long 진입가격: 90.269791 long 청산가격: 100.584618 
        | return : 1.1143
진입일 : 2010-05-04 00:00:00 
         long 진입가격: 97.538597
청산일 : 2010-10-13 00:00:00 
         long 진입가격: 97.538597 long 청산가격: 98.862717 
        | return : 1.0136
진입일 : 2011-03-10 00:00:00 
         long 진입가격: 109.513054
청산일 : 2011-04-26 00:00:00 
         long 진입가격: 109.513054 long 청산가격: 114.094101 
        | return : 1.0418
진입일 : 2011-05-23 00:00:00 
         long 진입가격: 111.783257
청산일 : 2011-06-30 00:00:00 
         long 진입가격: 111.783257 long 청산가격: 112.26088 
        | return : 1.0043
진입일 : 2011-08-02 00:00:00 
         long 진입가격: 106.748672
청산일 : 2012-02-03 00:00:00 
         long 진입가격: 106.748672 long 청산가격: 115.768776 
        | return : 1.0845
진입일 : 2012-04-10 00:00:00 
         long 진입가격: 117.451515
청산일 : 2012-07-03 00:00:00 
         long 진입가격: 117.451515 long 청산가격: 119.371857 
        | return : 1.0164
진입일 : 2012-10-23 00:00:00 
         long 진입가격: 123.511292
청산일 : 2012-12-18 00:00:00 
         long 진입가격: 123.511292 long 청산가격: 126.961044 
        | return : 1.0279
진입일 : 2013-06-05 00:00:00 
         long 진입가격: 142.477417
청산일 : 2013-07-11 00:00:00 
         long 진입가격: 142.477417 long 청산가격: 148.711197 
        | return : 1.0438
진입일 : 2013-08-15 00:00:00 
         long 진입가격: 147.769791
청산일 : 2013-09-11 00:00:00 
         long 진입가격: 147.769791 long 청산가격: 150.45195 
        | return : 1.0182
진입일 : 2013-10-08 00:00:00 
         long 진입가격: 147.686783
청산일 : 2013-10-17 00:00:00 
         long 진입가격: 147.686783 long 청산가격: 154.594528 
        | return : 1.0468
진입일 : 2014-01-24 00:00:00 
         long 진입가격: 160.521667
청산일 : 2014-04-02 00:00:00 
         long 진입가격: 160.521667 long 청산가격: 170.233917 
        | return : 1.0605
진입일 : 2014-04-11 00:00:00 
         long 진입가격: 163.591492
청산일 : 2014-05-27 00:00:00 
         long 진입가격: 163.591492 long 청산가격: 172.613312 
        | return : 1.0551
진입일 : 2014-07-31 00:00:00 
         long 진입가격: 174.862244
청산일 : 2014-09-18 00:00:00 
         long 진입가격: 174.862244 long 청산가격: 182.768143 
        | return : 1.0452
진입일 : 2014-09-25 00:00:00 
         long 진입가격: 178.636536
청산일 : 2016-04-13 00:00:00 
         long 진입가격: 178.636536 long 청산가격: 195.218124 
        | return : 1.0928
진입일 : 2016-06-24 00:00:00 
         long 진입가격: 191.742584
청산일 : 2016-08-11 00:00:00 
         long 진입가격: 191.742584 long 청산가격: 206.280853 
        | return : 1.0758
진입일 : 2016-09-09 00:00:00 
         long 진입가격: 201.21463
청산일 : 2016-12-07 00:00:00 
         long 진입가격: 201.21463 long 청산가격: 212.964615 
        | return : 1.0584
진입일 : 2017-03-21 00:00:00 
         long 진입가격: 223.89772
청산일 : 2017-04-24 00:00:00 
         long 진입가격: 223.89772 long 청산가격: 227.193008 
        | return : 1.0147
진입일 : 2017-07-06 00:00:00 
         long 진입가격: 231.55455
청산일 : 2017-07-14 00:00:00 
         long 진입가격: 231.55455 long 청산가격: 236.377182 
        | return : 1.0208
진입일 : 2017-08-10 00:00:00 
         long 진입가격: 234.644501
청산일 : 2017-09-11 00:00:00 
         long 진입가격: 234.644501 long 청산가격: 239.890701 
        | return : 1.0224
진입일 : 2018-02-05 00:00:00 
         long 진입가격: 256.626129
청산일 : 2018-05-10 00:00:00 
         long 진입가격: 256.626129 long 청산가격: 265.551544 
        | return : 1.0348
진입일 : 2018-06-27 00:00:00 
         long 진입가격: 264.125763
청산일 : 2018-08-07 00:00:00 
         long 진입가격: 264.125763 long 청산가격: 280.040985 
        | return : 1.0603
진입일 : 2018-10-10 00:00:00 
         long 진입가격: 274.137573
청산일 : 2019-03-18 00:00:00 
         long 진입가격: 274.137573 long 청산가격: 280.96347 
        | return : 1.0249
진입일 : 2019-05-13 00:00:00 
         long 진입가격: 279.50058
Accumulated return : 2.6528
/var/folders/n2/jbh_0_091bx8qgz7j87t2qwc0000gp/T/ipykernel_19470/1068455384.py:15: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '1.027783483555668' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  book.loc[i, 'return']= rtn
/var/folders/n2/jbh_0_091bx8qgz7j87t2qwc0000gp/T/ipykernel_19470/1068455384.py:23: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
  acc_rtn = book['acc return'][-1]
2.6528
book.tail()
Adj Close trade return acc return
Date
2019-06-18 290.984741 buy 1.0 2.652793
2019-06-19 291.641541 buy 1.0 2.652793
2019-06-20 294.427979 buy 1.0 2.652793
2019-06-21 294.000000 buy 1.0 2.652793
2019-06-24 293.640015 buy 1.0 2.652793
result = book['return'].resample('A').aggregate(
                [np.prod, lambda x: (x-1).std() * np.sqrt(252) ])
result.columns = ['Return', 'Vol']
result
/var/folders/n2/jbh_0_091bx8qgz7j87t2qwc0000gp/T/ipykernel_19470/2886312715.py:1: FutureWarning: 'A' is deprecated and will be removed in a future version, please use 'YE' instead.
  result = book['return'].resample('A').aggregate(
/var/folders/n2/jbh_0_091bx8qgz7j87t2qwc0000gp/T/ipykernel_19470/2886312715.py:1: FutureWarning: The provided callable <function prod at 0x105f46dd0> is currently using SeriesGroupBy.prod. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "prod" instead.
  result = book['return'].resample('A').aggregate(
Return Vol
Date
2009-12-31 1.027783 0.027783
2010-12-31 1.129393 0.115017
2011-12-31 1.046283 0.042032
2012-12-31 1.133016 0.090658
2013-12-31 1.112403 0.066348
2014-12-31 1.169581 0.093156
2015-12-31 1.000000 0.000000
2016-12-31 1.244337 0.132818
2017-12-31 1.059011 0.033853
2018-12-31 1.097132 0.069591
2019-12-31 1.024900 0.036083
book[(book['trade']=='buy') &
     (book['trade'].shift(1)=='')].index.year.value_counts().sort_index()
Date
2009    1
2010    2
2011    3
2012    2
2013    3
2014    4
2016    2
2017    3
2018    3
2019    1
Name: count, dtype: int64
sample[(book['trade']=='') & (book['trade'].shift(1)=='buy')].head()
Adj Close center ub lb
Date
2009-03-23 66.898392 60.247113 66.842427 53.651799
2010-04-14 100.584618 97.853165 100.219620 95.486710
2010-10-13 98.862717 96.240554 98.860432 93.620676
2011-04-26 114.094101 112.277610 114.046010 110.509210
2011-06-30 112.260880 109.184084 111.624576 106.743592