Python Final Exam

Problem 1.

Answer 1.

import pandas as pd
import numpy as np

def MVportfolio(asset, mu_p):
    # Calculate log returns (first column is date.)
    return_asset = np.log(asset.iloc[:, 1:]).diff()

    # Calculate expected log returns
    expect_asset = return_asset.mean()
    expect_asset_np = np.array(expect_asset)

    # Calculate covariance matrix
    cov_asset = return_asset.cov()
    cov_asset_np = np.array(cov_asset)

    # Use lagrange multipliers method for optimazation
    num_assets = return_asset.shape[1]
    u = np.ones(num_assets)

    a = np.block([
        [cov_asset_np, u[:, np.newaxis], expect_asset_np[:, np.newaxis]],
        [u[np.newaxis, :], np.zeros(1), np.zeros(1)],
        [expect_asset_np[np.newaxis, :], np.zeros(1), np.zeros(1)]
    ])

    b = np.concatenate([np.zeros(num_assets), [1], [mu_p]])

    lagrange = np.linalg.solve(a, b)
    
    # Optimal weights
    result = lagrange[:num_assets]

    # Portfolio variance
    Var_p = np.dot(result, np.dot(cov_asset, result))

    return result, Var_p

# Test
it = pd.read_csv('it.csv')
MVportfolio(asset = it, mu_p = -0.001)
(array([ 0.16186582,  0.25684189, -0.35114871,  0.44970549,  0.48273551]),
 0.00014638968239383282)
import matplotlib.pyplot as plot

# Range of mu_p
mu_p_values = np.arange(-0.001, 0.0011, 0.0001)

# Looping
portfolio_variances = []
for mu_p in mu_p_values:
    tmp = MVportfolio(asset=it, mu_p=mu_p)
    Var_p = tmp[1]
    portfolio_variances.append(Var_p)

# Plotting
plot.plot(portfolio_variances, mu_p_values)
plot.xlabel('Portfolio Variance')
plot.ylabel('Portfolio expected return')
Text(0, 0.5, 'Portfolio expected return')

Problem 2.

Answer 2.

# 1. 'price.csv'의 'date'를 DatetimeIndex로 하는 pd.DataFrame price 생성
price = pd.read_csv('price.csv')
price['date'] = pd.to_datetime(price['date'])
price = price.set_index('date')
price
X005930 X000660 X051910 X207940 X035420 X035720 X005380 X006400 X068270 X000270 ... X058220 X038160 X033600 X195440 X058420 X121890 X114570 X347140 X080440 X158310
date
2018-05-23 51800 95300.0 343500.0 399500.0 136994 22580 144500.0 204500 237286 33650 ... 2500.0 7790.0 7460.0 63720.0 13298.0 2905.0 26630.0 NaN 5079.0 10250.0
2018-05-24 51400 94600.0 345000.0 418000.0 136193 22681 140000.0 204500 243737 32700 ... 2383.0 7880.0 7310.0 63536.0 13226.0 2890.0 25339.0 NaN 5005.0 9300.0
2018-05-25 52700 95200.0 352500.0 430500.0 137194 22480 139000.0 207000 253413 32250 ... 2400.0 7770.0 7200.0 63997.0 13265.0 2830.0 25574.0 NaN 4870.0 8920.0
2018-05-28 52300 94500.0 352000.0 429000.0 136193 21677 138500.0 209000 249726 32300 ... 2403.0 7980.0 7370.0 62800.0 13226.0 2790.0 25339.0 NaN 4980.0 8740.0
2018-05-29 51300 94800.0 343500.0 433000.0 133789 21677 140000.0 211500 249266 31650 ... 2350.0 8930.0 7240.0 60866.0 13121.0 2740.0 26281.0 NaN 4631.0 8720.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2020-08-31 54000 75100.0 740000.0 778000.0 322500 81691 176500.0 452500 291656 42450 ... 275.0 862.0 179.0 2075.0 5968.0 739.0 620.0 2015.0 498.0 505.0
2020-09-01 54200 75200.0 743000.0 781000.0 324500 80587 180000.0 454500 292638 43150 ... 275.0 862.0 179.0 2075.0 5961.0 739.0 620.0 2015.0 498.0 505.0
2020-09-02 54400 75500.0 742000.0 770000.0 332500 82695 176000.0 452000 295093 42200 ... 275.0 862.0 179.0 2075.0 5891.0 739.0 620.0 2015.0 498.0 505.0
2020-09-03 56400 78700.0 768000.0 779000.0 339000 82293 176500.0 452500 294111 42700 ... 275.0 862.0 179.0 2075.0 6080.0 739.0 620.0 2010.0 498.0 505.0
2020-09-04 55600 78700.0 743000.0 773000.0 328500 80688 172500.0 439000 288710 42150 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

565 rows × 2203 columns

# 2. price에서 2019년만 선택해서 price_sub로 저장
price_sub=price.loc['2019']
price_sub
X005930 X000660 X051910 X207940 X035420 X035720 X005380 X006400 X068270 X000270 ... X058220 X038160 X033600 X195440 X058420 X121890 X114570 X347140 X080440 X158310
date
2019-01-02 38750 60600.0 337000.0 374000.0 118000 20473 114000.0 210500 201192 32800 ... 2525.0 7320.0 1810.0 38781.0 9939.0 1845.0 17675.0 NaN 3250.0 2700.0
2019-01-03 37600 57700.0 328000.0 376500.0 122000 20573 116500.0 203000 197440 32850 ... 2550.0 7260.0 1730.0 35054.0 9939.0 1870.0 17581.0 NaN 3155.0 2675.0
2019-01-04 37450 58300.0 330500.0 381000.0 125500 20774 119500.0 201000 206350 34850 ... 2475.0 7450.0 1780.0 35330.0 9972.0 1885.0 18806.0 NaN 3095.0 2670.0
2019-01-07 38750 58700.0 343000.0 388000.0 131500 20673 120500.0 217000 202599 34200 ... 2483.0 7750.0 1785.0 35330.0 10391.0 1885.0 18806.0 NaN 3095.0 2715.0
2019-01-08 38100 59200.0 349500.0 386000.0 130000 20051 119500.0 218500 200254 33800 ... 2445.0 7790.0 1740.0 34502.0 10391.0 1975.0 18099.0 NaN 3050.0 2895.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2019-12-23 55500 94600.0 313000.0 422500.0 184000 29806 122500.0 228000 174929 44750 ... 1265.0 3765.0 446.0 6130.0 11982.0 2520.0 3270.0 NaN 1420.0 2705.0
2019-12-24 55000 93800.0 312500.0 417000.0 182000 29405 122000.0 225000 173521 44700 ... 1265.0 3740.0 407.0 5750.0 11255.0 2385.0 3020.0 NaN 1210.0 2360.0
2019-12-26 55400 94800.0 308000.0 413500.0 180500 29706 122500.0 222500 177743 45100 ... 1375.0 3815.0 425.0 5520.0 10908.0 2245.0 3240.0 NaN 1215.0 2100.0
2019-12-27 56500 96000.0 310500.0 429500.0 183500 30810 120500.0 233000 180689 44350 ... 1450.0 3835.0 418.0 5270.0 10875.0 2340.0 3350.0 NaN 1215.0 1890.0
2019-12-30 55800 94100.0 317500.0 433000.0 186500 30810 120500.0 236000 177743 44300 ... 1585.0 3805.0 413.0 5210.0 10594.0 2350.0 3210.0 NaN 1155.0 2455.0

246 rows × 2203 columns

# 3. 열 별로 직전 대비 변동률을 구하고, 누적 곱하여 cum_ret 생성
returns = price_sub.pct_change()
cum_ret = (1 + returns).prod(axis=0) - 1
# 기간누적수익률 산출을 위해 (-1) 추가. 안하는 경우 (-) 수익률인 주식의 샤프비율 고평가됌
cum_ret
/var/folders/n2/jbh_0_091bx8qgz7j87t2qwc0000gp/T/ipykernel_1730/3098040869.py:2: FutureWarning:

The default fill_method='pad' in DataFrame.pct_change is deprecated and will be removed in a future version. Either fill in any non-leading NA values prior to calling pct_change or specify 'fill_method=None' to not fill NA values.
X005930    0.440000
X000660    0.552805
X051910   -0.057864
X207940    0.157754
X035420    0.580508
             ...   
X121890    0.273713
X114570   -0.818388
X347140    0.000000
X080440   -0.644615
X158310   -0.090741
Length: 2203, dtype: float64
# 4. cum_ret에서 높은 순서로 10개 출력 및 가장 높은 종목의 시계열 그림 출력
top10_cum_ret = cum_ret.nlargest(10)
top10_cum_ret
X088290    13.576271
X101360     7.400000
X078130     4.155172
X139670     4.049401
X032500     3.443478
X009190     2.992042
X138080     2.719443
X336370     2.693285
X033250     2.536524
X214150     2.498146
dtype: float64
top_index = top10_cum_ret.index[0]
price_sub[top_index].plot()
plot.xlabel('Date')
plot.ylabel('Price')
Text(0, 0.5, 'Price')

# 5. 열 별로 직전 대비 변동률의 표준편차를 계산하여 std 생성
std = returns.std(axis=0) * np.sqrt(252)
std
X005930    0.230925
X000660    0.353011
X051910    0.248067
X207940    0.367346
X035420    0.328026
             ...   
X121890    0.941212
X114570    0.709787
X347140         NaN
X080440    0.995452
X158310    1.000060
Length: 2203, dtype: float64
# 6. std에서 0이거나 NaN인 경우 제외
std = std[(std != 0) & (~std.isna())]
std
X005930    0.230925
X000660    0.353011
X051910    0.248067
X207940    0.367346
X035420    0.328026
             ...   
X058420    0.680404
X121890    0.941212
X114570    0.709787
X080440    0.995452
X158310    1.000060
Length: 2097, dtype: float64
# 7. 종목 별 Sharpe ratio=cum_ret/std를 구하여 shrp 생성
shrp = cum_ret / std
shrp
X000020   -0.412952
X000040   -1.106285
X000050   -0.795542
X000060   -0.651532
X000070   -0.593173
             ...   
X361390         NaN
X361610         NaN
X363280         NaN
X375500         NaN
X378850         NaN
Length: 2203, dtype: float64
# 8. shrp에 NaN이 포함된 경우 shar의 최소값으로 대체
shrp.fillna(shrp.min(), inplace=True)
shrp
X000020   -0.412952
X000040   -1.106285
X000050   -0.795542
X000060   -0.651532
X000070   -0.593173
             ...   
X361390   -2.164228
X361610   -2.164228
X363280   -2.164228
X375500   -2.164228
X378850   -2.164228
Length: 2203, dtype: float64
# 9. shrp를 값이 큰 순서로 10개 종목 출력
top10_shrp = shrp.nlargest(10)
top10_shrp
X088290    12.317314
X032500     5.582321
X101360     5.364799
X139670     4.570944
X138080     4.033614
X078130     3.760220
X214150     3.711147
X036540     3.707145
X097520     3.624222
X000990     3.552103
dtype: float64
# 10. 위 10개 종목에 대한 cum_ret, std, shrp 값을 각 열로 하는 pd.DataFrame  final_result 생성
final_result = pd.DataFrame({
    'cum_ret': cum_ret[top10_shrp.index],
    'std': std[top10_shrp.index],
    'shrp': shrp[top10_shrp.index]
})
final_result
cum_ret std shrp
X088290 13.576271 1.102210 12.317314
X032500 3.443478 0.616854 5.582321
X101360 7.400000 1.379362 5.364799
X139670 4.049401 0.885900 4.570944
X138080 2.719443 0.674195 4.033614
X078130 4.155172 1.105034 3.760220
X214150 2.498146 0.673147 3.711147
X036540 2.142336 0.577894 3.707145
X097520 1.903846 0.525312 3.624222
X000990 1.628571 0.458481 3.552103

Problem 3.

Answer 3.

def ECallSimul_2(S0, K, T, r, sigma, M, I=250000):
    import numpy as np
    S = np.zeros((I,M+1))
    S[:,0] = S0
    dt = T/M
    z = np.random.standard_normal(size=(I,M))
    for t in range(1,M+1):
        S[:,t] = S[:,t-1]*np.exp((r-0.5*sigma**2)*dt + sigma*np.sqrt(dt)*z[:,t-1])
    sum_val = np.maximum(S[:,-1]-K, 0)
    C0 = np.exp(-r*T)*np.mean(sum_val)
    return ( round(C0, 3) )
%time ECallSimul_1(S0=100.,K=105.,T=1.,r=0.05,sigma=0.2,M=50,I=250000)
CPU times: user 10.8 s, sys: 98.9 ms, total: 10.9 s
Wall time: 10.8 s
7.925
%time ECallSimul_2(S0=100.,K=105.,T=1.,r=0.05,sigma=0.2,M=50,I=250000)
CPU times: user 691 ms, sys: 32.2 ms, total: 723 ms
Wall time: 723 ms
7.979