Q6

2022-06-04 09:03:17 作者：互联网
import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime

def plot_ec(ec, ret_next='ret_next', fac='factor'):
    '''
    绘制单变量分组中各组的资金曲线
    ret_next: 分组数据中下期收益率的列名
    fac: 分组的变量的名称
    '''
    ec = ec.copy()
    ec = ec.set_index('date').dropna()
    ec['equity_curve'] = ec.groupby('group')[ret_next].apply(lambda x: (x+1).cumprod())

    group = int(ec.group.max())
    for i in range(group):
        if(i == 0):
            lab = 'low-' + fac
        elif(i == group-1):
            lab = 'high-'+ fac
        else:
            lab = 'g' + str(i+1)
        this = ec[ec.group == i+1]
        if i == 0: print('第1组最后曲线净值：',this.equity_curve[-1])
        plt.plot(this.index, this.equity_curve, label=lab)
    plt.legend()
    plt.show()

def set_group(x: pd.Series, group=10):
    '''
    获得组号, 用于groupby后的apply函数中
    group:分为几组
    '''
    size = x.size - np.count_nonzero(np.isnan(x))
    return np.ceil(x.rank(method='first') / (size/group))

def read_data():
    f1 = open('./data/CSI500_factor_data.pkl', 'rb')
    f2 = open('./data/benchmark.pkl', 'rb')
    f3 = open('./data/price_data.pkl', 'rb')

    CSI500_factor = pickle.load(f1)
    benchmark = pickle.load(f2)
    price_data = pickle.load(f3)

    return CSI500_factor, benchmark, price_data

def stock_transform(price_data):
    stock = pd.DataFrame(columns=['date', 'stkcd', 'close', 'ret_next'])
    for i in range(len(price_data.index)):
        temp = pd.DataFrame({'date':price_data.columns,
                               'stkcd': price_data.index[i],
                               'close': price_data.iloc[i, :]})
        temp['ret_next'] = temp['close'].shift(-1) / temp.close - 1
        stock = pd.concat([stock, temp], axis=0)

    return stock

def factor_merge(stock, CSI500_factor):
    stock = stock.reset_index(drop=True)
    # stock_ = stock.set_index(['stkcd', 'date'])
    stock['date'] = stock['date'].apply(lambda x: x[:6])

    CSI500_factor_ = CSI500_factor.reset_index()
    CSI500_factor_.columns = ['stkcd', 'date', 'factor_value']
    CSI500_factor_['date'] = CSI500_factor_['date'].apply(lambda x: x[:6])
    stock = pd.merge(stock, CSI500_factor_, how='left', on=['date', 'stkcd'])
    return stock


if __name__ == '__main__':
    CSI500_factor, benchmark, price_data = read_data()
    stock = stock_transform(price_data)
    stock = factor_merge(stock, CSI500_factor)
    raise TypeError('test')
    stock['group'] = stock['factor_value'].groupby(stock.date).apply(set_group, group=10)

    ec = stock.groupby(['date', 'group'])['ret_next'].agg('mean').reset_index()  # 等权重十分位投资组合下个月收益
    del stock['group']
    plot = 1
    if plot: plot_ec(ec,  fac='factor_value')

    ####################################################################################################################
    # IC & IR
    ####################################################################################################################
    IC = stock.set_index('date')[['factor_value', 'ret_next']].groupby('date').apply(lambda x: x.corr().iloc[0, 1])
    IC_value = IC.mean()
    IR = IC_value / IC.std()
    print('CSI500 factor', '\n 因子IC值：', round(IC_value, 5), '\n 因子IR值：', round(IR, 4))

    ####################################################################################################################
    # 对冲组合收益率计算
    ####################################################################################################################

    max_ret = ec.groupby(['date'])[['ret_next']].agg('max')
    benchmark.columns = ['zz500']

    benchmark_ = benchmark.copy()
    benchmark_.index = pd.to_datetime(benchmark_.index)
    benchmark_min = benchmark_.loc[benchmark_.groupby(benchmark_.index.to_period('M')).apply(lambda x: x.index.min())]
    benchmark_max = benchmark_.loc[benchmark_.groupby(benchmark_.index.to_period('M')).apply(lambda x: x.index.max())]

    index_ = np.unique([i[:6] for i in benchmark.index])
    benchmark_m = pd.DataFrame(columns=['zz_ret'], index=index_)
    benchmark_m['zz_ret'] = benchmark_max.values/ benchmark_min.values -1

    hege_ = pd.merge(max_ret, benchmark_m, left_index=True, right_index=True, how='left')
    hege_['ret'] = hege_['ret_next'] - hege_['zz_ret']
    hege_['year'] = [i[:4] for i in hege_.index]

    year_ = pd.DataFrame()
    year_['year_ret'] = hege_.groupby(['year'])['ret_next'].agg('sum')
    year_['year_std'] = hege_.groupby(['year'])['ret_next'].agg('std')
    year_['year_recall'] = hege_.groupby(['year'])['ret_next'].agg('max')
标签：index,Q6,benchmark,ret,factor,data,stock
来源： https://www.cnblogs.com/RankFan/p/Q6.html