Contents

Fetch_stock_daily_data_from_akshare

Contents
#!/usr/bin/env python
# coding: utf-8

# # 获取所有股票的所有日频价格

# ## ascii从历史代码中找到提取价格的代码并保存为pkl
# 
# 从各个`akshare`中下载股票文件,并存放为二进制文件中。另外为了方便后续的`backtrader`分析,需要考虑将单个股票文件放到一个地方。

# In[1]:

import os
import pickle
import re
import sys
import json
import multiprocessing
import time
import datetime

import pandas as pd
import numpy as np

import akshare as ak

# Basic Setup
# 1. cell show all command output
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# not use scientific notation
pd.set_option('display.float_format', lambda x: '%.2f' % x)
# tushare out of date
# ts.set_token('03f8f9baae350aa5289eee5c81f13ef8c582afe0b38b7b65380ecf89') 
# pro = ts.pro_api('03f8f9baae350aa5289eee5c81f13ef8c582afe0b38b7b65380ecf89', timeout=3000)
today = datetime.datetime.now().strftime("%Y%m%d")


# install ipywidgets to show progress bar
# !pip install ipywidgets


# - “6”开头,一般为上海A股股票:
# 上海主板A股:600***、601***、603***、605***  
# 上海科创板股票:688***
# 
# - “0”开头,一般为深圳A股股票:
# 深圳主板股票:000***  
# 深圳中小板股票:002***、003***、004***
# 
# - “3”开头,一般为深圳创业板股票。
# 创业板代码区间:300000-399999  
# 
# - “400”、“420”开头,一般为退市股票
# 两网及退市公司A股股票:400***  
# 退市公司B股股票:420***
# 
# - ”8“开头,北交所的股票
# 北交所的股票8字开头,包括82、83、87、88,其中82开头的股票表示优先股;83和87开头的股票表示普通股票、88开头的股票表示公开发行的。

# In[3]:


def code_to_symbol(code: str):
    if len(code) == 6:
        if code.startswith(("6")):
            return "sh{}".format(code)
        elif code.startswith(("0", "3")):
            return "sz{}".format(code)
        elif code.startswith("8"):
            return "bj{}".format(code)
        else:
            return code
    else:
        return code

def get_code_and_symbol():
    """
    get stock codes and symbols from `stock_info_a_code_name` function 
    """
    all_codes = ak.stock_info_a_code_name()
    # simple filter
    stock_codes = [x for x in all_codes['code'] if x.startswith(('0', '3', '6', '8'))]
    stock_symbols = [code_to_symbol(x) for x in stock_codes]
    return stock_codes, stock_symbols


stock_codes, stock_symbols = get_code_and_symbol()


# ak.stock_zh_a_spot_em()
# ak.stock_zh_a_spot()


# def cache_hist(codes: list, adjust='') -> pd.core.frame.DataFrame:
#     hist = []
#     for code in codes:
#         hist.append(ak.stock_zh_a_hist(code, period = 'daily', start_date = '19700101', end_date = '30000101', adjust = adjust))
#     return pd.concat([df for df in hist if df is not None], axis=0)

# # qfq for show
# stock_daily_qfq = cache_hist(stock_codes, adjust='qfq')
# # hfq for analysis
# stock_daily_hfq = cache_hist(stock_codes, adjust='hfq')


class Stock():
    def __init__(self, code: str, adjust: str = 'qfq'):
        self.code = code
        self.adjust = adjust
        self.symbol = code_to_symbol(code)
        self.market = self.symbol[:2]

    @property
    def hist(self):
        return ak.stock_zh_a_hist(self.code, period = 'daily', start_date = '19700101', end_date = '30000101', adjust = self.adjust)
    @property
    def main_fund(self):
        return (ak.stock_individual_fund_flow(stock=self.code, market=self.market)
                    .assign(日期 = lambda df: pd.to_datetime(df['日期']), stock_code = self.code).drop('涨跌幅', axis=1)
               )


def get_daily(code, adjust):
    return Stock(code, adjust).hist

def get_main_fund(code):
    return Stock(code).main_fund


if __name__ == '__main__':

    pool_size = multiprocessing.cpu_count()
    pool_size = 4
    with multiprocessing.Pool(processes=pool_size) as pool:
        # pool_outputs = pool.starmap(get_daily, stock_codes)
        pool_outputs_qfq = pool.starmap(get_daily, [(code, 'qfq') for code in stock_codes])
        pool_outputs_hfq = pool.starmap(get_daily, [(code, 'hfq') for code in stock_codes])
        pool_outputs_main_fund = pool.starmap(get_main_fund, [(code,) for code in stock_codes])
    
    stock_daily_qfq = pd.concat([df for df in pool_outputs_qfq if df is not None], axis=0)
    stock_daily_hfq = pd.concat([df for df in pool_outputs_hfq if df is not None], axis=0)
    stock_main_fund = pd.concat([df for df in pool_outputs_main_fund if df is not None], axis=0)
    
    # store data
    store_path_qfq = os.path.join("/home/larry/data/stock/stock_daily", "{}_qfq.pkl".format(today))
    store_path_hfq = os.path.join("/home/larry/data/stock/stock_daily", "{}_hfq.pkl".format(today))
    store_path_main_fund = os.path.join("/home/larry/data/stock/stock_daily", "{}_main_fund.pkl".format(today))

    with open(store_path_qfq, 'wb') as f:
        pickle.dump(stock_daily_qfq, f)
    with open(store_path_hfq, 'wb') as f:
        pickle.dump(stock_daily_hfq, f)
    with open(store_path_main_fund, 'wb') as f:
        pickle.dump(stock_main_fund, f)