'RNN' 태그의 글 목록

RNN

강화학습을 이용한 비트코인 매매프로그램(10) - CNN+RNN 모델 확장 2022.12.21

강화학습을 이용한 비트코인 매매프로그램(10) - CNN+RNN 모델 확장

아빠는 벌레잡이 2022. 12. 21. 14:49

2022. 12. 21. 14:49

지난차의 CNN+RNN모델에서 변경 가능한 파라메트는 hidden_dim값 일것입니다. 여기서 궁금점이 하나 생기는 것이 CNN+RNN,CNN+ISTM,CNN+GRU의 성능의 차이가 있을까? 하는것 입니다.

import torch
import torch.nn as nn
import torch.nn.functional as F

class CNNRNN2(nn.Module):
    def __init__(self, device, h, w, embedding_size, outputs, hdnsize, num_layer):
        super(CNNRNN2, self).__init__()
        self.device = device
        self.hidden_size = hdnsize
        self.conv1 = nn.Conv2d(4, h*w, kernel_size=5, stride=2)
        self.bn1 = nn.BatchNorm2d(h*w)
        self.conv2 = nn.Conv2d(h*w, hdnsize, kernel_size=5, stride=2)
        self.bn2 = nn.BatchNorm2d(hdnsize)
        self.conv3 = nn.Conv2d(hdnsize, hdnsize, kernel_size=5, stride=2)
        self.bn3 = nn.BatchNorm2d(hdnsize)
        self.embedding_size = embedding_size
        self.num_layer = num_layer

        self.encoder = nn.Embedding(54 * hdnsize, embedding_size)
        self.rnn = nn.RNN(embedding_size, hdnsize, num_layer)
        self.decoder = nn.Linear(hdnsize, outputs)
        
    def init_hidden(self):
        return torch.zeros(1, self.hidden_size)
    
    def forward(self, x):
        x = x.to(self.device)
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x)))
        hidden = self.init_hidden().to(self.device)
        x, hidden = self.rnn(x, hidden)
        x = self.decoder(x.view(8, -1))
        
        return x

embedding을 포함한 cnn+rnn 모델 cnnrnn2.py 입니다.

import torch
import torch.nn as nn
import torch.nn.functional as F

class CNNRNN2(nn.Module):
    def __init__(self, device, h, w, embedding_size, outputs, hdnsize, num_layer):
        super(CNNRNN2, self).__init__()
        self.device = device
        self.hidden_size = hdnsize
        self.conv1 = nn.Conv2d(4, h*w, kernel_size=5, stride=2)
        self.bn1 = nn.BatchNorm2d(h*w)
        self.conv2 = nn.Conv2d(h*w, hdnsize, kernel_size=5, stride=2)
        self.bn2 = nn.BatchNorm2d(hdnsize)
        self.conv3 = nn.Conv2d(hdnsize, hdnsize, kernel_size=5, stride=2)
        self.bn3 = nn.BatchNorm2d(hdnsize)
        self.embedding_size = embedding_size
        self.num_layer = num_layer

        self.encoder = nn.Embedding(126 * hdnsize, embedding_size)
        self.rnn = nn.LSTM(embedding_size, hdnsize, num_layer)
        self.decoder = nn.Linear(hdnsize, outputs)
        
    def init_hidden(self):
        hidden = torch.zeros(num_layer, 8, self.hidden_size) #8 = batch_size
        cell = torch.zeros(num_layer, 8, self.hidden_size)   #8 = batch_size
        return hidden, cell
    
    def forward(self, x):
        x = x.to(self.device)
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x)))
        hidden = self.init_hidden().to(self.device)
        x, (hidden, cell) = self.rnn(x, (hidden, cell))
        x = self.decoder(x.view(8, -1))
        
        return x

embedding을 포함한 cnn+lstm모델인 cnnlstm.py입니다.

import torch
import torch.nn as nn
import torch.nn.functional as F

class CNNRNN2(nn.Module):
    def __init__(self, device, h, w, embedding_size, outputs, hdnsize, num_layer):
        super(CNNRNN2, self).__init__()
        self.device = device
        self.hidden_size = hdnsize
        self.conv1 = nn.Conv2d(4, h*w, kernel_size=5, stride=2)
        self.bn1 = nn.BatchNorm2d(h*w)
        self.conv2 = nn.Conv2d(h*w, hdnsize, kernel_size=5, stride=2)
        self.bn2 = nn.BatchNorm2d(hdnsize)
        self.conv3 = nn.Conv2d(hdnsize, hdnsize, kernel_size=5, stride=2)
        self.bn3 = nn.BatchNorm2d(hdnsize)
        self.embedding_size = embedding_size
        self.num_layer = num_layer

        self.encoder = nn.Embedding(126 * hdnsize, embedding_size)
        self.rnn = nn.GRU(embedding_size, hdnsize, num_layer)
        self.decoder = nn.Linear(hdnsize, outputs)
        
    def init_hidden(self):
        return torch.zeros(1, self.hidden_size)
    
    def forward(self, x):
        x = x.to(self.device)
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x)))
        hidden = self.init_hidden().to(self.device)
        x, hidden = self.rnn(x, hidden)
        x = self.decoder(x.view(8, -1))
        
        return x

embedding을 포함한 cnn+gru모델인 cnngru.py입니다.

import random
from collections import deque, namedtuple
from IPython.display import display, Math
from Account import Account
import math
from itertools import count
import os

import sys
import os.path as path
import plotly as plt
import torch
import torch.nn as nn
import torch.optim as optim
from Market import Market
import torchvision
import time
from cnnrnn2 import CNNRNN2
from memory import ReplayMemory, Experience
# from transformers import get_cosine_schedule_with_warmup
import transformers

action_kind = 3
max_episode = 5000
screen_height = 100
screen_width  = 140
data_size = 250
visit_cnt = [0] * action_kind
# replay_buffer = deque()
epsilon = 0.3
dis = 0.9

BATCH_SIZE = 8
# GAMMA = 0.999
GAMMA = 0.99
EPS_START = 0.9
EPS_END = 0.05
EPS_DECAY = 200
TARGET_UPDATE = 10
steps_done = 0
loss = any
WINDOW_START = 0
WINDOW_SIZE  = 1500

# for i in range(torch.cuda.device_count()):
#     print(torch.cuda.get_device_name(i))
device_str = "cuda"
device = torch.device(device_str)
    
memory = ReplayMemory(BATCH_SIZE)
converter = torchvision.transforms.ToTensor()
market = Market()
train_net = CNNRNN2(device, screen_height, screen_width, 10, action_kind, 32, 2).to(device)
train_net = nn.DataParallel(train_net, device_ids=[0,1]).to(device)

episode_durations = []
optimizer = optim.RMSprop(train_net.parameters(), lr=0.0001, eps=0.00000001)
# scheduler = get_cosine_schedule_with_warmup(optimizer, 5, base_lr=0.3, final_lr=0.01)
scheduler = transformers.get_cosine_schedule_with_warmup(optimizer, 
                                                         num_warmup_steps=5, 
                                                         num_training_steps=25)
def optimize_action(memory):
    if len(memory) < BATCH_SIZE:
        return None
    optimizer.zero_grad()
    epsode = memory.pop(BATCH_SIZE)
    batch = Experience(*zip(*epsode))
    state_batch = torch.cat(batch.state)
    action_batch = torch.cat(batch.action)
    state_action_values = train_net(state_batch).gather(1, action_batch)
    # optimizer = optim.RMSprop(train_net.parameters(), 0.01)
    
    criterion = nn.SmoothL1Loss()
    loss = criterion(state_action_values, action_batch)

    # Optimize the model
    # optimizer.zero_grad()
    loss.backward()
    for param in train_net.parameters():
        param.grad.data.clamp_(-1, 1)
    optimizer.step()
    return loss

# def select_action(df, idx):
#     try:
#         global steps_done
#         sample = random.random()
#         eps_threshold = EPS_END + (EPS_START - EPS_END) * math.exp(-1. * steps_done / EPS_DECAY)
#         steps_done += 1
#         if sample > eps_threshold:
#             if df.loc[idx, "closemax"] == df.loc[idx, "close"]:
#                 action = 2
#                 return  action
#             elif df.loc[idx, "closemin"] == df.loc[idx, "close"]:
#                 action = 1
#                 return  action
#             else:
#                 return 0
#         else:
#             action = random.randrange(action_kind)
#             return  action
#     except Exception as ex:
#         exc_type, exc_obj, exc_tb = sys.exc_info()
#         fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
#         print("`select_action -> exception! %s : %s %d" % (str(ex) , fname, exc_tb.tb_lineno))
#         return 0
def select_action(df, idx):
    try:
        if df.loc[idx, "closemax"] == df.loc[idx, "close"]:
            action = 2
            return  action
        elif df.loc[idx, "closemin"] == df.loc[idx, "close"]:
            action = 1
            return  action
        else:
            return 0
    except Exception as ex:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        print("`select_action -> exception! %s : %s %d" % (str(ex) , fname, exc_tb.tb_lineno))
        return 0
        
def get_chart(market, idx, max_data):
    img = market.get_chart(idx, max_data=max_data)
    if img is None:
        return None
    # img = Image.fromarray(np.uint8(cm.gist_earth(plt.io.to_image(fig, format='png')*255)))
    # im = Image.fromarray(img, bytes=True)
    # im = Image.fromarray(np.uint8(cm.gist_earth(img))/255)
    # im = Image.fromarray(np.uint8(img)/255)
    # img = img.resize((700, 500), resample=Image.BICUBIC)
    # img = Image.fromarray(cm.gist_earth(plt.io.to_image(fig, format='png'), bytes=True))
    # display(img)
    # chart = converter(img).unsqueeze(0).to(device)
    chart = converter(img).unsqueeze(0)
    return chart

def plot_durations(last_chart, curr_chart):
    plt.figure()
    # plt.subplot(1,2,1)
    img = plt.imshow(last_chart.cpu().squeeze(0).permute(1, 2, 0).numpy(), interpolation='none')
    plt.title('Example extracted screen')
    plt.figure(2)
    # plt.subplot(1,2,2)
    plt.clf()
    durations_t = torch.tensor(episode_durations, dtype=torch.float)
    plt.title('Training...')
    plt.xlabel('Episode')
    plt.ylabel('Duration')
    plt.plot(durations_t.numpy())
    # plt.show()
    # Take 100 episode averages and plot them too
    if len(durations_t) >= 100:
        means = durations_t.unfold(0, 100, 1).mean(1).view(-1)
        means = torch.cat((torch.zeros(99), means))
        plt.plot(means.numpy())

    img.set_data(curr_chart.cpu().squeeze(0).permute(1, 2, 0).numpy())
    plt.pause(0.01)  # pause a bit so that plots are updated
    display.clear_output(wait=True)
    display.display(plt.gcf())
            
def main():
    if path.exists("pt/train_cnnrnn2_{}.pt".format(device_str)):
        train_net.load_state_dict(torch.load("pt/train_cnnrnn2_{}.pt".format(device_str)))
    train_net.train()
        
    for _ in range(10):    
        df = market.get_data()
        # df = df.head(WINDOW_SIZE)

        for epoch in range(max_episode):
            account = Account(df, 50000000)
            account.reset()
            for idx,_ in enumerate(df.index, start=data_size):
                try:
                    since = time.time()
                    curr_chart = get_chart(market, idx, data_size)
                    reward = 0
                    num_action = select_action(df, idx)
                    reward, real_action = account.exec_action(num_action, idx)
                    print("idx:%d==>action:%d, price:%.2f"%(idx, num_action, df.loc[idx, 'close']))
                    reward = torch.tensor([reward], device=device)
                    action = torch.tensor([[num_action]], device=device, dtype=torch.int64)
                                    
                    memory.push(curr_chart, action, reward)
                    while len(memory) >= BATCH_SIZE:
                        optimizer.zero_grad()
                        epsode = memory.pop(BATCH_SIZE)
                        batch = Experience(*zip(*epsode))
                        state_batch = torch.cat(batch.state)
                        action_batch = torch.cat(batch.action)
                        state_action_values = train_net(state_batch).gather(1, action_batch)
                        # optimizer = optim.RMSprop(train_net.parameters(), 0.01)
                        
                        criterion = nn.SmoothL1Loss()
                        loss = criterion(state_action_values, action_batch)

                        # Optimize the model
                        # optimizer.zero_grad()
                        loss.backward()
                        for param in train_net.parameters():
                            param.grad.data.clamp_(-1, 1)
                        optimizer.step()
                        if loss is not None:
                            print("epoch[%d:%d] epsode is next loss[%.10f]" % (epoch, idx, loss.item()))

                    if idx % TARGET_UPDATE == 0:
                        torch.save(train_net.state_dict(),"pt/train_cnnrnn2_{}.pt".format(device_str))
                    
                    spend = time.time() - since
                    print("idx:%d price [%.4f] unit[%.4f] used time[%.2f] agent rate:%.05f remind money:%.02f" 
                            % (idx, df.loc[idx, 'close'], account.unit, spend, account.rate, account.balance + account.unit * df.loc[idx, 'close']))
                    if account.is_bankrupt():
                        break
                    if idx == df.index.max():
                        break
                except Exception as ex:
                    exc_type, exc_obj, exc_tb = sys.exc_info()
                    fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
                    print("`recall_training -> exception! %s : %s %d" % (str(ex) , fname, exc_tb.tb_lineno))
            scheduler.step()

        print("end training DQN")
    
    print('Complete Training')

if __name__ == "__main__":
    main()

위의 3가지 모델을 학습하기 위한 학습 파일인 main_cnnrnn2.py 입니다. import 부분만 변경하여 훈련이 가능 할 것으로 예상됩니다.

대표이미지 출처:https://dgkim5360.tistory.com/entry/understanding-long-short-term-memory-lstm-kr

Long Short-Term Memory (LSTM) 이해하기

이 글은 Christopher Olah가 2015년 8월에 쓴 글을 우리 말로 번역한 것이다. Recurrent neural network의 개념을 쉽게 설명했고, 그 중 획기적인 모델인 LSTM을 이론적으로 이해할 수 있도록 좋은 그림과 함께

dgkim5360.tistory.com

LIST

'python > 자동매매 프로그램' 카테고리의 다른 글

강화학습을 이용한 비트코인 매매프로그램(11) - ResNet + RNN 적용 모델 (1)	2022.12.25
강화학습을 이용한 비트코인 매매프로그램(7)-Back Test (1)	2022.12.24
강화학습을 이용한 비트코인 매매프로그램(9) - CNN+RNN 모델 (0)	2022.12.20
강화학습을 이용한 비트코인 매매프로그램(8) - wsl이용하기 (3)	2022.12.12
강화학습을 이용한 비트코인 매매프로그램(6)-Buroto Force학습 (17)	2022.12.04

PREV 이전 1 NEXT 다음

오늘도 아빠는 벌레잡는 중

RNN

강화학습을 이용한 비트코인 매매프로그램(10) - CNN+RNN 모델 확장

'python > 자동매매 프로그램' 카테고리의 다른 글

+ Recent posts

티스토리툴바