지난차의 CNN+RNN모델에서 변경 가능한 파라메트는 hidden_dim값 일것입니다. 여기서 궁금점이 하나 생기는 것이 CNN+RNN,CNN+ISTM,CNN+GRU의 성능의 차이가 있을까? 하는것 입니다.
import torch
import torch.nn as nn
import torch.nn.functional as F
class CNNRNN2(nn.Module):
def __init__(self, device, h, w, embedding_size, outputs, hdnsize, num_layer):
super(CNNRNN2, self).__init__()
self.device = device
self.hidden_size = hdnsize
self.conv1 = nn.Conv2d(4, h*w, kernel_size=5, stride=2)
self.bn1 = nn.BatchNorm2d(h*w)
self.conv2 = nn.Conv2d(h*w, hdnsize, kernel_size=5, stride=2)
self.bn2 = nn.BatchNorm2d(hdnsize)
self.conv3 = nn.Conv2d(hdnsize, hdnsize, kernel_size=5, stride=2)
self.bn3 = nn.BatchNorm2d(hdnsize)
self.embedding_size = embedding_size
self.num_layer = num_layer
self.encoder = nn.Embedding(54 * hdnsize, embedding_size)
self.rnn = nn.RNN(embedding_size, hdnsize, num_layer)
self.decoder = nn.Linear(hdnsize, outputs)
def init_hidden(self):
return torch.zeros(1, self.hidden_size)
def forward(self, x):
x = x.to(self.device)
x = F.relu(self.bn1(self.conv1(x)))
x = F.relu(self.bn2(self.conv2(x)))
x = F.relu(self.bn3(self.conv3(x)))
hidden = self.init_hidden().to(self.device)
x, hidden = self.rnn(x, hidden)
x = self.decoder(x.view(8, -1))
return x
embedding을 포함한 cnn+rnn 모델 cnnrnn2.py 입니다.
import torch
import torch.nn as nn
import torch.nn.functional as F
class CNNRNN2(nn.Module):
def __init__(self, device, h, w, embedding_size, outputs, hdnsize, num_layer):
super(CNNRNN2, self).__init__()
self.device = device
self.hidden_size = hdnsize
self.conv1 = nn.Conv2d(4, h*w, kernel_size=5, stride=2)
self.bn1 = nn.BatchNorm2d(h*w)
self.conv2 = nn.Conv2d(h*w, hdnsize, kernel_size=5, stride=2)
self.bn2 = nn.BatchNorm2d(hdnsize)
self.conv3 = nn.Conv2d(hdnsize, hdnsize, kernel_size=5, stride=2)
self.bn3 = nn.BatchNorm2d(hdnsize)
self.embedding_size = embedding_size
self.num_layer = num_layer
self.encoder = nn.Embedding(126 * hdnsize, embedding_size)
self.rnn = nn.LSTM(embedding_size, hdnsize, num_layer)
self.decoder = nn.Linear(hdnsize, outputs)
def init_hidden(self):
hidden = torch.zeros(num_layer, 8, self.hidden_size) #8 = batch_size
cell = torch.zeros(num_layer, 8, self.hidden_size) #8 = batch_size
return hidden, cell
def forward(self, x):
x = x.to(self.device)
x = F.relu(self.bn1(self.conv1(x)))
x = F.relu(self.bn2(self.conv2(x)))
x = F.relu(self.bn3(self.conv3(x)))
hidden = self.init_hidden().to(self.device)
x, (hidden, cell) = self.rnn(x, (hidden, cell))
x = self.decoder(x.view(8, -1))
return x
embedding을 포함한 cnn+lstm모델인 cnnlstm.py입니다.
import torch
import torch.nn as nn
import torch.nn.functional as F
class CNNRNN2(nn.Module):
def __init__(self, device, h, w, embedding_size, outputs, hdnsize, num_layer):
super(CNNRNN2, self).__init__()
self.device = device
self.hidden_size = hdnsize
self.conv1 = nn.Conv2d(4, h*w, kernel_size=5, stride=2)
self.bn1 = nn.BatchNorm2d(h*w)
self.conv2 = nn.Conv2d(h*w, hdnsize, kernel_size=5, stride=2)
self.bn2 = nn.BatchNorm2d(hdnsize)
self.conv3 = nn.Conv2d(hdnsize, hdnsize, kernel_size=5, stride=2)
self.bn3 = nn.BatchNorm2d(hdnsize)
self.embedding_size = embedding_size
self.num_layer = num_layer
self.encoder = nn.Embedding(126 * hdnsize, embedding_size)
self.rnn = nn.GRU(embedding_size, hdnsize, num_layer)
self.decoder = nn.Linear(hdnsize, outputs)
def init_hidden(self):
return torch.zeros(1, self.hidden_size)
def forward(self, x):
x = x.to(self.device)
x = F.relu(self.bn1(self.conv1(x)))
x = F.relu(self.bn2(self.conv2(x)))
x = F.relu(self.bn3(self.conv3(x)))
hidden = self.init_hidden().to(self.device)
x, hidden = self.rnn(x, hidden)
x = self.decoder(x.view(8, -1))
return x
embedding을 포함한 cnn+gru모델인 cnngru.py입니다.
import random
from collections import deque, namedtuple
from IPython.display import display, Math
from Account import Account
import math
from itertools import count
import os
import sys
import os.path as path
import plotly as plt
import torch
import torch.nn as nn
import torch.optim as optim
from Market import Market
import torchvision
import time
from cnnrnn2 import CNNRNN2
from memory import ReplayMemory, Experience
# from transformers import get_cosine_schedule_with_warmup
import transformers
action_kind = 3
max_episode = 5000
screen_height = 100
screen_width = 140
data_size = 250
visit_cnt = [0] * action_kind
# replay_buffer = deque()
epsilon = 0.3
dis = 0.9
BATCH_SIZE = 8
# GAMMA = 0.999
GAMMA = 0.99
EPS_START = 0.9
EPS_END = 0.05
EPS_DECAY = 200
TARGET_UPDATE = 10
steps_done = 0
loss = any
WINDOW_START = 0
WINDOW_SIZE = 1500
# for i in range(torch.cuda.device_count()):
# print(torch.cuda.get_device_name(i))
device_str = "cuda"
device = torch.device(device_str)
memory = ReplayMemory(BATCH_SIZE)
converter = torchvision.transforms.ToTensor()
market = Market()
train_net = CNNRNN2(device, screen_height, screen_width, 10, action_kind, 32, 2).to(device)
train_net = nn.DataParallel(train_net, device_ids=[0,1]).to(device)
episode_durations = []
optimizer = optim.RMSprop(train_net.parameters(), lr=0.0001, eps=0.00000001)
# scheduler = get_cosine_schedule_with_warmup(optimizer, 5, base_lr=0.3, final_lr=0.01)
scheduler = transformers.get_cosine_schedule_with_warmup(optimizer,
num_warmup_steps=5,
num_training_steps=25)
def optimize_action(memory):
if len(memory) < BATCH_SIZE:
return None
optimizer.zero_grad()
epsode = memory.pop(BATCH_SIZE)
batch = Experience(*zip(*epsode))
state_batch = torch.cat(batch.state)
action_batch = torch.cat(batch.action)
state_action_values = train_net(state_batch).gather(1, action_batch)
# optimizer = optim.RMSprop(train_net.parameters(), 0.01)
criterion = nn.SmoothL1Loss()
loss = criterion(state_action_values, action_batch)
# Optimize the model
# optimizer.zero_grad()
loss.backward()
for param in train_net.parameters():
param.grad.data.clamp_(-1, 1)
optimizer.step()
return loss
# def select_action(df, idx):
# try:
# global steps_done
# sample = random.random()
# eps_threshold = EPS_END + (EPS_START - EPS_END) * math.exp(-1. * steps_done / EPS_DECAY)
# steps_done += 1
# if sample > eps_threshold:
# if df.loc[idx, "closemax"] == df.loc[idx, "close"]:
# action = 2
# return action
# elif df.loc[idx, "closemin"] == df.loc[idx, "close"]:
# action = 1
# return action
# else:
# return 0
# else:
# action = random.randrange(action_kind)
# return action
# except Exception as ex:
# exc_type, exc_obj, exc_tb = sys.exc_info()
# fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
# print("`select_action -> exception! %s : %s %d" % (str(ex) , fname, exc_tb.tb_lineno))
# return 0
def select_action(df, idx):
try:
if df.loc[idx, "closemax"] == df.loc[idx, "close"]:
action = 2
return action
elif df.loc[idx, "closemin"] == df.loc[idx, "close"]:
action = 1
return action
else:
return 0
except Exception as ex:
exc_type, exc_obj, exc_tb = sys.exc_info()
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
print("`select_action -> exception! %s : %s %d" % (str(ex) , fname, exc_tb.tb_lineno))
return 0
def get_chart(market, idx, max_data):
img = market.get_chart(idx, max_data=max_data)
if img is None:
return None
# img = Image.fromarray(np.uint8(cm.gist_earth(plt.io.to_image(fig, format='png')*255)))
# im = Image.fromarray(img, bytes=True)
# im = Image.fromarray(np.uint8(cm.gist_earth(img))/255)
# im = Image.fromarray(np.uint8(img)/255)
# img = img.resize((700, 500), resample=Image.BICUBIC)
# img = Image.fromarray(cm.gist_earth(plt.io.to_image(fig, format='png'), bytes=True))
# display(img)
# chart = converter(img).unsqueeze(0).to(device)
chart = converter(img).unsqueeze(0)
return chart
def plot_durations(last_chart, curr_chart):
plt.figure()
# plt.subplot(1,2,1)
img = plt.imshow(last_chart.cpu().squeeze(0).permute(1, 2, 0).numpy(), interpolation='none')
plt.title('Example extracted screen')
plt.figure(2)
# plt.subplot(1,2,2)
plt.clf()
durations_t = torch.tensor(episode_durations, dtype=torch.float)
plt.title('Training...')
plt.xlabel('Episode')
plt.ylabel('Duration')
plt.plot(durations_t.numpy())
# plt.show()
# Take 100 episode averages and plot them too
if len(durations_t) >= 100:
means = durations_t.unfold(0, 100, 1).mean(1).view(-1)
means = torch.cat((torch.zeros(99), means))
plt.plot(means.numpy())
img.set_data(curr_chart.cpu().squeeze(0).permute(1, 2, 0).numpy())
plt.pause(0.01) # pause a bit so that plots are updated
display.clear_output(wait=True)
display.display(plt.gcf())
def main():
if path.exists("pt/train_cnnrnn2_{}.pt".format(device_str)):
train_net.load_state_dict(torch.load("pt/train_cnnrnn2_{}.pt".format(device_str)))
train_net.train()
for _ in range(10):
df = market.get_data()
# df = df.head(WINDOW_SIZE)
for epoch in range(max_episode):
account = Account(df, 50000000)
account.reset()
for idx,_ in enumerate(df.index, start=data_size):
try:
since = time.time()
curr_chart = get_chart(market, idx, data_size)
reward = 0
num_action = select_action(df, idx)
reward, real_action = account.exec_action(num_action, idx)
print("idx:%d==>action:%d, price:%.2f"%(idx, num_action, df.loc[idx, 'close']))
reward = torch.tensor([reward], device=device)
action = torch.tensor([[num_action]], device=device, dtype=torch.int64)
memory.push(curr_chart, action, reward)
while len(memory) >= BATCH_SIZE:
optimizer.zero_grad()
epsode = memory.pop(BATCH_SIZE)
batch = Experience(*zip(*epsode))
state_batch = torch.cat(batch.state)
action_batch = torch.cat(batch.action)
state_action_values = train_net(state_batch).gather(1, action_batch)
# optimizer = optim.RMSprop(train_net.parameters(), 0.01)
criterion = nn.SmoothL1Loss()
loss = criterion(state_action_values, action_batch)
# Optimize the model
# optimizer.zero_grad()
loss.backward()
for param in train_net.parameters():
param.grad.data.clamp_(-1, 1)
optimizer.step()
if loss is not None:
print("epoch[%d:%d] epsode is next loss[%.10f]" % (epoch, idx, loss.item()))
if idx % TARGET_UPDATE == 0:
torch.save(train_net.state_dict(),"pt/train_cnnrnn2_{}.pt".format(device_str))
spend = time.time() - since
print("idx:%d price [%.4f] unit[%.4f] used time[%.2f] agent rate:%.05f remind money:%.02f"
% (idx, df.loc[idx, 'close'], account.unit, spend, account.rate, account.balance + account.unit * df.loc[idx, 'close']))
if account.is_bankrupt():
break
if idx == df.index.max():
break
except Exception as ex:
exc_type, exc_obj, exc_tb = sys.exc_info()
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
print("`recall_training -> exception! %s : %s %d" % (str(ex) , fname, exc_tb.tb_lineno))
scheduler.step()
print("end training DQN")
print('Complete Training')
if __name__ == "__main__":
main()
위의 3가지 모델을 학습하기 위한 학습 파일인 main_cnnrnn2.py 입니다. import 부분만 변경하여 훈련이 가능 할 것으로 예상됩니다.
대표이미지 출처:https://dgkim5360.tistory.com/entry/understanding-long-short-term-memory-lstm-kr
Long Short-Term Memory (LSTM) 이해하기
이 글은 Christopher Olah가 2015년 8월에 쓴 글을 우리 말로 번역한 것이다. Recurrent neural network의 개념을 쉽게 설명했고, 그 중 획기적인 모델인 LSTM을 이론적으로 이해할 수 있도록 좋은 그림과 함께
dgkim5360.tistory.com
반응형
LIST
'python > 자동매매 프로그램' 카테고리의 다른 글
강화학습을 이용한 비트코인 매매프로그램(11) - ResNet + RNN 적용 모델 (1) | 2022.12.25 |
---|---|
강화학습을 이용한 비트코인 매매프로그램(7)-Back Test (1) | 2022.12.24 |
강화학습을 이용한 비트코인 매매프로그램(9) - CNN+RNN 모델 (0) | 2022.12.20 |
강화학습을 이용한 비트코인 매매프로그램(8) - wsl이용하기 (3) | 2022.12.12 |
강화학습을 이용한 비트코인 매매프로그램(6)-Buroto Force학습 (17) | 2022.12.04 |