In [1]:
import wandb
import os
import pandas as pd
import plotly.graph_objs as go
from pytorch_forecasting.models.temporal_fusion_transformer import TemporalFusionTransformer
from pytorch_forecasting.data.timeseries import TimeSeriesDataSet
import numpy as np
import torch

from ml.model import load_model_from_wandb
from ml.data import get_dataset_from_wandb, get_train_validation_split, build_time_series_dataset

## Evaluate model predicting price (with Quantiles)

In [2]:
RUN_ID = "filipstefaniuk/wne-masters-thesis-testing/x352wmas"

run = wandb.Api().run(RUN_ID)
in_sample, out_of_sample = get_dataset_from_wandb(run)
test_data = out_of_sample
train_data, valid_data = get_train_validation_split(run.config, in_sample)
train = build_time_series_dataset(run.config, train_data)
valid = build_time_series_dataset(run.config, valid_data)
test = build_time_series_dataset(run.config, test_data)
model = load_model_from_wandb(run)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Downloading large artifact btc-usdt-5m:latest, 310.03MB. 10 files... 
[34m[1mwandb[0m:   10 of 10 files downloaded.  
Done. 0:0:0.4
/home/filip/anaconda3/envs/wne-msc-thesis/lib/python3.9/site-packages/lightning/pytorch/utilities/parsing.py:208: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
/home/filip/anaconda3/envs/wne-msc-thesis/lib/python3.9/site-packages/lightning/pytorch/utilities/parsing.py:208: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.


In [5]:
predictions = model.predict(
        valid.to_dataloader(train=False, batch_size=64),
        # test.to_dataloader(train=False, batch_size=64),
        mode="raw",
        return_index=True,
        trainer_kwargs={
            'logger': False
        })

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.



In [6]:
from strategy.strategy import BuyAndHoldStrategy, PriceQuantilePredictionStrategy
from strategy.evaluation import evaluate_strategy

model_predictions = pd.DataFrame(predictions.index)
model_predictions['prediction'] = list(torch.squeeze(predictions.output.prediction).cpu().numpy())

result_baseline = evaluate_strategy(valid_data, BuyAndHoldStrategy())
result_strategy = evaluate_strategy(valid_data, PriceQuantilePredictionStrategy(model_predictions), exchange_fee=0.001)

print('Baseline returns', result_baseline['total_return'])
print('Strategy returns', result_strategy['total_return'])

go.Figure([
    go.Scatter(y=result_baseline['portfolio_value']),
    go.Scatter(y=result_strategy['portfolio_value'])]).show()

Baseline returns 0.22730028053392815
Strategy returns 0.15714438080714976


## Evaluate model predicting returns (with GMADL loss)

In [8]:
RUN_ID = "filipstefaniuk/wne-masters-thesis-testing/m6zetdhf"

run = wandb.Api().run(RUN_ID)
in_sample, out_of_sample = get_dataset_from_wandb(run)
train_data, valid_data = get_train_validation_split(run.config, in_sample)
train = build_time_series_dataset(run.config, train_data)
valid = build_time_series_dataset(run.config, valid_data)
test = build_time_series_dataset(run.config, out_of_sample)
model = load_model_from_wandb(run)

[34m[1mwandb[0m: Downloading large artifact btc-usdt-5m:latest, 310.03MB. 10 files... 
[34m[1mwandb[0m:   10 of 10 files downloaded.  
Done. 0:0:0.4

Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.


Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.



In [9]:
predictions = model.predict(
        valid.to_dataloader(train=False, batch_size=64),
        # test.to_dataloader(train=False, batch_size=64),
        mode="raw",
        return_index=True,
        trainer_kwargs={
            'logger': False
        })

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.



In [13]:
from strategy.evaluation import evaluate_strategy
from strategy.strategy import BuyAndHoldStrategy, ReturnsPredictionStrategy

model_predictions = pd.DataFrame(predictions.index)
model_predictions['prediction'] = predictions.output.prediction.reshape(-1).cpu().numpy()

result_baseline = evaluate_strategy(valid_data, BuyAndHoldStrategy())
result_strategy = evaluate_strategy(valid_data, ReturnsPredictionStrategy(model_predictions, threshold=0.001))

print('Baseline returns', result_baseline['total_return'])
print('Strategy returns', result_strategy['total_return'])

go.Figure([
    go.Scatter(y=result_baseline['portfolio_value']),
    go.Scatter(y=result_strategy['portfolio_value'])]).show()

Baseline returns 0.2240889082270856
Strategy returns -0.3902088328046026


In [7]:
# Plot predictions
go.Figure([go.Scatter(y=model_predictions['prediction'])]).show()