diff --git a/.gitignore b/.gitignore index f69ab02..602dd0b 100644 --- a/.gitignore +++ b/.gitignore @@ -200,4 +200,6 @@ lightning_logs/ wne-masters-thesis-testing/ notebooks/cache/ notebooks/images/ -.DS_Store \ No newline at end of file +.DS_Store +data/ +venv-py310/ diff --git a/configs/evaluations/informer-btcusdt-5m-gmadl.yaml b/configs/evaluations/informer-btcusdt-5m-gmadl.yaml index 9a6088a..bfa9469 100644 --- a/configs/evaluations/informer-btcusdt-5m-gmadl.yaml +++ b/configs/evaluations/informer-btcusdt-5m-gmadl.yaml @@ -17,9 +17,10 @@ parameters: data: parameters: dataset: - value: "btc-usdt-5m:latest" + value: "btc-5m-features-full:latest" validation: value: 0.2 sliding_window: - min: 0 - max: 5 + # min: 0 # Use values for grid search + # max: 5 + values: [0, 1, 2, 3, 4, 5] # Explicitly list values for grid search diff --git a/configs/experiments/informer-btcusdt-5m-gmadl.yaml b/configs/experiments/informer-btcusdt-5m-gmadl.yaml index 7f15d5d..45a616d 100644 --- a/configs/experiments/informer-btcusdt-5m-gmadl.yaml +++ b/configs/experiments/informer-btcusdt-5m-gmadl.yaml @@ -8,7 +8,7 @@ max_epochs: value: 40 data: value: - dataset: "btc-usdt-5m:latest" + dataset: "btc-5m-features-full:latest" sliding_window: 0 validation: 0.2 fields: diff --git a/configs/experiments/informer-btcusdt-5m-quantile.yaml b/configs/experiments/informer-btcusdt-5m-quantile.yaml index 03cf019..ffab292 100644 --- a/configs/experiments/informer-btcusdt-5m-quantile.yaml +++ b/configs/experiments/informer-btcusdt-5m-quantile.yaml @@ -8,9 +8,11 @@ max_epochs: value: 30 data: value: - dataset: "btc-usdt-5m:latest" + # in_sample_artifact_name: "btc-5m-features-in_sample:latest" # Reverted + # out_of_sample_artifact_name: "btc-5m-features-out_of_sample:latest" # Reverted + dataset: "btc-5m-features-full:latest" # Use a single artifact name sliding_window: 0 - validation: 0.2 + validation: 0.2 # This likely controls the in-sample vs out-of-sample split in train.py fields: value: time_index: "time_index" diff --git a/configs/sweeps/informer-btcusdt-5m-gmadl.yaml b/configs/sweeps/informer-btcusdt-5m-gmadl.yaml index d78fee2..fe66044 100644 --- a/configs/sweeps/informer-btcusdt-5m-gmadl.yaml +++ b/configs/sweeps/informer-btcusdt-5m-gmadl.yaml @@ -8,7 +8,7 @@ command: - "./configs/experiments/informer-btcusdt-5m-gmadl.yaml" - "--patience" - "15" -method: random +method: bayes metric: goal: minimize name: val_loss diff --git a/configs/sweeps/informer-btcusdt-5m-quantile.yaml b/configs/sweeps/informer-btcusdt-5m-quantile.yaml index 4b54f00..66bf6af 100644 --- a/configs/sweeps/informer-btcusdt-5m-quantile.yaml +++ b/configs/sweeps/informer-btcusdt-5m-quantile.yaml @@ -13,6 +13,8 @@ metric: goal: minimize name: val_loss parameters: + val_check_interval: + value: 1.0 # Validate once per epoch past_window: distribution: int_uniform min: 20 diff --git a/data/.gitignore b/data/.gitignore deleted file mode 100644 index f59ec20..0000000 --- a/data/.gitignore +++ /dev/null @@ -1 +0,0 @@ -* \ No newline at end of file diff --git a/notebooks/btcusdt_5m_evaluation.ipynb b/notebooks/btcusdt_5m_evaluation.ipynb index e19fca1..84587e4 100644 --- a/notebooks/btcusdt_5m_evaluation.ipynb +++ b/notebooks/btcusdt_5m_evaluation.ipynb @@ -2,7 +2,34 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], + "source": [ + "%load_ext autoreload" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "%autoreload 2 " + ] + }, + { + "cell_type": "code", + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -41,24 +68,23 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Downloading large artifact btc-usdt-5m:latest, 745.12MB. 12 files... \n", - "\u001b[34m\u001b[1mwandb\u001b[0m: 12 of 12 files downloaded. \n", - "Done. 0:0:1.3\n" + "\u001b[34m\u001b[1mwandb\u001b[0m: Downloading large artifact btc-5m-features-full:latest, 72.63MB. 1 files... \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 1 of 1 files downloaded. \n", + "Done. 0:0:0.3\n" ] } ], "source": [ "data_windows = get_data_windows(\n", " 'wne-masters-thesis-testing',\n", - " 'btc-usdt-5m:latest',\n", + " \"btc-5m-features-full:latest\",\n", " min_window=0, \n", " max_window=5\n", ")" @@ -66,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -80,7 +106,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -89,19 +115,145 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 16, "metadata": {}, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "100%|██████████| 3840/3840 [00:18<00:00, 206.53it/s]\n", - "100%|██████████| 3840/3840 [00:18<00:00, 205.12it/s]\n", - "100%|██████████| 3840/3840 [00:18<00:00, 205.54it/s]\n", - "100%|██████████| 3840/3840 [00:18<00:00, 204.08it/s]\n", - "100%|██████████| 3840/3840 [00:18<00:00, 202.37it/s]\n", - "100%|██████████| 3840/3840 [00:18<00:00, 202.96it/s]\n" + "Running 3840 tasks in parallel with joblib (n_jobs=4)...\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "817c3009ddf94691a6b152c4daf25984", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/3840 [00:00fast_window_size", + "x": -0.45, + "xanchor": "left", + "xref": "x", + "y": 0, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#ffffff" + }, + "showarrow": false, + "text": "slow_window_size", + "x": 0.55, + "xanchor": "left", + "xref": "x", + "y": 0, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#ffffff" + }, + "showarrow": false, + "text": "signal_window_size", + "x": 1.55, + "xanchor": "left", + "xref": "x", + "y": 0, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#ffffff" + }, + "showarrow": false, + "text": "short_sell", + "x": 2.55, + "xanchor": "left", + "xref": "x", + "y": 0, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#ffffff" + }, + "showarrow": false, + "text": "mod_ir", + "x": 3.55, + "xanchor": "left", + "xref": "x", + "y": 0, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "610", + "x": -0.45, + "xanchor": "left", + "xref": "x", + "y": 1, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "1597", + "x": 0.55, + "xanchor": "left", + "xref": "x", + "y": 1, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "2584", + "x": 1.55, + "xanchor": "left", + "xref": "x", + "y": 1, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "False", + "x": 2.55, + "xanchor": "left", + "xref": "x", + "y": 1, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "6.8", + "x": 3.55, + "xanchor": "left", + "xref": "x", + "y": 1, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "610", + "x": -0.45, + "xanchor": "left", + "xref": "x", + "y": 2, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "2584", + "x": 0.55, + "xanchor": "left", + "xref": "x", + "y": 2, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "1597", + "x": 1.55, + "xanchor": "left", + "xref": "x", + "y": 2, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "False", + "x": 2.55, + "xanchor": "left", + "xref": "x", + "y": 2, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "6.56", + "x": 3.55, + "xanchor": "left", + "xref": "x", + "y": 2, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "1597", + "x": -0.45, + "xanchor": "left", + "xref": "x", + "y": 3, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "2584", + "x": 0.55, + "xanchor": "left", + "xref": "x", + "y": 3, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "610", + "x": 1.55, + "xanchor": "left", + "xref": "x", + "y": 3, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "False", + "x": 2.55, + "xanchor": "left", + "xref": "x", + "y": 3, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "6.55", + "x": 3.55, + "xanchor": "left", + "xref": "x", + "y": 3, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "987", + "x": -0.45, + "xanchor": "left", + "xref": "x", + "y": 4, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "2584", + "x": 0.55, + "xanchor": "left", + "xref": "x", + "y": 4, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "2584", + "x": 1.55, + "xanchor": "left", + "xref": "x", + "y": 4, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "False", + "x": 2.55, + "xanchor": "left", + "xref": "x", + "y": 4, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "6.02", + "x": 3.55, + "xanchor": "left", + "xref": "x", + "y": 4, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "610", + "x": -0.45, + "xanchor": "left", + "xref": "x", + "y": 5, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "2584", + "x": 0.55, + "xanchor": "left", + "xref": "x", + "y": 5, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "2584", + "x": 1.55, + "xanchor": "left", + "xref": "x", + "y": 5, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "False", + "x": 2.55, + "xanchor": "left", + "xref": "x", + "y": 5, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "4.36", + "x": 3.55, + "xanchor": "left", + "xref": "x", + "y": 5, + "yref": "y" + } + ], + "coloraxis": { + "cmid": 1, + "colorbar": { + "len": 0.5, + "thickness": 40, + "title": { + "text": "mod_ir" + }, + "x": 0.95, + "xanchor": "right", + "y": 1, + "yanchor": "top" + }, + "colorscale": [ + [ + 0, + "rgb(208, 88, 126)" + ], + [ + 0.16666666666666666, + "rgb(217, 137, 148)" + ], + [ + 0.3333333333333333, + "rgb(229, 185, 173)" + ], + [ + 0.5, + "rgb(241, 234, 200)" + ], + [ + 0.6666666666666666, + "rgb(177, 199, 179)" + ], + [ + 0.8333333333333334, + "rgb(114, 170, 161)" + ], + [ + 1, + "rgb(0, 147, 146)" + ] + ] + }, + "height": 530, + "margin": { + "b": 20, + "l": 20, + "r": 20, + "t": 40 + }, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "font": { + "size": 28 + }, + "text": "Hyperparameters search results", + "x": 0.5, + "xanchor": "center", + "y": 0.98, + "yanchor": "top" + }, + "xaxis": { + "dtick": 1, + "gridwidth": 2, + "showticklabels": false, + "tick0": -0.5, + "ticks": "", + "zeroline": false + }, + "yaxis": { + "autorange": "reversed", + "domain": [ + 0, + 0.4 + ], + "dtick": 1, + "gridwidth": 2, + "showticklabels": false, + "tick0": 0.5, + "ticks": "", + "zeroline": false + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "[[,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ],\n", + " [,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ],\n", + " [,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ],\n", + " [,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ],\n", + " [,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ],\n", + " [,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ]]" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# plot_sweep_results(pd.DataFrame([result for result, _ in macd_sweep_results[0]]), parameters=MACD_PARAMS.keys(), objective=METRIC)\n", - "# macd_sweep_results[0][:3]\n", - "# macd_best_strategies" + "\n", + "display(plot_sweep_results(pd.DataFrame([result for result, _ in macd_sweep_results[0]]), parameters=MACD_PARAMS.keys(), objective=METRIC))\n", + "macd_sweep_results[0][:3]\n", + "macd_best_strategies" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 18, "metadata": {}, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "100%|██████████| 11088/11088 [00:51<00:00, 216.64it/s]\n", - "100%|██████████| 11088/11088 [00:52<00:00, 210.53it/s]\n", - "100%|██████████| 11088/11088 [00:52<00:00, 210.59it/s]\n", - "100%|██████████| 11088/11088 [00:52<00:00, 210.06it/s]\n", - "100%|██████████| 11088/11088 [00:52<00:00, 209.24it/s]\n", - "100%|██████████| 11088/11088 [00:52<00:00, 209.32it/s]\n" + "Running 11088 tasks in parallel with joblib (n_jobs=7)...\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "e9ce10f364e34bec8ae52d8b50000c54", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/11088 [00:00window_size", + "x": -0.45, + "xanchor": "left", + "xref": "x", + "y": 0, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#ffffff" + }, + "showarrow": false, + "text": "enter_long", + "x": 0.55, + "xanchor": "left", + "xref": "x", + "y": 0, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#ffffff" + }, + "showarrow": false, + "text": "exit_long", + "x": 1.55, + "xanchor": "left", + "xref": "x", + "y": 0, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#ffffff" + }, + "showarrow": false, + "text": "enter_short", + "x": 2.55, + "xanchor": "left", + "xref": "x", + "y": 0, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#ffffff" + }, + "showarrow": false, + "text": "exit_short", + "x": 3.55, + "xanchor": "left", + "xref": "x", + "y": 0, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#ffffff" + }, + "showarrow": false, + "text": "mod_ir", + "x": 4.55, + "xanchor": "left", + "xref": "x", + "y": 0, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "13.0", + "x": -0.45, + "xanchor": "left", + "xref": "x", + "y": 1, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "90.0", + "x": 0.55, + "xanchor": "left", + "xref": "x", + "y": 1, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "nan", + "x": 1.55, + "xanchor": "left", + "xref": "x", + "y": 1, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "15.0", + "x": 2.55, + "xanchor": "left", + "xref": "x", + "y": 1, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "nan", + "x": 3.55, + "xanchor": "left", + "xref": "x", + "y": 1, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "87.44", + "x": 4.55, + "xanchor": "left", + "xref": "x", + "y": 1, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "13.0", + "x": -0.45, + "xanchor": "left", + "xref": "x", + "y": 2, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "90.0", + "x": 0.55, + "xanchor": "left", + "xref": "x", + "y": 2, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "15.0", + "x": 1.55, + "xanchor": "left", + "xref": "x", + "y": 2, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "5.0", + "x": 2.55, + "xanchor": "left", + "xref": "x", + "y": 2, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "nan", + "x": 3.55, + "xanchor": "left", + "xref": "x", + "y": 2, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "63.66", + "x": 4.55, + "xanchor": "left", + "xref": "x", + "y": 2, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "13.0", + "x": -0.45, + "xanchor": "left", + "xref": "x", + "y": 3, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "90.0", + "x": 0.55, + "xanchor": "left", + "xref": "x", + "y": 3, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "15.0", + "x": 1.55, + "xanchor": "left", + "xref": "x", + "y": 3, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "nan", + "x": 2.55, + "xanchor": "left", + "xref": "x", + "y": 3, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "nan", + "x": 3.55, + "xanchor": "left", + "xref": "x", + "y": 3, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "63.66", + "x": 4.55, + "xanchor": "left", + "xref": "x", + "y": 3, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "13.0", + "x": -0.45, + "xanchor": "left", + "xref": "x", + "y": 4, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "90.0", + "x": 0.55, + "xanchor": "left", + "xref": "x", + "y": 4, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "15.0", + "x": 1.55, + "xanchor": "left", + "xref": "x", + "y": 4, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "10.0", + "x": 2.55, + "xanchor": "left", + "xref": "x", + "y": 4, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "nan", + "x": 3.55, + "xanchor": "left", + "xref": "x", + "y": 4, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "45.78", + "x": 4.55, + "xanchor": "left", + "xref": "x", + "y": 4, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "8.0", + "x": -0.45, + "xanchor": "left", + "xref": "x", + "y": 5, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "95.0", + "x": 0.55, + "xanchor": "left", + "xref": "x", + "y": 5, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "nan", + "x": 1.55, + "xanchor": "left", + "xref": "x", + "y": 5, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "10.0", + "x": 2.55, + "xanchor": "left", + "xref": "x", + "y": 5, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "nan", + "x": 3.55, + "xanchor": "left", + "xref": "x", + "y": 5, + "yref": "y" + }, + { + "align": "left", + "font": { + "color": "#000000" + }, + "showarrow": false, + "text": "42.58", + "x": 4.55, + "xanchor": "left", + "xref": "x", + "y": 5, + "yref": "y" + } + ], + "coloraxis": { + "cmid": 1, + "colorbar": { + "len": 0.5, + "thickness": 40, + "title": { + "text": "mod_ir" + }, + "x": 0.95, + "xanchor": "right", + "y": 1, + "yanchor": "top" + }, + "colorscale": [ + [ + 0, + "rgb(208, 88, 126)" + ], + [ + 0.16666666666666666, + "rgb(217, 137, 148)" + ], + [ + 0.3333333333333333, + "rgb(229, 185, 173)" + ], + [ + 0.5, + "rgb(241, 234, 200)" + ], + [ + 0.6666666666666666, + "rgb(177, 199, 179)" + ], + [ + 0.8333333333333334, + "rgb(114, 170, 161)" + ], + [ + 1, + "rgb(0, 147, 146)" + ] + ] + }, + "height": 530, + "margin": { + "b": 20, + "l": 20, + "r": 20, + "t": 40 + }, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "font": { + "size": 28 + }, + "text": "Hyperparameters search results", + "x": 0.5, + "xanchor": "center", + "y": 0.98, + "yanchor": "top" + }, + "xaxis": { + "dtick": 1, + "gridwidth": 2, + "showticklabels": false, + "tick0": -0.5, + "ticks": "", + "zeroline": false + }, + "yaxis": { + "autorange": "reversed", + "domain": [ + 0, + 0.4 + ], + "dtick": 1, + "gridwidth": 2, + "showticklabels": false, + "tick0": 0.5, + "ticks": "", + "zeroline": false + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ - "# plot_sweep_results(pd.DataFrame([result for result, _ in rsi_sweep_results[0]]), parameters=RSI_PARAMS.keys(), objective=METRIC)" + "plot_sweep_results(pd.DataFrame([result for result, _ in rsi_sweep_results[0]]), parameters=RSI_PARAMS.keys(), objective=METRIC)" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -220,20 +100354,73 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "100%|██████████| 1176/1176 [02:10<00:00, 9.04it/s]\n", - "100%|██████████| 1176/1176 [02:07<00:00, 9.19it/s]\n", - "100%|██████████| 1176/1176 [02:10<00:00, 9.02it/s]\n", - "100%|██████████| 1176/1176 [02:16<00:00, 8.59it/s]\n", - "100%|██████████| 1176/1176 [02:13<00:00, 8.82it/s]\n", - "100%|██████████| 1176/1176 [02:12<00:00, 8.85it/s]\n" + "Running 1176 tasks in parallel with joblib (n_jobs=7)...\n" ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "8482ca53c5934b9781af871d5836392f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/1176 [00:00 0: + logging.warning(f"Dropped {rows_dropped} rows containing NaNs after forward filling.") + + # Final check + if final_df.isna().any().any(): + logging.warning(f"NaN values still present after processing:\n{final_df.isna().sum()[final_df.isna().sum() > 0]}") + else: + logging.info("No remaining NaN values detected.") + + + if final_df.empty: + logging.error("Final DataFrame is empty after processing and NaN handling.") + return + + # --- Removed Data Splitting Logic --- + # split_ratio = 0.8 # Use 80% for in-sample + # split_index = int(len(final_df) * split_ratio) + # + # in_sample_df = final_df.iloc[:split_index] + # out_of_sample_df = final_df.iloc[split_index:] + # + # logging.info(f"Split data: {len(in_sample_df)} in-sample rows, {len(out_of_sample_df)} out-of-sample rows.") + # logging.info(f"In-sample time range: {in_sample_df['time_index'].min()} to {in_sample_df['time_index'].max()}") + # logging.info(f"Out-of-sample time range: {out_of_sample_df['time_index'].min()} to {out_of_sample_df['time_index'].max()}") + # --- End Split Removal --- + + # --- Log Single Artifact to W&B --- Modified + logging.info(f"Logging full dataset artifact to W&B project '{wandb.run.project}', run '{wandb.run.name}'...") + + try: + with tempfile.TemporaryDirectory() as tempdir: + # Save the entire final_df + full_data_path = os.path.join(tempdir, 'full_data.parquet') + final_df.to_parquet(full_data_path, index=False) + logging.info(f"Temporary file saved to {tempdir}") + + # Create and log the single artifact + full_artifact = wandb.Artifact( + name=args.full_dataset_artifact_name, # Use new arg + type='dataset', + description=f'Full BTC 5min features data ({len(final_df)} rows). Prepared by run {wandb.run.id}.', + metadata={'rows': len(final_df)} + ) + full_artifact.add_file(full_data_path) + wandb.log_artifact(full_artifact) + logging.info(f"Logged full dataset artifact: {args.full_dataset_artifact_name}") + + # --- Removed logging for separate artifacts --- + # # Create and log the IN-SAMPLE artifact + # in_sample_artifact = wandb.Artifact( + # name=args.in_sample_artifact_name, # Use arg + # type='dataset', + # description=f'In-sample BTC 5min data ({len(in_sample_df)} rows). Prepared by run {wandb.run.id}.', + # metadata={'rows': len(in_sample_df), 'split': 'in_sample'} + # ) + # in_sample_artifact.add_file(in_sample_path) + # wandb.log_artifact(in_sample_artifact) + # logging.info(f"Logged in-sample artifact: {args.in_sample_artifact_name}") + # + # # Create and log the OUT-OF-SAMPLE artifact + # out_of_sample_artifact = wandb.Artifact( + # name=args.out_of_sample_artifact_name, # Use arg + # type='dataset', + # description=f'Out-of-sample BTC 5min data ({len(out_of_sample_df)} rows). Prepared by run {wandb.run.id}.', + # metadata={'rows': len(out_of_sample_df), 'split': 'out_of_sample'} + # ) + # out_of_sample_artifact.add_file(out_of_sample_path) + # wandb.log_artifact(out_of_sample_artifact) + # logging.info(f"Logged out-of-sample artifact: {args.out_of_sample_artifact_name}") + + logging.info("Artifact logged successfully.") + + except Exception as e: + logging.error(f"Error logging artifacts to W&B: {e}") + wandb.run.finish(exit_code=1) # Finish run with error + return + # --- End W&B Logging --- + + wandb.run.finish() # Finish run successfully + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Prepare BTC-USDT 5-minute data and log to W&B.") + parser.add_argument( + "--db-pattern", + default="/home/yasha/develop/data/combined.coinbase_1min_hist.db", + help="Pattern or exact path to find input SQLite database file(s)." + ) + parser.add_argument( + "--db-table", + default="combined_hist_1min", + help="Name of the table containing kline data within the SQLite files." + ) + parser.add_argument( + "--vix-file", + default="data/vix_daily.csv", + help="Path to the VIX index CSV file." + ) + parser.add_argument( + "--fear-greed-file", + default="data/fear_greed_index.csv", + help="Path to the Crypto Fear & Greed Index CSV file." + ) + parser.add_argument( + "--eff-rate-file", + default="data/DFF.csv", + help="Path to the Effective Rates CSV file." + ) + parser.add_argument( + "--wandb-project", + default="wne-masters-thesis-testing", + help="W&B project name." + ) + parser.add_argument( + "--wandb-run-name", + default="prepare-btc-data", + help="W&B run name for this preparation job." + ) + parser.add_argument( + "--wandb-notes", + default=None, + help="Optional notes for the W&B run." + ) + parser.add_argument( + "--full-dataset-artifact-name", + default="btc-5m-features-full", # Match YAML default + help="Name for the single W&B artifact containing the full dataset." + ) + args = parser.parse_args() + + # --- Initialize W&B Run --- New + run = wandb.init( + project=args.wandb_project, + name=args.wandb_run_name, + notes=args.wandb_notes, + job_type="data-preparation", + config=vars(args) # Log command line args + ) + # --- End W&B Init --- + + # --- Pass args to main --- Modified + main( + db_pattern=args.db_pattern, + db_table=args.db_table, + vix_file=args.vix_file, + fear_greed_file=args.fear_greed_file, + eff_rate_file=args.eff_rate_file, + args=args # Pass all args for artifact names etc. + ) \ No newline at end of file diff --git a/scripts/train.py b/scripts/train.py index 15ed1cc..2cde5db 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -7,6 +7,7 @@ import tempfile import torch import lightning.pytorch as pl import pandas as pd +import warnings from lightning.pytorch.utilities.model_summary import ModelSummary from lightning.pytorch.callbacks.early_stopping import EarlyStopping @@ -21,6 +22,13 @@ from ml.data import ( build_time_series_dataset ) +# --- Suppress specific sklearn UserWarning --- +warnings.filterwarnings("ignore", category=UserWarning, module="sklearn.utils.validation") +# --- + +# --- Set Matmul Precision for Tensor Cores --- +torch.set_float32_matmul_precision('medium') +# --- def get_args(): parser = argparse.ArgumentParser( @@ -60,7 +68,7 @@ def get_args(): parser.add_argument( '-v', '--val-check-interval', - default=300, + default=100, type=int, help="Run validation every n batches." ) diff --git a/src/ml/data.py b/src/ml/data.py index b84cdbc..d57e6b5 100644 --- a/src/ml/data.py +++ b/src/ml/data.py @@ -1,60 +1,155 @@ import os - -import pandas as pd import wandb -from pytorch_forecasting.data.timeseries import TimeSeriesDataSet +import pandas as pd +import logging + +from pytorch_forecasting.data import TimeSeriesDataSet -def get_dataset_from_wandb(run, window=None): - artifact_name = f"{run.project}/{run.config['data']['dataset']}" - artifact = wandb.Api().artifact(artifact_name) - base_path = artifact.download() +def get_dataset_from_wandb(run): + """Downloads the specified dataset artifact and splits it based on sliding window and validation split.""" + + # Construct artifact name from run config + # Example: "btc-5m-features-full:latest" + dataset_artifact_name = run.config.get('data', {}).get('dataset', None) + if not dataset_artifact_name: + raise ValueError("Dataset artifact name not found in run configuration (run.config.data.dataset)") + + full_artifact_name = f"{run.project}/{dataset_artifact_name}" + logging.info(f"Attempting to download artifact: {full_artifact_name}") + + try: + artifact = wandb.Api().artifact(full_artifact_name) + base_path = artifact.download() + logging.info(f"Artifact downloaded to: {base_path}") + except Exception as e: + logging.error(f"Failed to download artifact {full_artifact_name}: {e}") + raise # Re-raise the exception - name = artifact.metadata['name'] - in_sample_name =\ - f"in-sample-{window or run.config['data']['sliding_window']}" - in_sample_data = pd.read_csv(os.path.join( - base_path, name + '-' + in_sample_name + '.csv')) - out_of_sample_name =\ - f"out-of-sample-{window or run.config['data']['sliding_window']}" - out_of_sample_data = pd.read_csv(os.path.join( - base_path, name + '-' + out_of_sample_name + '.csv')) + # Load the single parquet file + full_data_file = os.path.join(base_path, 'full_data.parquet') + logging.info(f"Loading full dataset from: {full_data_file}") + if not os.path.exists(full_data_file): + raise FileNotFoundError(f"Expected parquet file 'full_data.parquet' not found in artifact directory: {base_path}") + + full_df = pd.read_parquet(full_data_file) + logging.info(f"Loaded full dataset with shape: {full_df.shape}") - return in_sample_data, out_of_sample_data + # --- Get Parameters for Splitting --- Modified + # Validation split now defines the size of the FINAL TEST SET + test_set_fraction = run.config.get('data', {}).get('validation', 0.2) + if not (0 < test_set_fraction < 1): + raise ValueError(f"Invalid final test set fraction (config.data.validation): {test_set_fraction}.") + + # Sliding window index determines the end of the current in-sample data + sliding_window_idx = run.config.get('data', {}).get('sliding_window', 0) + # *** Assumption: Total number of windows (e.g., 0 to 5 means 6 windows) *** + # This should ideally match the sweep range if sweeping over sliding_window + total_num_windows = 6 # Hardcoded assumption - Adjust if needed or make configurable + if not (0 <= sliding_window_idx < total_num_windows): + raise ValueError(f"Invalid sliding_window index: {sliding_window_idx}. Must be between 0 and {total_num_windows-1}.") + # --- + + # --- Calculate Splits --- Modified + N = len(full_df) + # End index of the pool used for all training/validation windows (excludes final test set) + train_val_pool_end_idx = int(N * (1 - test_set_fraction)) + # Size of each window's data block within the training/validation pool + window_block_size = train_val_pool_end_idx // total_num_windows + if window_block_size == 0: + raise ValueError("Dataset too small for the number of windows and test set fraction.") + + # End index for the current window's in-sample data (expanding window) + current_in_sample_end_idx = window_block_size * (sliding_window_idx + 1) + # Ensure the last window uses all data up to the test set + if sliding_window_idx == total_num_windows - 1: + current_in_sample_end_idx = train_val_pool_end_idx + + in_sample_df = full_df.iloc[0 : current_in_sample_end_idx].copy() + # Out-of-sample is now the fixed final test set + out_of_sample_df = full_df.iloc[train_val_pool_end_idx :].copy() + + logging.info(f"Sliding Window: {sliding_window_idx}, Test Fraction: {test_set_fraction:.1%}") + logging.info(f"In-sample indices: 0:{current_in_sample_end_idx}, Out-of-sample (Test) indices: {train_val_pool_end_idx}:{N}") + logging.info(f"Returned data shapes: In-sample={in_sample_df.shape}, Out-of-sample={out_of_sample_df.shape}") + # --- + + return in_sample_df, out_of_sample_df -def get_train_validation_split(config, in_sample_data): - validation_part = config['data']['validation'] - train_data = in_sample_data.iloc[:int( - len(in_sample_data) * (1 - validation_part))] - val_data = in_sample_data.iloc[len(train_data) - config['past_window']:] - - return train_data, val_data +def get_train_validation_split(config, data): + """Splits the provided (in-sample) data into training and validation sets.""" + # data here is the in_sample_df for the current sliding window + + # Use the 'validation' fraction again, but now it defines the val set size *within* the in-sample data + # This takes the LATEST part of the current window for validation. + validation_fraction_within_window = config.get('data', {}).get('validation', 0.2) + if not (0 < validation_fraction_within_window < 1): + raise ValueError(f"Invalid validation fraction for train/val split (config.data.validation): {validation_fraction_within_window}.") + + N_in_sample = len(data) + # Calculate the start index of the validation set within the current in-sample data + validation_start_idx = int(N_in_sample * (1 - validation_fraction_within_window)) + + train_data = data.iloc[:validation_start_idx] + valid_data = data.iloc[validation_start_idx:] + + logging.info(f"Split in-sample data ({1-validation_fraction_within_window:.1%} / {validation_fraction_within_window:.1%}): {len(train_data)} train, {len(valid_data)} validation rows.") + return train_data, valid_data def build_time_series_dataset(config, data): - data = data.copy() - # TODO: Fix in dataset - data['weekday'] = data['weekday'].astype('str') - data['hour'] = data['hour'].astype('str') + """Builds TimeSeriesDataSet from configuration and data.""" + + fields = config.get('fields', {}) + time_idx = fields.get('time_index', 'time_idx') # Default if not specified + target = fields.get('target', 'target') + group_ids = fields.get('group_ids', []) + + # Extract features based on types defined in config + time_varying_known_reals = fields.get('dynamic_known_real', []) + time_varying_known_categoricals = fields.get('dynamic_known_cat', []) + time_varying_unknown_reals = fields.get('dynamic_unknown_real', []) + time_varying_unknown_categoricals = fields.get('dynamic_unknown_cat', []) + static_reals = fields.get('static_real', []) + static_categoricals = fields.get('static_cat', []) + + # Max lengths from config + max_encoder_length = config.get('past_window', 24) + max_prediction_length = config.get('future_window', 6) - time_series_dataset = TimeSeriesDataSet( - data, - time_idx=config['fields']['time_index'], - target=config['fields']['target'], - group_ids=config['fields']['group_ids'], - min_encoder_length=config['past_window'], - max_encoder_length=config['past_window'], - min_prediction_length=config['future_window'], - max_prediction_length=config['future_window'], - static_reals=config['fields']['static_real'], - static_categoricals=config['fields']['static_cat'], - time_varying_known_reals=config['fields']['dynamic_known_real'], - time_varying_known_categoricals=config['fields']['dynamic_known_cat'], - time_varying_unknown_reals=config['fields']['dynamic_unknown_real'], - time_varying_unknown_categoricals=config['fields'][ - 'dynamic_unknown_cat'], - randomize_length=False, + # Ensure all specified columns exist in the dataframe + required_cols = ( + [time_idx, target] + group_ids + + time_varying_known_reals + time_varying_known_categoricals + + time_varying_unknown_reals + time_varying_unknown_categoricals + + static_reals + static_categoricals ) + missing_cols = [col for col in required_cols if col not in data.columns] + if missing_cols: + raise ValueError(f"Missing required columns in DataFrame: {missing_cols}") - return time_series_dataset + logging.info("Building TimeSeriesDataSet...") + # Ensure target is float for regression/quantile tasks + # data[target] = data[target].astype(float) + + dataset = TimeSeriesDataSet( + data, + time_idx=time_idx, + target=target, + group_ids=group_ids, + max_encoder_length=max_encoder_length, + max_prediction_length=max_prediction_length, + static_categoricals=static_categoricals, + static_reals=static_reals, + time_varying_known_categoricals=time_varying_known_categoricals, + time_varying_known_reals=time_varying_known_reals, + time_varying_unknown_categoricals=time_varying_unknown_categoricals, + time_varying_unknown_reals=time_varying_unknown_reals, + add_relative_time_idx=True, # Often useful + add_target_scales=True, # Often useful + add_encoder_length=True, # Often useful + allow_missing_timesteps=True # Set based on your data characteristics + ) + logging.info("TimeSeriesDataSet built successfully.") + return dataset diff --git a/src/ml/model.py b/src/ml/model.py index 91bebbf..c103621 100644 --- a/src/ml/model.py +++ b/src/ml/model.py @@ -109,88 +109,104 @@ class Informer(BaseModelWithCovariates): output_size: Union[int, List[int]] = 1, loss=None, logging_metrics: nn.ModuleList = None, + actual_n_encoder_reals: int = -1, **kwargs): + # --- Call super().__init__ first --- super().__init__( loss=loss, logging_metrics=logging_metrics, **kwargs) + # --- + # Save hparams after super().__init__ so dataset parameters are available self.save_hyperparameters(ignore=['loss']) self.attention_type = attention_type - assert not static_reals - assert not static_categoricals + # --- Calculate n_encoder_reals using self.hparams (populated by save_hyperparameters) --- + n_encoder_reals = len(self.hparams.x_reals) + print(f"Initializing enc_real_embeddings with {n_encoder_reals} channels (derived from len(hparams.x_reals)).") + # --- + # assertions (can remain commented) + # assert isinstance(loss, PyTorchMetric), "Loss has to be PyTorch Metric" + # assert not static_reals # Ensure this line remains commented out + + # --- Use self.hparams for MultiEmbedding as well --- self.cat_embeddings = MultiEmbedding( - embedding_sizes=embedding_sizes, - embedding_paddings=embedding_paddings, - categorical_groups=categorical_groups, - x_categoricals=x_categoricals, + embedding_sizes=self.hparams.embedding_sizes, + embedding_paddings=self.hparams.embedding_paddings, + categorical_groups=self.hparams.categorical_groups, + x_categoricals=self.hparams.x_categoricals, ) - self.enc_real_embeddings = TokenEmbedding( - len(time_varying_reals_encoder), d_model) - self.enc_positional_embeddings = PositionalEmbedding(d_model) + # Initialize with the derived total number of continuous encoder variables + self.enc_real_embeddings = TokenEmbedding(n_encoder_reals, self.hparams.d_model) + self.enc_positional_embeddings = PositionalEmbedding(self.hparams.d_model) + + # Decoder embedding initialization using hparams + decoder_reals_list = self.hparams.time_varying_reals_decoder + print(f"Initializing dec_real_embeddings with {len(decoder_reals_list)} channels.") self.dec_real_embeddings = TokenEmbedding( - len(time_varying_reals_decoder), d_model) - self.dec_positional_embeddings = PositionalEmbedding(d_model) + len(decoder_reals_list), self.hparams.d_model) + self.dec_positional_embeddings = PositionalEmbedding(self.hparams.d_model) Attention = ProbSparseAttention \ - if attention_type == "prob" else FullAttention + if self.hparams.attention_type == "prob" else FullAttention + # --- Initialize Encoder/Decoder using self.hparams --- self.encoder = Encoder( [ EncoderLayer( AttentionLayer( - Attention(False, factor, attention_dropout=dropout, - output_attention=output_attention), - d_model, - n_attention_heads, + Attention(False, self.hparams.factor, attention_dropout=self.hparams.dropout, + output_attention=self.hparams.output_attention), + self.hparams.d_model, + self.hparams.n_attention_heads, mix=False, ), - d_model, - d_fully_connected, - dropout=dropout, - activation=activation, + self.hparams.d_model, + self.hparams.d_fully_connected, + dropout=self.hparams.dropout, + activation=self.hparams.activation, ) - for _ in range(n_encoder_layers) + for _ in range(self.hparams.n_encoder_layers) ], - [SelfAttentionDistil(d_model) for _ in range( - n_encoder_layers - 1)] if distil else None, - nn.LayerNorm(d_model), + [SelfAttentionDistil(self.hparams.d_model) for _ in range( + self.hparams.n_encoder_layers - 1)] if self.hparams.distil else None, + nn.LayerNorm(self.hparams.d_model), ) self.decoder = Decoder( [ DecoderLayer( AttentionLayer( - Attention(True, factor, attention_dropout=dropout, + Attention(True, self.hparams.factor, attention_dropout=self.hparams.dropout, output_attention=False), - d_model, - n_attention_heads, - mix=mix_attention, + self.hparams.d_model, + self.hparams.n_attention_heads, + mix=self.hparams.mix_attention, ), AttentionLayer( FullAttention( False, - factor, - attention_dropout=dropout, + self.hparams.factor, + attention_dropout=self.hparams.dropout, output_attention=False), - d_model, - n_attention_heads, + self.hparams.d_model, + self.hparams.n_attention_heads, mix=False, ), - d_model, - d_fully_connected, - dropout=dropout, - activation=activation, + self.hparams.d_model, + self.hparams.d_fully_connected, + dropout=self.hparams.dropout, + activation=self.hparams.activation, ) - for _ in range(n_decoder_layers) + for _ in range(self.hparams.n_decoder_layers) ], - nn.LayerNorm(d_model), + nn.LayerNorm(self.hparams.d_model), ) - self.projection = nn.Linear(d_model, output_size) + self.projection = nn.Linear(self.hparams.d_model, self.hparams.output_size) def forward( self, @@ -230,6 +246,7 @@ class Informer(BaseModelWithCovariates): ): new_kwargs = copy(kwargs) new_kwargs.update(cls.deduce_default_output_parameters( - dataset, kwargs, QuantileLoss())) + dataset, kwargs, QuantileLoss())) # Using QuantileLoss for defaults might be okay + # Let super().from_dataset handle populating dataset_parameters correctly return super().from_dataset(dataset, **new_kwargs) diff --git a/src/strategy/evaluation.py b/src/strategy/evaluation.py index 48f7bed..f22a7fa 100644 --- a/src/strategy/evaluation.py +++ b/src/strategy/evaluation.py @@ -3,8 +3,9 @@ import itertools import pandas as pd import numpy as np import functools -from tqdm import tqdm -from multiprocessing import Pool +# from tqdm import tqdm # Remove standard tqdm +from tqdm.notebook import tqdm # Import notebook-specific tqdm +from joblib import Parallel, delayed from strategy import metrics from strategy.strategy import LONG_POSITION, SHORT_POSITION, EXIT_POSITION from strategy.strategy import StrategyBase @@ -30,26 +31,33 @@ def parameter_sweep( result = [] total = len(param_sets) - # Evaluate sets of different hyperparameters in parallel - with Pool(num_workers) as pool, tqdm(total=total) as pbar: - for chunk in (param_sets[i:i + log_every] - for i in range(0, total, log_every)): - tmp = list( - pool.map( - functools.partial( + # Prepare the function with fixed arguments + evaluate_func_partial = functools.partial( evaluate_strategy, data, exchange_fee=exchange_fee, interval=interval, padding=padding, - include_arrays=False), - map( - lambda p: strategy_class( - **p), chunk))) - pbar.update(len(tmp)) - result += list(zip(tmp, map( - lambda p: strategy_class( - **p), chunk))) + include_arrays=False + ) + + # Prepare the list of delayed strategy instantiations and function calls + tasks = [ + delayed(evaluate_func_partial)(strategy_class(**p)) + for p in param_sets + ] + + print(f"Running {total} tasks in parallel with joblib (n_jobs={num_workers})...") + # Run in parallel + # Using loky backend for potentially better pickling robustness + tmp_results = Parallel(n_jobs=num_workers, backend="loky")( + tqdm(tasks) + ) + + # Let's re-instantiate for simplicity here, though less efficient + evaluated_strategies = [strategy_class(**p) for p in param_sets] + result = list(zip(tmp_results, evaluated_strategies)) + print("Parallel processing finished.") return sorted(result, key=lambda x: x[0][sort_by], reverse=True) diff --git a/src/strategy/strategy.py b/src/strategy/strategy.py index a00920f..ff25e2c 100644 --- a/src/strategy/strategy.py +++ b/src/strategy/strategy.py @@ -241,9 +241,17 @@ class ModelPredictionsStrategyBase(StrategyBase): data, self.predictions, on=['time_index', 'group_id'], how='left') - return self.get_positions(merged_data) + # merged_data['prediction'] = merged_data['prediction'].fillna(0).infer_objects(copy=False) # Old fix + # Explicitly convert to numeric, coercing errors, then fill NaNs + merged_data['prediction'] = pd.to_numeric(merged_data['prediction'], errors='coerce').fillna(0) + arr_preds = merged_data['prediction'].to_numpy() - def get_positions(self, data): + # arr_preds = arr_preds[:, self.future, 0] # Incorrect indexing for 1D array + # No change needed if arr_preds is already the correct 1D array of predictions + + return self._get_positions(merged_data, arr_preds) + + def _get_positions(self, data, arr_preds): raise NotImplementedError() @@ -277,11 +285,7 @@ class ModelGmadlPredictionsStrategy(ModelPredictionsStrategyBase): 'exit_short': self.exit_short } - def get_positions(self, data): - # bfill() is a hack to make it work with non predicted data - arr_preds = np.stack(data['prediction'].ffill().bfill().to_numpy()) - arr_preds = arr_preds[:, self.future, 0] - + def _get_positions(self, data, arr_preds): enter_long = arr_preds > (self.enter_long or np.infty) exit_long = arr_preds < (self.exit_long or -np.infty) enter_short = arr_preds < ( @@ -344,7 +348,7 @@ class ModelQuantilePredictionsStrategy(ModelPredictionsStrategyBase): 'quantile_exit_short': self.quantile_exit_short } - def get_positions(self, data): + def _get_positions(self, data, arr_preds): if self.new_impl: return self.get_positions2(data) return self.get_positions1(data) @@ -528,12 +532,7 @@ class ModelQuantileReturnsPredictionsStrategy(ModelPredictionsStrategyBase): 'quantile_exit_short': self.quantile_exit_short } - def get_positions(self, data): - arr_target = data[self.target].to_numpy() - arr_preds = np.stack( - # bfill() is a hack to make it work with non predicted data - data['prediction'].ffill().bfill().to_numpy()) - + def _get_positions(self, data, arr_preds): enter_long = (((arr_preds[ :, self.future - 1, self.get_quantile_idx( round(1 - self.quantile_enter_long, 2))] diff --git a/src/strategy/util.py b/src/strategy/util.py index 0ad26ad..f1446c3 100644 --- a/src/strategy/util.py +++ b/src/strategy/util.py @@ -5,6 +5,7 @@ import pandas as pd import numpy as np from numba import jit from numba import int32, float64, optional +import logging def get_sweep_data_windows(sweep_id): @@ -26,22 +27,66 @@ def get_sweep_data_windows(sweep_id): def get_data_windows(project, dataset_name, min_window=0, max_window=5): artifact_name = f"{project}/{dataset_name}" + logging.info(f"Downloading artifact: {artifact_name}") artifact = wandb.Api().artifact(artifact_name) base_path = artifact.download() - name = artifact.metadata['name'] + # name = artifact.name # We don't need the artifact name itself anymore + + # --- Load the single full dataset file --- Modified + full_data_file = os.path.join(base_path, 'full_data.parquet') + logging.info(f"Loading full dataset from: {full_data_file}") + if not os.path.exists(full_data_file): + raise FileNotFoundError(f"Expected parquet file not found in artifact: {full_data_file}") + full_df = pd.read_parquet(full_data_file) + logging.info(f"Loaded full dataset with shape: {full_df.shape}") + # --- End Load --- result = [] - for i in range(min_window, max_window+1): - in_sample_name =\ - f"in-sample-{i}" - in_sample_data = pd.read_csv(os.path.join( - base_path, name + '-' + in_sample_name + '.csv')) - out_of_sample_name =\ - f"out-of-sample-{i}" - out_of_sample_data = pd.read_csv(os.path.join( - base_path, name + '-' + out_of_sample_name + '.csv')) + N = len(full_df) + # Determine how many out-of-sample windows we need based on the loop + num_oos_windows = max_window - min_window + 1 + if N < num_oos_windows: + raise ValueError(f"Dataset length ({N}) is too short for the requested number of windows ({num_oos_windows})") + + # Simplistic split: Assume the last ~20% is for OOS, split into required windows + # This might need refinement based on how splits were actually done in training sweeps. + # A more robust approach might get split info from artifact metadata if available. + oos_total_size = int(N * 0.2) # Example: use last 20% for all out-of-sample periods + if oos_total_size < num_oos_windows: + raise ValueError(f"Calculated out-of-sample size ({oos_total_size}) is too small for {num_oos_windows} windows.") + + oos_chunk_size = oos_total_size // num_oos_windows + start_of_oos_data = N - oos_total_size + logging.info(f"Total OOS size: {oos_total_size}, Num OOS windows: {num_oos_windows}, OOS chunk size: {oos_chunk_size}, OOS starts at index: {start_of_oos_data}") + + for i in range(min_window, max_window + 1): + # Calculate split points for this specific window i + # In-sample data is everything *before* the current OOS chunk starts + current_oos_start_index = start_of_oos_data + (i - min_window) * oos_chunk_size + current_oos_end_index = current_oos_start_index + oos_chunk_size + + # Ensure the last chunk goes to the end if division wasn't perfect + if i == max_window: + current_oos_end_index = N + + in_sample_data = full_df.iloc[0:current_oos_start_index].copy() + out_of_sample_data = full_df.iloc[current_oos_start_index:current_oos_end_index].copy() + + logging.info(f"Window {i}: In-sample indices 0:{current_oos_start_index}, Out-of-sample indices {current_oos_start_index}:{current_oos_end_index}") result.append((in_sample_data, out_of_sample_data)) + # --- Old file reading logic removed --- + # in_sample_name =\ + # f"in-sample-{i}" + # in_sample_data = pd.read_csv(os.path.join( + # base_path, name + '-' + in_sample_name + '.csv')) + # out_of_sample_name =\ + # f"out-of-sample-{i}" + # out_of_sample_data = pd.read_csv(os.path.join( + # base_path, name + '-' + out_of_sample_name + '.csv')) + # result.append((in_sample_data, out_of_sample_data)) + # --- End removal --- + return result @@ -61,9 +106,15 @@ def get_sweep_window_predictions(sweep_id, part): artifact_path = window_prediction.download() index = torch.load(os.path.join( - artifact_path, 'index.pt'), map_location=torch.device('cpu')) + artifact_path, 'index.pt'), + map_location=torch.device('cpu'), + weights_only=False # Allow loading non-tensor objects + ) preds = torch.load(os.path.join( - artifact_path, 'predictions.pt'), map_location=torch.device('cpu')) + artifact_path, 'predictions.pt'), + map_location=torch.device('cpu'), + weights_only=False # Allow loading non-tensor objects (safer to add here too) + ) result.append((window_num, index, preds.numpy()))