jupyter_gpu/tensorflow/notebooks/leo/LSTM_All_Crypto_01.ipynb
2024-06-13 20:17:22 +00:00

685 lines
68 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 164,
"id": "1023f2c1-e45f-4e1c-9a1b-66f59f128196",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Panda Version: 2.2.2\n",
"Today date is: 2024-06-13\n"
]
}
],
"source": [
"import sqlite3\n",
"\n",
"import os\n",
"import re\n",
"\n",
"import numpy as np\n",
"# It is apparently officially accepted to explicitly\n",
"# list all the functions you need from numpy:\n",
"from numpy import array, zeros, exp, random, dot\n",
"from numpy import shape, reshape, meshgrid, linspace\n",
"from numpy import hstack, vstack\n",
"\n",
"import pandas as pd\n",
"print('Panda Version:', pd.__version__)\n",
"\n",
"# Set ipython's max row display\n",
"pd.set_option('display.max_row', 100, 'display.max_columns', 25)\n",
"\n",
"import matplotlib.pyplot as plt # for plotting\n",
"import matplotlib\n",
"matplotlib.rcParams['figure.dpi'] = 100 # highres display\n",
"\n",
"import tensorflow as tf\n",
"from tensorflow import Variable\n",
"\n",
"from tensorflow.keras import Sequential\n",
"from tensorflow.keras import Model\n",
"from tensorflow.keras.layers import Dense\n",
"from tensorflow.keras.layers import Dropout\n",
"from tensorflow.keras.layers import TimeDistributed, RepeatVector\n",
"\n",
"from keras.optimizers import SGD\n",
"\n",
"from keras.models import load_model\n",
"from keras.callbacks import EarlyStopping\n",
"from keras.callbacks import ModelCheckpoint\n",
"\n",
"from keras.layers import LSTM, Dense, Concatenate\n",
"\n",
"from keras.optimizers import SGD\n",
"\n",
"import collections\n",
"from collections import Counter\n",
"\n",
"# Import date class from datetime module\n",
"import time\n",
"import datetime\n",
"# import datetime as dt\n",
"from datetime import date\n",
"print(\"Today date is: \", date.today())"
]
},
{
"cell_type": "code",
"execution_count": 165,
"id": "c09a37a6-f0d9-48e3-a1d1-65ddaf2c489c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/workspace/leo\n",
"total 16400\n",
"drwxrwxr-x 1 1000 1000 648 Jun 13 10:01 .\n",
"drwxrwxr-x 1 1000 1000 18 Jun 3 23:40 ..\n",
"-rw------- 1 1000 1000 1449984 Jun 4 00:49 20240601.mktdata.ohlcv.db\n",
"-rw------- 1 1000 1000 1445888 Jun 3 23:44 20240602.mktdata.ohlcv.db\n",
"-rw------- 1 1000 1000 1437696 Jun 4 16:45 20240603.mktdata.ohlcv.db\n",
"-rw------- 1 1000 1000 1269760 Jun 5 10:00 20240604.mktdata.ohlcv.db\n",
"-rw------- 1 1000 1000 1081344 Jun 6 10:00 20240605.mktdata.ohlcv.db\n",
"-rw------- 1 1000 1000 1441792 Jun 7 10:00 20240606.mktdata.ohlcv.db\n",
"-rw------- 1 1000 1000 1445888 Jun 8 10:00 20240607.mktdata.ohlcv.db\n",
"-rw------- 1 1000 1000 1449984 Jun 9 10:00 20240608.mktdata.ohlcv.db\n",
"-rw------- 1 1000 1000 1437696 Jun 10 10:00 20240609.mktdata.ohlcv.db\n",
"-rw-r--r-- 1 1000 1000 0 Jun 12 15:29 20240609.mktdata.ohlcvdb\n",
"-rw------- 1 1000 1000 1437696 Jun 11 10:00 20240610.mktdata.ohlcv.db\n",
"-rw------- 1 1000 1000 1449984 Jun 12 10:01 20240611.mktdata.ohlcv.db\n",
"-rw------- 1 1000 1000 1445888 Jun 13 10:01 20240612.mktdata.ohlcv.db\n"
]
}
],
"source": [
"!pwd\n",
"\n",
"!ls -la /workspace/data/crypto_md/"
]
},
{
"cell_type": "code",
"execution_count": 82,
"id": "8d72d849-e8df-4564-a006-03ab646b9330",
"metadata": {},
"outputs": [],
"source": [
"# db_conn = sqlite3.connect(mktdata_db_file)\n",
"# tables_df = pd.read_sql_query(\"select * from sqlite_master where type = 'table'\", db_conn)\n",
"# print (tables_df_20240601.head())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4df4b0d4-e92e-42a9-8747-ff60669a4e10",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 167,
"id": "5d2aed0b-8c9d-4f5a-9166-785da4811390",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"66803\n"
]
}
],
"source": [
"directory = \"/workspace/data/crypto_md\"\n",
"\n",
"# List of dates\n",
"dates = ['20240601', \n",
" '20240602', \n",
" '20240603', \n",
" '20240604', \n",
" '20240605', \n",
" '20240606', \n",
" '20240607', \n",
" '20240608', \n",
" '20240609',\n",
" '20240610',\n",
" '20240611',\n",
" '20240612'] \n",
"\n",
"# Create an entirely empty DataFrame\n",
"df_concat = pd.DataFrame()\n",
"\n",
"for date in dates:\n",
" # Construct the filename\n",
" filename = f\"{directory}/{date}.mktdata.ohlcv.db\"\n",
" \n",
" # Create the SQLite connection\n",
" conn = sqlite3.connect(filename)\n",
" \n",
" # Read the data into a DataFrame\n",
" df = pd.read_sql_query(\"select * from coinbase_ohlcv_1min\", conn)\n",
"\n",
" df_concat = pd.concat([df_concat, df], axis = 0,ignore_index = True)\n",
" # Print the combined DataFrame\n",
" # print(df_concat.shape[0])\n",
" # print(df_concat.shape[1])\n",
" conn.close()\n",
"print(df_concat.shape[0])\n",
"# print(df_concat.head(2))\n",
"# print(df_concat.tail(2))"
]
},
{
"cell_type": "code",
"execution_count": 174,
"id": "cd83027c-eaa1-44d9-98ad-8506099c323d",
"metadata": {},
"outputs": [],
"source": [
"df_concat['tstamp'] = pd.to_datetime(df_concat['tstamp'])\n",
"# Extract individual components\n",
"df_concat['year'] = df_concat['tstamp'].dt.year\n",
"df_concat['month'] = df_concat['tstamp'].dt.month\n",
"df_concat['day'] = df_concat['tstamp'].dt.day\n",
"df_concat['hour'] = df_concat['tstamp'].dt.hour\n",
"df_concat['minute'] = df_concat['tstamp'].dt.minute\n",
"df_concat['second'] = df_concat['tstamp'].dt.second\n",
"\n",
"df_concat['date'] = df_concat['day'].astype(str) + '-' + df_concat['hour'].astype(str) + '-' + df_concat['minute'].astype(str)\n",
"df_concat = df_concat.sort_values(by = ['day', 'hour', 'minute'])\n",
"\n",
"selected_columns = ['date', 'instrument_id', 'close', 'volume', 'vwap']\n",
"df_concat = df_concat[selected_columns]\n",
"\n",
"# instrument_list = df_concat['instrument_id'].unique().tolist()\n",
"# print(\"Distinct values in column 'instrument_id':\", instrument_list)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 179,
"id": "316c0cfb-c73d-4dad-9d49-eb80daa229ec",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"========================================\n",
" date close_xrp volume_xrp vwap_xrp close_eth volume_eth vwap_eth \\\n",
"0 1-0-0 0.5173 28809.165339 0.517217 0.376204 37.497964 0.376108 \n",
"1 1-0-1 0.5170 10184.511212 0.517060 0.375942 11.703305 0.376013 \n",
"2 1-0-10 0.5176 15515.348778 0.517601 0.375938 11.922897 0.375935 \n",
"\n",
" close_ltc volume_ltc vwap_ltc close_btc volume_btc vwap_btc \n",
"0 0.8317 110.056253 0.831662 0.675060 2.045049 0.674969 \n",
"1 0.8312 176.868598 0.831441 0.674947 0.646759 0.675009 \n",
"2 0.8311 58.951033 0.831037 0.675507 1.201335 0.675478 \n",
"test sample size: 16698\n"
]
}
],
"source": [
"# BTC\n",
"df_concat_btc = df_concat[df_concat['instrument_id'] == 'PAIR-BTC-USD']\n",
"\n",
"# Reset Index\n",
"df_concat_btc = df_concat_btc.reset_index(drop = True)\n",
"\n",
"# Rename Vars\n",
"df_concat_btc['close_btc'] = df_concat_btc['close']/100000.00\n",
"df_concat_btc['volume_btc'] = df_concat_btc['volume']\n",
"df_concat_btc['vwap_btc'] = df_concat_btc['vwap']/100000.00\n",
"\n",
"df_concat_btc = df_concat_btc.drop('close', axis = 1)\n",
"df_concat_btc = df_concat_btc.drop('volume', axis = 1)\n",
"df_concat_btc = df_concat_btc.drop('vwap', axis = 1)\n",
"df_concat_btc = df_concat_btc.drop('instrument_id', axis = 1)\n",
"\n",
"# print (df_concat_btc.head(3))\n",
"\n",
"# LTC\n",
"df_concat_ltc = df_concat[df_concat['instrument_id'] == 'PAIR-LTC-USD']\n",
"\n",
"# Reset Index\n",
"df_concat_ltc = df_concat_ltc.reset_index(drop = True)\n",
"\n",
"# Rename Vars\n",
"df_concat_ltc['close_ltc'] = df_concat_ltc['close']/100.00\n",
"df_concat_ltc['volume_ltc'] = df_concat_ltc['volume']\n",
"df_concat_ltc['vwap_ltc'] = df_concat_ltc['vwap']/100.00\n",
"\n",
"df_concat_ltc = df_concat_ltc.drop('close', axis = 1)\n",
"df_concat_ltc = df_concat_ltc.drop('volume', axis = 1)\n",
"df_concat_ltc = df_concat_ltc.drop('vwap', axis = 1)\n",
"df_concat_ltc = df_concat_ltc.drop('instrument_id', axis = 1)\n",
"\n",
"# print (df_concat_ltc.head(3))\n",
"\n",
"# ETH\n",
"df_concat_eth = df_concat[df_concat['instrument_id'] == 'PAIR-ETH-USD']\n",
"\n",
"# Reset Index\n",
"df_concat_eth = df_concat_eth.reset_index(drop = True)\n",
"\n",
"# Rename Vars\n",
"df_concat_eth['close_eth'] = df_concat_eth['close']/10000.00\n",
"df_concat_eth['volume_eth'] = df_concat_eth['volume']\n",
"df_concat_eth['vwap_eth'] = df_concat_eth['vwap']/10000.00\n",
"\n",
"df_concat_eth = df_concat_eth.drop('close', axis = 1)\n",
"df_concat_eth = df_concat_eth.drop('volume', axis = 1)\n",
"df_concat_eth = df_concat_eth.drop('vwap', axis = 1)\n",
"df_concat_eth = df_concat_eth.drop('instrument_id', axis = 1)\n",
"\n",
"# print (df_concat_eth.head(3))\n",
"\n",
"# XRP\n",
"df_concat_xrp = df_concat[df_concat['instrument_id'] == 'PAIR-XRP-USD']\n",
"\n",
"# Reset Index\n",
"df_concat_xrp = df_concat_xrp.reset_index(drop = True)\n",
"\n",
"# Rename Vars\n",
"df_concat_xrp['close_xrp'] = df_concat_xrp['close']\n",
"df_concat_xrp['volume_xrp'] = df_concat_xrp['volume']\n",
"df_concat_xrp['vwap_xrp'] = df_concat_xrp['vwap']\n",
"\n",
"df_concat_xrp = df_concat_xrp.drop('close', axis = 1)\n",
"df_concat_xrp = df_concat_xrp.drop('volume', axis = 1)\n",
"df_concat_xrp = df_concat_xrp.drop('vwap', axis = 1)\n",
"df_concat_xrp = df_concat_xrp.drop('instrument_id', axis = 1)\n",
"\n",
"# print (df_concat_xrp.head(3))\n",
"\n",
"df_M1 = pd.merge(df_concat_xrp, df_concat_eth, on = 'date', how = 'outer')\n",
"# print (df_M1.head(3))\n",
"# print (\"M1: \", df_M1.shape[0])\n",
"df_M2 = pd.merge(df_M1, df_concat_ltc, on = 'date', how = 'outer')\n",
"# print (df_M2.head(3))\n",
"# print (\"M2: \", df_M2.shape[0])\n",
"df_M3 = pd.merge(df_M2, df_concat_btc, on = 'date', how = 'outer')\n",
"# print (df_M3.head(3))\n",
"# print (\"M3: \", df_M3.shape[0])\n",
"\n",
"# Drop rows with any NaN value and assign it to a new DataFrame\n",
"# sample = df_M3.dropna(axis = 0).reset_index(drop = True)\n",
"print (\"========================================\")\n",
"print (sample.head(3))\n",
"sample_size = sample.shape[0]\n",
"print (\"test sample size: \", sample_size)"
]
},
{
"cell_type": "code",
"execution_count": 177,
"id": "92700c76-8eac-4ebb-86d3-27066486c437",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Create a figure and axis object\n",
"fig, ax = plt.subplots(2 , 1)\n",
"\n",
"# Plot the data\n",
"ax[0].plot(df_concat_btc['date'], df_concat_btc['close_btc'], 'blue')\n",
"# ax[0].plot(df_concat_btc['date'], df_concat_btc['vwap_btc'], 'green')\n",
"\n",
"# Customize the plot\n",
"ax[0].set_title('PAIR-BTC-USDT')\n",
"ax[0].set_xlabel('date')\n",
"ax[0].set_ylabel('close')\n",
"\n",
"ax[1].plot(df_concat_btc['date'], df_concat_btc['vwap_btc'], 'green')\n",
"# ax[1].plot(df_concat_btc['date'], df_concat_btc['volume_BTC'], 'red')\n",
"\n",
"# Show the plot\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 180,
"id": "3c6d80b3-4a46-432b-9b88-6a264f9b7a7e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"First part of the DataFrame:\n",
" date close_xrp volume_xrp vwap_xrp close_eth volume_eth \\\n",
"13356 7-23-25 0.4995 8673.755143 0.499557 0.368412 53.830657 \n",
"13357 7-23-26 0.4996 34403.036949 0.499605 0.368370 89.163024 \n",
"\n",
" vwap_eth close_ltc volume_ltc vwap_ltc close_btc volume_btc \\\n",
"13356 0.368548 0.8003 74.882298 0.800537 0.69375 1.598973 \n",
"13357 0.368411 0.8002 30.666566 0.800233 0.69381 1.612419 \n",
"\n",
" vwap_btc \n",
"13356 0.693999 \n",
"13357 0.693838 \n",
"\n",
"Second part of the DataFrame:\n",
" date close_xrp volume_xrp vwap_xrp close_eth volume_eth \\\n",
"13358 7-23-27 0.4995 8240.166990 0.499561 0.368369 108.118487 \n",
"13359 7-23-28 0.4995 21256.045405 0.499585 0.367813 229.373936 \n",
"\n",
" vwap_eth close_ltc volume_ltc vwap_ltc close_btc volume_btc \\\n",
"13358 0.368353 0.8002 43.931145 0.800134 0.693709 1.765308 \n",
"13359 0.368127 0.8002 22.408370 0.800188 0.693200 2.643365 \n",
"\n",
" vwap_btc \n",
"13358 0.693748 \n",
"13359 0.693508 \n"
]
}
],
"source": [
"split_index = round(sample.shape[0] * 0.8)\n",
"\n",
"# First part of the DataFrame\n",
"sample_test = sample.iloc[:split_index]\n",
"\n",
"# Second part of the DataFrame\n",
"sample_valid = sample.iloc[split_index:]\n",
"\n",
"print(\"\\nFirst part of the DataFrame:\")\n",
"print (sample_test.tail(2))\n",
"\n",
"print(\"\\nSecond part of the DataFrame:\")\n",
"print (sample_valid.head(2))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4edcdbdb-85fb-415e-a6c4-71cc383c4410",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 182,
"id": "71c8bf0d-7762-47ef-a8a1-6d57ee5c8bb7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"sample_size: 13358\n"
]
}
],
"source": [
"print ('sample_size: ', sample_size)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0802e693-29a6-4eda-a496-9504f3008c2e",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 211,
"id": "c3dee439-a3e8-4843-8f8b-33572b4170e9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"sample_size: 13358\n",
"[ 1108 13291]\n",
"[[5.18272282e-01 1.68025214e-04 3.77189791e-01 1.45151777e-04\n",
" 8.31595503e-01 3.95645238e-04 6.76253699e-01 2.15301382e-04]\n",
" [5.18303634e-01 1.68025214e-04 3.77154892e-01 1.45151777e-04\n",
" 8.32082769e-01 3.95645238e-04 6.76271128e-01 2.15301382e-04]\n",
" [5.18562378e-01 1.68025214e-04 3.77397110e-01 1.45151777e-04\n",
" 8.30954599e-01 3.95645238e-04 6.76918294e-01 2.15301382e-04]\n",
" [5.18341398e-01 1.68025214e-04 3.77160454e-01 1.45151777e-04\n",
" 8.31766697e-01 3.95645238e-04 6.76294450e-01 2.15301382e-04]\n",
" [5.18497467e-01 1.68025214e-04 3.77261769e-01 1.45151777e-04\n",
" 8.31990904e-01 3.95645238e-04 6.76269889e-01 2.15301382e-04]\n",
" [5.18698229e-01 1.68025214e-04 3.77392003e-01 1.45151777e-04\n",
" 8.31954856e-01 3.95645238e-04 6.76339962e-01 2.15301382e-04]\n",
" [5.18693630e-01 1.68025214e-04 3.77533283e-01 1.45151777e-04\n",
" 8.32184637e-01 3.95645238e-04 6.76516962e-01 2.15301382e-04]\n",
" [5.18672691e-01 1.68025214e-04 3.77517862e-01 1.45151777e-04\n",
" 8.32276407e-01 3.95645238e-04 6.76558233e-01 2.15301382e-04]\n",
" [4.99269291e-01 5.45867382e-04 3.68653612e-01 1.43205019e-04\n",
" 7.99339573e-01 3.86775482e-04 6.92654223e-01 3.72953569e-04]\n",
" [4.99424699e-01 5.45867382e-04 3.68590151e-01 1.43205019e-04\n",
" 7.99839862e-01 3.86775482e-04 6.92737055e-01 3.72953569e-04]\n",
" [4.99437041e-01 5.45867382e-04 3.68505112e-01 1.43205019e-04\n",
" 7.99388893e-01 3.86775482e-04 6.92646980e-01 3.72953569e-04]\n",
" [4.99561765e-01 5.45867382e-04 3.68547552e-01 1.43205019e-04\n",
" 7.99601900e-01 3.86775482e-04 6.92723486e-01 3.72953569e-04]\n",
" [4.99664819e-01 5.45867382e-04 3.68704191e-01 1.43205019e-04\n",
" 7.99877210e-01 3.86775482e-04 6.92928189e-01 3.72953569e-04]\n",
" [4.99833312e-01 5.45867382e-04 3.68823620e-01 1.43205019e-04\n",
" 7.99375187e-01 3.86775482e-04 6.93065604e-01 3.72953569e-04]\n",
" [4.99855255e-01 5.45867382e-04 3.68840191e-01 1.43205019e-04\n",
" 7.99747347e-01 3.86775482e-04 6.93183729e-01 3.72953569e-04]\n",
" [4.98030589e-01 5.45867382e-04 3.68931667e-01 1.43205019e-04\n",
" 7.98594073e-01 3.86775482e-04 6.91868385e-01 3.72953569e-04]]\n",
"XXXX ====== Actual Input ====== XXXX\n",
"(2, 8, 8)\n",
"[[0.51863729 0.37762073 0.83232141 0.6765906 ]\n",
" [0.49999486 0.36888691 0.79994226 0.69325938]]\n",
"(2, 1, 1)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_28/500398063.py:15: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" sample_test['Selected'] = 0\n"
]
}
],
"source": [
"# 05.17.2024\n",
"# ============================\n",
"sample_size = sample_test.shape[0]\n",
"print ('sample_size: ', sample_size)\n",
"\n",
"sl = 8 # <--- sequence length\n",
"batch = 2 # <--- batch size\n",
"n_features = 8 # <---- no. of variables\n",
"n_out = 1 # <----- no. of predicted variables\n",
"frwd = 1 # <--- how many to predict\n",
"\n",
"\n",
"# Trimming Indices !!!\n",
"# Marking selected rows\n",
"sample_test['Selected'] = 0\n",
"\n",
"# Selecting m random rows without replacement\n",
"np.random.seed(125)\n",
"selected_indices = np.random.choice(sample_test.index, size = batch, replace = False)\n",
"\n",
"sample_test.loc[selected_indices, 'Selected'] = 1 # print (sample_test[sample_test['Selected'] == 1])\n",
"chosen_idx1 = sample_test[sample_test['Selected'] == 1].index\n",
"chosen_idx2 = chosen_idx1[chosen_idx1 > sl]\n",
"\n",
"thrown_idx1 = chosen_idx1[chosen_idx1 <= sl]\n",
"cnt_thrown_idx1 = len(thrown_idx1)\n",
"\n",
"chosen_idx3 = chosen_idx2[chosen_idx2 < (sample_size - frwd)]\n",
"\n",
"thrown_idx2 = chosen_idx2[chosen_idx2 >= (sample_size - frwd)]\n",
"cnt_thrown_idx2 = len(thrown_idx2)\n",
"\n",
"cnt_thrown_idx = cnt_thrown_idx1 + cnt_thrown_idx2\n",
"\n",
"print (selected_indices)\n",
"\n",
"all_ts_xrp = np.empty((1, )) # final input/output\n",
"all_ts_eth = np.empty((1, )) # final input/output\n",
"all_ts_ltc = np.empty((1, )) # final input/output\n",
"all_ts_btc = np.empty((1, )) # final input/output\n",
"\n",
"stdev_xrp = np.empty((1, )) # final input/output\n",
"stdev_eth = np.empty((1, )) # final input/output\n",
"stdev_ltc = np.empty((1, )) # final input/output\n",
"stdev_btc = np.empty((1, )) # final input/output\n",
"\n",
"aft_ts_xrp = np.empty((1))\n",
"aft_ts_eth = np.empty((1))\n",
"aft_ts_ltc = np.empty((1))\n",
"aft_ts_btc = np.empty((1))\n",
"\n",
"for idx in chosen_idx3:\n",
" selected_rows_before = sample_test.iloc[max(0, idx-sl): idx]\n",
" selected_rows_after = sample_test.iloc[max(0, idx): idx+frwd]\n",
"\n",
" # print ('====================')\n",
" sr_before_vwap_xrp = np.array(selected_rows_before['vwap_xrp'])\n",
" sr_after_vwap_xrp = np.array(selected_rows_after['vwap_xrp']) \n",
" # print (sr_before_vwap_xrp)\n",
"\n",
"# Compute standard deviation\n",
" std_dev_xrp = np.std(sr_before_vwap_xrp)\n",
" std_dev_xrp_arr = np.repeat(std_dev_xrp, sl)\n",
" # print(std_dev_xrp_arr)\n",
"\n",
" sr_before_vwap_eth = np.array(selected_rows_before['vwap_eth'])\n",
" sr_after_vwap_eth = np.array(selected_rows_after['vwap_eth'])\n",
"\n",
"# Compute standard deviation\n",
" std_dev_eth = np.std(sr_before_vwap_eth)\n",
" std_dev_eth_arr = np.repeat(std_dev_eth, sl)\n",
" # print(std_dev_eth_arr)\n",
" \n",
"\n",
" sr_before_vwap_ltc = np.array(selected_rows_before['vwap_ltc'])\n",
" sr_after_vwap_ltc = np.array(selected_rows_after['vwap_ltc'])\n",
"\n",
"# Compute standard deviation\n",
" std_dev_ltc = np.std(sr_before_vwap_ltc)\n",
" std_dev_ltc_arr = np.repeat(std_dev_ltc, sl)\n",
" # print(std_dev_ltc_arr)\n",
" \n",
" sr_before_vwap_btc = np.array(selected_rows_before['vwap_btc'])\n",
" sr_after_vwap_btc = np.array(selected_rows_after['vwap_btc'])\n",
"\n",
"# Compute standard deviation\n",
" std_dev_btc = np.std(sr_before_vwap_btc)\n",
" std_dev_btc_arr = np.repeat(std_dev_btc, sl)\n",
" # print(std_dev_btc_arr)\n",
" \n",
" all_ts_xrp = np.concatenate((all_ts_xrp, sr_before_vwap_xrp))\n",
" stdev_xrp = np.concatenate((stdev_xrp, std_dev_xrp_arr))\n",
" \n",
" all_ts_eth = np.concatenate((all_ts_eth, sr_before_vwap_eth))\n",
" stdev_eth = np.concatenate((stdev_eth, std_dev_eth_arr))\n",
"\n",
" all_ts_ltc = np.concatenate((all_ts_ltc, sr_before_vwap_ltc))\n",
" stdev_ltc = np.concatenate((stdev_ltc, std_dev_ltc_arr))\n",
"\n",
" all_ts_btc = np.concatenate((all_ts_btc, sr_before_vwap_btc))\n",
" stdev_btc = np.concatenate((stdev_btc, std_dev_btc_arr))\n",
"\n",
" aft_ts_xrp = np.concatenate((aft_ts_xrp, sr_after_vwap_xrp))\n",
" aft_ts_eth = np.concatenate((aft_ts_eth, sr_after_vwap_eth))\n",
" aft_ts_ltc = np.concatenate((aft_ts_ltc, sr_after_vwap_ltc))\n",
" aft_ts_btc = np.concatenate((aft_ts_btc, sr_after_vwap_btc))\n",
"\n",
"X_XRP = all_ts_xrp[1:]\n",
"X_XRP_stdev = stdev_xrp[1:]\n",
"\n",
"X_ETH = all_ts_eth[1:]\n",
"X_ETH_stdev = stdev_eth[1:]\n",
"\n",
"X_LTC = all_ts_ltc[1:]\n",
"X_LTC_stdev = stdev_ltc[1:]\n",
"\n",
"X_BTC = all_ts_btc[1:]\n",
"X_BTC_stdev = stdev_btc[1:]\n",
"\n",
"X0 = np.column_stack((X_XRP, X_XRP_stdev, X_ETH, X_ETH_stdev, X_LTC, X_LTC_stdev, X_BTC, X_BTC_stdev))\n",
"print (X0)\n",
"\n",
"# ======== Model Input =========\n",
"X = X0.reshape(batch-cnt_thrown_idx, sl, n_features)\n",
"print ('XXXX ====== Actual Input ====== XXXX')\n",
"print (X.shape)\n",
"\n",
"y_XRP = aft_ts_xrp[1:]\n",
"y_ETH = aft_ts_eth[1:]\n",
"y_LTC = aft_ts_ltc[1:]\n",
"y_BTC = aft_ts_btc[1:]\n",
"\n",
"y = np.column_stack((y_XRP, y_ETH, y_LTC, y_BTC))\n",
"print (y)\n",
"\n",
"y = y_BTC.reshape(batch-cnt_thrown_idx, frwd, n_out)\n",
"# print ('YYYY ====== Actual Input ====== YYYY')\n",
"print (y.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "88e292cc-1244-462a-b311-3e964773c742",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.0rc1"
}
},
"nbformat": 4,
"nbformat_minor": 5
}