{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Rolling Window Market Movement Analysis\n", "\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Fetched 316 rows of data.\n" ] } ], "source": [ "import sys\n", "sys.path.append('../')\n", "import asyncio\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from datetime import datetime, timedelta\n", "from IPython.display import display\n", "from tqdm.notebook import tqdm\n", "\n", "from market_predictor.market_data_fetcher import MarketDataFetcher\n", "from market_predictor.data_processor import MarketDataProcessor\n", "from market_predictor.rag_engine import RAGEngine\n", "from market_predictor.performance_metrics import PerformanceMetrics\n", "from market_predictor.prediction_service import PredictionService\n", "\n", "\n", "symbol = \"BTC-USD\"\n", "end_date = datetime.now()\n", "start_date = end_date - timedelta(days=5)\n", "fetcher = MarketDataFetcher(symbol)\n", "market_data = fetcher.fetch_data(\n", " start_date=start_date.strftime('%Y-%m-%d'),\n", " end_date=end_date.strftime('%Y-%m-%d'),\n", " interval='5m'\n", ")\n", "print(f\"Fetched {len(market_data)} rows of data.\")\n", "\n", "rag_engine = RAGEngine()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Define Rolling Window Prediction Functions" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Processing: 100%|█████████████████████████████| 244/244 [16:34<00:00, 4.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Debug Counts:\n", "Initial DataFrame rows: 244\n", "Skipped: No next VWAP for timestamp 2025-01-31 16:00:00+00:00\n", "\n", "Processing Summary:\n", "Total rows initially: 244\n", "Valid predictions: 243\n", "Skipped timestamps: 1\n", "Final predictions count: 243\n", "\n", "\n", "Class distributions:\n", "Actual: {'down': 171, 'up': 72}\n", "Predicted: {'down': 164, 'up': 79}\n", "\n", "Confusion Matrix:\n", " Pred Down Pred Up\n", "True Down 137 34\n", "True Up 27 45\n", "\n", "Performance Report:\n", "\n", "Performance Report\n", "=================\n", "Total Predictions: 243\n", "Accuracy: 74.90%\n", "Precision: 56.96%\n", "Recall: 62.50%\n", "F1 Score: 59.60%\n", "\n", "Direction Distribution:\n", "-------------------\n", "Up: 79\n", "Down: 164\n", "\n", "Confidence Analysis:\n", "-----------------\n", "Average Confidence: 80.08%\n", "High Confidence Accuracy: 74.90%\n", "\n", "Trading Metrics:\n", "--------------\n", "Avg Expected VWAP Change: 0.12%\n", "Avg Volatility Estimate: 198.74%\n", "\n", "Price Target Analysis:\n", "-------------------\n", "Entry Success Rate: 74.90%\n", "Stop Loss Hits: 9.05%\n", "Take Profit Hits: 9.47%\n", "Avg Risk/Reward Ratio: 1.02\n", "\n", "Top Signals:\n", "----------\n", "Decreasing volume trend: 151\n", "Price below VWAP: 127\n", "Increasing volume suggests strong momentum.: 69\n", "Price above VWAP supports bullish momentum.: 68\n", "Decreasing volume suggests potential trend weakness.: 16\n", "\n", "Time Coverage:\n", "-----------\n", "Start: 2025-01-28 15:30:00+00:00\n", "End: 2025-01-31 15:55:00+00:00\n", "\n", "\n", "Predictions Summary:\n", " vwap_direction_next_5min confidence_score expected_vwap_change \\\n", "0 down 0.8 0.000000 \n", "1 up 0.8 0.000433 \n", "2 up 0.8 0.045066 \n", "3 down 0.8 0.000000 \n", "4 down 0.8 0.000000 \n", "\n", " volatility_estimate suggested_entry suggested_stop_loss \\\n", "0 0.000000 102547.785632 102849.709244 \n", "1 0.102457 102830.485685 102778.485685 \n", "2 10.792993 103057.394632 102757.894632 \n", "3 7.125226 103057.394967 103357.394967 \n", "4 6.469257 103057.394968 103357.394968 \n", "\n", " suggested_take_profit key_signals \\\n", "0 102245.862020 [Decreasing volume trend, VWAP below price] \n", "1 102882.485685 [Increasing volume suggests strong momentum., ... \n", "2 103356.894632 [Increasing volume suggests strong momentum., ... \n", "3 102757.394967 [Decreasing volume trend, Price below VWAP] \n", "4 102757.394968 [Decreasing volume trend, Price below VWAP, MA... \n", "\n", " reasoning \\\n", "0 The decreasing volume trend suggests potential... \n", "1 The increasing volume indicates strong momentu... \n", "2 The significant increase in volume indicates s... \n", "3 The decreasing volume trend suggests potential... \n", "4 The decreasing volume trend suggests potential... \n", "\n", " timestamp_prediction historical_start \\\n", "0 2025-01-28 15:30:00+00:00 2025-01-28 09:30:00+00:00 \n", "1 2025-01-28 15:35:00+00:00 2025-01-28 09:35:00+00:00 \n", "2 2025-01-28 15:40:00+00:00 2025-01-28 09:40:00+00:00 \n", "3 2025-01-28 15:45:00+00:00 2025-01-28 09:45:00+00:00 \n", "4 2025-01-28 15:50:00+00:00 2025-01-28 09:50:00+00:00 \n", "\n", " historical_end current_window_start \\\n", "0 2025-01-28 14:25:00+00:00 2025-01-28 14:30:00+00:00 \n", "1 2025-01-28 14:30:00+00:00 2025-01-28 14:35:00+00:00 \n", "2 2025-01-28 14:35:00+00:00 2025-01-28 14:40:00+00:00 \n", "3 2025-01-28 14:40:00+00:00 2025-01-28 14:45:00+00:00 \n", "4 2025-01-28 14:45:00+00:00 2025-01-28 14:50:00+00:00 \n", "\n", " current_window_end prediction_timestamp actual_movement \n", "0 2025-01-28 15:25:00+00:00 2025-01-28 15:30:00+00:00 up \n", "1 2025-01-28 15:30:00+00:00 2025-01-28 15:35:00+00:00 down \n", "2 2025-01-28 15:35:00+00:00 2025-01-28 15:40:00+00:00 down \n", "3 2025-01-28 15:40:00+00:00 2025-01-28 15:45:00+00:00 down \n", "4 2025-01-28 15:45:00+00:00 2025-01-28 15:50:00+00:00 down \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "import asyncio\n", "import pandas as pd\n", "from tqdm import tqdm\n", "import nest_asyncio\n", "\n", "# Enable nested event loops\n", "nest_asyncio.apply()\n", "\n", "async def analyze_market_data(\n", " market_data: pd.DataFrame,\n", " training_window_size: int = 36,\n", " inference_window_size: int = 12,\n", " inference_offset: int = 0\n", ") -> pd.DataFrame:\n", " processor = MarketDataProcessor(market_data)\n", " processed_data = processor.df\n", " \n", " service = PredictionService(\n", " market_data=processed_data,\n", " training_window_size=training_window_size,\n", " inference_window_size=inference_window_size,\n", " inference_offset=inference_offset\n", " )\n", " \n", " total_size = training_window_size + inference_offset + inference_window_size\n", " total_windows = len(processed_data) - total_size\n", " \n", " predictions = []\n", " with tqdm(total=total_windows, desc=\"Processing\", ncols=80) as pbar:\n", " async for pred in service.generate_rolling_predictions():\n", " if pred:\n", " # print(pred)\n", " predictions.append(pred)\n", " pbar.update(1)\n", " \n", " return pd.DataFrame(predictions) if predictions else pd.DataFrame()\n", "\n", "# Run analysis\n", "try:\n", " predictions_df = await analyze_market_data(\n", " market_data,\n", " training_window_size=60,\n", " inference_window_size=12,\n", " inference_offset=0\n", " )\n", " \n", " if not predictions_df.empty:\n", " metrics = PerformanceMetrics(predictions_df, market_data)\n", " report = metrics.generate_report()\n", " print(\"\\nPerformance Report:\")\n", " print(report)\n", " \n", " print(\"\\nPredictions Summary:\")\n", " print(predictions_df.head())\n", " else:\n", " print(\"No predictions generated\")\n", " \n", "except Exception as e:\n", " print(f\"Analysis failed: {str(e)}\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:httpx:HTTP Request: GET https://api.openai.com/v1/fine_tuning/jobs?limit=10 \"HTTP/1.1 200 OK\"\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "SyncCursorPage[FineTuningJob](data=[FineTuningJob(id='ftjob-wIUgfbwCoeUXBzBdb2jNqkMW', created_at=1738624595, error=Error(code='invalid_n_examples', message='Training file has 8 example(s), but must have at least 10 examples', param='training_file'), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs=3), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-EfEdakLI3PxeXpcffWaFxdol', result_files=[], seed=941448681, status='failed', trained_tokens=None, training_file='file-JFJ8pqwiCvNSbqhfEYsJfH', validation_file=None, estimated_finish=None, integrations=[], method=Method(dpo=None, supervised=MethodSupervised(hyperparameters=MethodSupervisedHyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs=3)), type='supervised'), user_provided_suffix=None), FineTuningJob(id='ftjob-wrfsXikgf6WDmBTsyP9bzWho', created_at=1738622626, error=Error(code='invalid_n_examples', message='Training file has 4 example(s), but must have at least 10 examples', param='training_file'), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs=3), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-EfEdakLI3PxeXpcffWaFxdol', result_files=[], seed=1006076083, status='failed', trained_tokens=None, training_file='file-VJKnjsMD4nT4p33yhnwiWU', validation_file=None, estimated_finish=None, integrations=[], method=Method(dpo=None, supervised=MethodSupervised(hyperparameters=MethodSupervisedHyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs=3)), type='supervised'), user_provided_suffix=None), FineTuningJob(id='ftjob-IqQ7CJ2kXCGl3vXtXIWbbSEV', created_at=1738621366, error=Error(code='invalid_n_examples', message='Training file has 3 example(s), but must have at least 10 examples', param='training_file'), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs=3), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-EfEdakLI3PxeXpcffWaFxdol', result_files=[], seed=553846108, status='failed', trained_tokens=None, training_file='file-3w9821tRBCAaQr274SzBNE', validation_file=None, estimated_finish=None, integrations=[], method=Method(dpo=None, supervised=MethodSupervised(hyperparameters=MethodSupervisedHyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs=3)), type='supervised'), user_provided_suffix=None), FineTuningJob(id='ftjob-mOw8bph9RRH9W6SNKpywklWy', created_at=1738575118, error=Error(code='invalid_training_file', message='The job failed due to an invalid training file. Unexpected file format, expected either prompt/completion pairs or chat messages.', param='training_file'), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs=3), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-EfEdakLI3PxeXpcffWaFxdol', result_files=[], seed=814473608, status='failed', trained_tokens=None, training_file='file-5ivaRBKXpFqY24CdSEiGoc', validation_file=None, estimated_finish=None, integrations=[], method=Method(dpo=None, supervised=MethodSupervised(hyperparameters=MethodSupervisedHyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs=3)), type='supervised'), user_provided_suffix=None), FineTuningJob(id='ftjob-obgdBUjJ9cIsxjeE7YZeu3EE', created_at=1738574894, error=Error(code='invalid_training_file', message='The job failed due to an invalid training file. Unexpected file format, expected either prompt/completion pairs or chat messages.', param='training_file'), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs=3), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-EfEdakLI3PxeXpcffWaFxdol', result_files=[], seed=1621977344, status='failed', trained_tokens=None, training_file='file-WMgYRKkUBSjdRrsLRtJUcT', validation_file=None, estimated_finish=None, integrations=[], method=Method(dpo=None, supervised=MethodSupervised(hyperparameters=MethodSupervisedHyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs=3)), type='supervised'), user_provided_suffix=None), FineTuningJob(id='ftjob-qDBemboNOg8rKWMvreUtKYqr', created_at=1738570690, error=Error(code='invalid_n_examples', message='Training file has 3 example(s), but must have at least 10 examples', param='training_file'), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs=3), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-EfEdakLI3PxeXpcffWaFxdol', result_files=[], seed=1950862311, status='failed', trained_tokens=None, training_file='file-J7NkngveduCT4ob3RGMoFX', validation_file=None, estimated_finish=None, integrations=[], method=Method(dpo=None, supervised=MethodSupervised(hyperparameters=MethodSupervisedHyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs=3)), type='supervised'), user_provided_suffix=None), FineTuningJob(id='ftjob-WxdpAXpuzwaOi0jitrQCH2hd', created_at=1738570510, error=Error(code='invalid_n_examples', message='Training file has 3 example(s), but must have at least 10 examples', param='training_file'), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs=3), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-EfEdakLI3PxeXpcffWaFxdol', result_files=[], seed=1531489607, status='failed', trained_tokens=None, training_file='file-34VoUAXgesMZ5Tipx8HDSh', validation_file=None, estimated_finish=None, integrations=[], method=Method(dpo=None, supervised=MethodSupervised(hyperparameters=MethodSupervisedHyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs=3)), type='supervised'), user_provided_suffix=None), FineTuningJob(id='ftjob-P35JtWmiBvijQSIi8fDmX3es', created_at=1738570109, error=Error(code='invalid_n_examples', message='Training file has 3 example(s), but must have at least 10 examples', param='training_file'), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs=3), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-EfEdakLI3PxeXpcffWaFxdol', result_files=[], seed=667822474, status='failed', trained_tokens=None, training_file='file-KyvSRkzxex4phv5UYZu2eK', validation_file=None, estimated_finish=None, integrations=[], method=Method(dpo=None, supervised=MethodSupervised(hyperparameters=MethodSupervisedHyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs=3)), type='supervised'), user_provided_suffix=None), FineTuningJob(id='ftjob-zInUs8778guYModhgpZyMM6j', created_at=1738570001, error=Error(code='invalid_n_examples', message='Training file has 3 example(s), but must have at least 10 examples', param='training_file'), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs=3), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-EfEdakLI3PxeXpcffWaFxdol', result_files=[], seed=1152709743, status='failed', trained_tokens=None, training_file='file-FHgehWp6fGjtqAv8uo2nZX', validation_file=None, estimated_finish=None, integrations=[], method=Method(dpo=None, supervised=MethodSupervised(hyperparameters=MethodSupervisedHyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs=3)), type='supervised'), user_provided_suffix=None), FineTuningJob(id='ftjob-1iRubnLEfXrTACm5xWObJk2U', created_at=1738568241, error=Error(code='invalid_n_examples', message='Training file has 2 example(s), but must have at least 10 examples', param='training_file'), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs=3), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-EfEdakLI3PxeXpcffWaFxdol', result_files=[], seed=242651269, status='failed', trained_tokens=None, training_file='file-V1y1qrdmrjapMLGUvwAayY', validation_file=None, estimated_finish=None, integrations=[], method=Method(dpo=None, supervised=MethodSupervised(hyperparameters=MethodSupervisedHyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs=3)), type='supervised'), user_provided_suffix=None)], object='list', has_more=True)\n" ] } ], "source": [ "# filepath: rolling_window_analysis.ipynb\n", "import sys\n", "import os\n", "sys.path.append(os.path.abspath(\"..\")) # Adjust the path as needed\n", "\n", "from market_predictor.config import OPENAI_API_KEY\n", "\n", "from market_predictor.config import OPENAI_API_KEY\n", "from openai import OpenAI\n", "client = OpenAI(api_key = OPENAI_API_KEY)\n", "print(client.fine_tuning.jobs.list(limit=10))\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | vwap_direction_next_5min | \n", "confidence_score | \n", "expected_vwap_change | \n", "volatility_estimate | \n", "suggested_entry | \n", "suggested_stop_loss | \n", "suggested_take_profit | \n", "key_signals | \n", "reasoning | \n", "timestamp_prediction | \n", "actual_vwap_change | \n", "actual_price_change | \n", "actual_volume | \n", "actual_return | \n", "actual_movement | \n", "prediction_correct | \n", "hour | \n", "day_of_week | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "up | \n", "0.85 | \n", "0.01 | \n", "0.02 | \n", "701.50 | \n", "700.5 | \n", "702.5 | \n", "['Consistent upward VWAP movement', 'Price abo... | \n", "The current market window shows a consistent u... | \n", "2025-01-31 10:50:00-05:00 | \n", "0.000048 | \n", "0.000126 | \n", "149963.0 | \n", "0.000048 | \n", "up | \n", "True | \n", "10 | \n", "4 | \n", "
| 1 | \n", "up | \n", "0.85 | \n", "0.01 | \n", "0.02 | \n", "699.50 | \n", "698.5 | \n", "700.5 | \n", "['Consistent upward VWAP movement', 'Price sta... | \n", "The current market window shows a consistent u... | \n", "2025-01-31 10:55:00-05:00 | \n", "0.000044 | \n", "-0.000251 | \n", "135296.6 | \n", "0.000044 | \n", "up | \n", "True | \n", "10 | \n", "4 | \n", "
| 2 | \n", "up | \n", "0.85 | \n", "0.01 | \n", "0.02 | \n", "700.00 | \n", "698.5 | \n", "701.5 | \n", "['Consistent upward VWAP movement', 'Price sta... | \n", "The current market window shows a consistent u... | \n", "2025-01-31 11:00:00-05:00 | \n", "0.000043 | \n", "0.000080 | \n", "131817.6 | \n", "0.000043 | \n", "up | \n", "True | \n", "11 | \n", "4 | \n", "
| 3 | \n", "up | \n", "0.85 | \n", "0.01 | \n", "0.02 | \n", "700.84 | \n", "699.5 | \n", "702.0 | \n", "['Consistent upward VWAP movement', 'Price sta... | \n", "The current market window shows a consistent u... | \n", "2025-01-31 11:05:00-05:00 | \n", "0.000041 | \n", "-0.000457 | \n", "131642.2 | \n", "0.000041 | \n", "up | \n", "True | \n", "11 | \n", "4 | \n", "
| 4 | \n", "up | \n", "0.85 | \n", "0.01 | \n", "0.02 | \n", "699.50 | \n", "698.5 | \n", "700.5 | \n", "['Consistent upward VWAP movement', 'Price sta... | \n", "The current market window shows a consistent u... | \n", "2025-01-31 11:10:00-05:00 | \n", "0.000044 | \n", "-0.000564 | \n", "139622.6 | \n", "0.000044 | \n", "up | \n", "True | \n", "11 | \n", "4 | \n", "