added new ft model

This commit is contained in:
Yasha Sheynin 2025-02-02 21:54:32 -05:00
parent 5dcab576c3
commit 7b1f8c6d9a
5 changed files with 451 additions and 1885 deletions

View File

@ -9,7 +9,7 @@ if not OPENAI_API_KEY:
raise ValueError("OpenAI API key not found in environment variables") raise ValueError("OpenAI API key not found in environment variables")
# Model Configuration # Model Configuration
MODEL_NAME = "ft:gpt-4o-mini-2024-07-18:yasha-sheynin::Awacdfg6" MODEL_NAME = 'ft:gpt-4o-mini-2024-07-18:yasha-sheynin::AwgWhL48' #"gpt-4o-2024-08-06" #"ft:gpt-4o-mini-2024-07-18:yasha-sheynin::Awacdfg6"
# RAG Configuration # RAG Configuration
VECTOR_STORE_TYPE = "faiss" VECTOR_STORE_TYPE = "faiss"

View File

@ -1,3 +1,5 @@
import sys
import os
import asyncio import asyncio
import json import json
from datetime import datetime, timedelta from datetime import datetime, timedelta
@ -5,213 +7,149 @@ import pandas as pd
from tqdm import tqdm from tqdm import tqdm
from openai import OpenAI from openai import OpenAI
from typing import List, Dict from typing import List, Dict
from .config import OPENAI_API_KEY from collections import Counter
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from main import analyze_market_data
from .market_data_fetcher import MarketDataFetcher from .market_data_fetcher import MarketDataFetcher
from .data_processor import MarketDataProcessor from .data_processor import MarketDataProcessor
from .config import OPENAI_API_KEY
from .rag_engine import RAGEngine
class FineTuneDatasetGenerator: class FineTuneDatasetGenerator:
def __init__(self, symbols: List[str], lookback_days: int = 30): def __init__(self,
symbols: List[str],
lookback_days: int = 30,
training_window_size: int = 60,
inference_window_size: int = 12,
inference_offset: int = 0,
interval: str = '5m'):
self.symbols = symbols self.symbols = symbols
self.lookback_days = lookback_days self.lookback_days = lookback_days
self.training_window_size = training_window_size
self.inference_window_size = inference_window_size
self.inference_offset = inference_offset
self.interval = interval
self.client = OpenAI(api_key=OPENAI_API_KEY) self.client = OpenAI(api_key=OPENAI_API_KEY)
self.rag_engine = RAGEngine()
async def generate_dataset(self) -> List[Dict]: async def generate_dataset(self) -> List[Dict]:
"""Generate labeled dataset for fine-tuning""" """Generate labeled dataset using correct predictions only"""
examples = [] examples = []
for symbol in tqdm(self.symbols, desc="Processing symbols"): for symbol in tqdm(self.symbols, desc="Processing symbols"):
# Fetch historical data
end_date = datetime.now() end_date = datetime.now()
start_date = end_date - timedelta(days=self.lookback_days) start_date = end_date - timedelta(days=self.lookback_days)
# Get predictions using analyze_market_data
fetcher = MarketDataFetcher(symbol) fetcher = MarketDataFetcher(symbol)
market_data = fetcher.fetch_data( market_data = fetcher.fetch_data(
start_date=start_date.strftime('%Y-%m-%d'), start_date=start_date.strftime('%Y-%m-%d'),
end_date=end_date.strftime('%Y-%m-%d'), end_date=end_date.strftime('%Y-%m-%d'),
interval='5m' interval=self.interval
) )
# Process market data predictions_df = await analyze_market_data(
processor = MarketDataProcessor(market_data) market_data=market_data,
processed_data = processor.df training_window_size=self.training_window_size,
inference_window_size=self.inference_window_size,
# Generate training examples inference_offset=self.inference_offset
examples.extend(self._generate_examples(processed_data)) )
if not predictions_df.empty:
correct_examples = self._convert_to_training_examples(predictions_df)
examples.extend(correct_examples)
print(f"Added {len(correct_examples)} examples from {symbol}")
return examples return examples
def _generate_examples(self, data: pd.DataFrame) -> List[Dict]: def _convert_to_training_examples(self, predictions_df: pd.DataFrame) -> List[Dict]:
"""Generate labeled examples from processed market data""" """Convert correct predictions to training examples with validation"""
examples = [] examples = []
window_size = 12 # 1-hour context
for i in range(len(data) - window_size): # Print DataFrame info for debugging
window = data.iloc[i:i+window_size] print("\nDataFrame Info:")
next_row = data.iloc[i+window_size] if i+window_size < len(data) else None print(predictions_df.info())
print("\nSample row:")
if next_row is not None: print(predictions_df.iloc[0])
# Create market state description
context = self._create_context(window) # Filter for correct predictions
correct_mask = predictions_df['vwap_direction_next_5min'] == predictions_df['actual_movement']
# Generate label correct_predictions = predictions_df[correct_mask].copy()
label = self._create_label(window, next_row)
print(f"Found {len(correct_predictions)} correct predictions out of {len(predictions_df)} total")
for _, pred in correct_predictions.iterrows():
try:
context = self._create_market_context(pred)
label = self._create_prediction_label(pred)
examples.append({ examples.append({
"messages": [ "messages": [
{"role": "system", "content": "You are a market analysis AI that predicts short-term price movements."}, {"role": "system", "content": self.rag_engine.system_prompt},
{"role": "user", "content": context}, {"role": "user", "content": context},
{"role": "assistant", "content": json.dumps(label)} {"role": "assistant", "content": json.dumps(label)}
] ]
}) })
except Exception as e:
print(f"Error processing prediction: {str(e)}")
continue
return examples return examples
def _create_context(self, window: pd.DataFrame) -> str: def _create_market_context(self, row: pd.Series) -> str:
"""Create market state description using DataProcessor format""" """Create market state description with column validation"""
# Ensure window has required columns # Print available columns for debugging
required_cols = ['MA5', 'MA20', 'Volume_MA5'] print(f"Available columns: {row.index.tolist()}")
missing = [col for col in required_cols if col not in window.columns]
if missing:
window = window.copy()
if "Close" in window.columns:
window["MA5"] = window["Close"].rolling(window=5, min_periods=1).mean().bfill()
window["MA20"] = window["Close"].rolling(window=20, min_periods=1).mean().bfill()
else:
window["MA5"] = 0
window["MA20"] = 0
if "Volume" in window.columns:
window["Volume_MA5"] = window["Volume"].rolling(window=5, min_periods=1).mean().bfill()
else:
window["Volume_MA5"] = 0
latest = window.iloc[-1]
prev = window.iloc[-2] if len(window) > 1 else latest
# Calculate changes
volume_change = ((latest['Volume'] - prev['Volume'])/prev['Volume']*100) if prev['Volume'] > 0 else 0
vwap_change = (latest["VWAP"] - prev["VWAP"]) / prev["VWAP"] * 100
vwap_direction = "up" if vwap_change > 0 else "down"
# Use safe column access with defaults
return f"""Current Market State: return f"""Current Market State:
Current Price: {latest['Close']:.2f} Current Price: {row.get('close', row.get('Close', 0.0)):.2f}
VWAP: {latest['VWAP']:.2f} VWAP: {row.get('vwap', row.get('VWAP', 0.0)):.2f}
Volume: {latest['Volume']} Volume: {row.get('volume', row.get('Volume', 0))}
MA5: {latest['MA5']:.2f} MA5: {row.get('ma5', row.get('MA5', 0.0)):.2f}
MA20: {latest['MA20']:.2f} MA20: {row.get('ma20', row.get('MA20', 0.0)):.2f}
Volume MA5: {latest['Volume_MA5']:.2f} Volume MA5: {row.get('volume_ma5', row.get('Volume_MA5', 0.0)):.2f}
Price Change: {((latest['Close'] - prev['Close'])/prev['Close']*100):.2f}% Price Change: {row.get('price_change', 0.0):.2f}%
Volume Change: {volume_change:.2f}% Volume Change: {row.get('volume_change', 0.0):.2f}%
Previous 5min VWAP Movement: {vwap_direction} ({vwap_change:.2f}%) Previous VWAP Movement: {row.get('prev_vwap_direction', 'none')}
Time: {latest.name} Time: {row.name}
""" """
def _create_label(self, window: pd.DataFrame, next_row: pd.Series) -> Dict: def _create_prediction_label(self, row: pd.Series) -> Dict:
"""Create labeled output""" """Create prediction label from actual data"""
current_vwap = window.iloc[-1]['VWAP']
next_vwap = next_row['VWAP']
direction = 'up' if next_vwap > current_vwap else 'down'
return { return {
"vwap_direction_next_5min": direction, "vwap_direction_next_5min": row['vwap_direction_next_5min'],
"confidence_score": 0.8, "confidence_score": row['confidence_score'],
"expected_vwap_change": ((next_vwap - current_vwap) / current_vwap) * 100, "expected_vwap_change": row['expected_vwap_change'],
"volatility_estimate": window['VWAP'].std(), "volatility_estimate": row['volatility_estimate'],
"suggested_entry": current_vwap, "suggested_entry": row['suggested_entry'],
"suggested_stop_loss": current_vwap * 0.997 if direction == 'up' else current_vwap * 1.003, "suggested_stop_loss": row['suggested_stop_loss'],
"suggested_take_profit": current_vwap * 1.003 if direction == 'up' else current_vwap * 0.997, "suggested_take_profit": row['suggested_take_profit'],
"key_signals": self._identify_signals(window), "key_signals": row['key_signals'],
"reasoning": self._generate_reasoning(window, direction) "reasoning": row['reasoning']
} }
def _identify_signals(self, window: pd.DataFrame) -> Dict:
"""
Identify technical signals from the market data window
Args:
window (pd.DataFrame): DataFrame containing market data for analysis
Returns:
Dict: Dictionary containing identified signals
"""
return {
"trend": self._calculate_trend(window),
"volume_trend": "increasing" if window['Volume'].iloc[-1] > window['Volume_MA5'].iloc[-1] else "decreasing"
}
def _calculate_trend(self, window: pd.DataFrame) -> str:
"""
Calculate the price trend based on moving averages
Args:
window (pd.DataFrame): Market data window with MA5 and MA20 columns
Returns:
str: Trend direction ('upward', 'downward', or 'sideways')
"""
last_row = window.iloc[-1]
ma5 = last_row['MA5']
ma20 = last_row['MA20']
# Calculate trend based on MA crossover
if ma5 > ma20 * 1.02: # 2% threshold
return "upward"
elif ma5 < ma20 * 0.98: # 2% threshold
return "downward"
else:
return "sideways"
def _generate_reasoning(self, window: pd.DataFrame, direction: str) -> str:
"""
Generate reasoning for the market prediction
Args:
window (pd.DataFrame): Market data window
direction (str): Predicted price direction ('up' or 'down')
Returns:
str: Generated reasoning for the prediction
"""
signals = self._identify_signals(window)
last_row = window.iloc[-1]
reasoning_parts = []
# Analyze trend
if signals['trend'] == direction:
reasoning_parts.append(f"The {signals['trend']} trend supports this prediction")
# Analyze volume
if signals['volume_trend'] == 'increasing':
reasoning_parts.append("Increasing volume suggests strong momentum")
else:
reasoning_parts.append("Decreasing volume suggests potential trend weakness")
# VWAP analysis
vwap = last_row['VWAP']
close = last_row['Close']
if close > vwap and direction == 'up':
reasoning_parts.append("Price above VWAP supports bullish momentum")
elif close < vwap and direction == 'down':
reasoning_parts.append("Price below VWAP supports bearish momentum")
return ". ".join(reasoning_parts) + "."
async def create_fine_tuning_job(self, examples: List[Dict]): async def create_fine_tuning_job(self, examples: List[Dict]):
"""Create and monitor fine-tuning job""" """Create and monitor fine-tuning job"""
if not examples:
raise ValueError("No examples provided for fine-tuning")
# Save examples to JSONL file # Save examples to JSONL file
with open('training_data.jsonl', 'w') as f: output_path = 'training_data.jsonl'
with open(output_path, 'w') as f:
for example in examples: for example in examples:
f.write(json.dumps(example) + '\n') f.write(json.dumps(example) + '\n')
# Upload training file - remove await print(f"Saved {len(examples)} examples to {output_path}")
# Upload training file
training_file = self.client.files.create( training_file = self.client.files.create(
file=open('training_data.jsonl', 'rb'), file=open(output_path, 'rb'),
purpose='fine-tune' purpose='fine-tune'
) )
# Create fine-tuning job - remove await # Create fine-tuning job
job = self.client.fine_tuning.jobs.create( job = self.client.fine_tuning.jobs.create(
training_file=training_file.id, training_file=training_file.id,
model="gpt-4o-mini-2024-07-18", model="gpt-4o-mini-2024-07-18",
@ -224,16 +162,23 @@ Time: {latest.name}
return job.id return job.id
async def main(): async def main():
symbols = ['BTC-USD'] """Run dataset generation and fine-tuning"""
generator = FineTuneDatasetGenerator(symbols) symbols = ['BTC-USD', 'NVDA', 'META', 'LTC-USD']
generator = FineTuneDatasetGenerator(
symbols=symbols,
lookback_days=2,
training_window_size=60,
inference_window_size=12,
inference_offset=0,
interval='5m'
)
# Generate dataset
examples = await generator.generate_dataset() examples = await generator.generate_dataset()
print(f"Generated {len(examples)} training examples") print(f"Generated {len(examples)} training examples")
# Create fine-tuning job if examples:
job_id = await generator.create_fine_tuning_job(examples) job_id = await generator.create_fine_tuning_job(examples)
print(f"Fine-tuning job started. Monitor progress using: openai api fine_tunes.follow -i {job_id}") print(f"Fine-tuning job started. Monitor progress using job ID: {job_id}")
if __name__ == "__main__": if __name__ == "__main__":
asyncio.run(main()) asyncio.run(main())

View File

@ -8,10 +8,26 @@ class PerformanceMetrics:
def __init__(self, predictions_df: pd.DataFrame, market_data: pd.DataFrame): def __init__(self, predictions_df: pd.DataFrame, market_data: pd.DataFrame):
self.predictions_df = predictions_df self.predictions_df = predictions_df
self.market_data = market_data self.market_data = market_data
self._calculate_actual_movements()
self.metrics = self._calculate_metrics() # Calculate actual movements first
self.predictions_df['actual_movement'] = self._calculate_actual_movements()
# Map vwap_direction_next_5min to predicted_movement if it exists
if 'vwap_direction_next_5min' in self.predictions_df.columns:
self.predictions_df['predicted_movement'] = self.predictions_df['vwap_direction_next_5min']
elif 'direction' in self.predictions_df.columns:
self.predictions_df['predicted_movement'] = self.predictions_df['direction']
else:
raise ValueError("No prediction column found in DataFrame. Expected 'vwap_direction_next_5min' or 'direction'")
# Now extract y_true and y_pred
y_true = self.predictions_df['actual_movement']
y_pred = self.predictions_df['predicted_movement']
# Calculate metrics
self.metrics = self._calculate_metrics(y_true, y_pred)
def _calculate_actual_movements(self): def _calculate_actual_movements(self) -> pd.Series:
"""Calculate actual VWAP movements with detailed logging""" """Calculate actual VWAP movements with detailed logging"""
print("\nDebug Counts:") print("\nDebug Counts:")
print(f"Initial DataFrame rows: {len(self.predictions_df)}") print(f"Initial DataFrame rows: {len(self.predictions_df)}")
@ -37,79 +53,76 @@ class PerformanceMetrics:
except KeyError: except KeyError:
movements.append(None) movements.append(None)
skipped_timestamps += 1 skipped_timestamps += 1
print(f"Skipped: Timestamp not found {timestamp}") print(f"Skipped: Timestamp {timestamp} not found in market data")
print(f"\nProcessing Summary:") print(f"\nProcessing Summary:")
print(f"Total rows initially: {len(self.predictions_df)}") print(f"Total rows initially: {len(self.predictions_df)}")
print(f"Valid predictions: {valid_predictions}") print(f"Valid predictions: {valid_predictions}")
print(f"Skipped timestamps: {skipped_timestamps}") print(f"Skipped timestamps: {skipped_timestamps}")
self.predictions_df['actual_movement'] = movements return pd.Series(movements, index=self.predictions_df.index)
valid_mask = self.predictions_df['actual_movement'].notna()
self.predictions_df = self.predictions_df[valid_mask].copy()
print(f"Final predictions count: {len(self.predictions_df)}\n")
def _calculate_metrics(self) -> dict: def _calculate_metrics(self, y_true: pd.Series, y_pred: pd.Series) -> Dict:
if len(self.predictions_df) == 0: """Calculate performance metrics with None value handling"""
return self._empty_metrics() # Filter out rows with None values
valid_mask = y_true.notna() & y_pred.notna()
y_true = self.predictions_df['actual_movement'] y_true_clean = y_true[valid_mask]
y_pred = self.predictions_df['vwap_direction_next_5min'] y_pred_clean = y_pred[valid_mask]
print("\nClass distributions:") if len(y_true_clean) == 0:
print("Actual:", y_true.value_counts().to_dict()) return self._get_empty_metrics()
print("Predicted:", y_pred.value_counts().to_dict())
acc = accuracy_score(y_true, y_pred) # Calculate base metrics
prec = precision_score(y_true, y_pred, pos_label='up', zero_division=0)
rec = recall_score(y_true, y_pred, pos_label='up', zero_division=0)
f1 = f1_score(y_true, y_pred, pos_label='up', zero_division=0)
# High confidence metrics
high_conf_mask = self.predictions_df['confidence_score'] >= 0.7
if high_conf_mask.any():
high_conf_correct = ((y_pred == y_true) & high_conf_mask).sum()
high_conf_acc = high_conf_correct / high_conf_mask.sum()
else:
high_conf_acc = 0.0
# Print confusion matrix for debugging
cm = confusion_matrix(y_true, y_pred)
print("\nConfusion Matrix:")
print(pd.DataFrame(
cm,
columns=['Pred Down', 'Pred Up'],
index=['True Down', 'True Up']
))
# Keep existing metrics calculation
metrics = { metrics = {
'total_predictions': len(self.predictions_df), 'total_predictions': len(y_true),
'class_distribution': y_pred.value_counts().to_dict(), 'valid_predictions': len(y_true_clean),
'avg_confidence': self.predictions_df['confidence_score'].mean(), 'accuracy': accuracy_score(y_true_clean, y_pred_clean),
'accuracy': acc, 'precision': precision_score(y_true_clean, y_pred_clean, pos_label='up', zero_division=0),
'precision': prec, 'recall': recall_score(y_true_clean, y_pred_clean, pos_label='up', zero_division=0),
'recall': rec, 'f1': f1_score(y_true_clean, y_pred_clean, pos_label='up', zero_division=0),
'f1': f1, 'class_distribution': y_pred_clean.value_counts().to_dict()
'high_confidence_accuracy': high_conf_acc
} }
# Add trading metrics # Add confidence and VWAP change metrics
metrics.update({ metrics.update({
'avg_confidence': self.predictions_df['confidence_score'].mean(),
'high_confidence_accuracy': self._calculate_high_confidence_accuracy(y_true_clean, y_pred_clean),
'avg_expected_vwap_change': self.predictions_df['expected_vwap_change'].mean(), 'avg_expected_vwap_change': self.predictions_df['expected_vwap_change'].mean(),
'avg_volatility_estimate': self.predictions_df['volatility_estimate'].mean(), 'avg_volatility_estimate': self.predictions_df['volatility_estimate'].mean()
'price_targets': {
'entry_success_rate': self._calculate_entry_success(),
'stop_loss_hits': self._calculate_stop_loss_hits(),
'take_profit_hits': self._calculate_take_profit_hits(),
'avg_risk_reward': self._calculate_risk_reward_ratio()
},
'signals': self._analyze_signals()
}) })
# Add price targets metrics
metrics['price_targets'] = {
'entry_success_rate': self._calculate_entry_success(),
'stop_loss_hits': self._calculate_stop_loss_hits(),
'take_profit_hits': self._calculate_take_profit_hits(),
'avg_risk_reward': self._calculate_avg_risk_reward()
}
return metrics return metrics
def _get_empty_metrics(self) -> Dict:
"""Return empty metrics dictionary with zero values"""
return {
'total_predictions': 0,
'valid_predictions': 0,
'accuracy': 0.0,
'precision': 0.0,
'recall': 0.0,
'f1': 0.0,
'high_confidence_accuracy': 0.0,
'class_distribution': {},
'avg_confidence': 0.0,
'avg_expected_vwap_change': 0.0,
'avg_volatility_estimate': 0.0,
'price_targets': {
'entry_success_rate': 0.0,
'stop_loss_hits': 0.0,
'take_profit_hits': 0.0,
'avg_risk_reward': 0.0
}
}
def _calculate_entry_success(self) -> float: def _calculate_entry_success(self) -> float:
"""Calculate rate of successful entries""" """Calculate rate of successful entries"""
successes = 0 successes = 0
@ -129,39 +142,68 @@ class PerformanceMetrics:
return successes / total if total > 0 else 0.0 return successes / total if total > 0 else 0.0
def _calculate_stop_loss_hits(self) -> float: def _calculate_stop_loss_hits(self) -> float:
"""Calculate stop loss hit rate""" """Calculate stop loss hit rate with proper index bounds checking"""
hits = 0 hits = 0
total = len(self.predictions_df) total = 0
for _, row in self.predictions_df.iterrows(): for _, row in self.predictions_df.iterrows():
stop_loss = row.get('suggested_stop_loss') stop_loss = row.get('suggested_stop_loss')
if stop_loss is None: if stop_loss is None:
continue continue
# Check if price hit stop loss # Get next VWAP value safely
next_vwap = self.market_data.loc[row['prediction_timestamp']:].iloc[1]['VWAP'] try:
if (row['vwap_direction_next_5min'] == 'up' and next_vwap <= stop_loss) or \ timestamp_idx = self.market_data.index.get_loc(row['prediction_timestamp'])
(row['vwap_direction_next_5min'] == 'down' and next_vwap >= stop_loss): if timestamp_idx + 1 >= len(self.market_data):
hits += 1 continue
next_vwap = self.market_data.iloc[timestamp_idx + 1]['VWAP']
total += 1
# Check if price hit stop loss
if (row['predicted_movement'] == 'up' and next_vwap <= stop_loss) or \
(row['predicted_movement'] == 'down' and next_vwap >= stop_loss):
hits += 1
except (KeyError, IndexError):
print(f"Warning: Could not find next VWAP for timestamp {row['prediction_timestamp']}")
continue
return hits / total if total > 0 else 0.0 return hits / total if total > 0 else 0.0
def _calculate_take_profit_hits(self) -> float: def _calculate_take_profit_hits(self) -> float:
"""Calculate take profit hit rate""" """
Calculate take profit hit rate with proper index bounds checking
Returns:
float: Ratio of take profit hits to total valid predictions
"""
hits = 0 hits = 0
total = len(self.predictions_df) total = 0
for _, row in self.predictions_df.iterrows(): for _, row in self.predictions_df.iterrows():
take_profit = row.get('suggested_take_profit') take_profit = row.get('suggested_take_profit')
if take_profit is None: if take_profit is None:
continue continue
# Check if price hit take profit try:
next_vwap = self.market_data.loc[row['prediction_timestamp']:].iloc[1]['VWAP'] # Get next VWAP value safely using index location
if (row['vwap_direction_next_5min'] == 'up' and next_vwap >= take_profit) or \ timestamp_idx = self.market_data.index.get_loc(row['prediction_timestamp'])
(row['vwap_direction_next_5min'] == 'down' and next_vwap <= take_profit): if timestamp_idx + 1 >= len(self.market_data):
hits += 1 continue
next_vwap = self.market_data.iloc[timestamp_idx + 1]['VWAP']
total += 1
# Check if price hit take profit level
if (row['predicted_movement'] == 'up' and next_vwap >= take_profit) or \
(row['predicted_movement'] == 'down' and next_vwap <= take_profit):
hits += 1
except (KeyError, IndexError):
print(f"Warning: Could not find next VWAP for timestamp {row['prediction_timestamp']}")
continue
return hits / total if total > 0 else 0.0 return hits / total if total > 0 else 0.0
def _calculate_risk_reward_ratio(self) -> float: def _calculate_risk_reward_ratio(self) -> float:
@ -183,6 +225,39 @@ class PerformanceMetrics:
return np.mean(ratios) if ratios else 0.0 return np.mean(ratios) if ratios else 0.0
def _calculate_avg_risk_reward(self) -> float:
"""
Calculate average risk/reward ratio across all trades
Returns:
float: Average risk/reward ratio, or 0.0 if no valid trades
"""
risk_rewards = []
for _, row in self.predictions_df.iterrows():
entry = row.get('suggested_entry')
stop_loss = row.get('suggested_stop_loss')
take_profit = row.get('suggested_take_profit')
if None in (entry, stop_loss, take_profit):
continue
# Calculate risk and reward
if row['predicted_movement'] == 'up':
risk = entry - stop_loss
reward = take_profit - entry
else: # down
risk = stop_loss - entry
reward = entry - take_profit
# Avoid division by zero
if risk <= 0 or reward <= 0:
continue
risk_rewards.append(reward / risk)
return np.mean(risk_rewards) if risk_rewards else 0.0
def _format_top_signals(self) -> str: def _format_top_signals(self) -> str:
"""Format signal analysis for report""" """Format signal analysis for report"""
all_signals = [] all_signals = []
@ -339,4 +414,28 @@ Start: {self.predictions_df['prediction_timestamp'].min()}
End: {self.predictions_df['prediction_timestamp'].max()} End: {self.predictions_df['prediction_timestamp'].max()}
""" """
return existing_report + trading_metrics return existing_report + trading_metrics
def _calculate_high_confidence_accuracy(self, y_true: pd.Series, y_pred: pd.Series) -> float:
"""
Calculate accuracy for high confidence predictions (confidence >= 0.8)
Args:
y_true (pd.Series): True labels
y_pred (pd.Series): Predicted labels
Returns:
float: Accuracy score for high confidence predictions
"""
# Get high confidence mask
high_conf_mask = self.predictions_df['confidence_score'] >= 0.8
if not high_conf_mask.any():
return 0.0
# Filter predictions by confidence
high_conf_true = y_true[high_conf_mask]
high_conf_pred = y_pred[high_conf_mask]
# Calculate accuracy
return accuracy_score(high_conf_true, high_conf_pred)

View File

@ -63,14 +63,143 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 3,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stderr", "name": "stderr",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Processing: 58%|████████████████▉ | 142/244 [10:00<18:10, 10.69s/it]" "Processing: 100%|█████████████████████████████| 244/244 [16:34<00:00, 4.08s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Debug Counts:\n",
"Initial DataFrame rows: 244\n",
"Skipped: No next VWAP for timestamp 2025-01-31 16:00:00+00:00\n",
"\n",
"Processing Summary:\n",
"Total rows initially: 244\n",
"Valid predictions: 243\n",
"Skipped timestamps: 1\n",
"Final predictions count: 243\n",
"\n",
"\n",
"Class distributions:\n",
"Actual: {'down': 171, 'up': 72}\n",
"Predicted: {'down': 164, 'up': 79}\n",
"\n",
"Confusion Matrix:\n",
" Pred Down Pred Up\n",
"True Down 137 34\n",
"True Up 27 45\n",
"\n",
"Performance Report:\n",
"\n",
"Performance Report\n",
"=================\n",
"Total Predictions: 243\n",
"Accuracy: 74.90%\n",
"Precision: 56.96%\n",
"Recall: 62.50%\n",
"F1 Score: 59.60%\n",
"\n",
"Direction Distribution:\n",
"-------------------\n",
"Up: 79\n",
"Down: 164\n",
"\n",
"Confidence Analysis:\n",
"-----------------\n",
"Average Confidence: 80.08%\n",
"High Confidence Accuracy: 74.90%\n",
"\n",
"Trading Metrics:\n",
"--------------\n",
"Avg Expected VWAP Change: 0.12%\n",
"Avg Volatility Estimate: 198.74%\n",
"\n",
"Price Target Analysis:\n",
"-------------------\n",
"Entry Success Rate: 74.90%\n",
"Stop Loss Hits: 9.05%\n",
"Take Profit Hits: 9.47%\n",
"Avg Risk/Reward Ratio: 1.02\n",
"\n",
"Top Signals:\n",
"----------\n",
"Decreasing volume trend: 151\n",
"Price below VWAP: 127\n",
"Increasing volume suggests strong momentum.: 69\n",
"Price above VWAP supports bullish momentum.: 68\n",
"Decreasing volume suggests potential trend weakness.: 16\n",
"\n",
"Time Coverage:\n",
"-----------\n",
"Start: 2025-01-28 15:30:00+00:00\n",
"End: 2025-01-31 15:55:00+00:00\n",
"\n",
"\n",
"Predictions Summary:\n",
" vwap_direction_next_5min confidence_score expected_vwap_change \\\n",
"0 down 0.8 0.000000 \n",
"1 up 0.8 0.000433 \n",
"2 up 0.8 0.045066 \n",
"3 down 0.8 0.000000 \n",
"4 down 0.8 0.000000 \n",
"\n",
" volatility_estimate suggested_entry suggested_stop_loss \\\n",
"0 0.000000 102547.785632 102849.709244 \n",
"1 0.102457 102830.485685 102778.485685 \n",
"2 10.792993 103057.394632 102757.894632 \n",
"3 7.125226 103057.394967 103357.394967 \n",
"4 6.469257 103057.394968 103357.394968 \n",
"\n",
" suggested_take_profit key_signals \\\n",
"0 102245.862020 [Decreasing volume trend, VWAP below price] \n",
"1 102882.485685 [Increasing volume suggests strong momentum., ... \n",
"2 103356.894632 [Increasing volume suggests strong momentum., ... \n",
"3 102757.394967 [Decreasing volume trend, Price below VWAP] \n",
"4 102757.394968 [Decreasing volume trend, Price below VWAP, MA... \n",
"\n",
" reasoning \\\n",
"0 The decreasing volume trend suggests potential... \n",
"1 The increasing volume indicates strong momentu... \n",
"2 The significant increase in volume indicates s... \n",
"3 The decreasing volume trend suggests potential... \n",
"4 The decreasing volume trend suggests potential... \n",
"\n",
" timestamp_prediction historical_start \\\n",
"0 2025-01-28 15:30:00+00:00 2025-01-28 09:30:00+00:00 \n",
"1 2025-01-28 15:35:00+00:00 2025-01-28 09:35:00+00:00 \n",
"2 2025-01-28 15:40:00+00:00 2025-01-28 09:40:00+00:00 \n",
"3 2025-01-28 15:45:00+00:00 2025-01-28 09:45:00+00:00 \n",
"4 2025-01-28 15:50:00+00:00 2025-01-28 09:50:00+00:00 \n",
"\n",
" historical_end current_window_start \\\n",
"0 2025-01-28 14:25:00+00:00 2025-01-28 14:30:00+00:00 \n",
"1 2025-01-28 14:30:00+00:00 2025-01-28 14:35:00+00:00 \n",
"2 2025-01-28 14:35:00+00:00 2025-01-28 14:40:00+00:00 \n",
"3 2025-01-28 14:40:00+00:00 2025-01-28 14:45:00+00:00 \n",
"4 2025-01-28 14:45:00+00:00 2025-01-28 14:50:00+00:00 \n",
"\n",
" current_window_end prediction_timestamp actual_movement \n",
"0 2025-01-28 15:25:00+00:00 2025-01-28 15:30:00+00:00 up \n",
"1 2025-01-28 15:30:00+00:00 2025-01-28 15:35:00+00:00 down \n",
"2 2025-01-28 15:35:00+00:00 2025-01-28 15:40:00+00:00 down \n",
"3 2025-01-28 15:40:00+00:00 2025-01-28 15:45:00+00:00 down \n",
"4 2025-01-28 15:45:00+00:00 2025-01-28 15:50:00+00:00 down \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
] ]
} }
], ],
@ -136,6 +265,26 @@
" print(f\"Analysis failed: {str(e)}\")" " print(f\"Analysis failed: {str(e)}\")"
] ]
}, },
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"SyncCursorPage[FineTuningJob](data=[FineTuningJob(id='ftjob-hggoauCsSRZHZkQyPoPRn05W', created_at=1738549484, error=Error(code=None, message=None, param=None), fine_tuned_model='ft:gpt-4o-mini-2024-07-18:yasha-sheynin::AwgWhL48', finished_at=1738549856, hyperparameters=Hyperparameters(batch_size=1, learning_rate_multiplier=1.8, n_epochs=3), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-EfEdakLI3PxeXpcffWaFxdol', result_files=['file-Cr1t8xFGTdjpjQCXBGqE5L'], seed=288523345, status='succeeded', trained_tokens=58239, training_file='file-WwWvai4rxePmvifhN2KVmz', validation_file=None, estimated_finish=None, integrations=[], method=Method(dpo=None, supervised=MethodSupervised(hyperparameters=MethodSupervisedHyperparameters(batch_size=1, learning_rate_multiplier=1.8, n_epochs=3)), type='supervised'), user_provided_suffix=None), FineTuningJob(id='ftjob-PS4EXlVz5SdInps2MU4f64B2', created_at=1738549280, error=Error(code='invalid_n_examples', message='Training file has 4 example(s), but must have at least 10 examples', param='training_file'), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs=3), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-EfEdakLI3PxeXpcffWaFxdol', result_files=[], seed=912538967, status='failed', trained_tokens=None, training_file='file-2YxyNCNuFnLeoyCsmwRFh4', validation_file=None, estimated_finish=None, integrations=[], method=Method(dpo=None, supervised=MethodSupervised(hyperparameters=MethodSupervisedHyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs=3)), type='supervised'), user_provided_suffix=None), FineTuningJob(id='ftjob-CdU86w4P5d5sAIeW2exVJJPo', created_at=1738524908, error=Error(code=None, message=None, param=None), fine_tuned_model='ft:gpt-4o-mini-2024-07-18:yasha-sheynin::Awacdfg6', finished_at=1738527160, hyperparameters=Hyperparameters(batch_size=3, learning_rate_multiplier=1.8, n_epochs=3), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-EfEdakLI3PxeXpcffWaFxdol', result_files=['file-6DUBwAYAsFk94P8Qe8n8nL'], seed=223641031, status='succeeded', trained_tokens=1320606, training_file='file-BNa5KfcVuuSY9HmbwwXoWb', validation_file=None, estimated_finish=None, integrations=[], method=Method(dpo=None, supervised=MethodSupervised(hyperparameters=MethodSupervisedHyperparameters(batch_size=3, learning_rate_multiplier=1.8, n_epochs=3)), type='supervised'), user_provided_suffix=None)], object='list', has_more=False)\n"
]
}
],
"source": [
"from market_predictor.config import OPENAI_API_KEY\n",
"from openai import OpenAI\n",
"client = OpenAI(api_key = OPENAI_API_KEY)\n",
"print(client.fine_tuning.jobs.list(limit=10))\n"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,

File diff suppressed because it is too large Load Diff