algo_trading_book/converted_code/TU_mom_hypothesisTest.py
2025-06-05 08:48:33 +02:00

194 lines
7.0 KiB
Python

import numpy as np
from scipy.stats import pearson3, skew, kurtosis
import warnings
warnings.filterwarnings('ignore')
# Import our utility functions
from .backshift import backshift
from .data_loader import load_futures_data
def pearsrnd(mu, sigma, skewness, kurt, m, n):
"""
Generate random numbers from Pearson distribution.
This is a simplified implementation - for production use,
consider using scipy.stats.pearson3 or other appropriate distributions.
"""
# For simplicity, we'll use normal distribution with adjusted parameters
# In practice, you might want to use a more sophisticated implementation
return np.random.normal(mu, sigma, (m, n))
def main():
"""
Python implementation of the TU_mom_hypothesisTest.m file.
Performs hypothesis testing on the TU momentum strategy.
"""
print("TU Momentum Strategy - Hypothesis Testing")
print("=" * 50)
try:
# Try to load real Treasury futures data
print("Loading Treasury futures data...")
data = load_futures_data('TU', '20120813')
tday = data['tday']
cl = data['cl'][:, 0] # Use first contract for simplicity
print(f"Loaded {len(tday)} days of Treasury futures data")
except (FileNotFoundError, KeyError) as e:
print(f"Could not load real data ({e}), using synthetic data for demonstration...")
# Synthetic data for demonstration
np.random.seed(42)
n_days = 2000
tday = np.arange(20090102, 20090102 + n_days)
cl = 100 * np.cumprod(1 + np.random.normal(0, 0.01, n_days))
# Strategy parameters
lookback = 250
holddays = 25
print(f"Strategy Parameters:")
print(f"Lookback: {lookback} days")
print(f"Hold days: {holddays} days")
# Generate trading signals
longs = cl > backshift(lookback, cl)
shorts = cl < backshift(lookback, cl)
# Initialize positions
pos = np.zeros(len(cl))
# Build position over holding period
for h in range(holddays):
long_lag = backshift(h, longs.astype(float))
long_lag = np.nan_to_num(long_lag, nan=0).astype(bool)
short_lag = backshift(h, shorts.astype(float))
short_lag = np.nan_to_num(short_lag, nan=0).astype(bool)
pos[long_lag] += 1
pos[short_lag] -= 1
# Calculate market returns
market_ret = (cl - backshift(1, cl)) / backshift(1, cl)
market_ret = np.nan_to_num(market_ret, nan=0)
# Calculate strategy returns
ret = backshift(1, pos) * market_ret / holddays
ret = np.nan_to_num(ret, nan=0)
# Gaussian hypothesis test
if np.std(ret) > 0:
gaussian_test_stat = np.mean(ret) / np.std(ret) * np.sqrt(len(ret))
print(f"\nGaussian Test statistic: {gaussian_test_stat:.2f}")
else:
print("\nGaussian Test: Cannot compute (zero standard deviation)")
# Randomized market returns hypothesis test
print("\nPerforming randomized market returns hypothesis test...")
# Calculate moments of market returns
moments = {
'mean': np.mean(market_ret),
'std': np.std(market_ret),
'skewness': skew(market_ret),
'kurtosis': kurtosis(market_ret, fisher=False) # Pearson kurtosis
}
num_samples_better = 0
num_simulations = 1000 # Reduced from 10000 for faster execution
print(f"Running {num_simulations} simulations...")
for sample in range(num_simulations):
if sample % 100 == 0:
print(f" Simulation {sample}/{num_simulations}")
# Generate simulated market returns
market_ret_sim = pearsrnd(moments['mean'], moments['std'],
moments['skewness'], moments['kurtosis'],
len(market_ret), 1).flatten()
# Generate simulated price series
cl_sim = np.cumprod(1 + market_ret_sim)
# Generate trading signals for simulated data
longs_sim = cl_sim > backshift(lookback, cl_sim)
shorts_sim = cl_sim < backshift(lookback, cl_sim)
# Initialize positions for simulation
pos_sim = np.zeros(len(cl_sim))
# Build position over holding period
for h in range(holddays):
long_sim_lag = backshift(h, longs_sim.astype(float))
long_sim_lag = np.nan_to_num(long_sim_lag, nan=0).astype(bool)
short_sim_lag = backshift(h, shorts_sim.astype(float))
short_sim_lag = np.nan_to_num(short_sim_lag, nan=0).astype(bool)
pos_sim[long_sim_lag] += 1
pos_sim[short_sim_lag] -= 1
# Calculate simulated strategy returns
ret_sim = backshift(1, pos_sim) * market_ret_sim / holddays
ret_sim = np.nan_to_num(ret_sim, nan=0)
# Check if simulated returns are better than observed
if np.mean(ret_sim) >= np.mean(ret):
num_samples_better += 1
p_value_randomized_prices = num_samples_better / num_simulations
print(f"Randomized prices: p-value = {p_value_randomized_prices:.6f}")
# Randomized entry trades hypothesis test
print("\nPerforming randomized entry trades hypothesis test...")
num_samples_better = 0
num_simulations = 10000 # Can use more simulations here as it's faster
print(f"Running {num_simulations} simulations...")
for sample in range(num_simulations):
if sample % 1000 == 0:
print(f" Simulation {sample}/{num_simulations}")
# Randomly permute the trading signals
P = np.random.permutation(len(longs))
longs_sim = longs[P]
shorts_sim = shorts[P]
# Initialize positions for simulation
pos_sim = np.zeros(len(cl))
# Build position over holding period
for h in range(holddays):
long_sim_lag = backshift(h, longs_sim.astype(float))
long_sim_lag = np.nan_to_num(long_sim_lag, nan=0).astype(bool)
short_sim_lag = backshift(h, shorts_sim.astype(float))
short_sim_lag = np.nan_to_num(short_sim_lag, nan=0).astype(bool)
pos_sim[long_sim_lag] += 1
pos_sim[short_sim_lag] -= 1
# Calculate simulated strategy returns
ret_sim = backshift(1, pos_sim) * market_ret / holddays
ret_sim = np.nan_to_num(ret_sim, nan=0)
# Check if simulated returns are better than observed
if np.mean(ret_sim) >= np.mean(ret):
num_samples_better += 1
p_value_randomized_trades = num_samples_better / num_simulations
print(f"Randomized trades: p-value = {p_value_randomized_trades:.6f}")
# Summary
print(f"\nHypothesis Test Results:")
print(f"Strategy mean return: {np.mean(ret):.6f}")
print(f"Strategy std return: {np.std(ret):.6f}")
print(f"Randomized prices p-value: {p_value_randomized_prices:.6f}")
print(f"Randomized trades p-value: {p_value_randomized_trades:.6f}")
if __name__ == "__main__":
main()