Add strategy evaluation

2024-09-06 11:17:48 +02:00 · 2024-09-06 11:17:48 +02:00 · 24f7cd60c2
commit 24f7cd60c2
parent f36b7a8e30
6 changed files with 64369 additions and 1 deletions
--- a/notebooks/evaluate.ipynb
+++ b/notebooks/evaluate.ipynb
--- a/pyproject.toml
+++ b/pyproject.toml
@ -4,7 +4,8 @@ build-backend = "hatchling.build"

 [tool.hatch.build.targets.wheel]
 packages = [
-    "src/ml"
+    "src/ml",
+    "src/strategy"
 ]

 [project]
--- a/src/strategy/init.py
+++ b/src/strategy/init.py
--- a/src/strategy/evaluation.py
+++ b/src/strategy/evaluation.py
@ -0,0 +1,67 @@
+import pandas as pd
+import numpy as np
+from strategy import metrics
+from strategy.strategy import LONG_POSITION, SHORT_POSITION, EXIT_POSITION
+from strategy.strategy import StrategyBase
+
+
+def evaluate_strategy(
+        data: pd.DataFrame,
+        strategy: StrategyBase,
+        exchange_fee: float = 0.001,
+        interval: str = "5min"):
+    """Evaluates a trading strategy."""
+
+    # Get strategy positions,
+    # position at time t is used at t+1.
+    # Skip last position as it cannot be evaluated.
+    positions = strategy.run(data)[:-1]
+
+    # Compute returns for long and short positions.
+    close_price = data['close_price'].to_numpy()
+    long_returns = (
+        (close_price[1:] - close_price[:-1]) / close_price[:-1])
+    short_returns = (
+        (close_price[:-1] - close_price[1:]) / close_price[1:])
+    assert positions.shape == long_returns.shape
+    assert positions.shape == short_returns.shape
+
+    # timestamps = data['close_time'].astype('datetime64[s]').to_numpy()
+    timestamps = data['close_time'].to_numpy()
+    assert positions.shape[0] == timestamps.shape[0] - 1
+
+    # Compute returns of the strategy.
+    strategy_returns = np.zeros_like(positions, dtype=np.float64)
+    strategy_returns[positions == LONG_POSITION] = \
+        long_returns[positions == LONG_POSITION]
+    strategy_returns[positions == SHORT_POSITION] = \
+        short_returns[positions == SHORT_POSITION]
+
+    # Include exchange fees
+    positions_changed = np.append([EXIT_POSITION], positions[:-1]) != positions
+    strategy_returns[positions_changed] = (
+        strategy_returns[positions_changed] + 1.0) * (1.0 - exchange_fee) - 1.0
+
+    strategy_returns = np.append([0.], strategy_returns)
+    portfolio_value = np.cumprod(strategy_returns + 1)
+
+    # Compute all the metrics
+    result = {
+        'value': portfolio_value[-1],
+        'total_return': portfolio_value[-1] - 1,
+        'arc': metrics.arc(portfolio_value, interval=interval),
+        'asd': metrics.asd(portfolio_value, interval=interval),
+        'ir': metrics.ir(portfolio_value, interval=interval),
+        'md': metrics.max_drawdown(portfolio_value),
+        'n_trades': np.sum(np.append([EXIT_POSITION], positions[:-1]) !=
+                           np.append(positions[1:], [EXIT_POSITION])),
+        'long_pos': np.sum(positions == LONG_POSITION) / positions.size,
+        'short_pos': np.sum(positions == SHORT_POSITION) / positions.size,
+        # Arrays
+        'portfolio_value': portfolio_value,
+        'strategy_returns': strategy_returns,
+        'strategy_positions': np.append([EXIT_POSITION], positions),
+        'time': timestamps
+    }
+
+    return result
--- a/src/strategy/metrics.py
+++ b/src/strategy/metrics.py
@ -0,0 +1,54 @@
+from typing import Any
+import numpy as np
+from numpy.typing import NDArray
+
+
+NUM_INTERVALS = {
+    'min': 365 * 24 * 60,
+    '5min': 365 * 24 * 12,
+    'hour': 365 * 24,
+    'day': 365
+}
+
+
+def investment_return(array: NDArray[Any]):
+    """Return at the end of the investment period."""
+    return (array[-1] - array[0]) / array[0]
+
+
+def arc(array: NDArray[Any], interval: str = '5min'):
+    """Annualised Return Compounded for the investment period."""
+    return np.power(array[-1] / array[0],
+                    NUM_INTERVALS[interval] / array.size) - 1
+
+
+def asd(array: NDArray[Any], interval: str = '5min'):
+    """Annualised Standard Deviation for the investment period."""
+    simple_returns = (array[1:] - array[:-1]) / array[:-1]
+    avg_simple_return = np.mean(simple_returns)
+    return np.sqrt(
+        (NUM_INTERVALS[interval] /
+         array.size) *
+        np.sum(
+            np.power(
+                simple_returns -
+                avg_simple_return,
+                2)))
+
+
+def ir(array: NDArray[Any], interval: str = '5min'):
+    """Information Ratio, the amount of return for a given unit of risk."""
+    std = asd(array, interval=interval)
+    return arc(array, interval=interval) / std if std else 0.0
+
+
+def max_drawdown(array: NDArray[Any]):
+    """The maximum percentage drawdown during the investment period."""
+    cummax = np.maximum.accumulate(array)
+    return np.max((cummax - array) / cummax)
+
+
+# def modified_ir(array: NDArray[Any]):
+#     """Information Ratio adjusted by drawdown and ARC."""
+# return ir(array) * arc(array) * (np.sign(arc(array)) /
+# max_drawdown(array))
--- a/src/strategy/strategy.py
+++ b/src/strategy/strategy.py
@ -0,0 +1,72 @@
+import numpy as np
+import pandas as pd
+from typing import Dict, Any
+
+EXIT_POSITION = 0
+LONG_POSITION = 1
+SHORT_POSITION = 2
+
+
+class StrategyBase:
+    """Base class for investment strategies."""
+
+    def info(self) -> Dict[str, Any]:
+        """Returns general informaiton about the strategy."""
+        raise NotImplementedError
+
+    def run(self, data: pd.DataFrame):
+        """Run strategy on data."""
+        raise NotImplementedError()
+
+
+class BuyAndHoldStrategy(StrategyBase):
+    """Simple benchmark strategy, always long position"""
+
+    NAME = "BUY_AND_HOLD"
+
+    def info(self) -> Dict[str, Any]:
+        return {'strategy_name': BuyAndHoldStrategy.NAME}
+
+    def run(self, data: pd.DataFrame):
+        return np.full_like(
+            data['close_price'].to_numpy(),
+            LONG_POSITION,
+            dtype=np.int32)
+
+
+class ModelReturnsPredictionStrategy(StrategyBase):
+    """Strategy that selects position based on returns predictions."""
+
+    def __init__(
+            self,
+            predictions,
+            threshold=0.001,
+            name=None):
+        self.predictions = predictions
+        assert 'time_index' in self.predictions.columns
+        assert 'group_id' in self.predictions.columns
+        assert 'prediction' in self.predictions.columns
+
+        self.name = name or "ML Returns prediction"
+        self.threshold = threshold
+
+    def info(self) -> Dict[str, Any]:
+        return {'strategy_name': self.name}
+
+    def run(self, data):
+        arr = pd.merge(
+            data, self.predictions, on=['time_index', 'group_id'],
+            how='left')['prediction'].to_numpy()
+
+        positions = []
+        for i in range(len(arr)):
+            if arr[i] > self.threshold:
+                positions.append(LONG_POSITION)
+            elif arr[i] < -self.threshold:
+                positions.append(EXIT_POSITION)
+            elif not len(positions):
+                positions.append(EXIT_POSITION)
+            else:
+                positions.append(positions[-1])
+
+        return np.array(positions, dtype=np.int32)