Factor Analysis API¶

`FactorDataLoader` ¶

Fetch Fama-French factor data from Kenneth French Data Library.

Uses pandas-datareader to download factor data and provides local caching to avoid repeated downloads. Supports US and international regional factors.

Parameters:

Name	Type	Description	Default
`cache_dir`	`str`	Directory for caching factor data. If None, uses a temp directory.	`None`

Examples:

>>> loader = FactorDataLoader()
>>> ff3 = loader.get_ff3_factors('2015-01-01', '2023-12-31')
>>> print(ff3.columns.tolist())
['Mkt-RF', 'SMB', 'HML', 'RF']

>>> ff5_intl = loader.get_ff5_factors('2015-01-01', '2023-12-31',
...                                   region='developed_ex_us')

Source code in portfolio_analysis/factors/data.py

class FactorDataLoader:
    """
    Fetch Fama-French factor data from Kenneth French Data Library.

    Uses pandas-datareader to download factor data and provides local caching
    to avoid repeated downloads. Supports US and international regional factors.

    Parameters
    ----------
    cache_dir : str, optional
        Directory for caching factor data. If None, uses a temp directory.

    Examples
    --------
    >>> loader = FactorDataLoader()
    >>> ff3 = loader.get_ff3_factors('2015-01-01', '2023-12-31')
    >>> print(ff3.columns.tolist())
    ['Mkt-RF', 'SMB', 'HML', 'RF']

    >>> ff5_intl = loader.get_ff5_factors('2015-01-01', '2023-12-31',
    ...                                   region='developed_ex_us')
    """

    # French data library dataset names (US — kept for backward compatibility)
    FF3_DAILY = "F-F_Research_Data_Factors_daily"
    FF3_MONTHLY = "F-F_Research_Data_Factors"
    FF5_DAILY = "F-F_Research_Data_5_Factors_2x3_daily"
    FF5_MONTHLY = "F-F_Research_Data_5_Factors_2x3"
    MOM_DAILY = "F-F_Momentum_Factor_daily"
    MOM_MONTHLY = "F-F_Momentum_Factor"

    # Regional dataset names from Kenneth French Data Library
    REGIONS = {
        "us": {
            "ff3_daily": "F-F_Research_Data_Factors_daily",
            "ff3_monthly": "F-F_Research_Data_Factors",
            "ff5_daily": "F-F_Research_Data_5_Factors_2x3_daily",
            "ff5_monthly": "F-F_Research_Data_5_Factors_2x3",
            "mom_daily": "F-F_Momentum_Factor_daily",
            "mom_monthly": "F-F_Momentum_Factor",
        },
        "developed": {
            "ff3_daily": "Developed_3_Factors_Daily",
            "ff3_monthly": "Developed_3_Factors",
            "ff5_daily": "Developed_5_Factors_Daily",
            "ff5_monthly": "Developed_5_Factors",
            "mom_daily": "Developed_Mom_Factor_Daily",
            "mom_monthly": "Developed_Mom_Factor",
        },
        "developed_ex_us": {
            "ff3_daily": "Developed_ex_US_3_Factors_Daily",
            "ff3_monthly": "Developed_ex_US_3_Factors",
            "ff5_daily": "Developed_ex_US_5_Factors_Daily",
            "ff5_monthly": "Developed_ex_US_5_Factors",
            "mom_daily": "Developed_ex_US_Mom_Factor_Daily",
            "mom_monthly": "Developed_ex_US_Mom_Factor",
        },
        "emerging": {
            "ff3_daily": None,
            "ff3_monthly": None,
            "ff5_daily": None,
            "ff5_monthly": "Emerging_5_Factors",
            "mom_daily": None,
            "mom_monthly": "Emerging_MOM_Factor",
        },
        "europe": {
            "ff3_daily": "Europe_3_Factors_Daily",
            "ff3_monthly": "Europe_3_Factors",
            "ff5_daily": "Europe_5_Factors_Daily",
            "ff5_monthly": "Europe_5_Factors",
            "mom_daily": "Europe_Mom_Factor_Daily",
            "mom_monthly": "Europe_Mom_Factor",
        },
        "japan": {
            "ff3_daily": "Japan_3_Factors_Daily",
            "ff3_monthly": "Japan_3_Factors",
            "ff5_daily": "Japan_5_Factors_Daily",
            "ff5_monthly": "Japan_5_Factors",
            "mom_daily": "Japan_Mom_Factor_Daily",
            "mom_monthly": "Japan_Mom_Factor",
        },
        "asia_pacific_ex_japan": {
            "ff3_daily": "Asia_Pacific_ex_Japan_3_Factors_Daily",
            "ff3_monthly": "Asia_Pacific_ex_Japan_3_Factors",
            "ff5_daily": "Asia_Pacific_ex_Japan_5_Factors_Daily",
            "ff5_monthly": "Asia_Pacific_ex_Japan_5_Factors",
            "mom_daily": "Asia_Pacific_ex_Japan_MOM_Factor_Daily",
            "mom_monthly": "Asia_Pacific_ex_Japan_MOM_Factor",
        },
        "north_america": {
            "ff3_daily": "North_America_3_Factors_Daily",
            "ff3_monthly": "North_America_3_Factors",
            "ff5_daily": "North_America_5_Factors_Daily",
            "ff5_monthly": "North_America_5_Factors",
            "mom_daily": "North_America_Mom_Factor_Daily",
            "mom_monthly": "North_America_Mom_Factor",
        },
    }

    def __init__(self, cache_dir: Optional[str] = None):
        if cache_dir is None:
            import tempfile

            cache_dir = os.path.join(tempfile.gettempdir(), "ff_factors_cache")
        self.cache_dir = cache_dir
        os.makedirs(self.cache_dir, exist_ok=True)

    @classmethod
    def get_available_regions(cls) -> list:
        """
        Return list of supported region names.

        Returns
        -------
        list of str
            Supported region names for use with the ``region`` parameter.
        """
        return list(cls.REGIONS.keys())

    def _get_dataset_name(self, model: str, frequency: str, region: str) -> str:
        """
        Look up the French library dataset name for a model/frequency/region.

        Parameters
        ----------
        model : str
            Factor model key: 'ff3', 'ff5', or 'mom'.
        frequency : str
            'daily' or 'monthly'.
        region : str
            Region name (must be a key in ``REGIONS``).

        Returns
        -------
        str
            Dataset name string for ``pandas_datareader``.

        Raises
        ------
        ValueError
            If the region is unknown or the requested combination is unavailable.
        """
        if region not in self.REGIONS:
            available = ", ".join(sorted(self.REGIONS.keys()))
            raise ValueError(
                f"Unknown region '{region}'. " f"Available regions: {available}"
            )

        key = f"{model}_{frequency}"
        region_datasets = self.REGIONS[region]

        if key not in region_datasets:
            raise ValueError(
                f"Invalid model/frequency combination: '{model}' / '{frequency}'."
            )

        dataset = region_datasets[key]
        if dataset is None:
            raise ValueError(
                f"Daily factor data is not available for region '{region}'. "
                f"Use frequency='monthly'."
            )

        return dataset

    def _get_cache_path(self, dataset: str) -> str:
        """Generate cache file path for a dataset."""
        return os.path.join(self.cache_dir, f"{dataset.replace('-', '_')}.parquet")

    def _load_from_cache(
        self, dataset: str, max_age_days: int = 7
    ) -> Optional[pd.DataFrame]:
        """Load data from cache if available and not stale."""
        cache_path = self._get_cache_path(dataset)
        if not os.path.exists(cache_path):
            return None

        # Check age
        mtime = os.path.getmtime(cache_path)
        age_days = (datetime.now().timestamp() - mtime) / (24 * 3600)
        if age_days > max_age_days:
            return None

        try:
            return pd.read_parquet(cache_path)
        except Exception:
            return None

    def _save_to_cache(self, data: pd.DataFrame, dataset: str) -> None:
        """Save data to cache."""
        cache_path = self._get_cache_path(dataset)
        try:
            data.to_parquet(cache_path)
        except Exception:
            pass  # Silently fail if caching doesn't work

    def _fetch_french_data(self, dataset: str, use_cache: bool = True) -> pd.DataFrame:
        """
        Fetch data from Kenneth French Data Library.

        Parameters
        ----------
        dataset : str
            Dataset name from French library
        use_cache : bool, default True
            Whether to use local cache

        Returns
        -------
        pd.DataFrame
            Factor data with datetime index
        """
        # Try cache first
        if use_cache:
            cached = self._load_from_cache(dataset)
            if cached is not None:
                return cached

        try:
            import pandas_datareader.data as web
        except ImportError:
            raise ImportError(
                "pandas-datareader is required for factor data. "
                "Install with: pip install pandas-datareader"
            )

        # Fetch from French library
        data = web.DataReader(dataset, "famafrench", start="1900-01-01")

        # web.DataReader returns a dict with multiple tables
        # First table (index 0) is typically the main data
        df = data[0]

        # Convert from percentage to decimal
        df = df / 100.0

        # Ensure datetime index
        if not isinstance(df.index, pd.DatetimeIndex):
            df.index = pd.to_datetime(df.index.astype(str))

        # Cache the result
        if use_cache:
            self._save_to_cache(df, dataset)

        return df

    def _filter_dates(
        self,
        data: pd.DataFrame,
        start_date: Union[str, datetime],
        end_date: Union[str, datetime],
    ) -> pd.DataFrame:
        """Filter data to date range."""
        start = pd.to_datetime(start_date)
        end = pd.to_datetime(end_date)
        return data[(data.index >= start) & (data.index <= end)]

    def get_ff3_factors(
        self,
        start_date: Union[str, datetime],
        end_date: Union[str, datetime],
        frequency: str = "daily",
        region: str = "us",
    ) -> pd.DataFrame:
        """
        Get Fama-French 3-factor data.

        Parameters
        ----------
        start_date : str or datetime
            Start date for data
        end_date : str or datetime
            End date for data
        frequency : str, default 'daily'
            Data frequency: 'daily' or 'monthly'
        region : str, default 'us'
            Geographic region for factor data. Use
            ``FactorDataLoader.get_available_regions()`` for the full list.

        Returns
        -------
        pd.DataFrame
            DataFrame with columns: Mkt-RF, SMB, HML, RF
        """
        if frequency not in ("daily", "monthly"):
            raise ValueError(
                f"Frequency must be 'daily' or 'monthly', got: {frequency}"
            )

        dataset = self._get_dataset_name("ff3", frequency, region)
        data = self._fetch_french_data(dataset)
        return self._filter_dates(data, start_date, end_date)

    def get_ff5_factors(
        self,
        start_date: Union[str, datetime],
        end_date: Union[str, datetime],
        frequency: str = "daily",
        region: str = "us",
    ) -> pd.DataFrame:
        """
        Get Fama-French 5-factor data.

        Parameters
        ----------
        start_date : str or datetime
            Start date for data
        end_date : str or datetime
            End date for data
        frequency : str, default 'daily'
            Data frequency: 'daily' or 'monthly'
        region : str, default 'us'
            Geographic region for factor data. Use
            ``FactorDataLoader.get_available_regions()`` for the full list.

        Returns
        -------
        pd.DataFrame
            DataFrame with columns: Mkt-RF, SMB, HML, RMW, CMA, RF
        """
        if frequency not in ("daily", "monthly"):
            raise ValueError(
                f"Frequency must be 'daily' or 'monthly', got: {frequency}"
            )

        dataset = self._get_dataset_name("ff5", frequency, region)
        data = self._fetch_french_data(dataset)
        return self._filter_dates(data, start_date, end_date)

    def get_momentum_factor(
        self,
        start_date: Union[str, datetime],
        end_date: Union[str, datetime],
        frequency: str = "daily",
        region: str = "us",
    ) -> pd.Series:
        """
        Get momentum factor data.

        Parameters
        ----------
        start_date : str or datetime
            Start date for data
        end_date : str or datetime
            End date for data
        frequency : str, default 'daily'
            Data frequency: 'daily' or 'monthly'
        region : str, default 'us'
            Geographic region for factor data. Use
            ``FactorDataLoader.get_available_regions()`` for the full list.

        Returns
        -------
        pd.Series
            Momentum factor (MOM or WML)
        """
        if frequency not in ("daily", "monthly"):
            raise ValueError(
                f"Frequency must be 'daily' or 'monthly', got: {frequency}"
            )

        dataset = self._get_dataset_name("mom", frequency, region)
        data = self._fetch_french_data(dataset)
        filtered = self._filter_dates(data, start_date, end_date)

        # Return as Series, column name varies
        if "Mom" in filtered.columns:
            return filtered["Mom"]
        elif "WML" in filtered.columns:
            return filtered["WML"]
        else:
            return filtered.iloc[:, 0]

    def get_carhart_factors(
        self,
        start_date: Union[str, datetime],
        end_date: Union[str, datetime],
        frequency: str = "daily",
        region: str = "us",
    ) -> pd.DataFrame:
        """
        Get Carhart 4-factor data (FF3 + Momentum).

        Parameters
        ----------
        start_date : str or datetime
            Start date for data
        end_date : str or datetime
            End date for data
        frequency : str, default 'daily'
            Data frequency: 'daily' or 'monthly'
        region : str, default 'us'
            Geographic region for factor data. Use
            ``FactorDataLoader.get_available_regions()`` for the full list.

        Returns
        -------
        pd.DataFrame
            DataFrame with columns: Mkt-RF, SMB, HML, MOM, RF
        """
        ff3 = self.get_ff3_factors(start_date, end_date, frequency, region)
        mom = self.get_momentum_factor(start_date, end_date, frequency, region)

        # Align dates
        common_dates = ff3.index.intersection(mom.index)
        result = ff3.loc[common_dates].copy()
        result["MOM"] = mom.loc[common_dates]

        # Reorder columns to put MOM before RF
        cols = ["Mkt-RF", "SMB", "HML", "MOM", "RF"]
        return result[cols]

`get_available_regions()` `classmethod` ¶

Return list of supported region names.

Returns:

Type	Description
`list of str`	Supported region names for use with the `region` parameter.

Source code in portfolio_analysis/factors/data.py

@classmethod
def get_available_regions(cls) -> list:
    """
    Return list of supported region names.

    Returns
    -------
    list of str
        Supported region names for use with the ``region`` parameter.
    """
    return list(cls.REGIONS.keys())

`get_ff3_factors(start_date, end_date, frequency='daily', region='us')` ¶

Get Fama-French 3-factor data.

Parameters:

Name	Type	Description	Default
`start_date`	`str or datetime`	Start date for data	required
`end_date`	`str or datetime`	End date for data	required
`frequency`	`str`	Data frequency: 'daily' or 'monthly'	`'daily'`
`region`	`str`	Geographic region for factor data. Use `FactorDataLoader.get_available_regions()` for the full list.	`'us'`

Returns:

Type	Description
`DataFrame`	DataFrame with columns: Mkt-RF, SMB, HML, RF

Source code in portfolio_analysis/factors/data.py

def get_ff3_factors(
    self,
    start_date: Union[str, datetime],
    end_date: Union[str, datetime],
    frequency: str = "daily",
    region: str = "us",
) -> pd.DataFrame:
    """
    Get Fama-French 3-factor data.

    Parameters
    ----------
    start_date : str or datetime
        Start date for data
    end_date : str or datetime
        End date for data
    frequency : str, default 'daily'
        Data frequency: 'daily' or 'monthly'
    region : str, default 'us'
        Geographic region for factor data. Use
        ``FactorDataLoader.get_available_regions()`` for the full list.

    Returns
    -------
    pd.DataFrame
        DataFrame with columns: Mkt-RF, SMB, HML, RF
    """
    if frequency not in ("daily", "monthly"):
        raise ValueError(
            f"Frequency must be 'daily' or 'monthly', got: {frequency}"
        )

    dataset = self._get_dataset_name("ff3", frequency, region)
    data = self._fetch_french_data(dataset)
    return self._filter_dates(data, start_date, end_date)

`get_ff5_factors(start_date, end_date, frequency='daily', region='us')` ¶

Get Fama-French 5-factor data.

Parameters:

Name	Type	Description	Default
`start_date`	`str or datetime`	Start date for data	required
`end_date`	`str or datetime`	End date for data	required
`frequency`	`str`	Data frequency: 'daily' or 'monthly'	`'daily'`
`region`	`str`	Geographic region for factor data. Use `FactorDataLoader.get_available_regions()` for the full list.	`'us'`

Returns:

Type	Description
`DataFrame`	DataFrame with columns: Mkt-RF, SMB, HML, RMW, CMA, RF

Source code in portfolio_analysis/factors/data.py

def get_ff5_factors(
    self,
    start_date: Union[str, datetime],
    end_date: Union[str, datetime],
    frequency: str = "daily",
    region: str = "us",
) -> pd.DataFrame:
    """
    Get Fama-French 5-factor data.

    Parameters
    ----------
    start_date : str or datetime
        Start date for data
    end_date : str or datetime
        End date for data
    frequency : str, default 'daily'
        Data frequency: 'daily' or 'monthly'
    region : str, default 'us'
        Geographic region for factor data. Use
        ``FactorDataLoader.get_available_regions()`` for the full list.

    Returns
    -------
    pd.DataFrame
        DataFrame with columns: Mkt-RF, SMB, HML, RMW, CMA, RF
    """
    if frequency not in ("daily", "monthly"):
        raise ValueError(
            f"Frequency must be 'daily' or 'monthly', got: {frequency}"
        )

    dataset = self._get_dataset_name("ff5", frequency, region)
    data = self._fetch_french_data(dataset)
    return self._filter_dates(data, start_date, end_date)

`get_momentum_factor(start_date, end_date, frequency='daily', region='us')` ¶

Get momentum factor data.

Parameters:

Name	Type	Description	Default
`start_date`	`str or datetime`	Start date for data	required
`end_date`	`str or datetime`	End date for data	required
`frequency`	`str`	Data frequency: 'daily' or 'monthly'	`'daily'`
`region`	`str`	Geographic region for factor data. Use `FactorDataLoader.get_available_regions()` for the full list.	`'us'`

Returns:

Type	Description
`Series`	Momentum factor (MOM or WML)

Source code in portfolio_analysis/factors/data.py

def get_momentum_factor(
    self,
    start_date: Union[str, datetime],
    end_date: Union[str, datetime],
    frequency: str = "daily",
    region: str = "us",
) -> pd.Series:
    """
    Get momentum factor data.

    Parameters
    ----------
    start_date : str or datetime
        Start date for data
    end_date : str or datetime
        End date for data
    frequency : str, default 'daily'
        Data frequency: 'daily' or 'monthly'
    region : str, default 'us'
        Geographic region for factor data. Use
        ``FactorDataLoader.get_available_regions()`` for the full list.

    Returns
    -------
    pd.Series
        Momentum factor (MOM or WML)
    """
    if frequency not in ("daily", "monthly"):
        raise ValueError(
            f"Frequency must be 'daily' or 'monthly', got: {frequency}"
        )

    dataset = self._get_dataset_name("mom", frequency, region)
    data = self._fetch_french_data(dataset)
    filtered = self._filter_dates(data, start_date, end_date)

    # Return as Series, column name varies
    if "Mom" in filtered.columns:
        return filtered["Mom"]
    elif "WML" in filtered.columns:
        return filtered["WML"]
    else:
        return filtered.iloc[:, 0]

`get_carhart_factors(start_date, end_date, frequency='daily', region='us')` ¶

Get Carhart 4-factor data (FF3 + Momentum).

Parameters:

Name	Type	Description	Default
`start_date`	`str or datetime`	Start date for data	required
`end_date`	`str or datetime`	End date for data	required
`frequency`	`str`	Data frequency: 'daily' or 'monthly'	`'daily'`
`region`	`str`	Geographic region for factor data. Use `FactorDataLoader.get_available_regions()` for the full list.	`'us'`

Returns:

Type	Description
`DataFrame`	DataFrame with columns: Mkt-RF, SMB, HML, MOM, RF

Source code in portfolio_analysis/factors/data.py

def get_carhart_factors(
    self,
    start_date: Union[str, datetime],
    end_date: Union[str, datetime],
    frequency: str = "daily",
    region: str = "us",
) -> pd.DataFrame:
    """
    Get Carhart 4-factor data (FF3 + Momentum).

    Parameters
    ----------
    start_date : str or datetime
        Start date for data
    end_date : str or datetime
        End date for data
    frequency : str, default 'daily'
        Data frequency: 'daily' or 'monthly'
    region : str, default 'us'
        Geographic region for factor data. Use
        ``FactorDataLoader.get_available_regions()`` for the full list.

    Returns
    -------
    pd.DataFrame
        DataFrame with columns: Mkt-RF, SMB, HML, MOM, RF
    """
    ff3 = self.get_ff3_factors(start_date, end_date, frequency, region)
    mom = self.get_momentum_factor(start_date, end_date, frequency, region)

    # Align dates
    common_dates = ff3.index.intersection(mom.index)
    result = ff3.loc[common_dates].copy()
    result["MOM"] = mom.loc[common_dates]

    # Reorder columns to put MOM before RF
    cols = ["Mkt-RF", "SMB", "HML", "MOM", "RF"]
    return result[cols]

`FactorModel` ¶

Bases: Enum

Supported factor models.

Attributes:

Name	Type	Description
`CAPM`	`Single-factor market model`
`FF3`	`Fama-French 3-factor model (Mkt-RF, SMB, HML)`
`FF5`	`Fama-French 5-factor model (+ RMW, CMA)`
`CARHART`	`Carhart 4-factor model (FF3 + MOM)`

Source code in portfolio_analysis/factors/models.py

class FactorModel(Enum):
    """
    Supported factor models.

    Attributes
    ----------
    CAPM : Single-factor market model
    FF3 : Fama-French 3-factor model (Mkt-RF, SMB, HML)
    FF5 : Fama-French 5-factor model (+ RMW, CMA)
    CARHART : Carhart 4-factor model (FF3 + MOM)
    """

    CAPM = ["Mkt-RF"]
    FF3 = ["Mkt-RF", "SMB", "HML"]
    FF5 = ["Mkt-RF", "SMB", "HML", "RMW", "CMA"]
    CARHART = ["Mkt-RF", "SMB", "HML", "MOM"]

`RegressionResults` `dataclass` ¶

Results from a factor regression.

Attributes:

Name	Type	Description
`alpha`	`float`	Jensen's alpha (annualized intercept)
`alpha_pvalue`	`float`	P-value for alpha significance test
`betas`	`dict`	Factor loadings (sensitivities)
`beta_pvalues`	`dict`	P-values for each beta
`beta_tstats`	`dict`	T-statistics for each beta
`r_squared`	`float`	R-squared (explained variance)
`adj_r_squared`	`float`	Adjusted R-squared
`residual_std`	`float`	Standard deviation of residuals (annualized)
`n_observations`	`int`	Number of observations used
`model`	`str`	Model name used for regression
`factors`	`list`	Factor names used in the model

Source code in portfolio_analysis/factors/models.py

@dataclass
class RegressionResults:
    """
    Results from a factor regression.

    Attributes
    ----------
    alpha : float
        Jensen's alpha (annualized intercept)
    alpha_pvalue : float
        P-value for alpha significance test
    betas : dict
        Factor loadings (sensitivities)
    beta_pvalues : dict
        P-values for each beta
    beta_tstats : dict
        T-statistics for each beta
    r_squared : float
        R-squared (explained variance)
    adj_r_squared : float
        Adjusted R-squared
    residual_std : float
        Standard deviation of residuals (annualized)
    n_observations : int
        Number of observations used
    model : str
        Model name used for regression
    factors : list
        Factor names used in the model
    """

    alpha: float
    alpha_pvalue: float
    betas: dict[str, float]
    beta_pvalues: dict[str, float]
    beta_tstats: dict[str, float]
    r_squared: float
    adj_r_squared: float
    residual_std: float
    n_observations: int
    model: str
    factors: list[str]

    def summary(self) -> str:
        """Generate a text summary of regression results."""
        lines = [
            f"\n{'=' * 60}",
            f"Factor Regression Results: {self.model}",
            f"{'=' * 60}",
            f"Observations: {self.n_observations}",
            f"R-squared: {self.r_squared:.4f}",
            f"Adj R-squared: {self.adj_r_squared:.4f}",
            f"Residual Std: {self.residual_std * 100:.2f}% (annualized)",
            f"\n{'Coefficient':<12} {'Value':>10} {'T-stat':>10} {'P-value':>10}",
            f"{'-' * 42}",
            f"{'Alpha':<12} {self.alpha * 100:>9.2f}% {self._alpha_tstat():>10.2f} {self.alpha_pvalue:>10.4f}",
        ]
        for factor in self.factors:
            lines.append(
                f"{factor:<12} {self.betas[factor]:>10.3f} "
                f"{self.beta_tstats[factor]:>10.2f} {self.beta_pvalues[factor]:>10.4f}"
            )
        lines.append("=" * 60)
        return "\n".join(lines)

    def _alpha_tstat(self) -> float:
        """Approximate t-stat for alpha from p-value."""
        from scipy import stats

        if self.alpha_pvalue >= 1.0:
            return 0.0
        if self.alpha_pvalue <= 0.0:
            return np.inf if self.alpha > 0 else -np.inf
        return stats.norm.ppf(1 - self.alpha_pvalue / 2) * np.sign(self.alpha)

    def __repr__(self) -> str:
        return (
            f"RegressionResults(model='{self.model}', alpha={self.alpha:.4f}, "
            f"r_squared={self.r_squared:.4f})"
        )

`summary()` ¶

Generate a text summary of regression results.

Source code in portfolio_analysis/factors/models.py

def summary(self) -> str:
    """Generate a text summary of regression results."""
    lines = [
        f"\n{'=' * 60}",
        f"Factor Regression Results: {self.model}",
        f"{'=' * 60}",
        f"Observations: {self.n_observations}",
        f"R-squared: {self.r_squared:.4f}",
        f"Adj R-squared: {self.adj_r_squared:.4f}",
        f"Residual Std: {self.residual_std * 100:.2f}% (annualized)",
        f"\n{'Coefficient':<12} {'Value':>10} {'T-stat':>10} {'P-value':>10}",
        f"{'-' * 42}",
        f"{'Alpha':<12} {self.alpha * 100:>9.2f}% {self._alpha_tstat():>10.2f} {self.alpha_pvalue:>10.4f}",
    ]
    for factor in self.factors:
        lines.append(
            f"{factor:<12} {self.betas[factor]:>10.3f} "
            f"{self.beta_tstats[factor]:>10.2f} {self.beta_pvalues[factor]:>10.4f}"
        )
    lines.append("=" * 60)
    return "\n".join(lines)

`FactorRegression` ¶

Run factor regressions on portfolio or asset returns.

Parameters:

Name	Type	Description	Default
`returns`	`Series`	Portfolio or asset returns with datetime index	required
`factor_data`	`DataFrame`	Factor data from FactorDataLoader	required
`annualization_factor`	`int`	Number of periods per year (252 for daily, 12 for monthly)	`252`

Examples:

>>> from portfolio_analysis.factors import FactorDataLoader, FactorRegression
>>> factor_loader = FactorDataLoader()
>>> ff3 = factor_loader.get_ff3_factors('2015-01-01', '2023-12-31')
>>> regression = FactorRegression(portfolio_returns, ff3)
>>> results = regression.run_regression('ff3')
>>> print(results.summary())

Source code in portfolio_analysis/factors/models.py

class FactorRegression:
    """
    Run factor regressions on portfolio or asset returns.

    Parameters
    ----------
    returns : pd.Series
        Portfolio or asset returns with datetime index
    factor_data : pd.DataFrame
        Factor data from FactorDataLoader
    annualization_factor : int, default 252
        Number of periods per year (252 for daily, 12 for monthly)

    Examples
    --------
    >>> from portfolio_analysis.factors import FactorDataLoader, FactorRegression
    >>> factor_loader = FactorDataLoader()
    >>> ff3 = factor_loader.get_ff3_factors('2015-01-01', '2023-12-31')
    >>> regression = FactorRegression(portfolio_returns, ff3)
    >>> results = regression.run_regression('ff3')
    >>> print(results.summary())
    """

    TRADING_DAYS = 252

    def __init__(
        self,
        returns: pd.Series,
        factor_data: pd.DataFrame,
        annualization_factor: Optional[int] = None,
    ):
        self.raw_returns = returns
        self.raw_factor_data = factor_data

        # Align data
        self.excess_returns, self.factor_data = align_returns_with_factors(
            returns, factor_data, compute_excess=True
        )

        # Auto-detect frequency if not specified
        if annualization_factor is None:
            # Check average days between observations
            if len(self.excess_returns) > 1:
                avg_days = (
                    self.excess_returns.index[-1] - self.excess_returns.index[0]
                ).days / len(self.excess_returns)
                if avg_days > 20:  # Monthly
                    annualization_factor = 12
                else:  # Daily
                    annualization_factor = self.TRADING_DAYS
            else:
                annualization_factor = self.TRADING_DAYS

        self.annualization_factor = annualization_factor

    def _get_model_factors(self, model: Union[str, FactorModel]) -> list[str]:
        """Get factor names for a model."""
        if isinstance(model, str):
            model = model.lower()
            if model == "capm":
                return FactorModel.CAPM.value
            elif model == "ff3":
                return FactorModel.FF3.value
            elif model == "ff5":
                return FactorModel.FF5.value
            elif model == "carhart":
                return FactorModel.CARHART.value
            else:
                raise ValueError(
                    f"Unknown model: {model}. Use 'capm', 'ff3', 'ff5', or 'carhart'"
                )
        return model.value

    def _ols_regression(self, y: np.ndarray, X: np.ndarray) -> tuple:
        """
        Run OLS regression with statistical inference.

        Returns coefficients, t-stats, p-values, and fit statistics.
        """
        from scipy import stats

        n = len(y)
        k = X.shape[1]

        # Add constant for alpha
        X_with_const = np.column_stack([np.ones(n), X])

        # OLS: (X'X)^-1 X'y
        XtX = X_with_const.T @ X_with_const
        XtX_inv = np.linalg.inv(XtX)
        coeffs = XtX_inv @ X_with_const.T @ y

        # Residuals and variance
        y_hat = X_with_const @ coeffs
        residuals = y - y_hat
        sse = residuals @ residuals
        dof = n - k - 1  # degrees of freedom

        if dof <= 0:
            raise ValueError("Not enough observations for regression")

        mse = sse / dof
        residual_std = np.sqrt(mse)

        # Coefficient standard errors
        se = np.sqrt(np.diag(XtX_inv) * mse)

        # T-statistics and p-values
        t_stats = coeffs / se
        p_values = 2 * (1 - stats.t.cdf(np.abs(t_stats), dof))

        # R-squared
        ss_total = ((y - y.mean()) ** 2).sum()
        r_squared = 1 - sse / ss_total if ss_total > 0 else 0

        # Adjusted R-squared
        adj_r_squared = 1 - (1 - r_squared) * (n - 1) / dof

        return {
            "coeffs": coeffs,
            "t_stats": t_stats,
            "p_values": p_values,
            "r_squared": r_squared,
            "adj_r_squared": adj_r_squared,
            "residual_std": residual_std,
            "n": n,
        }

    def run_regression(
        self, model: Union[str, FactorModel] = "ff3"
    ) -> RegressionResults:
        """
        Run a factor regression.

        Parameters
        ----------
        model : str or FactorModel, default 'ff3'
            Factor model to use: 'capm', 'ff3', 'ff5', or 'carhart'

        Returns
        -------
        RegressionResults
            Regression results with alpha, betas, and statistics
        """
        factors = self._get_model_factors(model)

        # Validate factors exist in data
        missing = [f for f in factors if f not in self.factor_data.columns]
        if missing:
            raise ValueError(
                f"Factors not in data: {missing}. Available: {self.factor_data.columns.tolist()}"
            )

        # Prepare data
        y = self.excess_returns.values
        X = self.factor_data[factors].values

        # Run regression
        results = self._ols_regression(y, X)

        # Extract results
        alpha = results["coeffs"][0]
        betas = dict(zip(factors, results["coeffs"][1:]))
        beta_tstats = dict(zip(factors, results["t_stats"][1:]))
        beta_pvalues = dict(zip(factors, results["p_values"][1:]))

        # Annualize alpha and residual std
        alpha_annual = alpha * self.annualization_factor
        residual_std_annual = results["residual_std"] * np.sqrt(
            self.annualization_factor
        )

        model_name = model.name if isinstance(model, FactorModel) else model.upper()

        return RegressionResults(
            alpha=alpha_annual,
            alpha_pvalue=results["p_values"][0],
            betas=betas,
            beta_pvalues=beta_pvalues,
            beta_tstats=beta_tstats,
            r_squared=results["r_squared"],
            adj_r_squared=results["adj_r_squared"],
            residual_std=residual_std_annual,
            n_observations=results["n"],
            model=model_name,
            factors=factors,
        )

    def run_rolling_regression(
        self, model: Union[str, FactorModel] = "ff3", window: int = 60
    ) -> pd.DataFrame:
        """
        Run rolling factor regressions.

        Parameters
        ----------
        model : str or FactorModel, default 'ff3'
            Factor model to use
        window : int, default 60
            Rolling window size (number of periods)

        Returns
        -------
        pd.DataFrame
            DataFrame with rolling alpha and betas, indexed by date
        """
        factors = self._get_model_factors(model)
        y = self.excess_returns
        X = self.factor_data[factors]

        results = []
        dates = []

        for i in range(window, len(y) + 1):
            y_window = y.iloc[i - window : i].values
            X_window = X.iloc[i - window : i].values

            try:
                reg = self._ols_regression(y_window, X_window)
                result = {"alpha": reg["coeffs"][0] * self.annualization_factor}
                for j, factor in enumerate(factors):
                    result[factor] = reg["coeffs"][j + 1]
                result["r_squared"] = reg["r_squared"]
                results.append(result)
                dates.append(y.index[i - 1])
            except Exception:
                continue

        return pd.DataFrame(results, index=pd.DatetimeIndex(dates))

    def compare_models(self) -> pd.DataFrame:
        """
        Compare different factor models.

        Returns
        -------
        pd.DataFrame
            Comparison table with alpha, R-squared, and key betas for each model
        """
        models = ["capm", "ff3"]

        # Add ff5 if factors available
        if all(f in self.factor_data.columns for f in FactorModel.FF5.value):
            models.append("ff5")

        # Add carhart if momentum available
        if "MOM" in self.factor_data.columns:
            models.append("carhart")

        results = []
        for model in models:
            try:
                reg = self.run_regression(model)
                result = {
                    "Model": reg.model,
                    "Alpha (%)": reg.alpha * 100,
                    "Alpha p-value": reg.alpha_pvalue,
                    "R-squared": reg.r_squared,
                    "Adj R-squared": reg.adj_r_squared,
                    "Mkt Beta": reg.betas.get("Mkt-RF", np.nan),
                }
                # Add other betas if available
                for factor in ["SMB", "HML", "RMW", "CMA", "MOM"]:
                    if factor in reg.betas:
                        result[factor] = reg.betas[factor]
                results.append(result)
            except Exception:
                continue

        return pd.DataFrame(results)

`run_regression(model='ff3')` ¶

Run a factor regression.

Parameters:

Name	Type	Description	Default
`model`	`str or FactorModel`	Factor model to use: 'capm', 'ff3', 'ff5', or 'carhart'	`'ff3'`

Returns:

Type	Description
`RegressionResults`	Regression results with alpha, betas, and statistics

Source code in portfolio_analysis/factors/models.py

def run_regression(
    self, model: Union[str, FactorModel] = "ff3"
) -> RegressionResults:
    """
    Run a factor regression.

    Parameters
    ----------
    model : str or FactorModel, default 'ff3'
        Factor model to use: 'capm', 'ff3', 'ff5', or 'carhart'

    Returns
    -------
    RegressionResults
        Regression results with alpha, betas, and statistics
    """
    factors = self._get_model_factors(model)

    # Validate factors exist in data
    missing = [f for f in factors if f not in self.factor_data.columns]
    if missing:
        raise ValueError(
            f"Factors not in data: {missing}. Available: {self.factor_data.columns.tolist()}"
        )

    # Prepare data
    y = self.excess_returns.values
    X = self.factor_data[factors].values

    # Run regression
    results = self._ols_regression(y, X)

    # Extract results
    alpha = results["coeffs"][0]
    betas = dict(zip(factors, results["coeffs"][1:]))
    beta_tstats = dict(zip(factors, results["t_stats"][1:]))
    beta_pvalues = dict(zip(factors, results["p_values"][1:]))

    # Annualize alpha and residual std
    alpha_annual = alpha * self.annualization_factor
    residual_std_annual = results["residual_std"] * np.sqrt(
        self.annualization_factor
    )

    model_name = model.name if isinstance(model, FactorModel) else model.upper()

    return RegressionResults(
        alpha=alpha_annual,
        alpha_pvalue=results["p_values"][0],
        betas=betas,
        beta_pvalues=beta_pvalues,
        beta_tstats=beta_tstats,
        r_squared=results["r_squared"],
        adj_r_squared=results["adj_r_squared"],
        residual_std=residual_std_annual,
        n_observations=results["n"],
        model=model_name,
        factors=factors,
    )

`run_rolling_regression(model='ff3', window=60)` ¶

Run rolling factor regressions.

Parameters:

Name	Type	Description	Default
`model`	`str or FactorModel`	Factor model to use	`'ff3'`
`window`	`int`	Rolling window size (number of periods)	`60`

Returns:

Type	Description
`DataFrame`	DataFrame with rolling alpha and betas, indexed by date

Source code in portfolio_analysis/factors/models.py

def run_rolling_regression(
    self, model: Union[str, FactorModel] = "ff3", window: int = 60
) -> pd.DataFrame:
    """
    Run rolling factor regressions.

    Parameters
    ----------
    model : str or FactorModel, default 'ff3'
        Factor model to use
    window : int, default 60
        Rolling window size (number of periods)

    Returns
    -------
    pd.DataFrame
        DataFrame with rolling alpha and betas, indexed by date
    """
    factors = self._get_model_factors(model)
    y = self.excess_returns
    X = self.factor_data[factors]

    results = []
    dates = []

    for i in range(window, len(y) + 1):
        y_window = y.iloc[i - window : i].values
        X_window = X.iloc[i - window : i].values

        try:
            reg = self._ols_regression(y_window, X_window)
            result = {"alpha": reg["coeffs"][0] * self.annualization_factor}
            for j, factor in enumerate(factors):
                result[factor] = reg["coeffs"][j + 1]
            result["r_squared"] = reg["r_squared"]
            results.append(result)
            dates.append(y.index[i - 1])
        except Exception:
            continue

    return pd.DataFrame(results, index=pd.DatetimeIndex(dates))

`compare_models()` ¶

Compare different factor models.

Returns:

Type	Description
`DataFrame`	Comparison table with alpha, R-squared, and key betas for each model

Source code in portfolio_analysis/factors/models.py

def compare_models(self) -> pd.DataFrame:
    """
    Compare different factor models.

    Returns
    -------
    pd.DataFrame
        Comparison table with alpha, R-squared, and key betas for each model
    """
    models = ["capm", "ff3"]

    # Add ff5 if factors available
    if all(f in self.factor_data.columns for f in FactorModel.FF5.value):
        models.append("ff5")

    # Add carhart if momentum available
    if "MOM" in self.factor_data.columns:
        models.append("carhart")

    results = []
    for model in models:
        try:
            reg = self.run_regression(model)
            result = {
                "Model": reg.model,
                "Alpha (%)": reg.alpha * 100,
                "Alpha p-value": reg.alpha_pvalue,
                "R-squared": reg.r_squared,
                "Adj R-squared": reg.adj_r_squared,
                "Mkt Beta": reg.betas.get("Mkt-RF", np.nan),
            }
            # Add other betas if available
            for factor in ["SMB", "HML", "RMW", "CMA", "MOM"]:
                if factor in reg.betas:
                    result[factor] = reg.betas[factor]
            results.append(result)
        except Exception:
            continue

    return pd.DataFrame(results)

`FactorExposures` ¶

Calculate characteristic-based factor exposures for a portfolio.

This class estimates factor tilts based on security characteristics (market cap, valuation ratios, momentum, etc.) rather than regression.

Parameters:

Name	Type	Description	Default
`tickers`	`list of str`	List of ticker symbols in the portfolio	required
`weights`	`list of float`	Portfolio weights for each ticker (must sum to 1.0)	required

Examples:

>>> exposures = FactorExposures(['VTI', 'VBR', 'VTV'], [0.5, 0.25, 0.25])
>>> tilts = exposures.get_all_tilts()
>>> print(f"Size tilt: {tilts['size']:.2f}")
>>> print(f"Value tilt: {tilts['value']:.2f}")

Source code in portfolio_analysis/factors/exposures.py

class FactorExposures:
    """
    Calculate characteristic-based factor exposures for a portfolio.

    This class estimates factor tilts based on security characteristics
    (market cap, valuation ratios, momentum, etc.) rather than regression.

    Parameters
    ----------
    tickers : list of str
        List of ticker symbols in the portfolio
    weights : list of float
        Portfolio weights for each ticker (must sum to 1.0)

    Examples
    --------
    >>> exposures = FactorExposures(['VTI', 'VBR', 'VTV'], [0.5, 0.25, 0.25])
    >>> tilts = exposures.get_all_tilts()
    >>> print(f"Size tilt: {tilts['size']:.2f}")
    >>> print(f"Value tilt: {tilts['value']:.2f}")
    """

    # Market cap thresholds (in billions)
    LARGE_CAP_THRESHOLD = 10.0
    SMALL_CAP_THRESHOLD = 2.0

    def __init__(self, tickers: list[str], weights: list[float]):
        if len(tickers) != len(weights):
            raise ValueError("Number of tickers must match number of weights")

        if abs(sum(weights) - 1.0) > 0.01:
            raise ValueError("Weights must sum to 1.0")

        self.tickers = tickers
        self.weights = np.array(weights)
        self._characteristics: Optional[pd.DataFrame] = None

    def _fetch_characteristics(self) -> pd.DataFrame:
        """Fetch fundamental characteristics for all tickers."""
        if self._characteristics is not None:
            return self._characteristics

        data = []
        for ticker in self.tickers:
            try:
                info = yf.Ticker(ticker).info
                data.append(
                    {
                        "ticker": ticker,
                        "market_cap": info.get("marketCap", None),
                        "pe_ratio": info.get("trailingPE", info.get("forwardPE", None)),
                        "pb_ratio": info.get("priceToBook", None),
                        "dividend_yield": info.get("dividendYield", 0) or 0,
                        "beta": info.get("beta", None),
                        "profit_margin": info.get("profitMargins", None),
                        "roe": info.get("returnOnEquity", None),
                        "debt_to_equity": info.get("debtToEquity", None),
                        "revenue_growth": info.get("revenueGrowth", None),
                        "earnings_growth": info.get("earningsGrowth", None),
                    }
                )
            except Exception:
                # Use defaults for ETFs or failed lookups
                data.append(
                    {
                        "ticker": ticker,
                        "market_cap": None,
                        "pe_ratio": None,
                        "pb_ratio": None,
                        "dividend_yield": 0,
                        "beta": 1.0,
                        "profit_margin": None,
                        "roe": None,
                        "debt_to_equity": None,
                        "revenue_growth": None,
                        "earnings_growth": None,
                    }
                )

        self._characteristics = pd.DataFrame(data).set_index("ticker")
        return self._characteristics

    def _calculate_momentum(self, lookback_months: int = 12) -> pd.Series:
        """Calculate momentum based on historical returns."""
        from datetime import datetime, timedelta

        end_date = datetime.now()
        start_date = end_date - timedelta(days=lookback_months * 30 + 30)

        try:
            prices = yf.download(
                self.tickers, start=start_date, end=end_date, progress=False
            )

            # Handle column format
            if isinstance(prices.columns, pd.MultiIndex):
                if "Adj Close" in prices.columns.get_level_values(0):
                    prices = prices["Adj Close"]
                else:
                    prices = prices["Close"]

            # Calculate momentum (skip most recent month)
            if len(prices) > 21:
                momentum = (prices.iloc[-22] / prices.iloc[0]) - 1
            else:
                momentum = pd.Series(0, index=self.tickers)

            return momentum

        except Exception:
            return pd.Series(0, index=self.tickers)

    def calculate_size_tilt(self) -> float:
        """
        Calculate portfolio size tilt (SMB exposure).

        Returns
        -------
        float
            Size tilt from -1 (large cap) to +1 (small cap)
            0 indicates market-neutral size exposure
        """
        chars = self._fetch_characteristics()

        # Convert market cap to billions
        market_caps = chars["market_cap"].fillna(chars["market_cap"].median())
        market_caps_b = market_caps / 1e9

        # Score each holding: -1 for large, 0 for mid, +1 for small
        scores = []
        for mc in market_caps_b:
            if mc is None or pd.isna(mc):
                scores.append(0)
            elif mc > self.LARGE_CAP_THRESHOLD:
                scores.append(-1)
            elif mc < self.SMALL_CAP_THRESHOLD:
                scores.append(1)
            else:
                # Linear interpolation for mid-cap
                scores.append(
                    (self.LARGE_CAP_THRESHOLD - mc)
                    / (self.LARGE_CAP_THRESHOLD - self.SMALL_CAP_THRESHOLD)
                    * 2
                    - 1
                )

        return float(np.dot(scores, self.weights))

    def calculate_value_tilt(self) -> float:
        """
        Calculate portfolio value tilt (HML exposure).

        Returns
        -------
        float
            Value tilt from -1 (growth) to +1 (value)
            0 indicates market-neutral value exposure
        """
        chars = self._fetch_characteristics()

        # Use P/B ratio primarily, P/E as backup
        pb_ratios = chars["pb_ratio"].fillna(chars["pb_ratio"].median())
        pe_ratios = chars["pe_ratio"].fillna(chars["pe_ratio"].median())

        # Score based on valuation: low P/B = value (+1), high P/B = growth (-1)
        # Typical P/B ranges: <1 deep value, 1-3 neutral, >3 growth
        scores = []
        for i, (pb, pe) in enumerate(zip(pb_ratios, pe_ratios)):
            # Use P/B if available, else P/E
            if pd.notna(pb) and pb > 0:
                if pb < 1.5:
                    score = 1.0
                elif pb > 4.0:
                    score = -1.0
                else:
                    # Linear interpolation
                    score = 1.0 - (pb - 1.5) / 2.5 * 2
            elif pd.notna(pe) and pe > 0:
                # P/E based scoring: <15 value, >25 growth
                if pe < 15:
                    score = 1.0
                elif pe > 25:
                    score = -1.0
                else:
                    score = 1.0 - (pe - 15) / 10 * 2
            else:
                score = 0.0
            scores.append(score)

        return float(np.dot(scores, self.weights))

    def calculate_momentum_tilt(self) -> float:
        """
        Calculate portfolio momentum tilt (MOM exposure).

        Returns
        -------
        float
            Momentum tilt from -1 (low momentum) to +1 (high momentum)
        """
        momentum = self._calculate_momentum(lookback_months=12)

        # Normalize momentum scores
        # Market average is roughly 10% annual, winners >20%, losers <0%
        scores = []
        for ticker in self.tickers:
            mom = momentum.get(ticker, 0)
            if mom > 0.2:
                score = 1.0
            elif mom < 0.0:
                score = -1.0
            else:
                # Linear interpolation
                score = mom / 0.2 * 2 - 1
            scores.append(score)

        return float(np.dot(scores, self.weights))

    def calculate_quality_tilt(self) -> float:
        """
        Calculate portfolio quality tilt (RMW-like exposure).

        Quality is based on profitability and financial health.

        Returns
        -------
        float
            Quality tilt from -1 (low quality) to +1 (high quality)
        """
        chars = self._fetch_characteristics()

        scores = []
        for i, ticker in enumerate(self.tickers):
            row = chars.loc[ticker]

            # Score components
            profit_score = 0
            roe_score = 0
            debt_score = 0

            # Profit margin (>15% good, <5% poor)
            pm = row.get("profit_margin")
            if pd.notna(pm):
                if pm > 0.15:
                    profit_score = 1
                elif pm < 0.05:
                    profit_score = -1
                else:
                    profit_score = (pm - 0.05) / 0.10 * 2 - 1

            # ROE (>15% good, <8% poor)
            roe = row.get("roe")
            if pd.notna(roe):
                if roe > 0.15:
                    roe_score = 1
                elif roe < 0.08:
                    roe_score = -1
                else:
                    roe_score = (roe - 0.08) / 0.07 * 2 - 1

            # Debt/Equity (low is better: <50% good, >150% poor)
            de = row.get("debt_to_equity")
            if pd.notna(de):
                de_ratio = de / 100  # Often reported as percentage
                if de_ratio < 0.5:
                    debt_score = 1
                elif de_ratio > 1.5:
                    debt_score = -1
                else:
                    debt_score = 1 - (de_ratio - 0.5) / 1.0 * 2

            # Average available scores
            available = [s for s in [profit_score, roe_score, debt_score] if s != 0]
            scores.append(np.mean(available) if available else 0)

        return float(np.dot(scores, self.weights))

    def calculate_investment_tilt(self) -> float:
        """
        Calculate portfolio investment tilt (CMA-like exposure).

        Conservative investment (low asset growth) vs aggressive.

        Returns
        -------
        float
            Investment tilt from -1 (aggressive) to +1 (conservative)
        """
        chars = self._fetch_characteristics()

        scores = []
        for ticker in self.tickers:
            row = chars.loc[ticker]

            # Use revenue growth as proxy for investment aggressiveness
            rev_growth = row.get("revenue_growth")
            if pd.notna(rev_growth):
                # High growth = aggressive (-1), low growth = conservative (+1)
                if rev_growth > 0.20:
                    score = -1.0
                elif rev_growth < 0.05:
                    score = 1.0
                else:
                    score = 1.0 - (rev_growth - 0.05) / 0.15 * 2
            else:
                score = 0.0
            scores.append(score)

        return float(np.dot(scores, self.weights))

    def get_all_tilts(self) -> dict[str, float]:
        """
        Calculate all factor tilts for the portfolio.

        Returns
        -------
        dict
            Dictionary with all factor tilts:
            - size: SMB-like exposure
            - value: HML-like exposure
            - momentum: MOM-like exposure
            - quality: RMW-like exposure
            - investment: CMA-like exposure
        """
        return {
            "size": self.calculate_size_tilt(),
            "value": self.calculate_value_tilt(),
            "momentum": self.calculate_momentum_tilt(),
            "quality": self.calculate_quality_tilt(),
            "investment": self.calculate_investment_tilt(),
        }

    def get_characteristics_table(self) -> pd.DataFrame:
        """
        Get a table of fundamental characteristics for all holdings.

        Returns
        -------
        pd.DataFrame
            Characteristics for each holding with portfolio weight
        """
        chars = self._fetch_characteristics().copy()
        chars["weight"] = self.weights

        # Reorder columns
        cols = [
            "weight",
            "market_cap",
            "pe_ratio",
            "pb_ratio",
            "dividend_yield",
            "beta",
            "profit_margin",
            "roe",
            "debt_to_equity",
            "revenue_growth",
            "earnings_growth",
        ]
        available_cols = [c for c in cols if c in chars.columns]

        return chars[available_cols]

    def summary(self) -> str:
        """Generate a text summary of factor exposures."""
        tilts = self.get_all_tilts()

        lines = [
            f"\n{'=' * 50}",
            "Portfolio Factor Exposures (Characteristic-Based)",
            f"{'=' * 50}",
            f"{'Factor':<15} {'Tilt':>10} {'Interpretation':<25}",
            f"{'-' * 50}",
        ]

        interpretations = {
            "size": lambda x: (
                "Small Cap" if x > 0.3 else ("Large Cap" if x < -0.3 else "Neutral")
            ),
            "value": lambda x: (
                "Value" if x > 0.3 else ("Growth" if x < -0.3 else "Blend")
            ),
            "momentum": lambda x: (
                "High Mom" if x > 0.3 else ("Low Mom" if x < -0.3 else "Neutral")
            ),
            "quality": lambda x: (
                "High Quality"
                if x > 0.3
                else ("Low Quality" if x < -0.3 else "Neutral")
            ),
            "investment": lambda x: (
                "Conservative" if x > 0.3 else ("Aggressive" if x < -0.3 else "Neutral")
            ),
        }

        for factor, tilt in tilts.items():
            interp = interpretations[factor](tilt)
            lines.append(f"{factor.capitalize():<15} {tilt:>10.2f} {interp:<25}")

        lines.append("=" * 50)
        return "\n".join(lines)

`calculate_size_tilt()` ¶

Calculate portfolio size tilt (SMB exposure).

Returns:

Type	Description
`float`	Size tilt from -1 (large cap) to +1 (small cap) 0 indicates market-neutral size exposure

Source code in portfolio_analysis/factors/exposures.py

def calculate_size_tilt(self) -> float:
    """
    Calculate portfolio size tilt (SMB exposure).

    Returns
    -------
    float
        Size tilt from -1 (large cap) to +1 (small cap)
        0 indicates market-neutral size exposure
    """
    chars = self._fetch_characteristics()

    # Convert market cap to billions
    market_caps = chars["market_cap"].fillna(chars["market_cap"].median())
    market_caps_b = market_caps / 1e9

    # Score each holding: -1 for large, 0 for mid, +1 for small
    scores = []
    for mc in market_caps_b:
        if mc is None or pd.isna(mc):
            scores.append(0)
        elif mc > self.LARGE_CAP_THRESHOLD:
            scores.append(-1)
        elif mc < self.SMALL_CAP_THRESHOLD:
            scores.append(1)
        else:
            # Linear interpolation for mid-cap
            scores.append(
                (self.LARGE_CAP_THRESHOLD - mc)
                / (self.LARGE_CAP_THRESHOLD - self.SMALL_CAP_THRESHOLD)
                * 2
                - 1
            )

    return float(np.dot(scores, self.weights))

`calculate_value_tilt()` ¶

Calculate portfolio value tilt (HML exposure).

Returns:

Type	Description
`float`	Value tilt from -1 (growth) to +1 (value) 0 indicates market-neutral value exposure

Source code in portfolio_analysis/factors/exposures.py

def calculate_value_tilt(self) -> float:
    """
    Calculate portfolio value tilt (HML exposure).

    Returns
    -------
    float
        Value tilt from -1 (growth) to +1 (value)
        0 indicates market-neutral value exposure
    """
    chars = self._fetch_characteristics()

    # Use P/B ratio primarily, P/E as backup
    pb_ratios = chars["pb_ratio"].fillna(chars["pb_ratio"].median())
    pe_ratios = chars["pe_ratio"].fillna(chars["pe_ratio"].median())

    # Score based on valuation: low P/B = value (+1), high P/B = growth (-1)
    # Typical P/B ranges: <1 deep value, 1-3 neutral, >3 growth
    scores = []
    for i, (pb, pe) in enumerate(zip(pb_ratios, pe_ratios)):
        # Use P/B if available, else P/E
        if pd.notna(pb) and pb > 0:
            if pb < 1.5:
                score = 1.0
            elif pb > 4.0:
                score = -1.0
            else:
                # Linear interpolation
                score = 1.0 - (pb - 1.5) / 2.5 * 2
        elif pd.notna(pe) and pe > 0:
            # P/E based scoring: <15 value, >25 growth
            if pe < 15:
                score = 1.0
            elif pe > 25:
                score = -1.0
            else:
                score = 1.0 - (pe - 15) / 10 * 2
        else:
            score = 0.0
        scores.append(score)

    return float(np.dot(scores, self.weights))

`calculate_momentum_tilt()` ¶

Calculate portfolio momentum tilt (MOM exposure).

Returns:

Type	Description
`float`	Momentum tilt from -1 (low momentum) to +1 (high momentum)

Source code in portfolio_analysis/factors/exposures.py

def calculate_momentum_tilt(self) -> float:
    """
    Calculate portfolio momentum tilt (MOM exposure).

    Returns
    -------
    float
        Momentum tilt from -1 (low momentum) to +1 (high momentum)
    """
    momentum = self._calculate_momentum(lookback_months=12)

    # Normalize momentum scores
    # Market average is roughly 10% annual, winners >20%, losers <0%
    scores = []
    for ticker in self.tickers:
        mom = momentum.get(ticker, 0)
        if mom > 0.2:
            score = 1.0
        elif mom < 0.0:
            score = -1.0
        else:
            # Linear interpolation
            score = mom / 0.2 * 2 - 1
        scores.append(score)

    return float(np.dot(scores, self.weights))

`calculate_quality_tilt()` ¶

Calculate portfolio quality tilt (RMW-like exposure).

Quality is based on profitability and financial health.

Returns:

Type	Description
`float`	Quality tilt from -1 (low quality) to +1 (high quality)

Source code in portfolio_analysis/factors/exposures.py

def calculate_quality_tilt(self) -> float:
    """
    Calculate portfolio quality tilt (RMW-like exposure).

    Quality is based on profitability and financial health.

    Returns
    -------
    float
        Quality tilt from -1 (low quality) to +1 (high quality)
    """
    chars = self._fetch_characteristics()

    scores = []
    for i, ticker in enumerate(self.tickers):
        row = chars.loc[ticker]

        # Score components
        profit_score = 0
        roe_score = 0
        debt_score = 0

        # Profit margin (>15% good, <5% poor)
        pm = row.get("profit_margin")
        if pd.notna(pm):
            if pm > 0.15:
                profit_score = 1
            elif pm < 0.05:
                profit_score = -1
            else:
                profit_score = (pm - 0.05) / 0.10 * 2 - 1

        # ROE (>15% good, <8% poor)
        roe = row.get("roe")
        if pd.notna(roe):
            if roe > 0.15:
                roe_score = 1
            elif roe < 0.08:
                roe_score = -1
            else:
                roe_score = (roe - 0.08) / 0.07 * 2 - 1

        # Debt/Equity (low is better: <50% good, >150% poor)
        de = row.get("debt_to_equity")
        if pd.notna(de):
            de_ratio = de / 100  # Often reported as percentage
            if de_ratio < 0.5:
                debt_score = 1
            elif de_ratio > 1.5:
                debt_score = -1
            else:
                debt_score = 1 - (de_ratio - 0.5) / 1.0 * 2

        # Average available scores
        available = [s for s in [profit_score, roe_score, debt_score] if s != 0]
        scores.append(np.mean(available) if available else 0)

    return float(np.dot(scores, self.weights))

`calculate_investment_tilt()` ¶

Calculate portfolio investment tilt (CMA-like exposure).

Conservative investment (low asset growth) vs aggressive.

Returns:

Type	Description
`float`	Investment tilt from -1 (aggressive) to +1 (conservative)

Source code in portfolio_analysis/factors/exposures.py

def calculate_investment_tilt(self) -> float:
    """
    Calculate portfolio investment tilt (CMA-like exposure).

    Conservative investment (low asset growth) vs aggressive.

    Returns
    -------
    float
        Investment tilt from -1 (aggressive) to +1 (conservative)
    """
    chars = self._fetch_characteristics()

    scores = []
    for ticker in self.tickers:
        row = chars.loc[ticker]

        # Use revenue growth as proxy for investment aggressiveness
        rev_growth = row.get("revenue_growth")
        if pd.notna(rev_growth):
            # High growth = aggressive (-1), low growth = conservative (+1)
            if rev_growth > 0.20:
                score = -1.0
            elif rev_growth < 0.05:
                score = 1.0
            else:
                score = 1.0 - (rev_growth - 0.05) / 0.15 * 2
        else:
            score = 0.0
        scores.append(score)

    return float(np.dot(scores, self.weights))

`get_all_tilts()` ¶

Calculate all factor tilts for the portfolio.

Returns:

Type	Description
`dict`	Dictionary with all factor tilts: - size: SMB-like exposure - value: HML-like exposure - momentum: MOM-like exposure - quality: RMW-like exposure - investment: CMA-like exposure

Source code in portfolio_analysis/factors/exposures.py

def get_all_tilts(self) -> dict[str, float]:
    """
    Calculate all factor tilts for the portfolio.

    Returns
    -------
    dict
        Dictionary with all factor tilts:
        - size: SMB-like exposure
        - value: HML-like exposure
        - momentum: MOM-like exposure
        - quality: RMW-like exposure
        - investment: CMA-like exposure
    """
    return {
        "size": self.calculate_size_tilt(),
        "value": self.calculate_value_tilt(),
        "momentum": self.calculate_momentum_tilt(),
        "quality": self.calculate_quality_tilt(),
        "investment": self.calculate_investment_tilt(),
    }

`get_characteristics_table()` ¶

Get a table of fundamental characteristics for all holdings.

Returns:

Type	Description
`DataFrame`	Characteristics for each holding with portfolio weight

Source code in portfolio_analysis/factors/exposures.py

def get_characteristics_table(self) -> pd.DataFrame:
    """
    Get a table of fundamental characteristics for all holdings.

    Returns
    -------
    pd.DataFrame
        Characteristics for each holding with portfolio weight
    """
    chars = self._fetch_characteristics().copy()
    chars["weight"] = self.weights

    # Reorder columns
    cols = [
        "weight",
        "market_cap",
        "pe_ratio",
        "pb_ratio",
        "dividend_yield",
        "beta",
        "profit_margin",
        "roe",
        "debt_to_equity",
        "revenue_growth",
        "earnings_growth",
    ]
    available_cols = [c for c in cols if c in chars.columns]

    return chars[available_cols]

`summary()` ¶

Generate a text summary of factor exposures.

Source code in portfolio_analysis/factors/exposures.py

def summary(self) -> str:
    """Generate a text summary of factor exposures."""
    tilts = self.get_all_tilts()

    lines = [
        f"\n{'=' * 50}",
        "Portfolio Factor Exposures (Characteristic-Based)",
        f"{'=' * 50}",
        f"{'Factor':<15} {'Tilt':>10} {'Interpretation':<25}",
        f"{'-' * 50}",
    ]

    interpretations = {
        "size": lambda x: (
            "Small Cap" if x > 0.3 else ("Large Cap" if x < -0.3 else "Neutral")
        ),
        "value": lambda x: (
            "Value" if x > 0.3 else ("Growth" if x < -0.3 else "Blend")
        ),
        "momentum": lambda x: (
            "High Mom" if x > 0.3 else ("Low Mom" if x < -0.3 else "Neutral")
        ),
        "quality": lambda x: (
            "High Quality"
            if x > 0.3
            else ("Low Quality" if x < -0.3 else "Neutral")
        ),
        "investment": lambda x: (
            "Conservative" if x > 0.3 else ("Aggressive" if x < -0.3 else "Neutral")
        ),
    }

    for factor, tilt in tilts.items():
        interp = interpretations[factor](tilt)
        lines.append(f"{factor.capitalize():<15} {tilt:>10.2f} {interp:<25}")

    lines.append("=" * 50)
    return "\n".join(lines)

`FactorAttribution` ¶

Decompose portfolio returns and risk into factor contributions.

This class uses factor regression to attribute portfolio performance to systematic factors and idiosyncratic (alpha) components.

Parameters:

Name	Type	Description	Default
`returns`	`Series`	Portfolio returns with datetime index	required
`factor_data`	`DataFrame`	Factor data from FactorDataLoader	required
`annualization_factor`	`int`	Number of periods per year (252 for daily, 12 for monthly)	`252`

Examples:

>>> from portfolio_analysis.factors import FactorAttribution, FactorDataLoader
>>> factor_loader = FactorDataLoader()
>>> ff3 = factor_loader.get_ff3_factors('2015-01-01', '2023-12-31')
>>> attribution = FactorAttribution(portfolio_returns, ff3)
>>> decomp = attribution.decompose_returns()
>>> print(f"Market contribution: {decomp['Mkt-RF']:.2%}")
>>> print(f"Alpha: {decomp['alpha']:.2%}")

Source code in portfolio_analysis/factors/attribution.py

class FactorAttribution:
    """
    Decompose portfolio returns and risk into factor contributions.

    This class uses factor regression to attribute portfolio performance
    to systematic factors and idiosyncratic (alpha) components.

    Parameters
    ----------
    returns : pd.Series
        Portfolio returns with datetime index
    factor_data : pd.DataFrame
        Factor data from FactorDataLoader
    annualization_factor : int, default 252
        Number of periods per year (252 for daily, 12 for monthly)

    Examples
    --------
    >>> from portfolio_analysis.factors import FactorAttribution, FactorDataLoader
    >>> factor_loader = FactorDataLoader()
    >>> ff3 = factor_loader.get_ff3_factors('2015-01-01', '2023-12-31')
    >>> attribution = FactorAttribution(portfolio_returns, ff3)
    >>> decomp = attribution.decompose_returns()
    >>> print(f"Market contribution: {decomp['Mkt-RF']:.2%}")
    >>> print(f"Alpha: {decomp['alpha']:.2%}")
    """

    TRADING_DAYS = 252

    def __init__(
        self,
        returns: pd.Series,
        factor_data: pd.DataFrame,
        annualization_factor: int | None = None,
    ):
        self.raw_returns = returns
        self.raw_factor_data = factor_data

        # Align data
        self.excess_returns, self.factor_data = align_returns_with_factors(
            returns, factor_data, compute_excess=True
        )

        # Auto-detect frequency
        if annualization_factor is None:
            if len(self.excess_returns) > 1:
                avg_days = (
                    self.excess_returns.index[-1] - self.excess_returns.index[0]
                ).days / len(self.excess_returns)
                annualization_factor = 12 if avg_days > 20 else self.TRADING_DAYS
            else:
                annualization_factor = self.TRADING_DAYS

        self.annualization_factor = annualization_factor

        # Create regression object for analysis
        self._regression = FactorRegression(returns, factor_data, annualization_factor)

    def decompose_returns(self, model: str | FactorModel = "ff3") -> dict[str, float]:
        """
        Decompose total returns into factor contributions.

        Parameters
        ----------
        model : str or FactorModel, default 'ff3'
            Factor model to use for decomposition

        Returns
        -------
        dict
            Dictionary with:
            - 'total': Total annualized return
            - 'risk_free': Risk-free contribution
            - One key per factor with its return contribution
            - 'alpha': Idiosyncratic return (Jensen's alpha)
        """
        # Run regression to get betas
        reg_results = self._regression.run_regression(model)
        factors = reg_results.factors

        # Calculate average factor returns (annualized)
        avg_factor_returns = (
            self.factor_data[factors].mean() * self.annualization_factor
        )

        # Risk-free rate contribution
        rf_return = self.factor_data["RF"].mean() * self.annualization_factor

        # Total return
        total_return = (
            self.raw_returns.loc[self.excess_returns.index].mean()
            * self.annualization_factor
        )

        # Factor contributions = beta * average factor return
        contributions = {}
        contributions["total"] = total_return
        contributions["risk_free"] = rf_return

        for factor in factors:
            contributions[factor] = (
                reg_results.betas[factor] * avg_factor_returns[factor]
            )

        # Alpha is the residual
        contributions["alpha"] = reg_results.alpha

        return contributions

    def decompose_risk(self, model: str | FactorModel = "ff3") -> dict[str, float]:
        """
        Decompose portfolio variance into factor contributions.

        Parameters
        ----------
        model : str or FactorModel, default 'ff3'
            Factor model to use for decomposition

        Returns
        -------
        dict
            Dictionary with:
            - 'total': Total annualized variance
            - One key per factor with its variance contribution
            - 'idiosyncratic': Residual (unexplained) variance
            - 'r_squared': Fraction explained by factors
        """
        # Run regression
        reg_results = self._regression.run_regression(model)
        factors = reg_results.factors

        # Factor covariance matrix (annualized)
        factor_cov = self.factor_data[factors].cov() * self.annualization_factor

        # Total variance (annualized)
        total_variance = self.excess_returns.var() * self.annualization_factor

        # Systematic variance = beta' * Cov(factors) * beta
        betas = np.array([reg_results.betas[f] for f in factors])
        systematic_variance = betas @ factor_cov.values @ betas

        # Individual factor contributions (marginal)
        contributions = {"total": total_variance}

        for i, factor in enumerate(factors):
            # Factor contribution = beta_i^2 * var(factor_i)
            factor_var = self.factor_data[factor].var() * self.annualization_factor
            contributions[factor] = reg_results.betas[factor] ** 2 * factor_var

        # Idiosyncratic variance
        contributions["idiosyncratic"] = total_variance - systematic_variance
        contributions["r_squared"] = reg_results.r_squared

        return contributions

    def get_rolling_attribution(
        self, model: str | FactorModel = "ff3", window: int = 60
    ) -> pd.DataFrame:
        """
        Calculate rolling return attribution over time.

        Parameters
        ----------
        model : str or FactorModel, default 'ff3'
            Factor model to use
        window : int, default 60
            Rolling window size (number of periods)

        Returns
        -------
        pd.DataFrame
            DataFrame with rolling factor contributions, indexed by date
        """
        # Get factors for the model
        if isinstance(model, str):
            model_enum = {
                "capm": FactorModel.CAPM,
                "ff3": FactorModel.FF3,
                "ff5": FactorModel.FF5,
                "carhart": FactorModel.CARHART,
            }.get(model.lower())
            factors = model_enum.value if model_enum else FactorModel.FF3.value
        else:
            factors = model.value

        # Get rolling betas
        rolling_betas = self._regression.run_rolling_regression(model, window)

        # Calculate rolling factor returns
        results = []
        for date in rolling_betas.index:
            # Get window data
            loc = self.factor_data.index.get_loc(date)
            start_loc = max(0, loc - window + 1)
            factor_window = self.factor_data.iloc[start_loc : loc + 1]

            # Average factor returns (annualized)
            avg_returns = factor_window[factors].mean() * self.annualization_factor

            # Contributions
            row = {"date": date}
            for factor in factors:
                beta = rolling_betas.loc[date, factor]
                row[f"{factor}_contrib"] = beta * avg_returns[factor]
            row["alpha"] = rolling_betas.loc[date, "alpha"]

            results.append(row)

        df = pd.DataFrame(results)
        if "date" in df.columns:
            df = df.set_index("date")

        return df

    def get_attribution_summary(self, model: str | FactorModel = "ff3") -> pd.DataFrame:
        """
        Get a summary table of return and risk attribution.

        Parameters
        ----------
        model : str or FactorModel, default 'ff3'
            Factor model to use

        Returns
        -------
        pd.DataFrame
            Summary table with return and risk contributions
        """
        return_decomp = self.decompose_returns(model)
        risk_decomp = self.decompose_risk(model)

        # Build summary
        rows = []

        # Total
        rows.append(
            {
                "Component": "Total",
                "Return (%)": return_decomp["total"] * 100,
                "Variance": risk_decomp["total"],
                "Std Dev (%)": np.sqrt(risk_decomp["total"]) * 100,
            }
        )

        # Risk-free
        rows.append(
            {
                "Component": "Risk-Free",
                "Return (%)": return_decomp["risk_free"] * 100,
                "Variance": 0,
                "Std Dev (%)": 0,
            }
        )

        # Factors
        reg_results = self._regression.run_regression(model)
        for factor in reg_results.factors:
            rows.append(
                {
                    "Component": factor,
                    "Return (%)": return_decomp[factor] * 100,
                    "Variance": risk_decomp.get(factor, 0),
                    "Std Dev (%)": np.sqrt(risk_decomp.get(factor, 0)) * 100,
                }
            )

        # Alpha / Idiosyncratic
        rows.append(
            {
                "Component": "Alpha (Idiosyncratic)",
                "Return (%)": return_decomp["alpha"] * 100,
                "Variance": risk_decomp["idiosyncratic"],
                "Std Dev (%)": np.sqrt(max(0, risk_decomp["idiosyncratic"])) * 100,
            }
        )

        return pd.DataFrame(rows)

    @classmethod
    def from_composite(
        cls,
        composite_results: CompositeRegressionResults,
        returns_dict: dict[str, pd.Series],
        factor_data_dict: dict[str, pd.DataFrame],
    ) -> dict[str, float]:
        """
        Decompose composite portfolio returns into factor contributions.

        Computes a weighted-average return attribution across all constituents,
        each using its own regional factor data.

        Parameters
        ----------
        composite_results : CompositeRegressionResults
            Results from ``CompositeFactorRegression.run_composite_regression()``.
        returns_dict : dict[str, pd.Series]
            Per-ticker return series ({ticker: returns}).
        factor_data_dict : dict[str, pd.DataFrame]
            Per-region factor data ({region: factor_df}).

        Returns
        -------
        dict[str, float]
            Decomposition with keys: total, risk_free, each factor name, alpha.
        """
        total_weight = composite_results.coverage
        if total_weight == 0:
            raise ValueError("Composite results have zero coverage.")

        scale = 1.0 / total_weight
        contributions: dict[str, float] = {}

        for ticker, result in composite_results.constituent_results.items():
            weight = composite_results.portfolio_weights.get(ticker, 0)
            region = composite_results.region_map.get(ticker, "us")
            factor_data = factor_data_dict.get(region)

            if factor_data is None or ticker not in returns_dict:
                continue

            ret = returns_dict[ticker]

            # Detect annualization factor
            if len(ret) > 1:
                avg_days = (ret.index[-1] - ret.index[0]).days / len(ret)
                ann_factor = 12 if avg_days > 20 else 252
            else:
                ann_factor = 252

            # Average factor returns (annualized)
            common = ret.index.intersection(factor_data.index)
            if len(common) == 0:
                continue
            aligned_factors = factor_data.loc[common]

            for factor in result.factors:
                if factor in aligned_factors.columns:
                    avg_ret = aligned_factors[factor].mean() * ann_factor
                    contrib = result.betas[factor] * avg_ret * weight
                    contributions[factor] = contributions.get(factor, 0) + contrib

            # Risk-free
            if "RF" in aligned_factors.columns:
                rf = aligned_factors["RF"].mean() * ann_factor * weight
                contributions["risk_free"] = contributions.get("risk_free", 0) + rf

            # Total return
            total_ret = ret.loc[common].mean() * ann_factor * weight
            contributions["total"] = contributions.get("total", 0) + total_ret

        # Scale all by coverage
        contributions = {k: v * scale for k, v in contributions.items()}

        # Alpha from composite results
        contributions["alpha"] = composite_results.weighted_alpha

        return contributions

    def summary(self, model: str | FactorModel = "ff3") -> str:
        """Generate a text summary of factor attribution."""
        return_decomp = self.decompose_returns(model)
        risk_decomp = self.decompose_risk(model)

        model_name = model.name if isinstance(model, FactorModel) else model.upper()

        lines = [
            f"\n{'=' * 60}",
            f"Factor Attribution Summary: {model_name}",
            f"{'=' * 60}",
            "",
            "RETURN ATTRIBUTION",
            f"{'-' * 40}",
            f"{'Component':<20} {'Return':>12}",
            f"{'-' * 40}",
            f"{'Total':<20} {return_decomp['total']*100:>11.2f}%",
            f"{'Risk-Free':<20} {return_decomp['risk_free']*100:>11.2f}%",
        ]

        # Factor contributions
        reg_results = self._regression.run_regression(model)
        for factor in reg_results.factors:
            lines.append(f"{factor:<20} {return_decomp[factor]*100:>11.2f}%")
        lines.append(f"{'Alpha':<20} {return_decomp['alpha']*100:>11.2f}%")

        lines.extend(
            [
                "",
                "RISK ATTRIBUTION",
                f"{'-' * 40}",
                f"{'Component':<20} {'Variance':>12} {'% of Total':>12}",
                f"{'-' * 40}",
            ]
        )

        total_var = risk_decomp["total"]
        for factor in reg_results.factors:
            pct = risk_decomp[factor] / total_var * 100 if total_var > 0 else 0
            lines.append(f"{factor:<20} {risk_decomp[factor]:>12.6f} {pct:>11.1f}%")

        idio_pct = (
            risk_decomp["idiosyncratic"] / total_var * 100 if total_var > 0 else 0
        )
        lines.append(
            f"{'Idiosyncratic':<20} {risk_decomp['idiosyncratic']:>12.6f} {idio_pct:>11.1f}%"
        )
        lines.append(f"\nR-squared: {risk_decomp['r_squared']:.4f}")
        lines.append("=" * 60)

        return "\n".join(lines)

`decompose_returns(model='ff3')` ¶

Decompose total returns into factor contributions.

Parameters:

Name	Type	Description	Default
`model`	`str or FactorModel`	Factor model to use for decomposition	`'ff3'`

Returns:

Type	Description
`dict`	Dictionary with: - 'total': Total annualized return - 'risk_free': Risk-free contribution - One key per factor with its return contribution - 'alpha': Idiosyncratic return (Jensen's alpha)

Source code in portfolio_analysis/factors/attribution.py

def decompose_returns(self, model: str | FactorModel = "ff3") -> dict[str, float]:
    """
    Decompose total returns into factor contributions.

    Parameters
    ----------
    model : str or FactorModel, default 'ff3'
        Factor model to use for decomposition

    Returns
    -------
    dict
        Dictionary with:
        - 'total': Total annualized return
        - 'risk_free': Risk-free contribution
        - One key per factor with its return contribution
        - 'alpha': Idiosyncratic return (Jensen's alpha)
    """
    # Run regression to get betas
    reg_results = self._regression.run_regression(model)
    factors = reg_results.factors

    # Calculate average factor returns (annualized)
    avg_factor_returns = (
        self.factor_data[factors].mean() * self.annualization_factor
    )

    # Risk-free rate contribution
    rf_return = self.factor_data["RF"].mean() * self.annualization_factor

    # Total return
    total_return = (
        self.raw_returns.loc[self.excess_returns.index].mean()
        * self.annualization_factor
    )

    # Factor contributions = beta * average factor return
    contributions = {}
    contributions["total"] = total_return
    contributions["risk_free"] = rf_return

    for factor in factors:
        contributions[factor] = (
            reg_results.betas[factor] * avg_factor_returns[factor]
        )

    # Alpha is the residual
    contributions["alpha"] = reg_results.alpha

    return contributions

`decompose_risk(model='ff3')` ¶

Decompose portfolio variance into factor contributions.

Parameters:

Name	Type	Description	Default
`model`	`str or FactorModel`	Factor model to use for decomposition	`'ff3'`

Returns:

Type	Description
`dict`	Dictionary with: - 'total': Total annualized variance - One key per factor with its variance contribution - 'idiosyncratic': Residual (unexplained) variance - 'r_squared': Fraction explained by factors

Source code in portfolio_analysis/factors/attribution.py

def decompose_risk(self, model: str | FactorModel = "ff3") -> dict[str, float]:
    """
    Decompose portfolio variance into factor contributions.

    Parameters
    ----------
    model : str or FactorModel, default 'ff3'
        Factor model to use for decomposition

    Returns
    -------
    dict
        Dictionary with:
        - 'total': Total annualized variance
        - One key per factor with its variance contribution
        - 'idiosyncratic': Residual (unexplained) variance
        - 'r_squared': Fraction explained by factors
    """
    # Run regression
    reg_results = self._regression.run_regression(model)
    factors = reg_results.factors

    # Factor covariance matrix (annualized)
    factor_cov = self.factor_data[factors].cov() * self.annualization_factor

    # Total variance (annualized)
    total_variance = self.excess_returns.var() * self.annualization_factor

    # Systematic variance = beta' * Cov(factors) * beta
    betas = np.array([reg_results.betas[f] for f in factors])
    systematic_variance = betas @ factor_cov.values @ betas

    # Individual factor contributions (marginal)
    contributions = {"total": total_variance}

    for i, factor in enumerate(factors):
        # Factor contribution = beta_i^2 * var(factor_i)
        factor_var = self.factor_data[factor].var() * self.annualization_factor
        contributions[factor] = reg_results.betas[factor] ** 2 * factor_var

    # Idiosyncratic variance
    contributions["idiosyncratic"] = total_variance - systematic_variance
    contributions["r_squared"] = reg_results.r_squared

    return contributions

`get_rolling_attribution(model='ff3', window=60)` ¶

Calculate rolling return attribution over time.

Parameters:

Name	Type	Description	Default
`model`	`str or FactorModel`	Factor model to use	`'ff3'`
`window`	`int`	Rolling window size (number of periods)	`60`

Returns:

Type	Description
`DataFrame`	DataFrame with rolling factor contributions, indexed by date

Source code in portfolio_analysis/factors/attribution.py

def get_rolling_attribution(
    self, model: str | FactorModel = "ff3", window: int = 60
) -> pd.DataFrame:
    """
    Calculate rolling return attribution over time.

    Parameters
    ----------
    model : str or FactorModel, default 'ff3'
        Factor model to use
    window : int, default 60
        Rolling window size (number of periods)

    Returns
    -------
    pd.DataFrame
        DataFrame with rolling factor contributions, indexed by date
    """
    # Get factors for the model
    if isinstance(model, str):
        model_enum = {
            "capm": FactorModel.CAPM,
            "ff3": FactorModel.FF3,
            "ff5": FactorModel.FF5,
            "carhart": FactorModel.CARHART,
        }.get(model.lower())
        factors = model_enum.value if model_enum else FactorModel.FF3.value
    else:
        factors = model.value

    # Get rolling betas
    rolling_betas = self._regression.run_rolling_regression(model, window)

    # Calculate rolling factor returns
    results = []
    for date in rolling_betas.index:
        # Get window data
        loc = self.factor_data.index.get_loc(date)
        start_loc = max(0, loc - window + 1)
        factor_window = self.factor_data.iloc[start_loc : loc + 1]

        # Average factor returns (annualized)
        avg_returns = factor_window[factors].mean() * self.annualization_factor

        # Contributions
        row = {"date": date}
        for factor in factors:
            beta = rolling_betas.loc[date, factor]
            row[f"{factor}_contrib"] = beta * avg_returns[factor]
        row["alpha"] = rolling_betas.loc[date, "alpha"]

        results.append(row)

    df = pd.DataFrame(results)
    if "date" in df.columns:
        df = df.set_index("date")

    return df

`get_attribution_summary(model='ff3')` ¶

Get a summary table of return and risk attribution.

Parameters:

Name	Type	Description	Default
`model`	`str or FactorModel`	Factor model to use	`'ff3'`

Returns:

Type	Description
`DataFrame`	Summary table with return and risk contributions

Source code in portfolio_analysis/factors/attribution.py

def get_attribution_summary(self, model: str | FactorModel = "ff3") -> pd.DataFrame:
    """
    Get a summary table of return and risk attribution.

    Parameters
    ----------
    model : str or FactorModel, default 'ff3'
        Factor model to use

    Returns
    -------
    pd.DataFrame
        Summary table with return and risk contributions
    """
    return_decomp = self.decompose_returns(model)
    risk_decomp = self.decompose_risk(model)

    # Build summary
    rows = []

    # Total
    rows.append(
        {
            "Component": "Total",
            "Return (%)": return_decomp["total"] * 100,
            "Variance": risk_decomp["total"],
            "Std Dev (%)": np.sqrt(risk_decomp["total"]) * 100,
        }
    )

    # Risk-free
    rows.append(
        {
            "Component": "Risk-Free",
            "Return (%)": return_decomp["risk_free"] * 100,
            "Variance": 0,
            "Std Dev (%)": 0,
        }
    )

    # Factors
    reg_results = self._regression.run_regression(model)
    for factor in reg_results.factors:
        rows.append(
            {
                "Component": factor,
                "Return (%)": return_decomp[factor] * 100,
                "Variance": risk_decomp.get(factor, 0),
                "Std Dev (%)": np.sqrt(risk_decomp.get(factor, 0)) * 100,
            }
        )

    # Alpha / Idiosyncratic
    rows.append(
        {
            "Component": "Alpha (Idiosyncratic)",
            "Return (%)": return_decomp["alpha"] * 100,
            "Variance": risk_decomp["idiosyncratic"],
            "Std Dev (%)": np.sqrt(max(0, risk_decomp["idiosyncratic"])) * 100,
        }
    )

    return pd.DataFrame(rows)

`from_composite(composite_results, returns_dict, factor_data_dict)` `classmethod` ¶

Decompose composite portfolio returns into factor contributions.

Computes a weighted-average return attribution across all constituents, each using its own regional factor data.

Parameters:

Name	Type	Description	Default
`composite_results`	`CompositeRegressionResults`	Results from `CompositeFactorRegression.run_composite_regression()`.	required
`returns_dict`	`dict[str, Series]`	Per-ticker return series ({ticker: returns}).	required
`factor_data_dict`	`dict[str, DataFrame]`	Per-region factor data ({region: factor_df}).	required

Returns:

Type	Description
`dict[str, float]`	Decomposition with keys: total, risk_free, each factor name, alpha.

Source code in portfolio_analysis/factors/attribution.py

@classmethod
def from_composite(
    cls,
    composite_results: CompositeRegressionResults,
    returns_dict: dict[str, pd.Series],
    factor_data_dict: dict[str, pd.DataFrame],
) -> dict[str, float]:
    """
    Decompose composite portfolio returns into factor contributions.

    Computes a weighted-average return attribution across all constituents,
    each using its own regional factor data.

    Parameters
    ----------
    composite_results : CompositeRegressionResults
        Results from ``CompositeFactorRegression.run_composite_regression()``.
    returns_dict : dict[str, pd.Series]
        Per-ticker return series ({ticker: returns}).
    factor_data_dict : dict[str, pd.DataFrame]
        Per-region factor data ({region: factor_df}).

    Returns
    -------
    dict[str, float]
        Decomposition with keys: total, risk_free, each factor name, alpha.
    """
    total_weight = composite_results.coverage
    if total_weight == 0:
        raise ValueError("Composite results have zero coverage.")

    scale = 1.0 / total_weight
    contributions: dict[str, float] = {}

    for ticker, result in composite_results.constituent_results.items():
        weight = composite_results.portfolio_weights.get(ticker, 0)
        region = composite_results.region_map.get(ticker, "us")
        factor_data = factor_data_dict.get(region)

        if factor_data is None or ticker not in returns_dict:
            continue

        ret = returns_dict[ticker]

        # Detect annualization factor
        if len(ret) > 1:
            avg_days = (ret.index[-1] - ret.index[0]).days / len(ret)
            ann_factor = 12 if avg_days > 20 else 252
        else:
            ann_factor = 252

        # Average factor returns (annualized)
        common = ret.index.intersection(factor_data.index)
        if len(common) == 0:
            continue
        aligned_factors = factor_data.loc[common]

        for factor in result.factors:
            if factor in aligned_factors.columns:
                avg_ret = aligned_factors[factor].mean() * ann_factor
                contrib = result.betas[factor] * avg_ret * weight
                contributions[factor] = contributions.get(factor, 0) + contrib

        # Risk-free
        if "RF" in aligned_factors.columns:
            rf = aligned_factors["RF"].mean() * ann_factor * weight
            contributions["risk_free"] = contributions.get("risk_free", 0) + rf

        # Total return
        total_ret = ret.loc[common].mean() * ann_factor * weight
        contributions["total"] = contributions.get("total", 0) + total_ret

    # Scale all by coverage
    contributions = {k: v * scale for k, v in contributions.items()}

    # Alpha from composite results
    contributions["alpha"] = composite_results.weighted_alpha

    return contributions

`summary(model='ff3')` ¶

Generate a text summary of factor attribution.

Source code in portfolio_analysis/factors/attribution.py

def summary(self, model: str | FactorModel = "ff3") -> str:
    """Generate a text summary of factor attribution."""
    return_decomp = self.decompose_returns(model)
    risk_decomp = self.decompose_risk(model)

    model_name = model.name if isinstance(model, FactorModel) else model.upper()

    lines = [
        f"\n{'=' * 60}",
        f"Factor Attribution Summary: {model_name}",
        f"{'=' * 60}",
        "",
        "RETURN ATTRIBUTION",
        f"{'-' * 40}",
        f"{'Component':<20} {'Return':>12}",
        f"{'-' * 40}",
        f"{'Total':<20} {return_decomp['total']*100:>11.2f}%",
        f"{'Risk-Free':<20} {return_decomp['risk_free']*100:>11.2f}%",
    ]

    # Factor contributions
    reg_results = self._regression.run_regression(model)
    for factor in reg_results.factors:
        lines.append(f"{factor:<20} {return_decomp[factor]*100:>11.2f}%")
    lines.append(f"{'Alpha':<20} {return_decomp['alpha']*100:>11.2f}%")

    lines.extend(
        [
            "",
            "RISK ATTRIBUTION",
            f"{'-' * 40}",
            f"{'Component':<20} {'Variance':>12} {'% of Total':>12}",
            f"{'-' * 40}",
        ]
    )

    total_var = risk_decomp["total"]
    for factor in reg_results.factors:
        pct = risk_decomp[factor] / total_var * 100 if total_var > 0 else 0
        lines.append(f"{factor:<20} {risk_decomp[factor]:>12.6f} {pct:>11.1f}%")

    idio_pct = (
        risk_decomp["idiosyncratic"] / total_var * 100 if total_var > 0 else 0
    )
    lines.append(
        f"{'Idiosyncratic':<20} {risk_decomp['idiosyncratic']:>12.6f} {idio_pct:>11.1f}%"
    )
    lines.append(f"\nR-squared: {risk_decomp['r_squared']:.4f}")
    lines.append("=" * 60)

    return "\n".join(lines)

`FactorOptimizer` ¶

Factor-aware portfolio optimization.

Optimize portfolios to achieve target factor exposures, minimize factor exposure, or generate factor-efficient frontiers.

Parameters:

Name	Type	Description	Default
`price_data`	`DataFrame`	Historical price data with datetime index and tickers as columns	required
`factor_data`	`DataFrame`	Factor data from FactorDataLoader	required
`risk_free_rate`	`float`	Annual risk-free rate for Sharpe calculations	`0.02`

Examples:

>>> from portfolio_analysis.factors import FactorOptimizer, FactorDataLoader
>>> factor_loader = FactorDataLoader()
>>> ff3 = factor_loader.get_ff3_factors('2015-01-01', '2023-12-31')
>>> optimizer = FactorOptimizer(price_data, ff3)
>>> result = optimizer.optimize_target_exposures(
...     target_betas={'Mkt-RF': 1.0, 'SMB': 0.3, 'HML': 0.2}
... )
>>> print(result['weights'])

Source code in portfolio_analysis/factors/optimization.py

class FactorOptimizer:
    """
    Factor-aware portfolio optimization.

    Optimize portfolios to achieve target factor exposures, minimize
    factor exposure, or generate factor-efficient frontiers.

    Parameters
    ----------
    price_data : pd.DataFrame
        Historical price data with datetime index and tickers as columns
    factor_data : pd.DataFrame
        Factor data from FactorDataLoader
    risk_free_rate : float, default 0.02
        Annual risk-free rate for Sharpe calculations

    Examples
    --------
    >>> from portfolio_analysis.factors import FactorOptimizer, FactorDataLoader
    >>> factor_loader = FactorDataLoader()
    >>> ff3 = factor_loader.get_ff3_factors('2015-01-01', '2023-12-31')
    >>> optimizer = FactorOptimizer(price_data, ff3)
    >>> result = optimizer.optimize_target_exposures(
    ...     target_betas={'Mkt-RF': 1.0, 'SMB': 0.3, 'HML': 0.2}
    ... )
    >>> print(result['weights'])
    """

    TRADING_DAYS = 252

    def __init__(
        self,
        price_data: pd.DataFrame,
        factor_data: pd.DataFrame,
        risk_free_rate: float = 0.02,
    ):
        self.price_data = price_data
        self.tickers = list(price_data.columns)
        self.n_assets = len(self.tickers)
        self.risk_free_rate = risk_free_rate

        # Calculate returns
        self.returns = price_data.pct_change().dropna()

        # Align with factor data
        common_dates = self.returns.index.intersection(factor_data.index)
        if len(common_dates) == 0:
            raise ValueError("No overlapping dates between price data and factor data")

        self.returns = self.returns.loc[common_dates]
        self.factor_data = factor_data.loc[common_dates]

        # Calculate excess returns for each asset
        self.excess_returns = self.returns.sub(self.factor_data["RF"], axis=0)

        # Pre-compute individual asset betas for all factors
        self._asset_betas = self._compute_asset_betas()

        # Annualized statistics
        self.mean_returns = self.returns.mean() * self.TRADING_DAYS
        self.cov_matrix = self.returns.cov() * self.TRADING_DAYS

    def _compute_asset_betas(self) -> pd.DataFrame:
        """Compute factor betas for each individual asset."""
        factors = [
            f
            for f in ["Mkt-RF", "SMB", "HML", "RMW", "CMA", "MOM"]
            if f in self.factor_data.columns
        ]

        betas = {}
        for ticker in self.tickers:
            y = self.excess_returns[ticker].values
            X = self.factor_data[factors].values

            # Add constant
            X_const = np.column_stack([np.ones(len(X)), X])

            # OLS
            coeffs = np.linalg.lstsq(X_const, y, rcond=None)[0]
            betas[ticker] = dict(zip(factors, coeffs[1:]))

        return pd.DataFrame(betas).T

    def _portfolio_betas(self, weights: np.ndarray) -> dict[str, float]:
        """Calculate portfolio-level factor betas given weights."""
        portfolio_betas = {}
        for factor in self._asset_betas.columns:
            portfolio_betas[factor] = float(np.dot(weights, self._asset_betas[factor]))
        return portfolio_betas

    def _portfolio_return(self, weights: np.ndarray) -> float:
        """Calculate expected portfolio return."""
        return float(np.dot(weights, self.mean_returns))

    def _portfolio_volatility(self, weights: np.ndarray) -> float:
        """Calculate portfolio volatility."""
        return float(np.sqrt(np.dot(weights.T, np.dot(self.cov_matrix, weights))))

    def _portfolio_sharpe(self, weights: np.ndarray) -> float:
        """Calculate portfolio Sharpe ratio."""
        ret = self._portfolio_return(weights)
        vol = self._portfolio_volatility(weights)
        return (ret - self.risk_free_rate) / vol if vol > 0 else 0

    def optimize_target_exposures(
        self,
        target_betas: dict[str, float],
        weight_bounds: tuple[float, float] = (0, 1),
        tolerance: float = 0.1,
    ) -> dict:
        """
        Optimize portfolio to achieve target factor exposures.

        Minimizes tracking error to target betas while maximizing Sharpe ratio.

        Parameters
        ----------
        target_betas : dict
            Target factor exposures (e.g., {'Mkt-RF': 1.0, 'SMB': 0.3})
        weight_bounds : tuple, default (0, 1)
            Min and max weight for each asset
        tolerance : float, default 0.1
            Allowed deviation from target betas

        Returns
        -------
        dict
            Optimal weights, achieved betas, return, volatility, and Sharpe ratio
        """
        # Validate factors exist
        for factor in target_betas:
            if factor not in self._asset_betas.columns:
                raise ValueError(
                    f"Factor '{factor}' not available. "
                    f"Available: {self._asset_betas.columns.tolist()}"
                )

        def objective(weights):
            # Maximize Sharpe (minimize negative Sharpe)
            return -self._portfolio_sharpe(weights)

        def beta_constraint(weights, factor, target):
            """Constraint: achieved beta should be close to target."""
            achieved = np.dot(weights, self._asset_betas[factor])
            return tolerance - abs(achieved - target)

        constraints = [{"type": "eq", "fun": lambda x: np.sum(x) - 1}]

        # Add beta constraints
        for factor, target in target_betas.items():
            constraints.append(
                {
                    "type": "ineq",
                    "fun": lambda x, f=factor, t=target: beta_constraint(x, f, t),
                }
            )

        bounds = tuple(weight_bounds for _ in range(self.n_assets))
        initial_weights = np.array([1 / self.n_assets] * self.n_assets)

        result = minimize(
            objective,
            initial_weights,
            method="SLSQP",
            bounds=bounds,
            constraints=constraints,
            options={"maxiter": 500},
        )

        optimal_weights = result.x

        return {
            "weights": dict(zip(self.tickers, optimal_weights)),
            "achieved_betas": self._portfolio_betas(optimal_weights),
            "target_betas": target_betas,
            "return": self._portfolio_return(optimal_weights),
            "volatility": self._portfolio_volatility(optimal_weights),
            "sharpe_ratio": self._portfolio_sharpe(optimal_weights),
            "success": result.success,
        }

    def optimize_factor_neutral(
        self,
        factors: list[str],
        weight_bounds: tuple[float, float] = (0, 1),
        tolerance: float = 0.05,
    ) -> dict:
        """
        Optimize portfolio to be neutral to specified factors.

        Parameters
        ----------
        factors : list of str
            Factors to neutralize (e.g., ['SMB', 'HML'])
        weight_bounds : tuple, default (0, 1)
            Min and max weight for each asset
        tolerance : float, default 0.05
            Maximum allowed absolute beta for neutral factors

        Returns
        -------
        dict
            Optimal weights with near-zero exposure to specified factors
        """
        # Validate factors
        for factor in factors:
            if factor not in self._asset_betas.columns:
                raise ValueError(f"Factor '{factor}' not available")

        def objective(weights):
            return -self._portfolio_sharpe(weights)

        def neutrality_constraint(weights, factor):
            """Beta should be close to zero."""
            beta = np.dot(weights, self._asset_betas[factor])
            return tolerance - abs(beta)

        constraints = [{"type": "eq", "fun": lambda x: np.sum(x) - 1}]

        for factor in factors:
            constraints.append(
                {"type": "ineq", "fun": lambda x, f=factor: neutrality_constraint(x, f)}
            )

        bounds = tuple(weight_bounds for _ in range(self.n_assets))
        initial_weights = np.array([1 / self.n_assets] * self.n_assets)

        result = minimize(
            objective,
            initial_weights,
            method="SLSQP",
            bounds=bounds,
            constraints=constraints,
            options={"maxiter": 500},
        )

        optimal_weights = result.x

        return {
            "weights": dict(zip(self.tickers, optimal_weights)),
            "achieved_betas": self._portfolio_betas(optimal_weights),
            "neutralized_factors": factors,
            "return": self._portfolio_return(optimal_weights),
            "volatility": self._portfolio_volatility(optimal_weights),
            "sharpe_ratio": self._portfolio_sharpe(optimal_weights),
            "success": result.success,
        }

    def optimize_max_alpha(
        self,
        model: Union[str, FactorModel] = "ff3",
        weight_bounds: tuple[float, float] = (0, 1),
    ) -> dict:
        """
        Optimize portfolio to maximize expected alpha.

        Uses pre-computed asset alphas to find the highest-alpha portfolio.

        Parameters
        ----------
        model : str or FactorModel, default 'ff3'
            Factor model for alpha calculation
        weight_bounds : tuple, default (0, 1)
            Min and max weight for each asset

        Returns
        -------
        dict
            Portfolio weights maximizing expected alpha
        """
        # Get model factors
        if isinstance(model, str):
            model_factors = {
                "capm": ["Mkt-RF"],
                "ff3": ["Mkt-RF", "SMB", "HML"],
                "ff5": ["Mkt-RF", "SMB", "HML", "RMW", "CMA"],
                "carhart": ["Mkt-RF", "SMB", "HML", "MOM"],
            }.get(model.lower(), ["Mkt-RF", "SMB", "HML"])
        else:
            model_factors = model.value

        factors = [f for f in model_factors if f in self.factor_data.columns]

        # Compute alpha for each asset
        alphas = []
        for ticker in self.tickers:
            y = self.excess_returns[ticker].values
            X = self.factor_data[factors].values
            X_const = np.column_stack([np.ones(len(X)), X])
            coeffs = np.linalg.lstsq(X_const, y, rcond=None)[0]
            alphas.append(coeffs[0] * self.TRADING_DAYS)  # Annualized alpha

        alphas = np.array(alphas)

        def objective(weights):
            # Negative alpha (minimize)
            return -np.dot(weights, alphas)

        constraints = [{"type": "eq", "fun": lambda x: np.sum(x) - 1}]
        bounds = tuple(weight_bounds for _ in range(self.n_assets))
        initial_weights = np.array([1 / self.n_assets] * self.n_assets)

        result = minimize(
            objective,
            initial_weights,
            method="SLSQP",
            bounds=bounds,
            constraints=constraints,
        )

        optimal_weights = result.x

        return {
            "weights": dict(zip(self.tickers, optimal_weights)),
            "expected_alpha": float(np.dot(optimal_weights, alphas)),
            "asset_alphas": dict(zip(self.tickers, alphas)),
            "achieved_betas": self._portfolio_betas(optimal_weights),
            "return": self._portfolio_return(optimal_weights),
            "volatility": self._portfolio_volatility(optimal_weights),
            "sharpe_ratio": self._portfolio_sharpe(optimal_weights),
            "success": result.success,
        }

    def generate_factor_frontier(
        self,
        factor: str,
        n_points: int = 20,
        weight_bounds: tuple[float, float] = (0, 1),
    ) -> pd.DataFrame:
        """
        Generate efficient frontier varying one factor's exposure.

        Parameters
        ----------
        factor : str
            Factor to vary (e.g., 'SMB', 'HML')
        n_points : int, default 20
            Number of points on the frontier
        weight_bounds : tuple, default (0, 1)
            Min and max weight for each asset

        Returns
        -------
        pd.DataFrame
            Frontier with columns: factor_beta, return, volatility, sharpe_ratio
        """
        if factor not in self._asset_betas.columns:
            raise ValueError(f"Factor '{factor}' not available")

        # Find beta range
        asset_betas = self._asset_betas[factor].values
        min_beta = asset_betas.min()
        max_beta = asset_betas.max()

        target_betas = np.linspace(min_beta, max_beta, n_points)

        frontier = []
        for target in target_betas:
            try:
                result = self.optimize_target_exposures(
                    target_betas={factor: target},
                    weight_bounds=weight_bounds,
                    tolerance=0.05,
                )
                if result["success"]:
                    frontier.append(
                        {
                            f"{factor}_beta": result["achieved_betas"][factor],
                            "return": result["return"],
                            "volatility": result["volatility"],
                            "sharpe_ratio": result["sharpe_ratio"],
                        }
                    )
            except Exception:
                continue

        return pd.DataFrame(frontier)

    def get_asset_betas(self) -> pd.DataFrame:
        """
        Get factor betas for all individual assets.

        Returns
        -------
        pd.DataFrame
            DataFrame with assets as rows and factors as columns
        """
        return self._asset_betas.copy()

    def summary(self) -> str:
        """Generate a summary of optimization capabilities and asset betas."""
        lines = [
            f"\n{'=' * 60}",
            "Factor Optimizer Summary",
            f"{'=' * 60}",
            f"Assets: {len(self.tickers)}",
            f"Observations: {len(self.returns)}",
            f"Available factors: {self._asset_betas.columns.tolist()}",
            "",
            "Asset Factor Betas:",
            f"{'-' * 60}",
        ]

        # Format asset betas table
        beta_str = self._asset_betas.to_string()
        lines.append(beta_str)

        lines.append("=" * 60)
        return "\n".join(lines)

`optimize_target_exposures(target_betas, weight_bounds=(0, 1), tolerance=0.1)` ¶

Optimize portfolio to achieve target factor exposures.

Minimizes tracking error to target betas while maximizing Sharpe ratio.

Parameters:

Name	Type	Description	Default
`target_betas`	`dict`	Target factor exposures (e.g., {'Mkt-RF': 1.0, 'SMB': 0.3})	required
`weight_bounds`	`tuple`	Min and max weight for each asset	`(0, 1)`
`tolerance`	`float`	Allowed deviation from target betas	`0.1`

Returns:

Type	Description
`dict`	Optimal weights, achieved betas, return, volatility, and Sharpe ratio

Source code in portfolio_analysis/factors/optimization.py

def optimize_target_exposures(
    self,
    target_betas: dict[str, float],
    weight_bounds: tuple[float, float] = (0, 1),
    tolerance: float = 0.1,
) -> dict:
    """
    Optimize portfolio to achieve target factor exposures.

    Minimizes tracking error to target betas while maximizing Sharpe ratio.

    Parameters
    ----------
    target_betas : dict
        Target factor exposures (e.g., {'Mkt-RF': 1.0, 'SMB': 0.3})
    weight_bounds : tuple, default (0, 1)
        Min and max weight for each asset
    tolerance : float, default 0.1
        Allowed deviation from target betas

    Returns
    -------
    dict
        Optimal weights, achieved betas, return, volatility, and Sharpe ratio
    """
    # Validate factors exist
    for factor in target_betas:
        if factor not in self._asset_betas.columns:
            raise ValueError(
                f"Factor '{factor}' not available. "
                f"Available: {self._asset_betas.columns.tolist()}"
            )

    def objective(weights):
        # Maximize Sharpe (minimize negative Sharpe)
        return -self._portfolio_sharpe(weights)

    def beta_constraint(weights, factor, target):
        """Constraint: achieved beta should be close to target."""
        achieved = np.dot(weights, self._asset_betas[factor])
        return tolerance - abs(achieved - target)

    constraints = [{"type": "eq", "fun": lambda x: np.sum(x) - 1}]

    # Add beta constraints
    for factor, target in target_betas.items():
        constraints.append(
            {
                "type": "ineq",
                "fun": lambda x, f=factor, t=target: beta_constraint(x, f, t),
            }
        )

    bounds = tuple(weight_bounds for _ in range(self.n_assets))
    initial_weights = np.array([1 / self.n_assets] * self.n_assets)

    result = minimize(
        objective,
        initial_weights,
        method="SLSQP",
        bounds=bounds,
        constraints=constraints,
        options={"maxiter": 500},
    )

    optimal_weights = result.x

    return {
        "weights": dict(zip(self.tickers, optimal_weights)),
        "achieved_betas": self._portfolio_betas(optimal_weights),
        "target_betas": target_betas,
        "return": self._portfolio_return(optimal_weights),
        "volatility": self._portfolio_volatility(optimal_weights),
        "sharpe_ratio": self._portfolio_sharpe(optimal_weights),
        "success": result.success,
    }

`optimize_factor_neutral(factors, weight_bounds=(0, 1), tolerance=0.05)` ¶

Optimize portfolio to be neutral to specified factors.

Parameters:

Name	Type	Description	Default
`factors`	`list of str`	Factors to neutralize (e.g., ['SMB', 'HML'])	required
`weight_bounds`	`tuple`	Min and max weight for each asset	`(0, 1)`
`tolerance`	`float`	Maximum allowed absolute beta for neutral factors	`0.05`

Returns:

Type	Description
`dict`	Optimal weights with near-zero exposure to specified factors

Source code in portfolio_analysis/factors/optimization.py

def optimize_factor_neutral(
    self,
    factors: list[str],
    weight_bounds: tuple[float, float] = (0, 1),
    tolerance: float = 0.05,
) -> dict:
    """
    Optimize portfolio to be neutral to specified factors.

    Parameters
    ----------
    factors : list of str
        Factors to neutralize (e.g., ['SMB', 'HML'])
    weight_bounds : tuple, default (0, 1)
        Min and max weight for each asset
    tolerance : float, default 0.05
        Maximum allowed absolute beta for neutral factors

    Returns
    -------
    dict
        Optimal weights with near-zero exposure to specified factors
    """
    # Validate factors
    for factor in factors:
        if factor not in self._asset_betas.columns:
            raise ValueError(f"Factor '{factor}' not available")

    def objective(weights):
        return -self._portfolio_sharpe(weights)

    def neutrality_constraint(weights, factor):
        """Beta should be close to zero."""
        beta = np.dot(weights, self._asset_betas[factor])
        return tolerance - abs(beta)

    constraints = [{"type": "eq", "fun": lambda x: np.sum(x) - 1}]

    for factor in factors:
        constraints.append(
            {"type": "ineq", "fun": lambda x, f=factor: neutrality_constraint(x, f)}
        )

    bounds = tuple(weight_bounds for _ in range(self.n_assets))
    initial_weights = np.array([1 / self.n_assets] * self.n_assets)

    result = minimize(
        objective,
        initial_weights,
        method="SLSQP",
        bounds=bounds,
        constraints=constraints,
        options={"maxiter": 500},
    )

    optimal_weights = result.x

    return {
        "weights": dict(zip(self.tickers, optimal_weights)),
        "achieved_betas": self._portfolio_betas(optimal_weights),
        "neutralized_factors": factors,
        "return": self._portfolio_return(optimal_weights),
        "volatility": self._portfolio_volatility(optimal_weights),
        "sharpe_ratio": self._portfolio_sharpe(optimal_weights),
        "success": result.success,
    }

`optimize_max_alpha(model='ff3', weight_bounds=(0, 1))` ¶

Optimize portfolio to maximize expected alpha.

Uses pre-computed asset alphas to find the highest-alpha portfolio.

Parameters:

Name	Type	Description	Default
`model`	`str or FactorModel`	Factor model for alpha calculation	`'ff3'`
`weight_bounds`	`tuple`	Min and max weight for each asset	`(0, 1)`

Returns:

Type	Description
`dict`	Portfolio weights maximizing expected alpha

Source code in portfolio_analysis/factors/optimization.py

def optimize_max_alpha(
    self,
    model: Union[str, FactorModel] = "ff3",
    weight_bounds: tuple[float, float] = (0, 1),
) -> dict:
    """
    Optimize portfolio to maximize expected alpha.

    Uses pre-computed asset alphas to find the highest-alpha portfolio.

    Parameters
    ----------
    model : str or FactorModel, default 'ff3'
        Factor model for alpha calculation
    weight_bounds : tuple, default (0, 1)
        Min and max weight for each asset

    Returns
    -------
    dict
        Portfolio weights maximizing expected alpha
    """
    # Get model factors
    if isinstance(model, str):
        model_factors = {
            "capm": ["Mkt-RF"],
            "ff3": ["Mkt-RF", "SMB", "HML"],
            "ff5": ["Mkt-RF", "SMB", "HML", "RMW", "CMA"],
            "carhart": ["Mkt-RF", "SMB", "HML", "MOM"],
        }.get(model.lower(), ["Mkt-RF", "SMB", "HML"])
    else:
        model_factors = model.value

    factors = [f for f in model_factors if f in self.factor_data.columns]

    # Compute alpha for each asset
    alphas = []
    for ticker in self.tickers:
        y = self.excess_returns[ticker].values
        X = self.factor_data[factors].values
        X_const = np.column_stack([np.ones(len(X)), X])
        coeffs = np.linalg.lstsq(X_const, y, rcond=None)[0]
        alphas.append(coeffs[0] * self.TRADING_DAYS)  # Annualized alpha

    alphas = np.array(alphas)

    def objective(weights):
        # Negative alpha (minimize)
        return -np.dot(weights, alphas)

    constraints = [{"type": "eq", "fun": lambda x: np.sum(x) - 1}]
    bounds = tuple(weight_bounds for _ in range(self.n_assets))
    initial_weights = np.array([1 / self.n_assets] * self.n_assets)

    result = minimize(
        objective,
        initial_weights,
        method="SLSQP",
        bounds=bounds,
        constraints=constraints,
    )

    optimal_weights = result.x

    return {
        "weights": dict(zip(self.tickers, optimal_weights)),
        "expected_alpha": float(np.dot(optimal_weights, alphas)),
        "asset_alphas": dict(zip(self.tickers, alphas)),
        "achieved_betas": self._portfolio_betas(optimal_weights),
        "return": self._portfolio_return(optimal_weights),
        "volatility": self._portfolio_volatility(optimal_weights),
        "sharpe_ratio": self._portfolio_sharpe(optimal_weights),
        "success": result.success,
    }

`generate_factor_frontier(factor, n_points=20, weight_bounds=(0, 1))` ¶

Generate efficient frontier varying one factor's exposure.

Parameters:

Name	Type	Description	Default
`factor`	`str`	Factor to vary (e.g., 'SMB', 'HML')	required
`n_points`	`int`	Number of points on the frontier	`20`
`weight_bounds`	`tuple`	Min and max weight for each asset	`(0, 1)`

Returns:

Type	Description
`DataFrame`	Frontier with columns: factor_beta, return, volatility, sharpe_ratio

Source code in portfolio_analysis/factors/optimization.py

def generate_factor_frontier(
    self,
    factor: str,
    n_points: int = 20,
    weight_bounds: tuple[float, float] = (0, 1),
) -> pd.DataFrame:
    """
    Generate efficient frontier varying one factor's exposure.

    Parameters
    ----------
    factor : str
        Factor to vary (e.g., 'SMB', 'HML')
    n_points : int, default 20
        Number of points on the frontier
    weight_bounds : tuple, default (0, 1)
        Min and max weight for each asset

    Returns
    -------
    pd.DataFrame
        Frontier with columns: factor_beta, return, volatility, sharpe_ratio
    """
    if factor not in self._asset_betas.columns:
        raise ValueError(f"Factor '{factor}' not available")

    # Find beta range
    asset_betas = self._asset_betas[factor].values
    min_beta = asset_betas.min()
    max_beta = asset_betas.max()

    target_betas = np.linspace(min_beta, max_beta, n_points)

    frontier = []
    for target in target_betas:
        try:
            result = self.optimize_target_exposures(
                target_betas={factor: target},
                weight_bounds=weight_bounds,
                tolerance=0.05,
            )
            if result["success"]:
                frontier.append(
                    {
                        f"{factor}_beta": result["achieved_betas"][factor],
                        "return": result["return"],
                        "volatility": result["volatility"],
                        "sharpe_ratio": result["sharpe_ratio"],
                    }
                )
        except Exception:
            continue

    return pd.DataFrame(frontier)

`get_asset_betas()` ¶

Get factor betas for all individual assets.

Returns:

Type	Description
`DataFrame`	DataFrame with assets as rows and factors as columns

Source code in portfolio_analysis/factors/optimization.py

def get_asset_betas(self) -> pd.DataFrame:
    """
    Get factor betas for all individual assets.

    Returns
    -------
    pd.DataFrame
        DataFrame with assets as rows and factors as columns
    """
    return self._asset_betas.copy()

`summary()` ¶

Generate a summary of optimization capabilities and asset betas.

Source code in portfolio_analysis/factors/optimization.py

def summary(self) -> str:
    """Generate a summary of optimization capabilities and asset betas."""
    lines = [
        f"\n{'=' * 60}",
        "Factor Optimizer Summary",
        f"{'=' * 60}",
        f"Assets: {len(self.tickers)}",
        f"Observations: {len(self.returns)}",
        f"Available factors: {self._asset_betas.columns.tolist()}",
        "",
        "Asset Factor Betas:",
        f"{'-' * 60}",
    ]

    # Format asset betas table
    beta_str = self._asset_betas.to_string()
    lines.append(beta_str)

    lines.append("=" * 60)
    return "\n".join(lines)