Skip to content

Factor Analysis API

FactorDataLoader

Fetch Fama-French factor data from Kenneth French Data Library.

Uses pandas-datareader to download factor data and provides local caching to avoid repeated downloads. Supports US and international regional factors.

Parameters:

Name Type Description Default
cache_dir str

Directory for caching factor data. If None, uses a temp directory.

None

Examples:

>>> loader = FactorDataLoader()
>>> ff3 = loader.get_ff3_factors('2015-01-01', '2023-12-31')
>>> print(ff3.columns.tolist())
['Mkt-RF', 'SMB', 'HML', 'RF']
>>> ff5_intl = loader.get_ff5_factors('2015-01-01', '2023-12-31',
...                                   region='developed_ex_us')
Source code in portfolio_analysis/factors/data.py
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
class FactorDataLoader:
    """
    Fetch Fama-French factor data from Kenneth French Data Library.

    Uses pandas-datareader to download factor data and provides local caching
    to avoid repeated downloads. Supports US and international regional factors.

    Parameters
    ----------
    cache_dir : str, optional
        Directory for caching factor data. If None, uses a temp directory.

    Examples
    --------
    >>> loader = FactorDataLoader()
    >>> ff3 = loader.get_ff3_factors('2015-01-01', '2023-12-31')
    >>> print(ff3.columns.tolist())
    ['Mkt-RF', 'SMB', 'HML', 'RF']

    >>> ff5_intl = loader.get_ff5_factors('2015-01-01', '2023-12-31',
    ...                                   region='developed_ex_us')
    """

    # French data library dataset names (US — kept for backward compatibility)
    FF3_DAILY = "F-F_Research_Data_Factors_daily"
    FF3_MONTHLY = "F-F_Research_Data_Factors"
    FF5_DAILY = "F-F_Research_Data_5_Factors_2x3_daily"
    FF5_MONTHLY = "F-F_Research_Data_5_Factors_2x3"
    MOM_DAILY = "F-F_Momentum_Factor_daily"
    MOM_MONTHLY = "F-F_Momentum_Factor"

    # Regional dataset names from Kenneth French Data Library
    REGIONS = {
        "us": {
            "ff3_daily": "F-F_Research_Data_Factors_daily",
            "ff3_monthly": "F-F_Research_Data_Factors",
            "ff5_daily": "F-F_Research_Data_5_Factors_2x3_daily",
            "ff5_monthly": "F-F_Research_Data_5_Factors_2x3",
            "mom_daily": "F-F_Momentum_Factor_daily",
            "mom_monthly": "F-F_Momentum_Factor",
        },
        "developed": {
            "ff3_daily": "Developed_3_Factors_Daily",
            "ff3_monthly": "Developed_3_Factors",
            "ff5_daily": "Developed_5_Factors_Daily",
            "ff5_monthly": "Developed_5_Factors",
            "mom_daily": "Developed_Mom_Factor_Daily",
            "mom_monthly": "Developed_Mom_Factor",
        },
        "developed_ex_us": {
            "ff3_daily": "Developed_ex_US_3_Factors_Daily",
            "ff3_monthly": "Developed_ex_US_3_Factors",
            "ff5_daily": "Developed_ex_US_5_Factors_Daily",
            "ff5_monthly": "Developed_ex_US_5_Factors",
            "mom_daily": "Developed_ex_US_Mom_Factor_Daily",
            "mom_monthly": "Developed_ex_US_Mom_Factor",
        },
        "emerging": {
            "ff3_daily": None,
            "ff3_monthly": None,
            "ff5_daily": None,
            "ff5_monthly": "Emerging_5_Factors",
            "mom_daily": None,
            "mom_monthly": "Emerging_MOM_Factor",
        },
        "europe": {
            "ff3_daily": "Europe_3_Factors_Daily",
            "ff3_monthly": "Europe_3_Factors",
            "ff5_daily": "Europe_5_Factors_Daily",
            "ff5_monthly": "Europe_5_Factors",
            "mom_daily": "Europe_Mom_Factor_Daily",
            "mom_monthly": "Europe_Mom_Factor",
        },
        "japan": {
            "ff3_daily": "Japan_3_Factors_Daily",
            "ff3_monthly": "Japan_3_Factors",
            "ff5_daily": "Japan_5_Factors_Daily",
            "ff5_monthly": "Japan_5_Factors",
            "mom_daily": "Japan_Mom_Factor_Daily",
            "mom_monthly": "Japan_Mom_Factor",
        },
        "asia_pacific_ex_japan": {
            "ff3_daily": "Asia_Pacific_ex_Japan_3_Factors_Daily",
            "ff3_monthly": "Asia_Pacific_ex_Japan_3_Factors",
            "ff5_daily": "Asia_Pacific_ex_Japan_5_Factors_Daily",
            "ff5_monthly": "Asia_Pacific_ex_Japan_5_Factors",
            "mom_daily": "Asia_Pacific_ex_Japan_MOM_Factor_Daily",
            "mom_monthly": "Asia_Pacific_ex_Japan_MOM_Factor",
        },
        "north_america": {
            "ff3_daily": "North_America_3_Factors_Daily",
            "ff3_monthly": "North_America_3_Factors",
            "ff5_daily": "North_America_5_Factors_Daily",
            "ff5_monthly": "North_America_5_Factors",
            "mom_daily": "North_America_Mom_Factor_Daily",
            "mom_monthly": "North_America_Mom_Factor",
        },
    }

    def __init__(self, cache_dir: Optional[str] = None):
        if cache_dir is None:
            import tempfile

            cache_dir = os.path.join(tempfile.gettempdir(), "ff_factors_cache")
        self.cache_dir = cache_dir
        os.makedirs(self.cache_dir, exist_ok=True)

    @classmethod
    def get_available_regions(cls) -> list:
        """
        Return list of supported region names.

        Returns
        -------
        list of str
            Supported region names for use with the ``region`` parameter.
        """
        return list(cls.REGIONS.keys())

    def _get_dataset_name(self, model: str, frequency: str, region: str) -> str:
        """
        Look up the French library dataset name for a model/frequency/region.

        Parameters
        ----------
        model : str
            Factor model key: 'ff3', 'ff5', or 'mom'.
        frequency : str
            'daily' or 'monthly'.
        region : str
            Region name (must be a key in ``REGIONS``).

        Returns
        -------
        str
            Dataset name string for ``pandas_datareader``.

        Raises
        ------
        ValueError
            If the region is unknown or the requested combination is unavailable.
        """
        if region not in self.REGIONS:
            available = ", ".join(sorted(self.REGIONS.keys()))
            raise ValueError(
                f"Unknown region '{region}'. " f"Available regions: {available}"
            )

        key = f"{model}_{frequency}"
        region_datasets = self.REGIONS[region]

        if key not in region_datasets:
            raise ValueError(
                f"Invalid model/frequency combination: '{model}' / '{frequency}'."
            )

        dataset = region_datasets[key]
        if dataset is None:
            raise ValueError(
                f"Daily factor data is not available for region '{region}'. "
                f"Use frequency='monthly'."
            )

        return dataset

    def _get_cache_path(self, dataset: str) -> str:
        """Generate cache file path for a dataset."""
        return os.path.join(self.cache_dir, f"{dataset.replace('-', '_')}.parquet")

    def _load_from_cache(
        self, dataset: str, max_age_days: int = 7
    ) -> Optional[pd.DataFrame]:
        """Load data from cache if available and not stale."""
        cache_path = self._get_cache_path(dataset)
        if not os.path.exists(cache_path):
            return None

        # Check age
        mtime = os.path.getmtime(cache_path)
        age_days = (datetime.now().timestamp() - mtime) / (24 * 3600)
        if age_days > max_age_days:
            return None

        try:
            return pd.read_parquet(cache_path)
        except Exception:
            return None

    def _save_to_cache(self, data: pd.DataFrame, dataset: str) -> None:
        """Save data to cache."""
        cache_path = self._get_cache_path(dataset)
        try:
            data.to_parquet(cache_path)
        except Exception:
            pass  # Silently fail if caching doesn't work

    def _fetch_french_data(self, dataset: str, use_cache: bool = True) -> pd.DataFrame:
        """
        Fetch data from Kenneth French Data Library.

        Parameters
        ----------
        dataset : str
            Dataset name from French library
        use_cache : bool, default True
            Whether to use local cache

        Returns
        -------
        pd.DataFrame
            Factor data with datetime index
        """
        # Try cache first
        if use_cache:
            cached = self._load_from_cache(dataset)
            if cached is not None:
                return cached

        try:
            import pandas_datareader.data as web
        except ImportError:
            raise ImportError(
                "pandas-datareader is required for factor data. "
                "Install with: pip install pandas-datareader"
            )

        # Fetch from French library
        data = web.DataReader(dataset, "famafrench", start="1900-01-01")

        # web.DataReader returns a dict with multiple tables
        # First table (index 0) is typically the main data
        df = data[0]

        # Convert from percentage to decimal
        df = df / 100.0

        # Ensure datetime index
        if not isinstance(df.index, pd.DatetimeIndex):
            df.index = pd.to_datetime(df.index.astype(str))

        # Cache the result
        if use_cache:
            self._save_to_cache(df, dataset)

        return df

    def _filter_dates(
        self,
        data: pd.DataFrame,
        start_date: Union[str, datetime],
        end_date: Union[str, datetime],
    ) -> pd.DataFrame:
        """Filter data to date range."""
        start = pd.to_datetime(start_date)
        end = pd.to_datetime(end_date)
        return data[(data.index >= start) & (data.index <= end)]

    def get_ff3_factors(
        self,
        start_date: Union[str, datetime],
        end_date: Union[str, datetime],
        frequency: str = "daily",
        region: str = "us",
    ) -> pd.DataFrame:
        """
        Get Fama-French 3-factor data.

        Parameters
        ----------
        start_date : str or datetime
            Start date for data
        end_date : str or datetime
            End date for data
        frequency : str, default 'daily'
            Data frequency: 'daily' or 'monthly'
        region : str, default 'us'
            Geographic region for factor data. Use
            ``FactorDataLoader.get_available_regions()`` for the full list.

        Returns
        -------
        pd.DataFrame
            DataFrame with columns: Mkt-RF, SMB, HML, RF
        """
        if frequency not in ("daily", "monthly"):
            raise ValueError(
                f"Frequency must be 'daily' or 'monthly', got: {frequency}"
            )

        dataset = self._get_dataset_name("ff3", frequency, region)
        data = self._fetch_french_data(dataset)
        return self._filter_dates(data, start_date, end_date)

    def get_ff5_factors(
        self,
        start_date: Union[str, datetime],
        end_date: Union[str, datetime],
        frequency: str = "daily",
        region: str = "us",
    ) -> pd.DataFrame:
        """
        Get Fama-French 5-factor data.

        Parameters
        ----------
        start_date : str or datetime
            Start date for data
        end_date : str or datetime
            End date for data
        frequency : str, default 'daily'
            Data frequency: 'daily' or 'monthly'
        region : str, default 'us'
            Geographic region for factor data. Use
            ``FactorDataLoader.get_available_regions()`` for the full list.

        Returns
        -------
        pd.DataFrame
            DataFrame with columns: Mkt-RF, SMB, HML, RMW, CMA, RF
        """
        if frequency not in ("daily", "monthly"):
            raise ValueError(
                f"Frequency must be 'daily' or 'monthly', got: {frequency}"
            )

        dataset = self._get_dataset_name("ff5", frequency, region)
        data = self._fetch_french_data(dataset)
        return self._filter_dates(data, start_date, end_date)

    def get_momentum_factor(
        self,
        start_date: Union[str, datetime],
        end_date: Union[str, datetime],
        frequency: str = "daily",
        region: str = "us",
    ) -> pd.Series:
        """
        Get momentum factor data.

        Parameters
        ----------
        start_date : str or datetime
            Start date for data
        end_date : str or datetime
            End date for data
        frequency : str, default 'daily'
            Data frequency: 'daily' or 'monthly'
        region : str, default 'us'
            Geographic region for factor data. Use
            ``FactorDataLoader.get_available_regions()`` for the full list.

        Returns
        -------
        pd.Series
            Momentum factor (MOM or WML)
        """
        if frequency not in ("daily", "monthly"):
            raise ValueError(
                f"Frequency must be 'daily' or 'monthly', got: {frequency}"
            )

        dataset = self._get_dataset_name("mom", frequency, region)
        data = self._fetch_french_data(dataset)
        filtered = self._filter_dates(data, start_date, end_date)

        # Return as Series, column name varies
        if "Mom" in filtered.columns:
            return filtered["Mom"]
        elif "WML" in filtered.columns:
            return filtered["WML"]
        else:
            return filtered.iloc[:, 0]

    def get_carhart_factors(
        self,
        start_date: Union[str, datetime],
        end_date: Union[str, datetime],
        frequency: str = "daily",
        region: str = "us",
    ) -> pd.DataFrame:
        """
        Get Carhart 4-factor data (FF3 + Momentum).

        Parameters
        ----------
        start_date : str or datetime
            Start date for data
        end_date : str or datetime
            End date for data
        frequency : str, default 'daily'
            Data frequency: 'daily' or 'monthly'
        region : str, default 'us'
            Geographic region for factor data. Use
            ``FactorDataLoader.get_available_regions()`` for the full list.

        Returns
        -------
        pd.DataFrame
            DataFrame with columns: Mkt-RF, SMB, HML, MOM, RF
        """
        ff3 = self.get_ff3_factors(start_date, end_date, frequency, region)
        mom = self.get_momentum_factor(start_date, end_date, frequency, region)

        # Align dates
        common_dates = ff3.index.intersection(mom.index)
        result = ff3.loc[common_dates].copy()
        result["MOM"] = mom.loc[common_dates]

        # Reorder columns to put MOM before RF
        cols = ["Mkt-RF", "SMB", "HML", "MOM", "RF"]
        return result[cols]

get_available_regions() classmethod

Return list of supported region names.

Returns:

Type Description
list of str

Supported region names for use with the region parameter.

Source code in portfolio_analysis/factors/data.py
@classmethod
def get_available_regions(cls) -> list:
    """
    Return list of supported region names.

    Returns
    -------
    list of str
        Supported region names for use with the ``region`` parameter.
    """
    return list(cls.REGIONS.keys())

get_ff3_factors(start_date, end_date, frequency='daily', region='us')

Get Fama-French 3-factor data.

Parameters:

Name Type Description Default
start_date str or datetime

Start date for data

required
end_date str or datetime

End date for data

required
frequency str

Data frequency: 'daily' or 'monthly'

'daily'
region str

Geographic region for factor data. Use FactorDataLoader.get_available_regions() for the full list.

'us'

Returns:

Type Description
DataFrame

DataFrame with columns: Mkt-RF, SMB, HML, RF

Source code in portfolio_analysis/factors/data.py
def get_ff3_factors(
    self,
    start_date: Union[str, datetime],
    end_date: Union[str, datetime],
    frequency: str = "daily",
    region: str = "us",
) -> pd.DataFrame:
    """
    Get Fama-French 3-factor data.

    Parameters
    ----------
    start_date : str or datetime
        Start date for data
    end_date : str or datetime
        End date for data
    frequency : str, default 'daily'
        Data frequency: 'daily' or 'monthly'
    region : str, default 'us'
        Geographic region for factor data. Use
        ``FactorDataLoader.get_available_regions()`` for the full list.

    Returns
    -------
    pd.DataFrame
        DataFrame with columns: Mkt-RF, SMB, HML, RF
    """
    if frequency not in ("daily", "monthly"):
        raise ValueError(
            f"Frequency must be 'daily' or 'monthly', got: {frequency}"
        )

    dataset = self._get_dataset_name("ff3", frequency, region)
    data = self._fetch_french_data(dataset)
    return self._filter_dates(data, start_date, end_date)

get_ff5_factors(start_date, end_date, frequency='daily', region='us')

Get Fama-French 5-factor data.

Parameters:

Name Type Description Default
start_date str or datetime

Start date for data

required
end_date str or datetime

End date for data

required
frequency str

Data frequency: 'daily' or 'monthly'

'daily'
region str

Geographic region for factor data. Use FactorDataLoader.get_available_regions() for the full list.

'us'

Returns:

Type Description
DataFrame

DataFrame with columns: Mkt-RF, SMB, HML, RMW, CMA, RF

Source code in portfolio_analysis/factors/data.py
def get_ff5_factors(
    self,
    start_date: Union[str, datetime],
    end_date: Union[str, datetime],
    frequency: str = "daily",
    region: str = "us",
) -> pd.DataFrame:
    """
    Get Fama-French 5-factor data.

    Parameters
    ----------
    start_date : str or datetime
        Start date for data
    end_date : str or datetime
        End date for data
    frequency : str, default 'daily'
        Data frequency: 'daily' or 'monthly'
    region : str, default 'us'
        Geographic region for factor data. Use
        ``FactorDataLoader.get_available_regions()`` for the full list.

    Returns
    -------
    pd.DataFrame
        DataFrame with columns: Mkt-RF, SMB, HML, RMW, CMA, RF
    """
    if frequency not in ("daily", "monthly"):
        raise ValueError(
            f"Frequency must be 'daily' or 'monthly', got: {frequency}"
        )

    dataset = self._get_dataset_name("ff5", frequency, region)
    data = self._fetch_french_data(dataset)
    return self._filter_dates(data, start_date, end_date)

get_momentum_factor(start_date, end_date, frequency='daily', region='us')

Get momentum factor data.

Parameters:

Name Type Description Default
start_date str or datetime

Start date for data

required
end_date str or datetime

End date for data

required
frequency str

Data frequency: 'daily' or 'monthly'

'daily'
region str

Geographic region for factor data. Use FactorDataLoader.get_available_regions() for the full list.

'us'

Returns:

Type Description
Series

Momentum factor (MOM or WML)

Source code in portfolio_analysis/factors/data.py
def get_momentum_factor(
    self,
    start_date: Union[str, datetime],
    end_date: Union[str, datetime],
    frequency: str = "daily",
    region: str = "us",
) -> pd.Series:
    """
    Get momentum factor data.

    Parameters
    ----------
    start_date : str or datetime
        Start date for data
    end_date : str or datetime
        End date for data
    frequency : str, default 'daily'
        Data frequency: 'daily' or 'monthly'
    region : str, default 'us'
        Geographic region for factor data. Use
        ``FactorDataLoader.get_available_regions()`` for the full list.

    Returns
    -------
    pd.Series
        Momentum factor (MOM or WML)
    """
    if frequency not in ("daily", "monthly"):
        raise ValueError(
            f"Frequency must be 'daily' or 'monthly', got: {frequency}"
        )

    dataset = self._get_dataset_name("mom", frequency, region)
    data = self._fetch_french_data(dataset)
    filtered = self._filter_dates(data, start_date, end_date)

    # Return as Series, column name varies
    if "Mom" in filtered.columns:
        return filtered["Mom"]
    elif "WML" in filtered.columns:
        return filtered["WML"]
    else:
        return filtered.iloc[:, 0]

get_carhart_factors(start_date, end_date, frequency='daily', region='us')

Get Carhart 4-factor data (FF3 + Momentum).

Parameters:

Name Type Description Default
start_date str or datetime

Start date for data

required
end_date str or datetime

End date for data

required
frequency str

Data frequency: 'daily' or 'monthly'

'daily'
region str

Geographic region for factor data. Use FactorDataLoader.get_available_regions() for the full list.

'us'

Returns:

Type Description
DataFrame

DataFrame with columns: Mkt-RF, SMB, HML, MOM, RF

Source code in portfolio_analysis/factors/data.py
def get_carhart_factors(
    self,
    start_date: Union[str, datetime],
    end_date: Union[str, datetime],
    frequency: str = "daily",
    region: str = "us",
) -> pd.DataFrame:
    """
    Get Carhart 4-factor data (FF3 + Momentum).

    Parameters
    ----------
    start_date : str or datetime
        Start date for data
    end_date : str or datetime
        End date for data
    frequency : str, default 'daily'
        Data frequency: 'daily' or 'monthly'
    region : str, default 'us'
        Geographic region for factor data. Use
        ``FactorDataLoader.get_available_regions()`` for the full list.

    Returns
    -------
    pd.DataFrame
        DataFrame with columns: Mkt-RF, SMB, HML, MOM, RF
    """
    ff3 = self.get_ff3_factors(start_date, end_date, frequency, region)
    mom = self.get_momentum_factor(start_date, end_date, frequency, region)

    # Align dates
    common_dates = ff3.index.intersection(mom.index)
    result = ff3.loc[common_dates].copy()
    result["MOM"] = mom.loc[common_dates]

    # Reorder columns to put MOM before RF
    cols = ["Mkt-RF", "SMB", "HML", "MOM", "RF"]
    return result[cols]

FactorModel

Bases: Enum

Supported factor models.

Attributes:

Name Type Description
CAPM Single-factor market model
FF3 Fama-French 3-factor model (Mkt-RF, SMB, HML)
FF5 Fama-French 5-factor model (+ RMW, CMA)
CARHART Carhart 4-factor model (FF3 + MOM)
Source code in portfolio_analysis/factors/models.py
class FactorModel(Enum):
    """
    Supported factor models.

    Attributes
    ----------
    CAPM : Single-factor market model
    FF3 : Fama-French 3-factor model (Mkt-RF, SMB, HML)
    FF5 : Fama-French 5-factor model (+ RMW, CMA)
    CARHART : Carhart 4-factor model (FF3 + MOM)
    """

    CAPM = ["Mkt-RF"]
    FF3 = ["Mkt-RF", "SMB", "HML"]
    FF5 = ["Mkt-RF", "SMB", "HML", "RMW", "CMA"]
    CARHART = ["Mkt-RF", "SMB", "HML", "MOM"]

RegressionResults dataclass

Results from a factor regression.

Attributes:

Name Type Description
alpha float

Jensen's alpha (annualized intercept)

alpha_pvalue float

P-value for alpha significance test

betas dict

Factor loadings (sensitivities)

beta_pvalues dict

P-values for each beta

beta_tstats dict

T-statistics for each beta

r_squared float

R-squared (explained variance)

adj_r_squared float

Adjusted R-squared

residual_std float

Standard deviation of residuals (annualized)

n_observations int

Number of observations used

model str

Model name used for regression

factors list

Factor names used in the model

Source code in portfolio_analysis/factors/models.py
@dataclass
class RegressionResults:
    """
    Results from a factor regression.

    Attributes
    ----------
    alpha : float
        Jensen's alpha (annualized intercept)
    alpha_pvalue : float
        P-value for alpha significance test
    betas : dict
        Factor loadings (sensitivities)
    beta_pvalues : dict
        P-values for each beta
    beta_tstats : dict
        T-statistics for each beta
    r_squared : float
        R-squared (explained variance)
    adj_r_squared : float
        Adjusted R-squared
    residual_std : float
        Standard deviation of residuals (annualized)
    n_observations : int
        Number of observations used
    model : str
        Model name used for regression
    factors : list
        Factor names used in the model
    """

    alpha: float
    alpha_pvalue: float
    betas: dict[str, float]
    beta_pvalues: dict[str, float]
    beta_tstats: dict[str, float]
    r_squared: float
    adj_r_squared: float
    residual_std: float
    n_observations: int
    model: str
    factors: list[str]

    def summary(self) -> str:
        """Generate a text summary of regression results."""
        lines = [
            f"\n{'=' * 60}",
            f"Factor Regression Results: {self.model}",
            f"{'=' * 60}",
            f"Observations: {self.n_observations}",
            f"R-squared: {self.r_squared:.4f}",
            f"Adj R-squared: {self.adj_r_squared:.4f}",
            f"Residual Std: {self.residual_std * 100:.2f}% (annualized)",
            f"\n{'Coefficient':<12} {'Value':>10} {'T-stat':>10} {'P-value':>10}",
            f"{'-' * 42}",
            f"{'Alpha':<12} {self.alpha * 100:>9.2f}% {self._alpha_tstat():>10.2f} {self.alpha_pvalue:>10.4f}",
        ]
        for factor in self.factors:
            lines.append(
                f"{factor:<12} {self.betas[factor]:>10.3f} "
                f"{self.beta_tstats[factor]:>10.2f} {self.beta_pvalues[factor]:>10.4f}"
            )
        lines.append("=" * 60)
        return "\n".join(lines)

    def _alpha_tstat(self) -> float:
        """Approximate t-stat for alpha from p-value."""
        from scipy import stats

        if self.alpha_pvalue >= 1.0:
            return 0.0
        if self.alpha_pvalue <= 0.0:
            return np.inf if self.alpha > 0 else -np.inf
        return stats.norm.ppf(1 - self.alpha_pvalue / 2) * np.sign(self.alpha)

    def __repr__(self) -> str:
        return (
            f"RegressionResults(model='{self.model}', alpha={self.alpha:.4f}, "
            f"r_squared={self.r_squared:.4f})"
        )

summary()

Generate a text summary of regression results.

Source code in portfolio_analysis/factors/models.py
def summary(self) -> str:
    """Generate a text summary of regression results."""
    lines = [
        f"\n{'=' * 60}",
        f"Factor Regression Results: {self.model}",
        f"{'=' * 60}",
        f"Observations: {self.n_observations}",
        f"R-squared: {self.r_squared:.4f}",
        f"Adj R-squared: {self.adj_r_squared:.4f}",
        f"Residual Std: {self.residual_std * 100:.2f}% (annualized)",
        f"\n{'Coefficient':<12} {'Value':>10} {'T-stat':>10} {'P-value':>10}",
        f"{'-' * 42}",
        f"{'Alpha':<12} {self.alpha * 100:>9.2f}% {self._alpha_tstat():>10.2f} {self.alpha_pvalue:>10.4f}",
    ]
    for factor in self.factors:
        lines.append(
            f"{factor:<12} {self.betas[factor]:>10.3f} "
            f"{self.beta_tstats[factor]:>10.2f} {self.beta_pvalues[factor]:>10.4f}"
        )
    lines.append("=" * 60)
    return "\n".join(lines)

FactorRegression

Run factor regressions on portfolio or asset returns.

Parameters:

Name Type Description Default
returns Series

Portfolio or asset returns with datetime index

required
factor_data DataFrame

Factor data from FactorDataLoader

required
annualization_factor int

Number of periods per year (252 for daily, 12 for monthly)

252

Examples:

>>> from portfolio_analysis.factors import FactorDataLoader, FactorRegression
>>> factor_loader = FactorDataLoader()
>>> ff3 = factor_loader.get_ff3_factors('2015-01-01', '2023-12-31')
>>> regression = FactorRegression(portfolio_returns, ff3)
>>> results = regression.run_regression('ff3')
>>> print(results.summary())
Source code in portfolio_analysis/factors/models.py
class FactorRegression:
    """
    Run factor regressions on portfolio or asset returns.

    Parameters
    ----------
    returns : pd.Series
        Portfolio or asset returns with datetime index
    factor_data : pd.DataFrame
        Factor data from FactorDataLoader
    annualization_factor : int, default 252
        Number of periods per year (252 for daily, 12 for monthly)

    Examples
    --------
    >>> from portfolio_analysis.factors import FactorDataLoader, FactorRegression
    >>> factor_loader = FactorDataLoader()
    >>> ff3 = factor_loader.get_ff3_factors('2015-01-01', '2023-12-31')
    >>> regression = FactorRegression(portfolio_returns, ff3)
    >>> results = regression.run_regression('ff3')
    >>> print(results.summary())
    """

    TRADING_DAYS = 252

    def __init__(
        self,
        returns: pd.Series,
        factor_data: pd.DataFrame,
        annualization_factor: Optional[int] = None,
    ):
        self.raw_returns = returns
        self.raw_factor_data = factor_data

        # Align data
        self.excess_returns, self.factor_data = align_returns_with_factors(
            returns, factor_data, compute_excess=True
        )

        # Auto-detect frequency if not specified
        if annualization_factor is None:
            # Check average days between observations
            if len(self.excess_returns) > 1:
                avg_days = (
                    self.excess_returns.index[-1] - self.excess_returns.index[0]
                ).days / len(self.excess_returns)
                if avg_days > 20:  # Monthly
                    annualization_factor = 12
                else:  # Daily
                    annualization_factor = self.TRADING_DAYS
            else:
                annualization_factor = self.TRADING_DAYS

        self.annualization_factor = annualization_factor

    def _get_model_factors(self, model: Union[str, FactorModel]) -> list[str]:
        """Get factor names for a model."""
        if isinstance(model, str):
            model = model.lower()
            if model == "capm":
                return FactorModel.CAPM.value
            elif model == "ff3":
                return FactorModel.FF3.value
            elif model == "ff5":
                return FactorModel.FF5.value
            elif model == "carhart":
                return FactorModel.CARHART.value
            else:
                raise ValueError(
                    f"Unknown model: {model}. Use 'capm', 'ff3', 'ff5', or 'carhart'"
                )
        return model.value

    def _ols_regression(self, y: np.ndarray, X: np.ndarray) -> tuple:
        """
        Run OLS regression with statistical inference.

        Returns coefficients, t-stats, p-values, and fit statistics.
        """
        from scipy import stats

        n = len(y)
        k = X.shape[1]

        # Add constant for alpha
        X_with_const = np.column_stack([np.ones(n), X])

        # OLS: (X'X)^-1 X'y
        XtX = X_with_const.T @ X_with_const
        XtX_inv = np.linalg.inv(XtX)
        coeffs = XtX_inv @ X_with_const.T @ y

        # Residuals and variance
        y_hat = X_with_const @ coeffs
        residuals = y - y_hat
        sse = residuals @ residuals
        dof = n - k - 1  # degrees of freedom

        if dof <= 0:
            raise ValueError("Not enough observations for regression")

        mse = sse / dof
        residual_std = np.sqrt(mse)

        # Coefficient standard errors
        se = np.sqrt(np.diag(XtX_inv) * mse)

        # T-statistics and p-values
        t_stats = coeffs / se
        p_values = 2 * (1 - stats.t.cdf(np.abs(t_stats), dof))

        # R-squared
        ss_total = ((y - y.mean()) ** 2).sum()
        r_squared = 1 - sse / ss_total if ss_total > 0 else 0

        # Adjusted R-squared
        adj_r_squared = 1 - (1 - r_squared) * (n - 1) / dof

        return {
            "coeffs": coeffs,
            "t_stats": t_stats,
            "p_values": p_values,
            "r_squared": r_squared,
            "adj_r_squared": adj_r_squared,
            "residual_std": residual_std,
            "n": n,
        }

    def run_regression(
        self, model: Union[str, FactorModel] = "ff3"
    ) -> RegressionResults:
        """
        Run a factor regression.

        Parameters
        ----------
        model : str or FactorModel, default 'ff3'
            Factor model to use: 'capm', 'ff3', 'ff5', or 'carhart'

        Returns
        -------
        RegressionResults
            Regression results with alpha, betas, and statistics
        """
        factors = self._get_model_factors(model)

        # Validate factors exist in data
        missing = [f for f in factors if f not in self.factor_data.columns]
        if missing:
            raise ValueError(
                f"Factors not in data: {missing}. Available: {self.factor_data.columns.tolist()}"
            )

        # Prepare data
        y = self.excess_returns.values
        X = self.factor_data[factors].values

        # Run regression
        results = self._ols_regression(y, X)

        # Extract results
        alpha = results["coeffs"][0]
        betas = dict(zip(factors, results["coeffs"][1:]))
        beta_tstats = dict(zip(factors, results["t_stats"][1:]))
        beta_pvalues = dict(zip(factors, results["p_values"][1:]))

        # Annualize alpha and residual std
        alpha_annual = alpha * self.annualization_factor
        residual_std_annual = results["residual_std"] * np.sqrt(
            self.annualization_factor
        )

        model_name = model.name if isinstance(model, FactorModel) else model.upper()

        return RegressionResults(
            alpha=alpha_annual,
            alpha_pvalue=results["p_values"][0],
            betas=betas,
            beta_pvalues=beta_pvalues,
            beta_tstats=beta_tstats,
            r_squared=results["r_squared"],
            adj_r_squared=results["adj_r_squared"],
            residual_std=residual_std_annual,
            n_observations=results["n"],
            model=model_name,
            factors=factors,
        )

    def run_rolling_regression(
        self, model: Union[str, FactorModel] = "ff3", window: int = 60
    ) -> pd.DataFrame:
        """
        Run rolling factor regressions.

        Parameters
        ----------
        model : str or FactorModel, default 'ff3'
            Factor model to use
        window : int, default 60
            Rolling window size (number of periods)

        Returns
        -------
        pd.DataFrame
            DataFrame with rolling alpha and betas, indexed by date
        """
        factors = self._get_model_factors(model)
        y = self.excess_returns
        X = self.factor_data[factors]

        results = []
        dates = []

        for i in range(window, len(y) + 1):
            y_window = y.iloc[i - window : i].values
            X_window = X.iloc[i - window : i].values

            try:
                reg = self._ols_regression(y_window, X_window)
                result = {"alpha": reg["coeffs"][0] * self.annualization_factor}
                for j, factor in enumerate(factors):
                    result[factor] = reg["coeffs"][j + 1]
                result["r_squared"] = reg["r_squared"]
                results.append(result)
                dates.append(y.index[i - 1])
            except Exception:
                continue

        return pd.DataFrame(results, index=pd.DatetimeIndex(dates))

    def compare_models(self) -> pd.DataFrame:
        """
        Compare different factor models.

        Returns
        -------
        pd.DataFrame
            Comparison table with alpha, R-squared, and key betas for each model
        """
        models = ["capm", "ff3"]

        # Add ff5 if factors available
        if all(f in self.factor_data.columns for f in FactorModel.FF5.value):
            models.append("ff5")

        # Add carhart if momentum available
        if "MOM" in self.factor_data.columns:
            models.append("carhart")

        results = []
        for model in models:
            try:
                reg = self.run_regression(model)
                result = {
                    "Model": reg.model,
                    "Alpha (%)": reg.alpha * 100,
                    "Alpha p-value": reg.alpha_pvalue,
                    "R-squared": reg.r_squared,
                    "Adj R-squared": reg.adj_r_squared,
                    "Mkt Beta": reg.betas.get("Mkt-RF", np.nan),
                }
                # Add other betas if available
                for factor in ["SMB", "HML", "RMW", "CMA", "MOM"]:
                    if factor in reg.betas:
                        result[factor] = reg.betas[factor]
                results.append(result)
            except Exception:
                continue

        return pd.DataFrame(results)

run_regression(model='ff3')

Run a factor regression.

Parameters:

Name Type Description Default
model str or FactorModel

Factor model to use: 'capm', 'ff3', 'ff5', or 'carhart'

'ff3'

Returns:

Type Description
RegressionResults

Regression results with alpha, betas, and statistics

Source code in portfolio_analysis/factors/models.py
def run_regression(
    self, model: Union[str, FactorModel] = "ff3"
) -> RegressionResults:
    """
    Run a factor regression.

    Parameters
    ----------
    model : str or FactorModel, default 'ff3'
        Factor model to use: 'capm', 'ff3', 'ff5', or 'carhart'

    Returns
    -------
    RegressionResults
        Regression results with alpha, betas, and statistics
    """
    factors = self._get_model_factors(model)

    # Validate factors exist in data
    missing = [f for f in factors if f not in self.factor_data.columns]
    if missing:
        raise ValueError(
            f"Factors not in data: {missing}. Available: {self.factor_data.columns.tolist()}"
        )

    # Prepare data
    y = self.excess_returns.values
    X = self.factor_data[factors].values

    # Run regression
    results = self._ols_regression(y, X)

    # Extract results
    alpha = results["coeffs"][0]
    betas = dict(zip(factors, results["coeffs"][1:]))
    beta_tstats = dict(zip(factors, results["t_stats"][1:]))
    beta_pvalues = dict(zip(factors, results["p_values"][1:]))

    # Annualize alpha and residual std
    alpha_annual = alpha * self.annualization_factor
    residual_std_annual = results["residual_std"] * np.sqrt(
        self.annualization_factor
    )

    model_name = model.name if isinstance(model, FactorModel) else model.upper()

    return RegressionResults(
        alpha=alpha_annual,
        alpha_pvalue=results["p_values"][0],
        betas=betas,
        beta_pvalues=beta_pvalues,
        beta_tstats=beta_tstats,
        r_squared=results["r_squared"],
        adj_r_squared=results["adj_r_squared"],
        residual_std=residual_std_annual,
        n_observations=results["n"],
        model=model_name,
        factors=factors,
    )

run_rolling_regression(model='ff3', window=60)

Run rolling factor regressions.

Parameters:

Name Type Description Default
model str or FactorModel

Factor model to use

'ff3'
window int

Rolling window size (number of periods)

60

Returns:

Type Description
DataFrame

DataFrame with rolling alpha and betas, indexed by date

Source code in portfolio_analysis/factors/models.py
def run_rolling_regression(
    self, model: Union[str, FactorModel] = "ff3", window: int = 60
) -> pd.DataFrame:
    """
    Run rolling factor regressions.

    Parameters
    ----------
    model : str or FactorModel, default 'ff3'
        Factor model to use
    window : int, default 60
        Rolling window size (number of periods)

    Returns
    -------
    pd.DataFrame
        DataFrame with rolling alpha and betas, indexed by date
    """
    factors = self._get_model_factors(model)
    y = self.excess_returns
    X = self.factor_data[factors]

    results = []
    dates = []

    for i in range(window, len(y) + 1):
        y_window = y.iloc[i - window : i].values
        X_window = X.iloc[i - window : i].values

        try:
            reg = self._ols_regression(y_window, X_window)
            result = {"alpha": reg["coeffs"][0] * self.annualization_factor}
            for j, factor in enumerate(factors):
                result[factor] = reg["coeffs"][j + 1]
            result["r_squared"] = reg["r_squared"]
            results.append(result)
            dates.append(y.index[i - 1])
        except Exception:
            continue

    return pd.DataFrame(results, index=pd.DatetimeIndex(dates))

compare_models()

Compare different factor models.

Returns:

Type Description
DataFrame

Comparison table with alpha, R-squared, and key betas for each model

Source code in portfolio_analysis/factors/models.py
def compare_models(self) -> pd.DataFrame:
    """
    Compare different factor models.

    Returns
    -------
    pd.DataFrame
        Comparison table with alpha, R-squared, and key betas for each model
    """
    models = ["capm", "ff3"]

    # Add ff5 if factors available
    if all(f in self.factor_data.columns for f in FactorModel.FF5.value):
        models.append("ff5")

    # Add carhart if momentum available
    if "MOM" in self.factor_data.columns:
        models.append("carhart")

    results = []
    for model in models:
        try:
            reg = self.run_regression(model)
            result = {
                "Model": reg.model,
                "Alpha (%)": reg.alpha * 100,
                "Alpha p-value": reg.alpha_pvalue,
                "R-squared": reg.r_squared,
                "Adj R-squared": reg.adj_r_squared,
                "Mkt Beta": reg.betas.get("Mkt-RF", np.nan),
            }
            # Add other betas if available
            for factor in ["SMB", "HML", "RMW", "CMA", "MOM"]:
                if factor in reg.betas:
                    result[factor] = reg.betas[factor]
            results.append(result)
        except Exception:
            continue

    return pd.DataFrame(results)

FactorExposures

Calculate characteristic-based factor exposures for a portfolio.

This class estimates factor tilts based on security characteristics (market cap, valuation ratios, momentum, etc.) rather than regression.

Parameters:

Name Type Description Default
tickers list of str

List of ticker symbols in the portfolio

required
weights list of float

Portfolio weights for each ticker (must sum to 1.0)

required

Examples:

>>> exposures = FactorExposures(['VTI', 'VBR', 'VTV'], [0.5, 0.25, 0.25])
>>> tilts = exposures.get_all_tilts()
>>> print(f"Size tilt: {tilts['size']:.2f}")
>>> print(f"Value tilt: {tilts['value']:.2f}")
Source code in portfolio_analysis/factors/exposures.py
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
class FactorExposures:
    """
    Calculate characteristic-based factor exposures for a portfolio.

    This class estimates factor tilts based on security characteristics
    (market cap, valuation ratios, momentum, etc.) rather than regression.

    Parameters
    ----------
    tickers : list of str
        List of ticker symbols in the portfolio
    weights : list of float
        Portfolio weights for each ticker (must sum to 1.0)

    Examples
    --------
    >>> exposures = FactorExposures(['VTI', 'VBR', 'VTV'], [0.5, 0.25, 0.25])
    >>> tilts = exposures.get_all_tilts()
    >>> print(f"Size tilt: {tilts['size']:.2f}")
    >>> print(f"Value tilt: {tilts['value']:.2f}")
    """

    # Market cap thresholds (in billions)
    LARGE_CAP_THRESHOLD = 10.0
    SMALL_CAP_THRESHOLD = 2.0

    def __init__(self, tickers: list[str], weights: list[float]):
        if len(tickers) != len(weights):
            raise ValueError("Number of tickers must match number of weights")

        if abs(sum(weights) - 1.0) > 0.01:
            raise ValueError("Weights must sum to 1.0")

        self.tickers = tickers
        self.weights = np.array(weights)
        self._characteristics: Optional[pd.DataFrame] = None

    def _fetch_characteristics(self) -> pd.DataFrame:
        """Fetch fundamental characteristics for all tickers."""
        if self._characteristics is not None:
            return self._characteristics

        data = []
        for ticker in self.tickers:
            try:
                info = yf.Ticker(ticker).info
                data.append(
                    {
                        "ticker": ticker,
                        "market_cap": info.get("marketCap", None),
                        "pe_ratio": info.get("trailingPE", info.get("forwardPE", None)),
                        "pb_ratio": info.get("priceToBook", None),
                        "dividend_yield": info.get("dividendYield", 0) or 0,
                        "beta": info.get("beta", None),
                        "profit_margin": info.get("profitMargins", None),
                        "roe": info.get("returnOnEquity", None),
                        "debt_to_equity": info.get("debtToEquity", None),
                        "revenue_growth": info.get("revenueGrowth", None),
                        "earnings_growth": info.get("earningsGrowth", None),
                    }
                )
            except Exception:
                # Use defaults for ETFs or failed lookups
                data.append(
                    {
                        "ticker": ticker,
                        "market_cap": None,
                        "pe_ratio": None,
                        "pb_ratio": None,
                        "dividend_yield": 0,
                        "beta": 1.0,
                        "profit_margin": None,
                        "roe": None,
                        "debt_to_equity": None,
                        "revenue_growth": None,
                        "earnings_growth": None,
                    }
                )

        self._characteristics = pd.DataFrame(data).set_index("ticker")
        return self._characteristics

    def _calculate_momentum(self, lookback_months: int = 12) -> pd.Series:
        """Calculate momentum based on historical returns."""
        from datetime import datetime, timedelta

        end_date = datetime.now()
        start_date = end_date - timedelta(days=lookback_months * 30 + 30)

        try:
            prices = yf.download(
                self.tickers, start=start_date, end=end_date, progress=False
            )

            # Handle column format
            if isinstance(prices.columns, pd.MultiIndex):
                if "Adj Close" in prices.columns.get_level_values(0):
                    prices = prices["Adj Close"]
                else:
                    prices = prices["Close"]

            # Calculate momentum (skip most recent month)
            if len(prices) > 21:
                momentum = (prices.iloc[-22] / prices.iloc[0]) - 1
            else:
                momentum = pd.Series(0, index=self.tickers)

            return momentum

        except Exception:
            return pd.Series(0, index=self.tickers)

    def calculate_size_tilt(self) -> float:
        """
        Calculate portfolio size tilt (SMB exposure).

        Returns
        -------
        float
            Size tilt from -1 (large cap) to +1 (small cap)
            0 indicates market-neutral size exposure
        """
        chars = self._fetch_characteristics()

        # Convert market cap to billions
        market_caps = chars["market_cap"].fillna(chars["market_cap"].median())
        market_caps_b = market_caps / 1e9

        # Score each holding: -1 for large, 0 for mid, +1 for small
        scores = []
        for mc in market_caps_b:
            if mc is None or pd.isna(mc):
                scores.append(0)
            elif mc > self.LARGE_CAP_THRESHOLD:
                scores.append(-1)
            elif mc < self.SMALL_CAP_THRESHOLD:
                scores.append(1)
            else:
                # Linear interpolation for mid-cap
                scores.append(
                    (self.LARGE_CAP_THRESHOLD - mc)
                    / (self.LARGE_CAP_THRESHOLD - self.SMALL_CAP_THRESHOLD)
                    * 2
                    - 1
                )

        return float(np.dot(scores, self.weights))

    def calculate_value_tilt(self) -> float:
        """
        Calculate portfolio value tilt (HML exposure).

        Returns
        -------
        float
            Value tilt from -1 (growth) to +1 (value)
            0 indicates market-neutral value exposure
        """
        chars = self._fetch_characteristics()

        # Use P/B ratio primarily, P/E as backup
        pb_ratios = chars["pb_ratio"].fillna(chars["pb_ratio"].median())
        pe_ratios = chars["pe_ratio"].fillna(chars["pe_ratio"].median())

        # Score based on valuation: low P/B = value (+1), high P/B = growth (-1)
        # Typical P/B ranges: <1 deep value, 1-3 neutral, >3 growth
        scores = []
        for i, (pb, pe) in enumerate(zip(pb_ratios, pe_ratios)):
            # Use P/B if available, else P/E
            if pd.notna(pb) and pb > 0:
                if pb < 1.5:
                    score = 1.0
                elif pb > 4.0:
                    score = -1.0
                else:
                    # Linear interpolation
                    score = 1.0 - (pb - 1.5) / 2.5 * 2
            elif pd.notna(pe) and pe > 0:
                # P/E based scoring: <15 value, >25 growth
                if pe < 15:
                    score = 1.0
                elif pe > 25:
                    score = -1.0
                else:
                    score = 1.0 - (pe - 15) / 10 * 2
            else:
                score = 0.0
            scores.append(score)

        return float(np.dot(scores, self.weights))

    def calculate_momentum_tilt(self) -> float:
        """
        Calculate portfolio momentum tilt (MOM exposure).

        Returns
        -------
        float
            Momentum tilt from -1 (low momentum) to +1 (high momentum)
        """
        momentum = self._calculate_momentum(lookback_months=12)

        # Normalize momentum scores
        # Market average is roughly 10% annual, winners >20%, losers <0%
        scores = []
        for ticker in self.tickers:
            mom = momentum.get(ticker, 0)
            if mom > 0.2:
                score = 1.0
            elif mom < 0.0:
                score = -1.0
            else:
                # Linear interpolation
                score = mom / 0.2 * 2 - 1
            scores.append(score)

        return float(np.dot(scores, self.weights))

    def calculate_quality_tilt(self) -> float:
        """
        Calculate portfolio quality tilt (RMW-like exposure).

        Quality is based on profitability and financial health.

        Returns
        -------
        float
            Quality tilt from -1 (low quality) to +1 (high quality)
        """
        chars = self._fetch_characteristics()

        scores = []
        for i, ticker in enumerate(self.tickers):
            row = chars.loc[ticker]

            # Score components
            profit_score = 0
            roe_score = 0
            debt_score = 0

            # Profit margin (>15% good, <5% poor)
            pm = row.get("profit_margin")
            if pd.notna(pm):
                if pm > 0.15:
                    profit_score = 1
                elif pm < 0.05:
                    profit_score = -1
                else:
                    profit_score = (pm - 0.05) / 0.10 * 2 - 1

            # ROE (>15% good, <8% poor)
            roe = row.get("roe")
            if pd.notna(roe):
                if roe > 0.15:
                    roe_score = 1
                elif roe < 0.08:
                    roe_score = -1
                else:
                    roe_score = (roe - 0.08) / 0.07 * 2 - 1

            # Debt/Equity (low is better: <50% good, >150% poor)
            de = row.get("debt_to_equity")
            if pd.notna(de):
                de_ratio = de / 100  # Often reported as percentage
                if de_ratio < 0.5:
                    debt_score = 1
                elif de_ratio > 1.5:
                    debt_score = -1
                else:
                    debt_score = 1 - (de_ratio - 0.5) / 1.0 * 2

            # Average available scores
            available = [s for s in [profit_score, roe_score, debt_score] if s != 0]
            scores.append(np.mean(available) if available else 0)

        return float(np.dot(scores, self.weights))

    def calculate_investment_tilt(self) -> float:
        """
        Calculate portfolio investment tilt (CMA-like exposure).

        Conservative investment (low asset growth) vs aggressive.

        Returns
        -------
        float
            Investment tilt from -1 (aggressive) to +1 (conservative)
        """
        chars = self._fetch_characteristics()

        scores = []
        for ticker in self.tickers:
            row = chars.loc[ticker]

            # Use revenue growth as proxy for investment aggressiveness
            rev_growth = row.get("revenue_growth")
            if pd.notna(rev_growth):
                # High growth = aggressive (-1), low growth = conservative (+1)
                if rev_growth > 0.20:
                    score = -1.0
                elif rev_growth < 0.05:
                    score = 1.0
                else:
                    score = 1.0 - (rev_growth - 0.05) / 0.15 * 2
            else:
                score = 0.0
            scores.append(score)

        return float(np.dot(scores, self.weights))

    def get_all_tilts(self) -> dict[str, float]:
        """
        Calculate all factor tilts for the portfolio.

        Returns
        -------
        dict
            Dictionary with all factor tilts:
            - size: SMB-like exposure
            - value: HML-like exposure
            - momentum: MOM-like exposure
            - quality: RMW-like exposure
            - investment: CMA-like exposure
        """
        return {
            "size": self.calculate_size_tilt(),
            "value": self.calculate_value_tilt(),
            "momentum": self.calculate_momentum_tilt(),
            "quality": self.calculate_quality_tilt(),
            "investment": self.calculate_investment_tilt(),
        }

    def get_characteristics_table(self) -> pd.DataFrame:
        """
        Get a table of fundamental characteristics for all holdings.

        Returns
        -------
        pd.DataFrame
            Characteristics for each holding with portfolio weight
        """
        chars = self._fetch_characteristics().copy()
        chars["weight"] = self.weights

        # Reorder columns
        cols = [
            "weight",
            "market_cap",
            "pe_ratio",
            "pb_ratio",
            "dividend_yield",
            "beta",
            "profit_margin",
            "roe",
            "debt_to_equity",
            "revenue_growth",
            "earnings_growth",
        ]
        available_cols = [c for c in cols if c in chars.columns]

        return chars[available_cols]

    def summary(self) -> str:
        """Generate a text summary of factor exposures."""
        tilts = self.get_all_tilts()

        lines = [
            f"\n{'=' * 50}",
            "Portfolio Factor Exposures (Characteristic-Based)",
            f"{'=' * 50}",
            f"{'Factor':<15} {'Tilt':>10} {'Interpretation':<25}",
            f"{'-' * 50}",
        ]

        interpretations = {
            "size": lambda x: (
                "Small Cap" if x > 0.3 else ("Large Cap" if x < -0.3 else "Neutral")
            ),
            "value": lambda x: (
                "Value" if x > 0.3 else ("Growth" if x < -0.3 else "Blend")
            ),
            "momentum": lambda x: (
                "High Mom" if x > 0.3 else ("Low Mom" if x < -0.3 else "Neutral")
            ),
            "quality": lambda x: (
                "High Quality"
                if x > 0.3
                else ("Low Quality" if x < -0.3 else "Neutral")
            ),
            "investment": lambda x: (
                "Conservative" if x > 0.3 else ("Aggressive" if x < -0.3 else "Neutral")
            ),
        }

        for factor, tilt in tilts.items():
            interp = interpretations[factor](tilt)
            lines.append(f"{factor.capitalize():<15} {tilt:>10.2f} {interp:<25}")

        lines.append("=" * 50)
        return "\n".join(lines)

calculate_size_tilt()

Calculate portfolio size tilt (SMB exposure).

Returns:

Type Description
float

Size tilt from -1 (large cap) to +1 (small cap) 0 indicates market-neutral size exposure

Source code in portfolio_analysis/factors/exposures.py
def calculate_size_tilt(self) -> float:
    """
    Calculate portfolio size tilt (SMB exposure).

    Returns
    -------
    float
        Size tilt from -1 (large cap) to +1 (small cap)
        0 indicates market-neutral size exposure
    """
    chars = self._fetch_characteristics()

    # Convert market cap to billions
    market_caps = chars["market_cap"].fillna(chars["market_cap"].median())
    market_caps_b = market_caps / 1e9

    # Score each holding: -1 for large, 0 for mid, +1 for small
    scores = []
    for mc in market_caps_b:
        if mc is None or pd.isna(mc):
            scores.append(0)
        elif mc > self.LARGE_CAP_THRESHOLD:
            scores.append(-1)
        elif mc < self.SMALL_CAP_THRESHOLD:
            scores.append(1)
        else:
            # Linear interpolation for mid-cap
            scores.append(
                (self.LARGE_CAP_THRESHOLD - mc)
                / (self.LARGE_CAP_THRESHOLD - self.SMALL_CAP_THRESHOLD)
                * 2
                - 1
            )

    return float(np.dot(scores, self.weights))

calculate_value_tilt()

Calculate portfolio value tilt (HML exposure).

Returns:

Type Description
float

Value tilt from -1 (growth) to +1 (value) 0 indicates market-neutral value exposure

Source code in portfolio_analysis/factors/exposures.py
def calculate_value_tilt(self) -> float:
    """
    Calculate portfolio value tilt (HML exposure).

    Returns
    -------
    float
        Value tilt from -1 (growth) to +1 (value)
        0 indicates market-neutral value exposure
    """
    chars = self._fetch_characteristics()

    # Use P/B ratio primarily, P/E as backup
    pb_ratios = chars["pb_ratio"].fillna(chars["pb_ratio"].median())
    pe_ratios = chars["pe_ratio"].fillna(chars["pe_ratio"].median())

    # Score based on valuation: low P/B = value (+1), high P/B = growth (-1)
    # Typical P/B ranges: <1 deep value, 1-3 neutral, >3 growth
    scores = []
    for i, (pb, pe) in enumerate(zip(pb_ratios, pe_ratios)):
        # Use P/B if available, else P/E
        if pd.notna(pb) and pb > 0:
            if pb < 1.5:
                score = 1.0
            elif pb > 4.0:
                score = -1.0
            else:
                # Linear interpolation
                score = 1.0 - (pb - 1.5) / 2.5 * 2
        elif pd.notna(pe) and pe > 0:
            # P/E based scoring: <15 value, >25 growth
            if pe < 15:
                score = 1.0
            elif pe > 25:
                score = -1.0
            else:
                score = 1.0 - (pe - 15) / 10 * 2
        else:
            score = 0.0
        scores.append(score)

    return float(np.dot(scores, self.weights))

calculate_momentum_tilt()

Calculate portfolio momentum tilt (MOM exposure).

Returns:

Type Description
float

Momentum tilt from -1 (low momentum) to +1 (high momentum)

Source code in portfolio_analysis/factors/exposures.py
def calculate_momentum_tilt(self) -> float:
    """
    Calculate portfolio momentum tilt (MOM exposure).

    Returns
    -------
    float
        Momentum tilt from -1 (low momentum) to +1 (high momentum)
    """
    momentum = self._calculate_momentum(lookback_months=12)

    # Normalize momentum scores
    # Market average is roughly 10% annual, winners >20%, losers <0%
    scores = []
    for ticker in self.tickers:
        mom = momentum.get(ticker, 0)
        if mom > 0.2:
            score = 1.0
        elif mom < 0.0:
            score = -1.0
        else:
            # Linear interpolation
            score = mom / 0.2 * 2 - 1
        scores.append(score)

    return float(np.dot(scores, self.weights))

calculate_quality_tilt()

Calculate portfolio quality tilt (RMW-like exposure).

Quality is based on profitability and financial health.

Returns:

Type Description
float

Quality tilt from -1 (low quality) to +1 (high quality)

Source code in portfolio_analysis/factors/exposures.py
def calculate_quality_tilt(self) -> float:
    """
    Calculate portfolio quality tilt (RMW-like exposure).

    Quality is based on profitability and financial health.

    Returns
    -------
    float
        Quality tilt from -1 (low quality) to +1 (high quality)
    """
    chars = self._fetch_characteristics()

    scores = []
    for i, ticker in enumerate(self.tickers):
        row = chars.loc[ticker]

        # Score components
        profit_score = 0
        roe_score = 0
        debt_score = 0

        # Profit margin (>15% good, <5% poor)
        pm = row.get("profit_margin")
        if pd.notna(pm):
            if pm > 0.15:
                profit_score = 1
            elif pm < 0.05:
                profit_score = -1
            else:
                profit_score = (pm - 0.05) / 0.10 * 2 - 1

        # ROE (>15% good, <8% poor)
        roe = row.get("roe")
        if pd.notna(roe):
            if roe > 0.15:
                roe_score = 1
            elif roe < 0.08:
                roe_score = -1
            else:
                roe_score = (roe - 0.08) / 0.07 * 2 - 1

        # Debt/Equity (low is better: <50% good, >150% poor)
        de = row.get("debt_to_equity")
        if pd.notna(de):
            de_ratio = de / 100  # Often reported as percentage
            if de_ratio < 0.5:
                debt_score = 1
            elif de_ratio > 1.5:
                debt_score = -1
            else:
                debt_score = 1 - (de_ratio - 0.5) / 1.0 * 2

        # Average available scores
        available = [s for s in [profit_score, roe_score, debt_score] if s != 0]
        scores.append(np.mean(available) if available else 0)

    return float(np.dot(scores, self.weights))

calculate_investment_tilt()

Calculate portfolio investment tilt (CMA-like exposure).

Conservative investment (low asset growth) vs aggressive.

Returns:

Type Description
float

Investment tilt from -1 (aggressive) to +1 (conservative)

Source code in portfolio_analysis/factors/exposures.py
def calculate_investment_tilt(self) -> float:
    """
    Calculate portfolio investment tilt (CMA-like exposure).

    Conservative investment (low asset growth) vs aggressive.

    Returns
    -------
    float
        Investment tilt from -1 (aggressive) to +1 (conservative)
    """
    chars = self._fetch_characteristics()

    scores = []
    for ticker in self.tickers:
        row = chars.loc[ticker]

        # Use revenue growth as proxy for investment aggressiveness
        rev_growth = row.get("revenue_growth")
        if pd.notna(rev_growth):
            # High growth = aggressive (-1), low growth = conservative (+1)
            if rev_growth > 0.20:
                score = -1.0
            elif rev_growth < 0.05:
                score = 1.0
            else:
                score = 1.0 - (rev_growth - 0.05) / 0.15 * 2
        else:
            score = 0.0
        scores.append(score)

    return float(np.dot(scores, self.weights))

get_all_tilts()

Calculate all factor tilts for the portfolio.

Returns:

Type Description
dict

Dictionary with all factor tilts: - size: SMB-like exposure - value: HML-like exposure - momentum: MOM-like exposure - quality: RMW-like exposure - investment: CMA-like exposure

Source code in portfolio_analysis/factors/exposures.py
def get_all_tilts(self) -> dict[str, float]:
    """
    Calculate all factor tilts for the portfolio.

    Returns
    -------
    dict
        Dictionary with all factor tilts:
        - size: SMB-like exposure
        - value: HML-like exposure
        - momentum: MOM-like exposure
        - quality: RMW-like exposure
        - investment: CMA-like exposure
    """
    return {
        "size": self.calculate_size_tilt(),
        "value": self.calculate_value_tilt(),
        "momentum": self.calculate_momentum_tilt(),
        "quality": self.calculate_quality_tilt(),
        "investment": self.calculate_investment_tilt(),
    }

get_characteristics_table()

Get a table of fundamental characteristics for all holdings.

Returns:

Type Description
DataFrame

Characteristics for each holding with portfolio weight

Source code in portfolio_analysis/factors/exposures.py
def get_characteristics_table(self) -> pd.DataFrame:
    """
    Get a table of fundamental characteristics for all holdings.

    Returns
    -------
    pd.DataFrame
        Characteristics for each holding with portfolio weight
    """
    chars = self._fetch_characteristics().copy()
    chars["weight"] = self.weights

    # Reorder columns
    cols = [
        "weight",
        "market_cap",
        "pe_ratio",
        "pb_ratio",
        "dividend_yield",
        "beta",
        "profit_margin",
        "roe",
        "debt_to_equity",
        "revenue_growth",
        "earnings_growth",
    ]
    available_cols = [c for c in cols if c in chars.columns]

    return chars[available_cols]

summary()

Generate a text summary of factor exposures.

Source code in portfolio_analysis/factors/exposures.py
def summary(self) -> str:
    """Generate a text summary of factor exposures."""
    tilts = self.get_all_tilts()

    lines = [
        f"\n{'=' * 50}",
        "Portfolio Factor Exposures (Characteristic-Based)",
        f"{'=' * 50}",
        f"{'Factor':<15} {'Tilt':>10} {'Interpretation':<25}",
        f"{'-' * 50}",
    ]

    interpretations = {
        "size": lambda x: (
            "Small Cap" if x > 0.3 else ("Large Cap" if x < -0.3 else "Neutral")
        ),
        "value": lambda x: (
            "Value" if x > 0.3 else ("Growth" if x < -0.3 else "Blend")
        ),
        "momentum": lambda x: (
            "High Mom" if x > 0.3 else ("Low Mom" if x < -0.3 else "Neutral")
        ),
        "quality": lambda x: (
            "High Quality"
            if x > 0.3
            else ("Low Quality" if x < -0.3 else "Neutral")
        ),
        "investment": lambda x: (
            "Conservative" if x > 0.3 else ("Aggressive" if x < -0.3 else "Neutral")
        ),
    }

    for factor, tilt in tilts.items():
        interp = interpretations[factor](tilt)
        lines.append(f"{factor.capitalize():<15} {tilt:>10.2f} {interp:<25}")

    lines.append("=" * 50)
    return "\n".join(lines)

FactorAttribution

Decompose portfolio returns and risk into factor contributions.

This class uses factor regression to attribute portfolio performance to systematic factors and idiosyncratic (alpha) components.

Parameters:

Name Type Description Default
returns Series

Portfolio returns with datetime index

required
factor_data DataFrame

Factor data from FactorDataLoader

required
annualization_factor int

Number of periods per year (252 for daily, 12 for monthly)

252

Examples:

>>> from portfolio_analysis.factors import FactorAttribution, FactorDataLoader
>>> factor_loader = FactorDataLoader()
>>> ff3 = factor_loader.get_ff3_factors('2015-01-01', '2023-12-31')
>>> attribution = FactorAttribution(portfolio_returns, ff3)
>>> decomp = attribution.decompose_returns()
>>> print(f"Market contribution: {decomp['Mkt-RF']:.2%}")
>>> print(f"Alpha: {decomp['alpha']:.2%}")
Source code in portfolio_analysis/factors/attribution.py
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
class FactorAttribution:
    """
    Decompose portfolio returns and risk into factor contributions.

    This class uses factor regression to attribute portfolio performance
    to systematic factors and idiosyncratic (alpha) components.

    Parameters
    ----------
    returns : pd.Series
        Portfolio returns with datetime index
    factor_data : pd.DataFrame
        Factor data from FactorDataLoader
    annualization_factor : int, default 252
        Number of periods per year (252 for daily, 12 for monthly)

    Examples
    --------
    >>> from portfolio_analysis.factors import FactorAttribution, FactorDataLoader
    >>> factor_loader = FactorDataLoader()
    >>> ff3 = factor_loader.get_ff3_factors('2015-01-01', '2023-12-31')
    >>> attribution = FactorAttribution(portfolio_returns, ff3)
    >>> decomp = attribution.decompose_returns()
    >>> print(f"Market contribution: {decomp['Mkt-RF']:.2%}")
    >>> print(f"Alpha: {decomp['alpha']:.2%}")
    """

    TRADING_DAYS = 252

    def __init__(
        self,
        returns: pd.Series,
        factor_data: pd.DataFrame,
        annualization_factor: int | None = None,
    ):
        self.raw_returns = returns
        self.raw_factor_data = factor_data

        # Align data
        self.excess_returns, self.factor_data = align_returns_with_factors(
            returns, factor_data, compute_excess=True
        )

        # Auto-detect frequency
        if annualization_factor is None:
            if len(self.excess_returns) > 1:
                avg_days = (
                    self.excess_returns.index[-1] - self.excess_returns.index[0]
                ).days / len(self.excess_returns)
                annualization_factor = 12 if avg_days > 20 else self.TRADING_DAYS
            else:
                annualization_factor = self.TRADING_DAYS

        self.annualization_factor = annualization_factor

        # Create regression object for analysis
        self._regression = FactorRegression(returns, factor_data, annualization_factor)

    def decompose_returns(self, model: str | FactorModel = "ff3") -> dict[str, float]:
        """
        Decompose total returns into factor contributions.

        Parameters
        ----------
        model : str or FactorModel, default 'ff3'
            Factor model to use for decomposition

        Returns
        -------
        dict
            Dictionary with:
            - 'total': Total annualized return
            - 'risk_free': Risk-free contribution
            - One key per factor with its return contribution
            - 'alpha': Idiosyncratic return (Jensen's alpha)
        """
        # Run regression to get betas
        reg_results = self._regression.run_regression(model)
        factors = reg_results.factors

        # Calculate average factor returns (annualized)
        avg_factor_returns = (
            self.factor_data[factors].mean() * self.annualization_factor
        )

        # Risk-free rate contribution
        rf_return = self.factor_data["RF"].mean() * self.annualization_factor

        # Total return
        total_return = (
            self.raw_returns.loc[self.excess_returns.index].mean()
            * self.annualization_factor
        )

        # Factor contributions = beta * average factor return
        contributions = {}
        contributions["total"] = total_return
        contributions["risk_free"] = rf_return

        for factor in factors:
            contributions[factor] = (
                reg_results.betas[factor] * avg_factor_returns[factor]
            )

        # Alpha is the residual
        contributions["alpha"] = reg_results.alpha

        return contributions

    def decompose_risk(self, model: str | FactorModel = "ff3") -> dict[str, float]:
        """
        Decompose portfolio variance into factor contributions.

        Parameters
        ----------
        model : str or FactorModel, default 'ff3'
            Factor model to use for decomposition

        Returns
        -------
        dict
            Dictionary with:
            - 'total': Total annualized variance
            - One key per factor with its variance contribution
            - 'idiosyncratic': Residual (unexplained) variance
            - 'r_squared': Fraction explained by factors
        """
        # Run regression
        reg_results = self._regression.run_regression(model)
        factors = reg_results.factors

        # Factor covariance matrix (annualized)
        factor_cov = self.factor_data[factors].cov() * self.annualization_factor

        # Total variance (annualized)
        total_variance = self.excess_returns.var() * self.annualization_factor

        # Systematic variance = beta' * Cov(factors) * beta
        betas = np.array([reg_results.betas[f] for f in factors])
        systematic_variance = betas @ factor_cov.values @ betas

        # Individual factor contributions (marginal)
        contributions = {"total": total_variance}

        for i, factor in enumerate(factors):
            # Factor contribution = beta_i^2 * var(factor_i)
            factor_var = self.factor_data[factor].var() * self.annualization_factor
            contributions[factor] = reg_results.betas[factor] ** 2 * factor_var

        # Idiosyncratic variance
        contributions["idiosyncratic"] = total_variance - systematic_variance
        contributions["r_squared"] = reg_results.r_squared

        return contributions

    def get_rolling_attribution(
        self, model: str | FactorModel = "ff3", window: int = 60
    ) -> pd.DataFrame:
        """
        Calculate rolling return attribution over time.

        Parameters
        ----------
        model : str or FactorModel, default 'ff3'
            Factor model to use
        window : int, default 60
            Rolling window size (number of periods)

        Returns
        -------
        pd.DataFrame
            DataFrame with rolling factor contributions, indexed by date
        """
        # Get factors for the model
        if isinstance(model, str):
            model_enum = {
                "capm": FactorModel.CAPM,
                "ff3": FactorModel.FF3,
                "ff5": FactorModel.FF5,
                "carhart": FactorModel.CARHART,
            }.get(model.lower())
            factors = model_enum.value if model_enum else FactorModel.FF3.value
        else:
            factors = model.value

        # Get rolling betas
        rolling_betas = self._regression.run_rolling_regression(model, window)

        # Calculate rolling factor returns
        results = []
        for date in rolling_betas.index:
            # Get window data
            loc = self.factor_data.index.get_loc(date)
            start_loc = max(0, loc - window + 1)
            factor_window = self.factor_data.iloc[start_loc : loc + 1]

            # Average factor returns (annualized)
            avg_returns = factor_window[factors].mean() * self.annualization_factor

            # Contributions
            row = {"date": date}
            for factor in factors:
                beta = rolling_betas.loc[date, factor]
                row[f"{factor}_contrib"] = beta * avg_returns[factor]
            row["alpha"] = rolling_betas.loc[date, "alpha"]

            results.append(row)

        df = pd.DataFrame(results)
        if "date" in df.columns:
            df = df.set_index("date")

        return df

    def get_attribution_summary(self, model: str | FactorModel = "ff3") -> pd.DataFrame:
        """
        Get a summary table of return and risk attribution.

        Parameters
        ----------
        model : str or FactorModel, default 'ff3'
            Factor model to use

        Returns
        -------
        pd.DataFrame
            Summary table with return and risk contributions
        """
        return_decomp = self.decompose_returns(model)
        risk_decomp = self.decompose_risk(model)

        # Build summary
        rows = []

        # Total
        rows.append(
            {
                "Component": "Total",
                "Return (%)": return_decomp["total"] * 100,
                "Variance": risk_decomp["total"],
                "Std Dev (%)": np.sqrt(risk_decomp["total"]) * 100,
            }
        )

        # Risk-free
        rows.append(
            {
                "Component": "Risk-Free",
                "Return (%)": return_decomp["risk_free"] * 100,
                "Variance": 0,
                "Std Dev (%)": 0,
            }
        )

        # Factors
        reg_results = self._regression.run_regression(model)
        for factor in reg_results.factors:
            rows.append(
                {
                    "Component": factor,
                    "Return (%)": return_decomp[factor] * 100,
                    "Variance": risk_decomp.get(factor, 0),
                    "Std Dev (%)": np.sqrt(risk_decomp.get(factor, 0)) * 100,
                }
            )

        # Alpha / Idiosyncratic
        rows.append(
            {
                "Component": "Alpha (Idiosyncratic)",
                "Return (%)": return_decomp["alpha"] * 100,
                "Variance": risk_decomp["idiosyncratic"],
                "Std Dev (%)": np.sqrt(max(0, risk_decomp["idiosyncratic"])) * 100,
            }
        )

        return pd.DataFrame(rows)

    @classmethod
    def from_composite(
        cls,
        composite_results: CompositeRegressionResults,
        returns_dict: dict[str, pd.Series],
        factor_data_dict: dict[str, pd.DataFrame],
    ) -> dict[str, float]:
        """
        Decompose composite portfolio returns into factor contributions.

        Computes a weighted-average return attribution across all constituents,
        each using its own regional factor data.

        Parameters
        ----------
        composite_results : CompositeRegressionResults
            Results from ``CompositeFactorRegression.run_composite_regression()``.
        returns_dict : dict[str, pd.Series]
            Per-ticker return series ({ticker: returns}).
        factor_data_dict : dict[str, pd.DataFrame]
            Per-region factor data ({region: factor_df}).

        Returns
        -------
        dict[str, float]
            Decomposition with keys: total, risk_free, each factor name, alpha.
        """
        total_weight = composite_results.coverage
        if total_weight == 0:
            raise ValueError("Composite results have zero coverage.")

        scale = 1.0 / total_weight
        contributions: dict[str, float] = {}

        for ticker, result in composite_results.constituent_results.items():
            weight = composite_results.portfolio_weights.get(ticker, 0)
            region = composite_results.region_map.get(ticker, "us")
            factor_data = factor_data_dict.get(region)

            if factor_data is None or ticker not in returns_dict:
                continue

            ret = returns_dict[ticker]

            # Detect annualization factor
            if len(ret) > 1:
                avg_days = (ret.index[-1] - ret.index[0]).days / len(ret)
                ann_factor = 12 if avg_days > 20 else 252
            else:
                ann_factor = 252

            # Average factor returns (annualized)
            common = ret.index.intersection(factor_data.index)
            if len(common) == 0:
                continue
            aligned_factors = factor_data.loc[common]

            for factor in result.factors:
                if factor in aligned_factors.columns:
                    avg_ret = aligned_factors[factor].mean() * ann_factor
                    contrib = result.betas[factor] * avg_ret * weight
                    contributions[factor] = contributions.get(factor, 0) + contrib

            # Risk-free
            if "RF" in aligned_factors.columns:
                rf = aligned_factors["RF"].mean() * ann_factor * weight
                contributions["risk_free"] = contributions.get("risk_free", 0) + rf

            # Total return
            total_ret = ret.loc[common].mean() * ann_factor * weight
            contributions["total"] = contributions.get("total", 0) + total_ret

        # Scale all by coverage
        contributions = {k: v * scale for k, v in contributions.items()}

        # Alpha from composite results
        contributions["alpha"] = composite_results.weighted_alpha

        return contributions

    def summary(self, model: str | FactorModel = "ff3") -> str:
        """Generate a text summary of factor attribution."""
        return_decomp = self.decompose_returns(model)
        risk_decomp = self.decompose_risk(model)

        model_name = model.name if isinstance(model, FactorModel) else model.upper()

        lines = [
            f"\n{'=' * 60}",
            f"Factor Attribution Summary: {model_name}",
            f"{'=' * 60}",
            "",
            "RETURN ATTRIBUTION",
            f"{'-' * 40}",
            f"{'Component':<20} {'Return':>12}",
            f"{'-' * 40}",
            f"{'Total':<20} {return_decomp['total']*100:>11.2f}%",
            f"{'Risk-Free':<20} {return_decomp['risk_free']*100:>11.2f}%",
        ]

        # Factor contributions
        reg_results = self._regression.run_regression(model)
        for factor in reg_results.factors:
            lines.append(f"{factor:<20} {return_decomp[factor]*100:>11.2f}%")
        lines.append(f"{'Alpha':<20} {return_decomp['alpha']*100:>11.2f}%")

        lines.extend(
            [
                "",
                "RISK ATTRIBUTION",
                f"{'-' * 40}",
                f"{'Component':<20} {'Variance':>12} {'% of Total':>12}",
                f"{'-' * 40}",
            ]
        )

        total_var = risk_decomp["total"]
        for factor in reg_results.factors:
            pct = risk_decomp[factor] / total_var * 100 if total_var > 0 else 0
            lines.append(f"{factor:<20} {risk_decomp[factor]:>12.6f} {pct:>11.1f}%")

        idio_pct = (
            risk_decomp["idiosyncratic"] / total_var * 100 if total_var > 0 else 0
        )
        lines.append(
            f"{'Idiosyncratic':<20} {risk_decomp['idiosyncratic']:>12.6f} {idio_pct:>11.1f}%"
        )
        lines.append(f"\nR-squared: {risk_decomp['r_squared']:.4f}")
        lines.append("=" * 60)

        return "\n".join(lines)

decompose_returns(model='ff3')

Decompose total returns into factor contributions.

Parameters:

Name Type Description Default
model str or FactorModel

Factor model to use for decomposition

'ff3'

Returns:

Type Description
dict

Dictionary with: - 'total': Total annualized return - 'risk_free': Risk-free contribution - One key per factor with its return contribution - 'alpha': Idiosyncratic return (Jensen's alpha)

Source code in portfolio_analysis/factors/attribution.py
def decompose_returns(self, model: str | FactorModel = "ff3") -> dict[str, float]:
    """
    Decompose total returns into factor contributions.

    Parameters
    ----------
    model : str or FactorModel, default 'ff3'
        Factor model to use for decomposition

    Returns
    -------
    dict
        Dictionary with:
        - 'total': Total annualized return
        - 'risk_free': Risk-free contribution
        - One key per factor with its return contribution
        - 'alpha': Idiosyncratic return (Jensen's alpha)
    """
    # Run regression to get betas
    reg_results = self._regression.run_regression(model)
    factors = reg_results.factors

    # Calculate average factor returns (annualized)
    avg_factor_returns = (
        self.factor_data[factors].mean() * self.annualization_factor
    )

    # Risk-free rate contribution
    rf_return = self.factor_data["RF"].mean() * self.annualization_factor

    # Total return
    total_return = (
        self.raw_returns.loc[self.excess_returns.index].mean()
        * self.annualization_factor
    )

    # Factor contributions = beta * average factor return
    contributions = {}
    contributions["total"] = total_return
    contributions["risk_free"] = rf_return

    for factor in factors:
        contributions[factor] = (
            reg_results.betas[factor] * avg_factor_returns[factor]
        )

    # Alpha is the residual
    contributions["alpha"] = reg_results.alpha

    return contributions

decompose_risk(model='ff3')

Decompose portfolio variance into factor contributions.

Parameters:

Name Type Description Default
model str or FactorModel

Factor model to use for decomposition

'ff3'

Returns:

Type Description
dict

Dictionary with: - 'total': Total annualized variance - One key per factor with its variance contribution - 'idiosyncratic': Residual (unexplained) variance - 'r_squared': Fraction explained by factors

Source code in portfolio_analysis/factors/attribution.py
def decompose_risk(self, model: str | FactorModel = "ff3") -> dict[str, float]:
    """
    Decompose portfolio variance into factor contributions.

    Parameters
    ----------
    model : str or FactorModel, default 'ff3'
        Factor model to use for decomposition

    Returns
    -------
    dict
        Dictionary with:
        - 'total': Total annualized variance
        - One key per factor with its variance contribution
        - 'idiosyncratic': Residual (unexplained) variance
        - 'r_squared': Fraction explained by factors
    """
    # Run regression
    reg_results = self._regression.run_regression(model)
    factors = reg_results.factors

    # Factor covariance matrix (annualized)
    factor_cov = self.factor_data[factors].cov() * self.annualization_factor

    # Total variance (annualized)
    total_variance = self.excess_returns.var() * self.annualization_factor

    # Systematic variance = beta' * Cov(factors) * beta
    betas = np.array([reg_results.betas[f] for f in factors])
    systematic_variance = betas @ factor_cov.values @ betas

    # Individual factor contributions (marginal)
    contributions = {"total": total_variance}

    for i, factor in enumerate(factors):
        # Factor contribution = beta_i^2 * var(factor_i)
        factor_var = self.factor_data[factor].var() * self.annualization_factor
        contributions[factor] = reg_results.betas[factor] ** 2 * factor_var

    # Idiosyncratic variance
    contributions["idiosyncratic"] = total_variance - systematic_variance
    contributions["r_squared"] = reg_results.r_squared

    return contributions

get_rolling_attribution(model='ff3', window=60)

Calculate rolling return attribution over time.

Parameters:

Name Type Description Default
model str or FactorModel

Factor model to use

'ff3'
window int

Rolling window size (number of periods)

60

Returns:

Type Description
DataFrame

DataFrame with rolling factor contributions, indexed by date

Source code in portfolio_analysis/factors/attribution.py
def get_rolling_attribution(
    self, model: str | FactorModel = "ff3", window: int = 60
) -> pd.DataFrame:
    """
    Calculate rolling return attribution over time.

    Parameters
    ----------
    model : str or FactorModel, default 'ff3'
        Factor model to use
    window : int, default 60
        Rolling window size (number of periods)

    Returns
    -------
    pd.DataFrame
        DataFrame with rolling factor contributions, indexed by date
    """
    # Get factors for the model
    if isinstance(model, str):
        model_enum = {
            "capm": FactorModel.CAPM,
            "ff3": FactorModel.FF3,
            "ff5": FactorModel.FF5,
            "carhart": FactorModel.CARHART,
        }.get(model.lower())
        factors = model_enum.value if model_enum else FactorModel.FF3.value
    else:
        factors = model.value

    # Get rolling betas
    rolling_betas = self._regression.run_rolling_regression(model, window)

    # Calculate rolling factor returns
    results = []
    for date in rolling_betas.index:
        # Get window data
        loc = self.factor_data.index.get_loc(date)
        start_loc = max(0, loc - window + 1)
        factor_window = self.factor_data.iloc[start_loc : loc + 1]

        # Average factor returns (annualized)
        avg_returns = factor_window[factors].mean() * self.annualization_factor

        # Contributions
        row = {"date": date}
        for factor in factors:
            beta = rolling_betas.loc[date, factor]
            row[f"{factor}_contrib"] = beta * avg_returns[factor]
        row["alpha"] = rolling_betas.loc[date, "alpha"]

        results.append(row)

    df = pd.DataFrame(results)
    if "date" in df.columns:
        df = df.set_index("date")

    return df

get_attribution_summary(model='ff3')

Get a summary table of return and risk attribution.

Parameters:

Name Type Description Default
model str or FactorModel

Factor model to use

'ff3'

Returns:

Type Description
DataFrame

Summary table with return and risk contributions

Source code in portfolio_analysis/factors/attribution.py
def get_attribution_summary(self, model: str | FactorModel = "ff3") -> pd.DataFrame:
    """
    Get a summary table of return and risk attribution.

    Parameters
    ----------
    model : str or FactorModel, default 'ff3'
        Factor model to use

    Returns
    -------
    pd.DataFrame
        Summary table with return and risk contributions
    """
    return_decomp = self.decompose_returns(model)
    risk_decomp = self.decompose_risk(model)

    # Build summary
    rows = []

    # Total
    rows.append(
        {
            "Component": "Total",
            "Return (%)": return_decomp["total"] * 100,
            "Variance": risk_decomp["total"],
            "Std Dev (%)": np.sqrt(risk_decomp["total"]) * 100,
        }
    )

    # Risk-free
    rows.append(
        {
            "Component": "Risk-Free",
            "Return (%)": return_decomp["risk_free"] * 100,
            "Variance": 0,
            "Std Dev (%)": 0,
        }
    )

    # Factors
    reg_results = self._regression.run_regression(model)
    for factor in reg_results.factors:
        rows.append(
            {
                "Component": factor,
                "Return (%)": return_decomp[factor] * 100,
                "Variance": risk_decomp.get(factor, 0),
                "Std Dev (%)": np.sqrt(risk_decomp.get(factor, 0)) * 100,
            }
        )

    # Alpha / Idiosyncratic
    rows.append(
        {
            "Component": "Alpha (Idiosyncratic)",
            "Return (%)": return_decomp["alpha"] * 100,
            "Variance": risk_decomp["idiosyncratic"],
            "Std Dev (%)": np.sqrt(max(0, risk_decomp["idiosyncratic"])) * 100,
        }
    )

    return pd.DataFrame(rows)

from_composite(composite_results, returns_dict, factor_data_dict) classmethod

Decompose composite portfolio returns into factor contributions.

Computes a weighted-average return attribution across all constituents, each using its own regional factor data.

Parameters:

Name Type Description Default
composite_results CompositeRegressionResults

Results from CompositeFactorRegression.run_composite_regression().

required
returns_dict dict[str, Series]

Per-ticker return series ({ticker: returns}).

required
factor_data_dict dict[str, DataFrame]

Per-region factor data ({region: factor_df}).

required

Returns:

Type Description
dict[str, float]

Decomposition with keys: total, risk_free, each factor name, alpha.

Source code in portfolio_analysis/factors/attribution.py
@classmethod
def from_composite(
    cls,
    composite_results: CompositeRegressionResults,
    returns_dict: dict[str, pd.Series],
    factor_data_dict: dict[str, pd.DataFrame],
) -> dict[str, float]:
    """
    Decompose composite portfolio returns into factor contributions.

    Computes a weighted-average return attribution across all constituents,
    each using its own regional factor data.

    Parameters
    ----------
    composite_results : CompositeRegressionResults
        Results from ``CompositeFactorRegression.run_composite_regression()``.
    returns_dict : dict[str, pd.Series]
        Per-ticker return series ({ticker: returns}).
    factor_data_dict : dict[str, pd.DataFrame]
        Per-region factor data ({region: factor_df}).

    Returns
    -------
    dict[str, float]
        Decomposition with keys: total, risk_free, each factor name, alpha.
    """
    total_weight = composite_results.coverage
    if total_weight == 0:
        raise ValueError("Composite results have zero coverage.")

    scale = 1.0 / total_weight
    contributions: dict[str, float] = {}

    for ticker, result in composite_results.constituent_results.items():
        weight = composite_results.portfolio_weights.get(ticker, 0)
        region = composite_results.region_map.get(ticker, "us")
        factor_data = factor_data_dict.get(region)

        if factor_data is None or ticker not in returns_dict:
            continue

        ret = returns_dict[ticker]

        # Detect annualization factor
        if len(ret) > 1:
            avg_days = (ret.index[-1] - ret.index[0]).days / len(ret)
            ann_factor = 12 if avg_days > 20 else 252
        else:
            ann_factor = 252

        # Average factor returns (annualized)
        common = ret.index.intersection(factor_data.index)
        if len(common) == 0:
            continue
        aligned_factors = factor_data.loc[common]

        for factor in result.factors:
            if factor in aligned_factors.columns:
                avg_ret = aligned_factors[factor].mean() * ann_factor
                contrib = result.betas[factor] * avg_ret * weight
                contributions[factor] = contributions.get(factor, 0) + contrib

        # Risk-free
        if "RF" in aligned_factors.columns:
            rf = aligned_factors["RF"].mean() * ann_factor * weight
            contributions["risk_free"] = contributions.get("risk_free", 0) + rf

        # Total return
        total_ret = ret.loc[common].mean() * ann_factor * weight
        contributions["total"] = contributions.get("total", 0) + total_ret

    # Scale all by coverage
    contributions = {k: v * scale for k, v in contributions.items()}

    # Alpha from composite results
    contributions["alpha"] = composite_results.weighted_alpha

    return contributions

summary(model='ff3')

Generate a text summary of factor attribution.

Source code in portfolio_analysis/factors/attribution.py
def summary(self, model: str | FactorModel = "ff3") -> str:
    """Generate a text summary of factor attribution."""
    return_decomp = self.decompose_returns(model)
    risk_decomp = self.decompose_risk(model)

    model_name = model.name if isinstance(model, FactorModel) else model.upper()

    lines = [
        f"\n{'=' * 60}",
        f"Factor Attribution Summary: {model_name}",
        f"{'=' * 60}",
        "",
        "RETURN ATTRIBUTION",
        f"{'-' * 40}",
        f"{'Component':<20} {'Return':>12}",
        f"{'-' * 40}",
        f"{'Total':<20} {return_decomp['total']*100:>11.2f}%",
        f"{'Risk-Free':<20} {return_decomp['risk_free']*100:>11.2f}%",
    ]

    # Factor contributions
    reg_results = self._regression.run_regression(model)
    for factor in reg_results.factors:
        lines.append(f"{factor:<20} {return_decomp[factor]*100:>11.2f}%")
    lines.append(f"{'Alpha':<20} {return_decomp['alpha']*100:>11.2f}%")

    lines.extend(
        [
            "",
            "RISK ATTRIBUTION",
            f"{'-' * 40}",
            f"{'Component':<20} {'Variance':>12} {'% of Total':>12}",
            f"{'-' * 40}",
        ]
    )

    total_var = risk_decomp["total"]
    for factor in reg_results.factors:
        pct = risk_decomp[factor] / total_var * 100 if total_var > 0 else 0
        lines.append(f"{factor:<20} {risk_decomp[factor]:>12.6f} {pct:>11.1f}%")

    idio_pct = (
        risk_decomp["idiosyncratic"] / total_var * 100 if total_var > 0 else 0
    )
    lines.append(
        f"{'Idiosyncratic':<20} {risk_decomp['idiosyncratic']:>12.6f} {idio_pct:>11.1f}%"
    )
    lines.append(f"\nR-squared: {risk_decomp['r_squared']:.4f}")
    lines.append("=" * 60)

    return "\n".join(lines)

FactorOptimizer

Factor-aware portfolio optimization.

Optimize portfolios to achieve target factor exposures, minimize factor exposure, or generate factor-efficient frontiers.

Parameters:

Name Type Description Default
price_data DataFrame

Historical price data with datetime index and tickers as columns

required
factor_data DataFrame

Factor data from FactorDataLoader

required
risk_free_rate float

Annual risk-free rate for Sharpe calculations

0.02

Examples:

>>> from portfolio_analysis.factors import FactorOptimizer, FactorDataLoader
>>> factor_loader = FactorDataLoader()
>>> ff3 = factor_loader.get_ff3_factors('2015-01-01', '2023-12-31')
>>> optimizer = FactorOptimizer(price_data, ff3)
>>> result = optimizer.optimize_target_exposures(
...     target_betas={'Mkt-RF': 1.0, 'SMB': 0.3, 'HML': 0.2}
... )
>>> print(result['weights'])
Source code in portfolio_analysis/factors/optimization.py
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
class FactorOptimizer:
    """
    Factor-aware portfolio optimization.

    Optimize portfolios to achieve target factor exposures, minimize
    factor exposure, or generate factor-efficient frontiers.

    Parameters
    ----------
    price_data : pd.DataFrame
        Historical price data with datetime index and tickers as columns
    factor_data : pd.DataFrame
        Factor data from FactorDataLoader
    risk_free_rate : float, default 0.02
        Annual risk-free rate for Sharpe calculations

    Examples
    --------
    >>> from portfolio_analysis.factors import FactorOptimizer, FactorDataLoader
    >>> factor_loader = FactorDataLoader()
    >>> ff3 = factor_loader.get_ff3_factors('2015-01-01', '2023-12-31')
    >>> optimizer = FactorOptimizer(price_data, ff3)
    >>> result = optimizer.optimize_target_exposures(
    ...     target_betas={'Mkt-RF': 1.0, 'SMB': 0.3, 'HML': 0.2}
    ... )
    >>> print(result['weights'])
    """

    TRADING_DAYS = 252

    def __init__(
        self,
        price_data: pd.DataFrame,
        factor_data: pd.DataFrame,
        risk_free_rate: float = 0.02,
    ):
        self.price_data = price_data
        self.tickers = list(price_data.columns)
        self.n_assets = len(self.tickers)
        self.risk_free_rate = risk_free_rate

        # Calculate returns
        self.returns = price_data.pct_change().dropna()

        # Align with factor data
        common_dates = self.returns.index.intersection(factor_data.index)
        if len(common_dates) == 0:
            raise ValueError("No overlapping dates between price data and factor data")

        self.returns = self.returns.loc[common_dates]
        self.factor_data = factor_data.loc[common_dates]

        # Calculate excess returns for each asset
        self.excess_returns = self.returns.sub(self.factor_data["RF"], axis=0)

        # Pre-compute individual asset betas for all factors
        self._asset_betas = self._compute_asset_betas()

        # Annualized statistics
        self.mean_returns = self.returns.mean() * self.TRADING_DAYS
        self.cov_matrix = self.returns.cov() * self.TRADING_DAYS

    def _compute_asset_betas(self) -> pd.DataFrame:
        """Compute factor betas for each individual asset."""
        factors = [
            f
            for f in ["Mkt-RF", "SMB", "HML", "RMW", "CMA", "MOM"]
            if f in self.factor_data.columns
        ]

        betas = {}
        for ticker in self.tickers:
            y = self.excess_returns[ticker].values
            X = self.factor_data[factors].values

            # Add constant
            X_const = np.column_stack([np.ones(len(X)), X])

            # OLS
            coeffs = np.linalg.lstsq(X_const, y, rcond=None)[0]
            betas[ticker] = dict(zip(factors, coeffs[1:]))

        return pd.DataFrame(betas).T

    def _portfolio_betas(self, weights: np.ndarray) -> dict[str, float]:
        """Calculate portfolio-level factor betas given weights."""
        portfolio_betas = {}
        for factor in self._asset_betas.columns:
            portfolio_betas[factor] = float(np.dot(weights, self._asset_betas[factor]))
        return portfolio_betas

    def _portfolio_return(self, weights: np.ndarray) -> float:
        """Calculate expected portfolio return."""
        return float(np.dot(weights, self.mean_returns))

    def _portfolio_volatility(self, weights: np.ndarray) -> float:
        """Calculate portfolio volatility."""
        return float(np.sqrt(np.dot(weights.T, np.dot(self.cov_matrix, weights))))

    def _portfolio_sharpe(self, weights: np.ndarray) -> float:
        """Calculate portfolio Sharpe ratio."""
        ret = self._portfolio_return(weights)
        vol = self._portfolio_volatility(weights)
        return (ret - self.risk_free_rate) / vol if vol > 0 else 0

    def optimize_target_exposures(
        self,
        target_betas: dict[str, float],
        weight_bounds: tuple[float, float] = (0, 1),
        tolerance: float = 0.1,
    ) -> dict:
        """
        Optimize portfolio to achieve target factor exposures.

        Minimizes tracking error to target betas while maximizing Sharpe ratio.

        Parameters
        ----------
        target_betas : dict
            Target factor exposures (e.g., {'Mkt-RF': 1.0, 'SMB': 0.3})
        weight_bounds : tuple, default (0, 1)
            Min and max weight for each asset
        tolerance : float, default 0.1
            Allowed deviation from target betas

        Returns
        -------
        dict
            Optimal weights, achieved betas, return, volatility, and Sharpe ratio
        """
        # Validate factors exist
        for factor in target_betas:
            if factor not in self._asset_betas.columns:
                raise ValueError(
                    f"Factor '{factor}' not available. "
                    f"Available: {self._asset_betas.columns.tolist()}"
                )

        def objective(weights):
            # Maximize Sharpe (minimize negative Sharpe)
            return -self._portfolio_sharpe(weights)

        def beta_constraint(weights, factor, target):
            """Constraint: achieved beta should be close to target."""
            achieved = np.dot(weights, self._asset_betas[factor])
            return tolerance - abs(achieved - target)

        constraints = [{"type": "eq", "fun": lambda x: np.sum(x) - 1}]

        # Add beta constraints
        for factor, target in target_betas.items():
            constraints.append(
                {
                    "type": "ineq",
                    "fun": lambda x, f=factor, t=target: beta_constraint(x, f, t),
                }
            )

        bounds = tuple(weight_bounds for _ in range(self.n_assets))
        initial_weights = np.array([1 / self.n_assets] * self.n_assets)

        result = minimize(
            objective,
            initial_weights,
            method="SLSQP",
            bounds=bounds,
            constraints=constraints,
            options={"maxiter": 500},
        )

        optimal_weights = result.x

        return {
            "weights": dict(zip(self.tickers, optimal_weights)),
            "achieved_betas": self._portfolio_betas(optimal_weights),
            "target_betas": target_betas,
            "return": self._portfolio_return(optimal_weights),
            "volatility": self._portfolio_volatility(optimal_weights),
            "sharpe_ratio": self._portfolio_sharpe(optimal_weights),
            "success": result.success,
        }

    def optimize_factor_neutral(
        self,
        factors: list[str],
        weight_bounds: tuple[float, float] = (0, 1),
        tolerance: float = 0.05,
    ) -> dict:
        """
        Optimize portfolio to be neutral to specified factors.

        Parameters
        ----------
        factors : list of str
            Factors to neutralize (e.g., ['SMB', 'HML'])
        weight_bounds : tuple, default (0, 1)
            Min and max weight for each asset
        tolerance : float, default 0.05
            Maximum allowed absolute beta for neutral factors

        Returns
        -------
        dict
            Optimal weights with near-zero exposure to specified factors
        """
        # Validate factors
        for factor in factors:
            if factor not in self._asset_betas.columns:
                raise ValueError(f"Factor '{factor}' not available")

        def objective(weights):
            return -self._portfolio_sharpe(weights)

        def neutrality_constraint(weights, factor):
            """Beta should be close to zero."""
            beta = np.dot(weights, self._asset_betas[factor])
            return tolerance - abs(beta)

        constraints = [{"type": "eq", "fun": lambda x: np.sum(x) - 1}]

        for factor in factors:
            constraints.append(
                {"type": "ineq", "fun": lambda x, f=factor: neutrality_constraint(x, f)}
            )

        bounds = tuple(weight_bounds for _ in range(self.n_assets))
        initial_weights = np.array([1 / self.n_assets] * self.n_assets)

        result = minimize(
            objective,
            initial_weights,
            method="SLSQP",
            bounds=bounds,
            constraints=constraints,
            options={"maxiter": 500},
        )

        optimal_weights = result.x

        return {
            "weights": dict(zip(self.tickers, optimal_weights)),
            "achieved_betas": self._portfolio_betas(optimal_weights),
            "neutralized_factors": factors,
            "return": self._portfolio_return(optimal_weights),
            "volatility": self._portfolio_volatility(optimal_weights),
            "sharpe_ratio": self._portfolio_sharpe(optimal_weights),
            "success": result.success,
        }

    def optimize_max_alpha(
        self,
        model: Union[str, FactorModel] = "ff3",
        weight_bounds: tuple[float, float] = (0, 1),
    ) -> dict:
        """
        Optimize portfolio to maximize expected alpha.

        Uses pre-computed asset alphas to find the highest-alpha portfolio.

        Parameters
        ----------
        model : str or FactorModel, default 'ff3'
            Factor model for alpha calculation
        weight_bounds : tuple, default (0, 1)
            Min and max weight for each asset

        Returns
        -------
        dict
            Portfolio weights maximizing expected alpha
        """
        # Get model factors
        if isinstance(model, str):
            model_factors = {
                "capm": ["Mkt-RF"],
                "ff3": ["Mkt-RF", "SMB", "HML"],
                "ff5": ["Mkt-RF", "SMB", "HML", "RMW", "CMA"],
                "carhart": ["Mkt-RF", "SMB", "HML", "MOM"],
            }.get(model.lower(), ["Mkt-RF", "SMB", "HML"])
        else:
            model_factors = model.value

        factors = [f for f in model_factors if f in self.factor_data.columns]

        # Compute alpha for each asset
        alphas = []
        for ticker in self.tickers:
            y = self.excess_returns[ticker].values
            X = self.factor_data[factors].values
            X_const = np.column_stack([np.ones(len(X)), X])
            coeffs = np.linalg.lstsq(X_const, y, rcond=None)[0]
            alphas.append(coeffs[0] * self.TRADING_DAYS)  # Annualized alpha

        alphas = np.array(alphas)

        def objective(weights):
            # Negative alpha (minimize)
            return -np.dot(weights, alphas)

        constraints = [{"type": "eq", "fun": lambda x: np.sum(x) - 1}]
        bounds = tuple(weight_bounds for _ in range(self.n_assets))
        initial_weights = np.array([1 / self.n_assets] * self.n_assets)

        result = minimize(
            objective,
            initial_weights,
            method="SLSQP",
            bounds=bounds,
            constraints=constraints,
        )

        optimal_weights = result.x

        return {
            "weights": dict(zip(self.tickers, optimal_weights)),
            "expected_alpha": float(np.dot(optimal_weights, alphas)),
            "asset_alphas": dict(zip(self.tickers, alphas)),
            "achieved_betas": self._portfolio_betas(optimal_weights),
            "return": self._portfolio_return(optimal_weights),
            "volatility": self._portfolio_volatility(optimal_weights),
            "sharpe_ratio": self._portfolio_sharpe(optimal_weights),
            "success": result.success,
        }

    def generate_factor_frontier(
        self,
        factor: str,
        n_points: int = 20,
        weight_bounds: tuple[float, float] = (0, 1),
    ) -> pd.DataFrame:
        """
        Generate efficient frontier varying one factor's exposure.

        Parameters
        ----------
        factor : str
            Factor to vary (e.g., 'SMB', 'HML')
        n_points : int, default 20
            Number of points on the frontier
        weight_bounds : tuple, default (0, 1)
            Min and max weight for each asset

        Returns
        -------
        pd.DataFrame
            Frontier with columns: factor_beta, return, volatility, sharpe_ratio
        """
        if factor not in self._asset_betas.columns:
            raise ValueError(f"Factor '{factor}' not available")

        # Find beta range
        asset_betas = self._asset_betas[factor].values
        min_beta = asset_betas.min()
        max_beta = asset_betas.max()

        target_betas = np.linspace(min_beta, max_beta, n_points)

        frontier = []
        for target in target_betas:
            try:
                result = self.optimize_target_exposures(
                    target_betas={factor: target},
                    weight_bounds=weight_bounds,
                    tolerance=0.05,
                )
                if result["success"]:
                    frontier.append(
                        {
                            f"{factor}_beta": result["achieved_betas"][factor],
                            "return": result["return"],
                            "volatility": result["volatility"],
                            "sharpe_ratio": result["sharpe_ratio"],
                        }
                    )
            except Exception:
                continue

        return pd.DataFrame(frontier)

    def get_asset_betas(self) -> pd.DataFrame:
        """
        Get factor betas for all individual assets.

        Returns
        -------
        pd.DataFrame
            DataFrame with assets as rows and factors as columns
        """
        return self._asset_betas.copy()

    def summary(self) -> str:
        """Generate a summary of optimization capabilities and asset betas."""
        lines = [
            f"\n{'=' * 60}",
            "Factor Optimizer Summary",
            f"{'=' * 60}",
            f"Assets: {len(self.tickers)}",
            f"Observations: {len(self.returns)}",
            f"Available factors: {self._asset_betas.columns.tolist()}",
            "",
            "Asset Factor Betas:",
            f"{'-' * 60}",
        ]

        # Format asset betas table
        beta_str = self._asset_betas.to_string()
        lines.append(beta_str)

        lines.append("=" * 60)
        return "\n".join(lines)

optimize_target_exposures(target_betas, weight_bounds=(0, 1), tolerance=0.1)

Optimize portfolio to achieve target factor exposures.

Minimizes tracking error to target betas while maximizing Sharpe ratio.

Parameters:

Name Type Description Default
target_betas dict

Target factor exposures (e.g., {'Mkt-RF': 1.0, 'SMB': 0.3})

required
weight_bounds tuple

Min and max weight for each asset

(0, 1)
tolerance float

Allowed deviation from target betas

0.1

Returns:

Type Description
dict

Optimal weights, achieved betas, return, volatility, and Sharpe ratio

Source code in portfolio_analysis/factors/optimization.py
def optimize_target_exposures(
    self,
    target_betas: dict[str, float],
    weight_bounds: tuple[float, float] = (0, 1),
    tolerance: float = 0.1,
) -> dict:
    """
    Optimize portfolio to achieve target factor exposures.

    Minimizes tracking error to target betas while maximizing Sharpe ratio.

    Parameters
    ----------
    target_betas : dict
        Target factor exposures (e.g., {'Mkt-RF': 1.0, 'SMB': 0.3})
    weight_bounds : tuple, default (0, 1)
        Min and max weight for each asset
    tolerance : float, default 0.1
        Allowed deviation from target betas

    Returns
    -------
    dict
        Optimal weights, achieved betas, return, volatility, and Sharpe ratio
    """
    # Validate factors exist
    for factor in target_betas:
        if factor not in self._asset_betas.columns:
            raise ValueError(
                f"Factor '{factor}' not available. "
                f"Available: {self._asset_betas.columns.tolist()}"
            )

    def objective(weights):
        # Maximize Sharpe (minimize negative Sharpe)
        return -self._portfolio_sharpe(weights)

    def beta_constraint(weights, factor, target):
        """Constraint: achieved beta should be close to target."""
        achieved = np.dot(weights, self._asset_betas[factor])
        return tolerance - abs(achieved - target)

    constraints = [{"type": "eq", "fun": lambda x: np.sum(x) - 1}]

    # Add beta constraints
    for factor, target in target_betas.items():
        constraints.append(
            {
                "type": "ineq",
                "fun": lambda x, f=factor, t=target: beta_constraint(x, f, t),
            }
        )

    bounds = tuple(weight_bounds for _ in range(self.n_assets))
    initial_weights = np.array([1 / self.n_assets] * self.n_assets)

    result = minimize(
        objective,
        initial_weights,
        method="SLSQP",
        bounds=bounds,
        constraints=constraints,
        options={"maxiter": 500},
    )

    optimal_weights = result.x

    return {
        "weights": dict(zip(self.tickers, optimal_weights)),
        "achieved_betas": self._portfolio_betas(optimal_weights),
        "target_betas": target_betas,
        "return": self._portfolio_return(optimal_weights),
        "volatility": self._portfolio_volatility(optimal_weights),
        "sharpe_ratio": self._portfolio_sharpe(optimal_weights),
        "success": result.success,
    }

optimize_factor_neutral(factors, weight_bounds=(0, 1), tolerance=0.05)

Optimize portfolio to be neutral to specified factors.

Parameters:

Name Type Description Default
factors list of str

Factors to neutralize (e.g., ['SMB', 'HML'])

required
weight_bounds tuple

Min and max weight for each asset

(0, 1)
tolerance float

Maximum allowed absolute beta for neutral factors

0.05

Returns:

Type Description
dict

Optimal weights with near-zero exposure to specified factors

Source code in portfolio_analysis/factors/optimization.py
def optimize_factor_neutral(
    self,
    factors: list[str],
    weight_bounds: tuple[float, float] = (0, 1),
    tolerance: float = 0.05,
) -> dict:
    """
    Optimize portfolio to be neutral to specified factors.

    Parameters
    ----------
    factors : list of str
        Factors to neutralize (e.g., ['SMB', 'HML'])
    weight_bounds : tuple, default (0, 1)
        Min and max weight for each asset
    tolerance : float, default 0.05
        Maximum allowed absolute beta for neutral factors

    Returns
    -------
    dict
        Optimal weights with near-zero exposure to specified factors
    """
    # Validate factors
    for factor in factors:
        if factor not in self._asset_betas.columns:
            raise ValueError(f"Factor '{factor}' not available")

    def objective(weights):
        return -self._portfolio_sharpe(weights)

    def neutrality_constraint(weights, factor):
        """Beta should be close to zero."""
        beta = np.dot(weights, self._asset_betas[factor])
        return tolerance - abs(beta)

    constraints = [{"type": "eq", "fun": lambda x: np.sum(x) - 1}]

    for factor in factors:
        constraints.append(
            {"type": "ineq", "fun": lambda x, f=factor: neutrality_constraint(x, f)}
        )

    bounds = tuple(weight_bounds for _ in range(self.n_assets))
    initial_weights = np.array([1 / self.n_assets] * self.n_assets)

    result = minimize(
        objective,
        initial_weights,
        method="SLSQP",
        bounds=bounds,
        constraints=constraints,
        options={"maxiter": 500},
    )

    optimal_weights = result.x

    return {
        "weights": dict(zip(self.tickers, optimal_weights)),
        "achieved_betas": self._portfolio_betas(optimal_weights),
        "neutralized_factors": factors,
        "return": self._portfolio_return(optimal_weights),
        "volatility": self._portfolio_volatility(optimal_weights),
        "sharpe_ratio": self._portfolio_sharpe(optimal_weights),
        "success": result.success,
    }

optimize_max_alpha(model='ff3', weight_bounds=(0, 1))

Optimize portfolio to maximize expected alpha.

Uses pre-computed asset alphas to find the highest-alpha portfolio.

Parameters:

Name Type Description Default
model str or FactorModel

Factor model for alpha calculation

'ff3'
weight_bounds tuple

Min and max weight for each asset

(0, 1)

Returns:

Type Description
dict

Portfolio weights maximizing expected alpha

Source code in portfolio_analysis/factors/optimization.py
def optimize_max_alpha(
    self,
    model: Union[str, FactorModel] = "ff3",
    weight_bounds: tuple[float, float] = (0, 1),
) -> dict:
    """
    Optimize portfolio to maximize expected alpha.

    Uses pre-computed asset alphas to find the highest-alpha portfolio.

    Parameters
    ----------
    model : str or FactorModel, default 'ff3'
        Factor model for alpha calculation
    weight_bounds : tuple, default (0, 1)
        Min and max weight for each asset

    Returns
    -------
    dict
        Portfolio weights maximizing expected alpha
    """
    # Get model factors
    if isinstance(model, str):
        model_factors = {
            "capm": ["Mkt-RF"],
            "ff3": ["Mkt-RF", "SMB", "HML"],
            "ff5": ["Mkt-RF", "SMB", "HML", "RMW", "CMA"],
            "carhart": ["Mkt-RF", "SMB", "HML", "MOM"],
        }.get(model.lower(), ["Mkt-RF", "SMB", "HML"])
    else:
        model_factors = model.value

    factors = [f for f in model_factors if f in self.factor_data.columns]

    # Compute alpha for each asset
    alphas = []
    for ticker in self.tickers:
        y = self.excess_returns[ticker].values
        X = self.factor_data[factors].values
        X_const = np.column_stack([np.ones(len(X)), X])
        coeffs = np.linalg.lstsq(X_const, y, rcond=None)[0]
        alphas.append(coeffs[0] * self.TRADING_DAYS)  # Annualized alpha

    alphas = np.array(alphas)

    def objective(weights):
        # Negative alpha (minimize)
        return -np.dot(weights, alphas)

    constraints = [{"type": "eq", "fun": lambda x: np.sum(x) - 1}]
    bounds = tuple(weight_bounds for _ in range(self.n_assets))
    initial_weights = np.array([1 / self.n_assets] * self.n_assets)

    result = minimize(
        objective,
        initial_weights,
        method="SLSQP",
        bounds=bounds,
        constraints=constraints,
    )

    optimal_weights = result.x

    return {
        "weights": dict(zip(self.tickers, optimal_weights)),
        "expected_alpha": float(np.dot(optimal_weights, alphas)),
        "asset_alphas": dict(zip(self.tickers, alphas)),
        "achieved_betas": self._portfolio_betas(optimal_weights),
        "return": self._portfolio_return(optimal_weights),
        "volatility": self._portfolio_volatility(optimal_weights),
        "sharpe_ratio": self._portfolio_sharpe(optimal_weights),
        "success": result.success,
    }

generate_factor_frontier(factor, n_points=20, weight_bounds=(0, 1))

Generate efficient frontier varying one factor's exposure.

Parameters:

Name Type Description Default
factor str

Factor to vary (e.g., 'SMB', 'HML')

required
n_points int

Number of points on the frontier

20
weight_bounds tuple

Min and max weight for each asset

(0, 1)

Returns:

Type Description
DataFrame

Frontier with columns: factor_beta, return, volatility, sharpe_ratio

Source code in portfolio_analysis/factors/optimization.py
def generate_factor_frontier(
    self,
    factor: str,
    n_points: int = 20,
    weight_bounds: tuple[float, float] = (0, 1),
) -> pd.DataFrame:
    """
    Generate efficient frontier varying one factor's exposure.

    Parameters
    ----------
    factor : str
        Factor to vary (e.g., 'SMB', 'HML')
    n_points : int, default 20
        Number of points on the frontier
    weight_bounds : tuple, default (0, 1)
        Min and max weight for each asset

    Returns
    -------
    pd.DataFrame
        Frontier with columns: factor_beta, return, volatility, sharpe_ratio
    """
    if factor not in self._asset_betas.columns:
        raise ValueError(f"Factor '{factor}' not available")

    # Find beta range
    asset_betas = self._asset_betas[factor].values
    min_beta = asset_betas.min()
    max_beta = asset_betas.max()

    target_betas = np.linspace(min_beta, max_beta, n_points)

    frontier = []
    for target in target_betas:
        try:
            result = self.optimize_target_exposures(
                target_betas={factor: target},
                weight_bounds=weight_bounds,
                tolerance=0.05,
            )
            if result["success"]:
                frontier.append(
                    {
                        f"{factor}_beta": result["achieved_betas"][factor],
                        "return": result["return"],
                        "volatility": result["volatility"],
                        "sharpe_ratio": result["sharpe_ratio"],
                    }
                )
        except Exception:
            continue

    return pd.DataFrame(frontier)

get_asset_betas()

Get factor betas for all individual assets.

Returns:

Type Description
DataFrame

DataFrame with assets as rows and factors as columns

Source code in portfolio_analysis/factors/optimization.py
def get_asset_betas(self) -> pd.DataFrame:
    """
    Get factor betas for all individual assets.

    Returns
    -------
    pd.DataFrame
        DataFrame with assets as rows and factors as columns
    """
    return self._asset_betas.copy()

summary()

Generate a summary of optimization capabilities and asset betas.

Source code in portfolio_analysis/factors/optimization.py
def summary(self) -> str:
    """Generate a summary of optimization capabilities and asset betas."""
    lines = [
        f"\n{'=' * 60}",
        "Factor Optimizer Summary",
        f"{'=' * 60}",
        f"Assets: {len(self.tickers)}",
        f"Observations: {len(self.returns)}",
        f"Available factors: {self._asset_betas.columns.tolist()}",
        "",
        "Asset Factor Betas:",
        f"{'-' * 60}",
    ]

    # Format asset betas table
    beta_str = self._asset_betas.to_string()
    lines.append(beta_str)

    lines.append("=" * 60)
    return "\n".join(lines)

FactorVisualization

Static methods for visualizing factor analysis results.

Examples:

>>> from portfolio_analysis.factors import FactorVisualization
>>> FactorVisualization.plot_factor_exposures(regression_results)
>>> FactorVisualization.plot_rolling_betas(rolling_data)
>>> FactorVisualization.plot_return_attribution(attribution_dict)
Source code in portfolio_analysis/factors/visualization.py
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
class FactorVisualization:
    """
    Static methods for visualizing factor analysis results.

    Examples
    --------
    >>> from portfolio_analysis.factors import FactorVisualization
    >>> FactorVisualization.plot_factor_exposures(regression_results)
    >>> FactorVisualization.plot_rolling_betas(rolling_data)
    >>> FactorVisualization.plot_return_attribution(attribution_dict)
    """

    @staticmethod
    def plot_factor_exposures(
        results: RegressionResults,
        figsize: tuple = (10, 6),
        show_significance: bool = True,
    ) -> None:
        """
        Plot factor exposures (betas) as a bar chart.

        Parameters
        ----------
        results : RegressionResults
            Results from FactorRegression
        figsize : tuple, default (10, 6)
            Figure size
        show_significance : bool, default True
            Color bars by statistical significance
        """
        factors = results.factors
        betas = [results.betas[f] for f in factors]
        pvalues = [results.beta_pvalues[f] for f in factors]

        # Color by significance
        if show_significance:
            colors = ["green" if p < 0.05 else "gray" for p in pvalues]
        else:
            colors = ["steelblue"] * len(factors)

        plt.figure(figsize=figsize)
        bars = plt.bar(factors, betas, color=colors, edgecolor="black", alpha=0.8)

        # Add horizontal line at zero
        plt.axhline(y=0, color="black", linestyle="-", linewidth=0.5)

        # Add value labels on bars
        for bar, beta in zip(bars, betas):
            height = bar.get_height()
            plt.annotate(
                f"{beta:.3f}",
                xy=(bar.get_x() + bar.get_width() / 2, height),
                xytext=(0, 3 if height >= 0 else -10),
                textcoords="offset points",
                ha="center",
                va="bottom" if height >= 0 else "top",
                fontsize=10,
            )

        plt.xlabel("Factor")
        plt.ylabel("Beta (Factor Loading)")
        plt.title(
            f"Factor Exposures - {results.model} Model\n"
            f"(R² = {results.r_squared:.3f}, Alpha = {results.alpha*100:.2f}%)"
        )

        if show_significance:
            plt.legend(
                ["p < 0.05 (significant)", "p >= 0.05"],
                handles=[
                    plt.Rectangle((0, 0), 1, 1, fc="green", alpha=0.8),
                    plt.Rectangle((0, 0), 1, 1, fc="gray", alpha=0.8),
                ],
                loc="best",
            )

        plt.grid(True, alpha=0.3, axis="y")
        plt.tight_layout()
        plt.show()

    @staticmethod
    def plot_rolling_betas(
        rolling_data: pd.DataFrame,
        figsize: tuple = (12, 8),
        factors: list[str] | None = None,
    ) -> None:
        """
        Plot rolling factor betas over time.

        Parameters
        ----------
        rolling_data : pd.DataFrame
            Output from FactorRegression.run_rolling_regression()
        figsize : tuple, default (12, 8)
            Figure size
        factors : list of str, optional
            Specific factors to plot. If None, plots all.
        """
        if factors is None:
            # Get all columns except 'alpha' and 'r_squared'
            factors = [
                c for c in rolling_data.columns if c not in ["alpha", "r_squared"]
            ]

        n_factors = len(factors)
        fig, axes = plt.subplots(n_factors + 1, 1, figsize=figsize, sharex=True)

        # Plot each factor beta
        for i, factor in enumerate(factors):
            ax = axes[i]
            ax.plot(rolling_data.index, rolling_data[factor], linewidth=1.5)
            ax.axhline(
                y=rolling_data[factor].mean(),
                color="red",
                linestyle="--",
                alpha=0.7,
                label="Mean",
            )
            ax.axhline(y=0, color="black", linestyle="-", linewidth=0.5)
            ax.set_ylabel(factor)
            ax.legend(loc="upper right", fontsize=8)
            ax.grid(True, alpha=0.3)

        # Plot rolling alpha
        ax = axes[-1]
        ax.plot(
            rolling_data.index,
            rolling_data["alpha"] * 100,
            linewidth=1.5,
            color="green",
        )
        ax.axhline(y=0, color="black", linestyle="-", linewidth=0.5)
        ax.set_ylabel("Alpha (%)")
        ax.set_xlabel("Date")
        ax.grid(True, alpha=0.3)

        fig.suptitle("Rolling Factor Exposures", fontsize=14, y=1.02)
        plt.tight_layout()
        plt.show()

    @staticmethod
    def plot_return_attribution(
        attribution: dict[str, float], figsize: tuple = (10, 6)
    ) -> None:
        """
        Plot return attribution as a waterfall chart.

        Parameters
        ----------
        attribution : dict
            Output from FactorAttribution.decompose_returns()
        figsize : tuple, default (10, 6)
            Figure size
        """
        # Order: risk-free, factors, alpha -> total
        components = []
        values = []

        # Start with risk-free
        if "risk_free" in attribution:
            components.append("Risk-Free")
            values.append(attribution["risk_free"] * 100)

        # Add factors (exclude 'total', 'risk_free', 'alpha')
        factor_keys = [
            k for k in attribution.keys() if k not in ["total", "risk_free", "alpha"]
        ]
        for factor in factor_keys:
            components.append(factor)
            values.append(attribution[factor] * 100)

        # Add alpha
        if "alpha" in attribution:
            components.append("Alpha")
            values.append(attribution["alpha"] * 100)

        # Calculate cumulative for waterfall
        cumulative = np.cumsum([0] + values[:-1])
        total = sum(values)

        plt.figure(figsize=figsize)

        # Color bars based on positive/negative
        colors = ["green" if v >= 0 else "red" for v in values]

        # Create waterfall bars
        _bars = plt.bar(  # noqa: F841
            components,
            values,
            bottom=cumulative,
            color=colors,
            edgecolor="black",
            alpha=0.8,
        )

        # Add total bar
        plt.bar(["Total"], [total], color="steelblue", edgecolor="black", alpha=0.8)

        # Add value labels
        for i, (comp, val) in enumerate(zip(components, values)):
            height = cumulative[i] + val
            plt.annotate(
                f"{val:.2f}%",
                xy=(i, height),
                xytext=(0, 3 if val >= 0 else -10),
                textcoords="offset points",
                ha="center",
                va="bottom" if val >= 0 else "top",
                fontsize=9,
            )

        # Total label
        plt.annotate(
            f"{total:.2f}%",
            xy=(len(components), total),
            xytext=(0, 3),
            textcoords="offset points",
            ha="center",
            va="bottom",
            fontsize=9,
            fontweight="bold",
        )

        plt.axhline(y=0, color="black", linestyle="-", linewidth=0.5)
        plt.ylabel("Return Contribution (%)")
        plt.title("Return Attribution by Factor")
        plt.xticks(rotation=45, ha="right")
        plt.grid(True, alpha=0.3, axis="y")
        plt.tight_layout()
        plt.show()

    @staticmethod
    def plot_factor_tilts(tilts: dict[str, float], figsize: tuple = (8, 8)) -> None:
        """
        Plot characteristic-based factor tilts as a radar chart.

        Parameters
        ----------
        tilts : dict
            Output from FactorExposures.get_all_tilts()
        figsize : tuple, default (8, 8)
            Figure size
        """
        factors = list(tilts.keys())
        values = list(tilts.values())

        # Close the radar chart
        angles = np.linspace(0, 2 * np.pi, len(factors), endpoint=False).tolist()
        values = values + [values[0]]
        angles = angles + [angles[0]]
        factors = factors + [factors[0]]

        fig, ax = plt.subplots(figsize=figsize, subplot_kw=dict(projection="polar"))

        # Plot the tilt values
        ax.plot(angles, values, "o-", linewidth=2, markersize=8)
        ax.fill(angles, values, alpha=0.25)

        # Add reference circles
        for val in [-1, -0.5, 0, 0.5, 1]:
            ax.plot(
                angles,
                [val] * len(angles),
                "--",
                color="gray",
                alpha=0.3,
                linewidth=0.5,
            )

        ax.set_xticks(angles[:-1])
        ax.set_xticklabels([f.capitalize() for f in factors[:-1]], fontsize=11)
        ax.set_ylim(-1.2, 1.2)

        # Add gridlines at factor positions
        ax.set_thetagrids(
            np.degrees(angles[:-1]), [f.capitalize() for f in factors[:-1]]
        )

        plt.title("Portfolio Factor Tilts\n(Characteristic-Based)", fontsize=14, y=1.1)
        plt.tight_layout()
        plt.show()

    @staticmethod
    def plot_model_comparison(
        comparison_df: pd.DataFrame, figsize: tuple = (12, 5)
    ) -> None:
        """
        Plot comparison of different factor models.

        Parameters
        ----------
        comparison_df : pd.DataFrame
            Output from FactorRegression.compare_models()
        figsize : tuple, default (12, 5)
            Figure size
        """
        fig, axes = plt.subplots(1, 3, figsize=figsize)

        models = comparison_df["Model"].tolist()
        x = np.arange(len(models))
        width = 0.6

        # Alpha comparison
        ax = axes[0]
        alphas = comparison_df["Alpha (%)"].values
        colors = ["green" if a > 0 else "red" for a in alphas]
        bars = ax.bar(x, alphas, width, color=colors, edgecolor="black", alpha=0.8)
        ax.axhline(y=0, color="black", linestyle="-", linewidth=0.5)
        ax.set_xticks(x)
        ax.set_xticklabels(models)
        ax.set_ylabel("Alpha (%)")
        ax.set_title("Alpha by Model")
        ax.grid(True, alpha=0.3, axis="y")

        # Add significance indicators
        for i, (bar, pval) in enumerate(zip(bars, comparison_df["Alpha p-value"])):
            if pval < 0.05:
                ax.annotate(
                    "*",
                    xy=(bar.get_x() + bar.get_width() / 2, bar.get_height()),
                    ha="center",
                    fontsize=14,
                )

        # R-squared comparison
        ax = axes[1]
        r2 = comparison_df["R-squared"].values
        ax.bar(x, r2, width, color="steelblue", edgecolor="black", alpha=0.8)
        ax.set_xticks(x)
        ax.set_xticklabels(models)
        ax.set_ylabel("R-squared")
        ax.set_title("Model Fit (R²)")
        ax.set_ylim(0, 1)
        ax.grid(True, alpha=0.3, axis="y")

        # Market beta comparison
        ax = axes[2]
        mkt_betas = comparison_df["Mkt Beta"].values
        ax.bar(x, mkt_betas, width, color="purple", edgecolor="black", alpha=0.8)
        ax.axhline(y=1, color="red", linestyle="--", alpha=0.7, label="Market (β=1)")
        ax.set_xticks(x)
        ax.set_xticklabels(models)
        ax.set_ylabel("Market Beta")
        ax.set_title("Market Exposure")
        ax.legend(loc="best", fontsize=8)
        ax.grid(True, alpha=0.3, axis="y")

        plt.suptitle("Factor Model Comparison", fontsize=14, y=1.02)
        plt.tight_layout()
        plt.show()

    @staticmethod
    def plot_factor_frontier(
        frontier_df: pd.DataFrame, factor: str, figsize: tuple = (10, 6)
    ) -> None:
        """
        Plot factor-efficient frontier.

        Parameters
        ----------
        frontier_df : pd.DataFrame
            Output from FactorOptimizer.generate_factor_frontier()
        factor : str
            Name of the factor
        figsize : tuple, default (10, 6)
            Figure size
        """
        beta_col = f"{factor}_beta"

        fig, axes = plt.subplots(1, 2, figsize=figsize)

        # Plot: Return vs Factor Beta
        ax = axes[0]
        scatter = ax.scatter(
            frontier_df[beta_col],
            frontier_df["return"] * 100,
            c=frontier_df["sharpe_ratio"],
            cmap="RdYlGn",
            s=60,
            edgecolors="black",
            alpha=0.8,
        )
        ax.set_xlabel(f"{factor} Beta")
        ax.set_ylabel("Expected Return (%)")
        ax.set_title(f"Return vs {factor} Exposure")
        ax.grid(True, alpha=0.3)
        plt.colorbar(scatter, ax=ax, label="Sharpe Ratio")

        # Plot: Sharpe vs Factor Beta
        ax = axes[1]
        ax.plot(
            frontier_df[beta_col],
            frontier_df["sharpe_ratio"],
            "o-",
            linewidth=2,
            markersize=6,
        )
        ax.set_xlabel(f"{factor} Beta")
        ax.set_ylabel("Sharpe Ratio")
        ax.set_title(f"Risk-Adjusted Return vs {factor} Exposure")
        ax.grid(True, alpha=0.3)

        # Mark optimal point
        max_sharpe_idx = frontier_df["sharpe_ratio"].idxmax()
        opt_beta = frontier_df.loc[max_sharpe_idx, beta_col]
        opt_sharpe = frontier_df.loc[max_sharpe_idx, "sharpe_ratio"]
        ax.scatter(
            [opt_beta],
            [opt_sharpe],
            s=150,
            c="red",
            marker="*",
            zorder=5,
            label=f"Optimal (β={opt_beta:.2f})",
        )
        ax.legend(loc="best")

        plt.suptitle(f"{factor} Factor Efficient Frontier", fontsize=14, y=1.02)
        plt.tight_layout()
        plt.show()

    @staticmethod
    def plot_risk_attribution(
        risk_decomp: dict[str, float], figsize: tuple = (10, 6)
    ) -> None:
        """
        Plot risk (variance) attribution as a stacked bar or pie chart.

        Parameters
        ----------
        risk_decomp : dict
            Output from FactorAttribution.decompose_risk()
        figsize : tuple, default (10, 6)
            Figure size
        """
        # Extract components (exclude 'total' and 'r_squared')
        components = [k for k in risk_decomp.keys() if k not in ["total", "r_squared"]]
        values = [risk_decomp[k] for k in components]

        # Convert to percentages (use absolute values for pie chart)
        abs_values = [abs(v) for v in values]
        abs_total = sum(abs_values) if sum(abs_values) > 0 else 1
        percentages = [v / abs_total * 100 for v in abs_values]

        # Create labels with +/- indicators for negative contributions
        pie_labels = [
            f"{c} (-)" if values[i] < 0 else c for i, c in enumerate(components)
        ]

        fig, axes = plt.subplots(1, 2, figsize=figsize)

        # Pie chart (using absolute values)
        ax = axes[0]
        colors = plt.cm.Set3(np.linspace(0, 1, len(components)))
        wedges, texts, autotexts = ax.pie(
            percentages,
            labels=pie_labels,
            autopct="%1.1f%%",
            startangle=90,
            colors=colors,
            explode=[0.02] * len(components),
        )
        ax.set_title("Variance Attribution (absolute %)")

        # Bar chart with variance values
        ax = axes[1]
        bars = ax.barh(components, values, color=colors, edgecolor="black", alpha=0.8)
        ax.set_xlabel("Variance Contribution")
        ax.set_title("Variance Attribution (Absolute)")
        ax.grid(True, alpha=0.3, axis="x")

        # Add value labels
        for bar, val in zip(bars, values):
            ax.annotate(
                f"{val:.6f}",
                xy=(bar.get_width(), bar.get_y() + bar.get_height() / 2),
                xytext=(5, 0),
                textcoords="offset points",
                ha="left",
                va="center",
                fontsize=9,
            )

        plt.suptitle(
            f'Risk Attribution (R² = {risk_decomp.get("r_squared", 0):.3f})',
            fontsize=14,
            y=1.02,
        )
        plt.tight_layout()
        plt.show()

    @staticmethod
    def plot_regional_comparison(
        comparison_df: pd.DataFrame, figsize: tuple = (14, 5)
    ) -> None:
        """
        Plot side-by-side R² and alpha comparison of US vs regional factors.

        Parameters
        ----------
        comparison_df : pd.DataFrame
            Output from ``CompositeFactorRegression.compare_us_vs_regional()``.
            Expected columns: US R², Regional R², US Alpha (%), Regional Alpha (%).
        figsize : tuple, default (14, 5)
            Figure size.
        """
        if comparison_df.empty:
            return

        fig, axes = plt.subplots(1, 2, figsize=figsize)

        tickers = comparison_df.index.tolist()
        x = np.arange(len(tickers))
        width = 0.35

        # R² comparison
        ax = axes[0]
        ax.bar(
            x - width / 2,
            comparison_df["US R²"],
            width,
            label="US Factors",
            color="#e74c3c",
            edgecolor="black",
            alpha=0.85,
        )
        ax.bar(
            x + width / 2,
            comparison_df["Regional R²"],
            width,
            label="Regional Factors",
            color="#2ecc71",
            edgecolor="black",
            alpha=0.85,
        )
        ax.set_xticks(x)
        ax.set_xticklabels(tickers, fontsize=11)
        ax.set_ylabel("R²", fontsize=12)
        ax.set_title("R² Improvement with Regional Factors", fontsize=13)
        ax.legend(fontsize=10)
        ax.set_ylim(0, 1.05)
        ax.grid(True, alpha=0.3, axis="y")

        # Alpha comparison
        ax = axes[1]
        ax.bar(
            x - width / 2,
            comparison_df["US Alpha (%)"],
            width,
            label="US Factors",
            color="#e74c3c",
            edgecolor="black",
            alpha=0.85,
        )
        ax.bar(
            x + width / 2,
            comparison_df["Regional Alpha (%)"],
            width,
            label="Regional Factors",
            color="#2ecc71",
            edgecolor="black",
            alpha=0.85,
        )
        ax.set_xticks(x)
        ax.set_xticklabels(tickers, fontsize=11)
        ax.set_ylabel("Alpha (% annualized)", fontsize=12)
        ax.set_title("Alpha Estimate: US vs Regional Factors", fontsize=13)
        ax.axhline(y=0, color="black", linewidth=0.5)
        ax.legend(fontsize=10)
        ax.grid(True, alpha=0.3, axis="y")

        plt.suptitle(
            "Impact of Using Regional Factor Data for International ETFs",
            fontsize=14,
            y=1.03,
        )
        plt.tight_layout()
        plt.show()

    @staticmethod
    def plot_composite_exposures(
        composite_results: CompositeRegressionResults,
        baseline_results: RegressionResults | None = None,
        figsize: tuple = (12, 6),
    ) -> None:
        """
        Plot composite weighted-average factor betas, optionally vs a baseline.

        Parameters
        ----------
        composite_results : CompositeRegressionResults
            Output from ``CompositeFactorRegression.run_composite_regression()``.
        baseline_results : RegressionResults, optional
            Single-region baseline (e.g. US-only regression) for comparison.
        figsize : tuple, default (12, 6)
            Figure size.
        """
        factors = list(composite_results.weighted_betas.keys())
        composite_betas = [composite_results.weighted_betas[f] for f in factors]

        x = np.arange(len(factors))
        fig, ax = plt.subplots(figsize=figsize)

        if baseline_results is not None:
            width = 0.35
            baseline_betas = [baseline_results.betas.get(f, 0) for f in factors]
            ax.bar(
                x - width / 2,
                baseline_betas,
                width,
                label="Baseline (Single-Region)",
                color="#3498db",
                edgecolor="black",
                alpha=0.85,
            )
            ax.bar(
                x + width / 2,
                composite_betas,
                width,
                label="Composite (Regional)",
                color="#e67e22",
                edgecolor="black",
                alpha=0.85,
            )
            for i, (bl, co) in enumerate(zip(baseline_betas, composite_betas)):
                ax.text(
                    i - width / 2,
                    bl + 0.01,
                    f"{bl:.3f}",
                    ha="center",
                    va="bottom",
                    fontsize=9,
                )
                ax.text(
                    i + width / 2,
                    co + 0.01,
                    f"{co:.3f}",
                    ha="center",
                    va="bottom",
                    fontsize=9,
                )
            ax.legend(fontsize=11, loc="upper right")
        else:
            bars = ax.bar(
                x,
                composite_betas,
                color="#e67e22",
                edgecolor="black",
                alpha=0.85,
            )
            for bar, beta in zip(bars, composite_betas):
                h = bar.get_height()
                ax.text(
                    bar.get_x() + bar.get_width() / 2,
                    h + 0.01,
                    f"{beta:.3f}",
                    ha="center",
                    va="bottom",
                    fontsize=10,
                )

        ax.set_xticks(x)
        ax.set_xticklabels(factors, fontsize=12)
        ax.axhline(y=0, color="black", linewidth=0.5)
        ax.axhline(y=1, color="gray", linewidth=0.5, linestyle="--", alpha=0.5)
        ax.set_ylabel("Factor Loading (Beta)", fontsize=12)
        ax.set_title(
            "Composite Portfolio Factor Loadings\n"
            f"(Coverage: {composite_results.coverage:.1%}, "
            f"Alpha: {composite_results.weighted_alpha * 100:.2f}%)",
            fontsize=14,
        )
        ax.grid(True, alpha=0.3, axis="y")
        plt.tight_layout()
        plt.show()

plot_factor_exposures(results, figsize=(10, 6), show_significance=True) staticmethod

Plot factor exposures (betas) as a bar chart.

Parameters:

Name Type Description Default
results RegressionResults

Results from FactorRegression

required
figsize tuple

Figure size

(10, 6)
show_significance bool

Color bars by statistical significance

True
Source code in portfolio_analysis/factors/visualization.py
@staticmethod
def plot_factor_exposures(
    results: RegressionResults,
    figsize: tuple = (10, 6),
    show_significance: bool = True,
) -> None:
    """
    Plot factor exposures (betas) as a bar chart.

    Parameters
    ----------
    results : RegressionResults
        Results from FactorRegression
    figsize : tuple, default (10, 6)
        Figure size
    show_significance : bool, default True
        Color bars by statistical significance
    """
    factors = results.factors
    betas = [results.betas[f] for f in factors]
    pvalues = [results.beta_pvalues[f] for f in factors]

    # Color by significance
    if show_significance:
        colors = ["green" if p < 0.05 else "gray" for p in pvalues]
    else:
        colors = ["steelblue"] * len(factors)

    plt.figure(figsize=figsize)
    bars = plt.bar(factors, betas, color=colors, edgecolor="black", alpha=0.8)

    # Add horizontal line at zero
    plt.axhline(y=0, color="black", linestyle="-", linewidth=0.5)

    # Add value labels on bars
    for bar, beta in zip(bars, betas):
        height = bar.get_height()
        plt.annotate(
            f"{beta:.3f}",
            xy=(bar.get_x() + bar.get_width() / 2, height),
            xytext=(0, 3 if height >= 0 else -10),
            textcoords="offset points",
            ha="center",
            va="bottom" if height >= 0 else "top",
            fontsize=10,
        )

    plt.xlabel("Factor")
    plt.ylabel("Beta (Factor Loading)")
    plt.title(
        f"Factor Exposures - {results.model} Model\n"
        f"(R² = {results.r_squared:.3f}, Alpha = {results.alpha*100:.2f}%)"
    )

    if show_significance:
        plt.legend(
            ["p < 0.05 (significant)", "p >= 0.05"],
            handles=[
                plt.Rectangle((0, 0), 1, 1, fc="green", alpha=0.8),
                plt.Rectangle((0, 0), 1, 1, fc="gray", alpha=0.8),
            ],
            loc="best",
        )

    plt.grid(True, alpha=0.3, axis="y")
    plt.tight_layout()
    plt.show()

plot_rolling_betas(rolling_data, figsize=(12, 8), factors=None) staticmethod

Plot rolling factor betas over time.

Parameters:

Name Type Description Default
rolling_data DataFrame

Output from FactorRegression.run_rolling_regression()

required
figsize tuple

Figure size

(12, 8)
factors list of str

Specific factors to plot. If None, plots all.

None
Source code in portfolio_analysis/factors/visualization.py
@staticmethod
def plot_rolling_betas(
    rolling_data: pd.DataFrame,
    figsize: tuple = (12, 8),
    factors: list[str] | None = None,
) -> None:
    """
    Plot rolling factor betas over time.

    Parameters
    ----------
    rolling_data : pd.DataFrame
        Output from FactorRegression.run_rolling_regression()
    figsize : tuple, default (12, 8)
        Figure size
    factors : list of str, optional
        Specific factors to plot. If None, plots all.
    """
    if factors is None:
        # Get all columns except 'alpha' and 'r_squared'
        factors = [
            c for c in rolling_data.columns if c not in ["alpha", "r_squared"]
        ]

    n_factors = len(factors)
    fig, axes = plt.subplots(n_factors + 1, 1, figsize=figsize, sharex=True)

    # Plot each factor beta
    for i, factor in enumerate(factors):
        ax = axes[i]
        ax.plot(rolling_data.index, rolling_data[factor], linewidth=1.5)
        ax.axhline(
            y=rolling_data[factor].mean(),
            color="red",
            linestyle="--",
            alpha=0.7,
            label="Mean",
        )
        ax.axhline(y=0, color="black", linestyle="-", linewidth=0.5)
        ax.set_ylabel(factor)
        ax.legend(loc="upper right", fontsize=8)
        ax.grid(True, alpha=0.3)

    # Plot rolling alpha
    ax = axes[-1]
    ax.plot(
        rolling_data.index,
        rolling_data["alpha"] * 100,
        linewidth=1.5,
        color="green",
    )
    ax.axhline(y=0, color="black", linestyle="-", linewidth=0.5)
    ax.set_ylabel("Alpha (%)")
    ax.set_xlabel("Date")
    ax.grid(True, alpha=0.3)

    fig.suptitle("Rolling Factor Exposures", fontsize=14, y=1.02)
    plt.tight_layout()
    plt.show()

plot_return_attribution(attribution, figsize=(10, 6)) staticmethod

Plot return attribution as a waterfall chart.

Parameters:

Name Type Description Default
attribution dict

Output from FactorAttribution.decompose_returns()

required
figsize tuple

Figure size

(10, 6)
Source code in portfolio_analysis/factors/visualization.py
@staticmethod
def plot_return_attribution(
    attribution: dict[str, float], figsize: tuple = (10, 6)
) -> None:
    """
    Plot return attribution as a waterfall chart.

    Parameters
    ----------
    attribution : dict
        Output from FactorAttribution.decompose_returns()
    figsize : tuple, default (10, 6)
        Figure size
    """
    # Order: risk-free, factors, alpha -> total
    components = []
    values = []

    # Start with risk-free
    if "risk_free" in attribution:
        components.append("Risk-Free")
        values.append(attribution["risk_free"] * 100)

    # Add factors (exclude 'total', 'risk_free', 'alpha')
    factor_keys = [
        k for k in attribution.keys() if k not in ["total", "risk_free", "alpha"]
    ]
    for factor in factor_keys:
        components.append(factor)
        values.append(attribution[factor] * 100)

    # Add alpha
    if "alpha" in attribution:
        components.append("Alpha")
        values.append(attribution["alpha"] * 100)

    # Calculate cumulative for waterfall
    cumulative = np.cumsum([0] + values[:-1])
    total = sum(values)

    plt.figure(figsize=figsize)

    # Color bars based on positive/negative
    colors = ["green" if v >= 0 else "red" for v in values]

    # Create waterfall bars
    _bars = plt.bar(  # noqa: F841
        components,
        values,
        bottom=cumulative,
        color=colors,
        edgecolor="black",
        alpha=0.8,
    )

    # Add total bar
    plt.bar(["Total"], [total], color="steelblue", edgecolor="black", alpha=0.8)

    # Add value labels
    for i, (comp, val) in enumerate(zip(components, values)):
        height = cumulative[i] + val
        plt.annotate(
            f"{val:.2f}%",
            xy=(i, height),
            xytext=(0, 3 if val >= 0 else -10),
            textcoords="offset points",
            ha="center",
            va="bottom" if val >= 0 else "top",
            fontsize=9,
        )

    # Total label
    plt.annotate(
        f"{total:.2f}%",
        xy=(len(components), total),
        xytext=(0, 3),
        textcoords="offset points",
        ha="center",
        va="bottom",
        fontsize=9,
        fontweight="bold",
    )

    plt.axhline(y=0, color="black", linestyle="-", linewidth=0.5)
    plt.ylabel("Return Contribution (%)")
    plt.title("Return Attribution by Factor")
    plt.xticks(rotation=45, ha="right")
    plt.grid(True, alpha=0.3, axis="y")
    plt.tight_layout()
    plt.show()

plot_factor_tilts(tilts, figsize=(8, 8)) staticmethod

Plot characteristic-based factor tilts as a radar chart.

Parameters:

Name Type Description Default
tilts dict

Output from FactorExposures.get_all_tilts()

required
figsize tuple

Figure size

(8, 8)
Source code in portfolio_analysis/factors/visualization.py
@staticmethod
def plot_factor_tilts(tilts: dict[str, float], figsize: tuple = (8, 8)) -> None:
    """
    Plot characteristic-based factor tilts as a radar chart.

    Parameters
    ----------
    tilts : dict
        Output from FactorExposures.get_all_tilts()
    figsize : tuple, default (8, 8)
        Figure size
    """
    factors = list(tilts.keys())
    values = list(tilts.values())

    # Close the radar chart
    angles = np.linspace(0, 2 * np.pi, len(factors), endpoint=False).tolist()
    values = values + [values[0]]
    angles = angles + [angles[0]]
    factors = factors + [factors[0]]

    fig, ax = plt.subplots(figsize=figsize, subplot_kw=dict(projection="polar"))

    # Plot the tilt values
    ax.plot(angles, values, "o-", linewidth=2, markersize=8)
    ax.fill(angles, values, alpha=0.25)

    # Add reference circles
    for val in [-1, -0.5, 0, 0.5, 1]:
        ax.plot(
            angles,
            [val] * len(angles),
            "--",
            color="gray",
            alpha=0.3,
            linewidth=0.5,
        )

    ax.set_xticks(angles[:-1])
    ax.set_xticklabels([f.capitalize() for f in factors[:-1]], fontsize=11)
    ax.set_ylim(-1.2, 1.2)

    # Add gridlines at factor positions
    ax.set_thetagrids(
        np.degrees(angles[:-1]), [f.capitalize() for f in factors[:-1]]
    )

    plt.title("Portfolio Factor Tilts\n(Characteristic-Based)", fontsize=14, y=1.1)
    plt.tight_layout()
    plt.show()

plot_model_comparison(comparison_df, figsize=(12, 5)) staticmethod

Plot comparison of different factor models.

Parameters:

Name Type Description Default
comparison_df DataFrame

Output from FactorRegression.compare_models()

required
figsize tuple

Figure size

(12, 5)
Source code in portfolio_analysis/factors/visualization.py
@staticmethod
def plot_model_comparison(
    comparison_df: pd.DataFrame, figsize: tuple = (12, 5)
) -> None:
    """
    Plot comparison of different factor models.

    Parameters
    ----------
    comparison_df : pd.DataFrame
        Output from FactorRegression.compare_models()
    figsize : tuple, default (12, 5)
        Figure size
    """
    fig, axes = plt.subplots(1, 3, figsize=figsize)

    models = comparison_df["Model"].tolist()
    x = np.arange(len(models))
    width = 0.6

    # Alpha comparison
    ax = axes[0]
    alphas = comparison_df["Alpha (%)"].values
    colors = ["green" if a > 0 else "red" for a in alphas]
    bars = ax.bar(x, alphas, width, color=colors, edgecolor="black", alpha=0.8)
    ax.axhline(y=0, color="black", linestyle="-", linewidth=0.5)
    ax.set_xticks(x)
    ax.set_xticklabels(models)
    ax.set_ylabel("Alpha (%)")
    ax.set_title("Alpha by Model")
    ax.grid(True, alpha=0.3, axis="y")

    # Add significance indicators
    for i, (bar, pval) in enumerate(zip(bars, comparison_df["Alpha p-value"])):
        if pval < 0.05:
            ax.annotate(
                "*",
                xy=(bar.get_x() + bar.get_width() / 2, bar.get_height()),
                ha="center",
                fontsize=14,
            )

    # R-squared comparison
    ax = axes[1]
    r2 = comparison_df["R-squared"].values
    ax.bar(x, r2, width, color="steelblue", edgecolor="black", alpha=0.8)
    ax.set_xticks(x)
    ax.set_xticklabels(models)
    ax.set_ylabel("R-squared")
    ax.set_title("Model Fit (R²)")
    ax.set_ylim(0, 1)
    ax.grid(True, alpha=0.3, axis="y")

    # Market beta comparison
    ax = axes[2]
    mkt_betas = comparison_df["Mkt Beta"].values
    ax.bar(x, mkt_betas, width, color="purple", edgecolor="black", alpha=0.8)
    ax.axhline(y=1, color="red", linestyle="--", alpha=0.7, label="Market (β=1)")
    ax.set_xticks(x)
    ax.set_xticklabels(models)
    ax.set_ylabel("Market Beta")
    ax.set_title("Market Exposure")
    ax.legend(loc="best", fontsize=8)
    ax.grid(True, alpha=0.3, axis="y")

    plt.suptitle("Factor Model Comparison", fontsize=14, y=1.02)
    plt.tight_layout()
    plt.show()

plot_factor_frontier(frontier_df, factor, figsize=(10, 6)) staticmethod

Plot factor-efficient frontier.

Parameters:

Name Type Description Default
frontier_df DataFrame

Output from FactorOptimizer.generate_factor_frontier()

required
factor str

Name of the factor

required
figsize tuple

Figure size

(10, 6)
Source code in portfolio_analysis/factors/visualization.py
@staticmethod
def plot_factor_frontier(
    frontier_df: pd.DataFrame, factor: str, figsize: tuple = (10, 6)
) -> None:
    """
    Plot factor-efficient frontier.

    Parameters
    ----------
    frontier_df : pd.DataFrame
        Output from FactorOptimizer.generate_factor_frontier()
    factor : str
        Name of the factor
    figsize : tuple, default (10, 6)
        Figure size
    """
    beta_col = f"{factor}_beta"

    fig, axes = plt.subplots(1, 2, figsize=figsize)

    # Plot: Return vs Factor Beta
    ax = axes[0]
    scatter = ax.scatter(
        frontier_df[beta_col],
        frontier_df["return"] * 100,
        c=frontier_df["sharpe_ratio"],
        cmap="RdYlGn",
        s=60,
        edgecolors="black",
        alpha=0.8,
    )
    ax.set_xlabel(f"{factor} Beta")
    ax.set_ylabel("Expected Return (%)")
    ax.set_title(f"Return vs {factor} Exposure")
    ax.grid(True, alpha=0.3)
    plt.colorbar(scatter, ax=ax, label="Sharpe Ratio")

    # Plot: Sharpe vs Factor Beta
    ax = axes[1]
    ax.plot(
        frontier_df[beta_col],
        frontier_df["sharpe_ratio"],
        "o-",
        linewidth=2,
        markersize=6,
    )
    ax.set_xlabel(f"{factor} Beta")
    ax.set_ylabel("Sharpe Ratio")
    ax.set_title(f"Risk-Adjusted Return vs {factor} Exposure")
    ax.grid(True, alpha=0.3)

    # Mark optimal point
    max_sharpe_idx = frontier_df["sharpe_ratio"].idxmax()
    opt_beta = frontier_df.loc[max_sharpe_idx, beta_col]
    opt_sharpe = frontier_df.loc[max_sharpe_idx, "sharpe_ratio"]
    ax.scatter(
        [opt_beta],
        [opt_sharpe],
        s=150,
        c="red",
        marker="*",
        zorder=5,
        label=f"Optimal (β={opt_beta:.2f})",
    )
    ax.legend(loc="best")

    plt.suptitle(f"{factor} Factor Efficient Frontier", fontsize=14, y=1.02)
    plt.tight_layout()
    plt.show()

plot_risk_attribution(risk_decomp, figsize=(10, 6)) staticmethod

Plot risk (variance) attribution as a stacked bar or pie chart.

Parameters:

Name Type Description Default
risk_decomp dict

Output from FactorAttribution.decompose_risk()

required
figsize tuple

Figure size

(10, 6)
Source code in portfolio_analysis/factors/visualization.py
@staticmethod
def plot_risk_attribution(
    risk_decomp: dict[str, float], figsize: tuple = (10, 6)
) -> None:
    """
    Plot risk (variance) attribution as a stacked bar or pie chart.

    Parameters
    ----------
    risk_decomp : dict
        Output from FactorAttribution.decompose_risk()
    figsize : tuple, default (10, 6)
        Figure size
    """
    # Extract components (exclude 'total' and 'r_squared')
    components = [k for k in risk_decomp.keys() if k not in ["total", "r_squared"]]
    values = [risk_decomp[k] for k in components]

    # Convert to percentages (use absolute values for pie chart)
    abs_values = [abs(v) for v in values]
    abs_total = sum(abs_values) if sum(abs_values) > 0 else 1
    percentages = [v / abs_total * 100 for v in abs_values]

    # Create labels with +/- indicators for negative contributions
    pie_labels = [
        f"{c} (-)" if values[i] < 0 else c for i, c in enumerate(components)
    ]

    fig, axes = plt.subplots(1, 2, figsize=figsize)

    # Pie chart (using absolute values)
    ax = axes[0]
    colors = plt.cm.Set3(np.linspace(0, 1, len(components)))
    wedges, texts, autotexts = ax.pie(
        percentages,
        labels=pie_labels,
        autopct="%1.1f%%",
        startangle=90,
        colors=colors,
        explode=[0.02] * len(components),
    )
    ax.set_title("Variance Attribution (absolute %)")

    # Bar chart with variance values
    ax = axes[1]
    bars = ax.barh(components, values, color=colors, edgecolor="black", alpha=0.8)
    ax.set_xlabel("Variance Contribution")
    ax.set_title("Variance Attribution (Absolute)")
    ax.grid(True, alpha=0.3, axis="x")

    # Add value labels
    for bar, val in zip(bars, values):
        ax.annotate(
            f"{val:.6f}",
            xy=(bar.get_width(), bar.get_y() + bar.get_height() / 2),
            xytext=(5, 0),
            textcoords="offset points",
            ha="left",
            va="center",
            fontsize=9,
        )

    plt.suptitle(
        f'Risk Attribution (R² = {risk_decomp.get("r_squared", 0):.3f})',
        fontsize=14,
        y=1.02,
    )
    plt.tight_layout()
    plt.show()

plot_regional_comparison(comparison_df, figsize=(14, 5)) staticmethod

Plot side-by-side R² and alpha comparison of US vs regional factors.

Parameters:

Name Type Description Default
comparison_df DataFrame

Output from CompositeFactorRegression.compare_us_vs_regional(). Expected columns: US R², Regional R², US Alpha (%), Regional Alpha (%).

required
figsize tuple

Figure size.

(14, 5)
Source code in portfolio_analysis/factors/visualization.py
@staticmethod
def plot_regional_comparison(
    comparison_df: pd.DataFrame, figsize: tuple = (14, 5)
) -> None:
    """
    Plot side-by-side R² and alpha comparison of US vs regional factors.

    Parameters
    ----------
    comparison_df : pd.DataFrame
        Output from ``CompositeFactorRegression.compare_us_vs_regional()``.
        Expected columns: US R², Regional R², US Alpha (%), Regional Alpha (%).
    figsize : tuple, default (14, 5)
        Figure size.
    """
    if comparison_df.empty:
        return

    fig, axes = plt.subplots(1, 2, figsize=figsize)

    tickers = comparison_df.index.tolist()
    x = np.arange(len(tickers))
    width = 0.35

    # R² comparison
    ax = axes[0]
    ax.bar(
        x - width / 2,
        comparison_df["US R²"],
        width,
        label="US Factors",
        color="#e74c3c",
        edgecolor="black",
        alpha=0.85,
    )
    ax.bar(
        x + width / 2,
        comparison_df["Regional R²"],
        width,
        label="Regional Factors",
        color="#2ecc71",
        edgecolor="black",
        alpha=0.85,
    )
    ax.set_xticks(x)
    ax.set_xticklabels(tickers, fontsize=11)
    ax.set_ylabel("R²", fontsize=12)
    ax.set_title("R² Improvement with Regional Factors", fontsize=13)
    ax.legend(fontsize=10)
    ax.set_ylim(0, 1.05)
    ax.grid(True, alpha=0.3, axis="y")

    # Alpha comparison
    ax = axes[1]
    ax.bar(
        x - width / 2,
        comparison_df["US Alpha (%)"],
        width,
        label="US Factors",
        color="#e74c3c",
        edgecolor="black",
        alpha=0.85,
    )
    ax.bar(
        x + width / 2,
        comparison_df["Regional Alpha (%)"],
        width,
        label="Regional Factors",
        color="#2ecc71",
        edgecolor="black",
        alpha=0.85,
    )
    ax.set_xticks(x)
    ax.set_xticklabels(tickers, fontsize=11)
    ax.set_ylabel("Alpha (% annualized)", fontsize=12)
    ax.set_title("Alpha Estimate: US vs Regional Factors", fontsize=13)
    ax.axhline(y=0, color="black", linewidth=0.5)
    ax.legend(fontsize=10)
    ax.grid(True, alpha=0.3, axis="y")

    plt.suptitle(
        "Impact of Using Regional Factor Data for International ETFs",
        fontsize=14,
        y=1.03,
    )
    plt.tight_layout()
    plt.show()

plot_composite_exposures(composite_results, baseline_results=None, figsize=(12, 6)) staticmethod

Plot composite weighted-average factor betas, optionally vs a baseline.

Parameters:

Name Type Description Default
composite_results CompositeRegressionResults

Output from CompositeFactorRegression.run_composite_regression().

required
baseline_results RegressionResults

Single-region baseline (e.g. US-only regression) for comparison.

None
figsize tuple

Figure size.

(12, 6)
Source code in portfolio_analysis/factors/visualization.py
@staticmethod
def plot_composite_exposures(
    composite_results: CompositeRegressionResults,
    baseline_results: RegressionResults | None = None,
    figsize: tuple = (12, 6),
) -> None:
    """
    Plot composite weighted-average factor betas, optionally vs a baseline.

    Parameters
    ----------
    composite_results : CompositeRegressionResults
        Output from ``CompositeFactorRegression.run_composite_regression()``.
    baseline_results : RegressionResults, optional
        Single-region baseline (e.g. US-only regression) for comparison.
    figsize : tuple, default (12, 6)
        Figure size.
    """
    factors = list(composite_results.weighted_betas.keys())
    composite_betas = [composite_results.weighted_betas[f] for f in factors]

    x = np.arange(len(factors))
    fig, ax = plt.subplots(figsize=figsize)

    if baseline_results is not None:
        width = 0.35
        baseline_betas = [baseline_results.betas.get(f, 0) for f in factors]
        ax.bar(
            x - width / 2,
            baseline_betas,
            width,
            label="Baseline (Single-Region)",
            color="#3498db",
            edgecolor="black",
            alpha=0.85,
        )
        ax.bar(
            x + width / 2,
            composite_betas,
            width,
            label="Composite (Regional)",
            color="#e67e22",
            edgecolor="black",
            alpha=0.85,
        )
        for i, (bl, co) in enumerate(zip(baseline_betas, composite_betas)):
            ax.text(
                i - width / 2,
                bl + 0.01,
                f"{bl:.3f}",
                ha="center",
                va="bottom",
                fontsize=9,
            )
            ax.text(
                i + width / 2,
                co + 0.01,
                f"{co:.3f}",
                ha="center",
                va="bottom",
                fontsize=9,
            )
        ax.legend(fontsize=11, loc="upper right")
    else:
        bars = ax.bar(
            x,
            composite_betas,
            color="#e67e22",
            edgecolor="black",
            alpha=0.85,
        )
        for bar, beta in zip(bars, composite_betas):
            h = bar.get_height()
            ax.text(
                bar.get_x() + bar.get_width() / 2,
                h + 0.01,
                f"{beta:.3f}",
                ha="center",
                va="bottom",
                fontsize=10,
            )

    ax.set_xticks(x)
    ax.set_xticklabels(factors, fontsize=12)
    ax.axhline(y=0, color="black", linewidth=0.5)
    ax.axhline(y=1, color="gray", linewidth=0.5, linestyle="--", alpha=0.5)
    ax.set_ylabel("Factor Loading (Beta)", fontsize=12)
    ax.set_title(
        "Composite Portfolio Factor Loadings\n"
        f"(Coverage: {composite_results.coverage:.1%}, "
        f"Alpha: {composite_results.weighted_alpha * 100:.2f}%)",
        fontsize=14,
    )
    ax.grid(True, alpha=0.3, axis="y")
    plt.tight_layout()
    plt.show()