Skip to content

API Reference

The public API of pyhrp. Import the main entry points directly from the top-level package:

from pyhrp.hrp import hrp, build_tree, Dendrogram
from pyhrp.algos import risk_parity, one_over_n
from pyhrp.cluster import Cluster, Portfolio

Hierarchical Risk Parity (HRP) algorithm implementation.

This module implements the core HRP algorithm and related functions: - hrp: Main function to compute HRP portfolio weights - build_tree: Function to build hierarchical cluster tree from correlation matrix - compute_cov: Function to compute a covariance matrix from returns - compute_corr: Function to compute a correlation matrix from returns - Dendrogram: Class to store and visualize hierarchical clustering results

Dendrogram dataclass

Container for hierarchical clustering dendrogram data and visualization.

This class stores the results of hierarchical clustering and provides methods for accessing and visualizing the dendrogram structure.

Attributes:

Name Type Description
root Cluster

The root node of the hierarchical clustering tree

assets list[str]

Names of assets included in the clustering

linkage ndarray | None

Linkage matrix in scipy format for plotting

distance DataFrame | None

Distance matrix used for clustering

method str | None

Linkage method used for clustering

Source code in src/pyhrp/hrp.py
@dataclass(frozen=True)
class Dendrogram:
    """Container for hierarchical clustering dendrogram data and visualization.

    This class stores the results of hierarchical clustering and provides methods
    for accessing and visualizing the dendrogram structure.

    Attributes:
        root (Cluster): The root node of the hierarchical clustering tree
        assets (list[str]): Names of assets included in the clustering
        linkage (np.ndarray | None): Linkage matrix in scipy format for plotting
        distance (pl.DataFrame | None): Distance matrix used for clustering
        method (str | None): Linkage method used for clustering
    """

    root: Cluster
    assets: list[str]
    distance: pl.DataFrame | None = None
    linkage: np.ndarray | None = None
    method: str | None = None

    def __post_init__(self) -> None:
        """Validate dataclass fields after initialization.

        Ensures that the optional distance matrix, when provided, is a polars
        DataFrame with columns aligned to the asset list, and verifies that the
        number of leaves in the cluster tree matches the number of assets.
        """
        if self.distance is not None:
            if not isinstance(self.distance, pl.DataFrame):
                raise TypeError("distance must be a polars DataFrame.")  # noqa: TRY003

            if self.distance.columns != list(self.assets):
                raise ValueError("Distance matrix index/columns must align with assets.")  # noqa: TRY003

        if len(self.root.leaves) != len(self.assets):
            raise ValueError("Number of leaves does not match number of assets.")  # noqa: TRY003

    def plot(self, **kwargs: object) -> go.Figure:
        """Build and return a plotly dendrogram figure."""
        if self.linkage is None:
            msg = "Dendrogram has no linkage matrix to plot."
            raise ValueError(msg)
        ddata = sch.dendrogram(self.linkage, labels=self.assets, no_plot=True, **kwargs)
        fig = go.Figure()
        for xs, ys in zip(ddata["icoord"], ddata["dcoord"], strict=False):
            fig.add_trace(go.Scatter(x=xs, y=ys, mode="lines", line={"color": "steelblue"}, showlegend=False))
        n = len(self.assets)
        fig.update_layout(
            xaxis={
                "tickmode": "array",
                "tickvals": [5 + 10 * i for i in range(n)],
                "ticktext": ddata["ivl"],
                "tickangle": -90,
            },
        )
        return fig

    @property
    def ids(self) -> list[int]:
        """Node values in the order left -> right as they appear in the dendrogram."""
        return [node.value for node in self.root.leaves]

    @property
    def names(self) -> list[str]:
        """The asset names as induced by the order of ids."""
        return [self.assets[i] for i in self.ids]

ids property

Node values in the order left -> right as they appear in the dendrogram.

names property

The asset names as induced by the order of ids.

__post_init__()

Validate dataclass fields after initialization.

Ensures that the optional distance matrix, when provided, is a polars DataFrame with columns aligned to the asset list, and verifies that the number of leaves in the cluster tree matches the number of assets.

Source code in src/pyhrp/hrp.py
def __post_init__(self) -> None:
    """Validate dataclass fields after initialization.

    Ensures that the optional distance matrix, when provided, is a polars
    DataFrame with columns aligned to the asset list, and verifies that the
    number of leaves in the cluster tree matches the number of assets.
    """
    if self.distance is not None:
        if not isinstance(self.distance, pl.DataFrame):
            raise TypeError("distance must be a polars DataFrame.")  # noqa: TRY003

        if self.distance.columns != list(self.assets):
            raise ValueError("Distance matrix index/columns must align with assets.")  # noqa: TRY003

    if len(self.root.leaves) != len(self.assets):
        raise ValueError("Number of leaves does not match number of assets.")  # noqa: TRY003

plot(**kwargs)

Build and return a plotly dendrogram figure.

Source code in src/pyhrp/hrp.py
def plot(self, **kwargs: object) -> go.Figure:
    """Build and return a plotly dendrogram figure."""
    if self.linkage is None:
        msg = "Dendrogram has no linkage matrix to plot."
        raise ValueError(msg)
    ddata = sch.dendrogram(self.linkage, labels=self.assets, no_plot=True, **kwargs)
    fig = go.Figure()
    for xs, ys in zip(ddata["icoord"], ddata["dcoord"], strict=False):
        fig.add_trace(go.Scatter(x=xs, y=ys, mode="lines", line={"color": "steelblue"}, showlegend=False))
    n = len(self.assets)
    fig.update_layout(
        xaxis={
            "tickmode": "array",
            "tickvals": [5 + 10 * i for i in range(n)],
            "ticktext": ddata["ivl"],
            "tickangle": -90,
        },
    )
    return fig

build_tree(cor, method='ward', bisection=False)

Build hierarchical cluster tree from correlation matrix.

This function converts a correlation matrix to a distance matrix, performs hierarchical clustering, and returns a Dendrogram object containing the resulting tree structure.

Parameters:

Name Type Description Default
cor DataFrame

Correlation matrix of asset returns (columns are assets)

required
method Literal['single', 'complete', 'average', 'ward']

Linkage method for hierarchical clustering - "single": minimum distance between points (nearest neighbor) - "complete": maximum distance between points (furthest neighbor) - "average": average distance between all points - "ward": Ward variance minimization

'ward'
bisection bool

Whether to use bisection method for tree construction

False

Returns:

Name Type Description
Dendrogram Dendrogram

Object containing the hierarchical clustering tree, with: - root: Root cluster node - linkage: Linkage matrix for plotting - assets: List of assets - method: Clustering method used - distance: Distance matrix

Examples:

>>> import polars as pl
>>> from pyhrp.hrp import build_tree
>>> cor = pl.DataFrame({"A": [1.0, 0.5], "B": [0.5, 1.0]})
>>> dg = build_tree(cor, method="ward")
>>> dg.root.leaf_count
2
Source code in src/pyhrp/hrp.py
def build_tree(
    cor: pl.DataFrame, method: Literal["single", "complete", "average", "ward"] = "ward", bisection: bool = False
) -> Dendrogram:
    """Build hierarchical cluster tree from correlation matrix.

    This function converts a correlation matrix to a distance matrix, performs
    hierarchical clustering, and returns a Dendrogram object containing the
    resulting tree structure.

    Args:
        cor (pl.DataFrame): Correlation matrix of asset returns (columns are assets)
        method (Literal["single", "complete", "average", "ward"]): Linkage method for hierarchical clustering
            - "single": minimum distance between points (nearest neighbor)
            - "complete": maximum distance between points (furthest neighbor)
            - "average": average distance between all points
            - "ward": Ward variance minimization
        bisection (bool): Whether to use bisection method for tree construction

    Returns:
        Dendrogram: Object containing the hierarchical clustering tree, with:
            - root: Root cluster node
            - linkage: Linkage matrix for plotting
            - assets: List of assets
            - method: Clustering method used
            - distance: Distance matrix

    Examples:
        >>> import polars as pl
        >>> from pyhrp.hrp import build_tree
        >>> cor = pl.DataFrame({"A": [1.0, 0.5], "B": [0.5, 1.0]})
        >>> dg = build_tree(cor, method="ward")
        >>> dg.root.leaf_count
        2
    """
    if not isinstance(cor, pl.DataFrame):
        raise TypeError("Correlation matrix must be a polars DataFrame.")  # noqa: TRY003
    if len(cor.columns) < 2:
        msg = "Correlation matrix must contain at least two assets."
        raise ValueError(msg)
    c = cor.to_numpy()
    bad = [col for col, diag in zip(cor.columns, np.diagonal(c), strict=True) if not np.isfinite(diag)]
    if bad:
        msg = (
            f"Correlation matrix contains non-finite values for assets {bad}; "
            "constant (zero-variance) price series produce NaN correlations."
        )
        raise ValueError(msg)
    if not np.isfinite(c).all():
        msg = "Correlation matrix contains non-finite values."
        raise ValueError(msg)
    dist = _compute_distance_matrix(cor)
    links = sch.linkage(ssd.squareform(dist.to_numpy(), checks=False), method=method)

    # Convert scipy tree to our Cluster format
    def to_cluster(node: sch.ClusterNode) -> Cluster:
        """Convert a scipy ClusterNode to our Cluster format.

        Args:
            node (sch.ClusterNode): A node from scipy's hierarchical clustering

        Returns:
            Cluster: Equivalent node in our Cluster format
        """
        if node.left is not None and node.right is not None:
            left = to_cluster(node.left)
            right = to_cluster(node.right)
            return Cluster(value=node.id, left=left, right=right)
        return Cluster(value=node.id)

    root = to_cluster(sch.to_tree(links, rd=False))

    # Apply bisection if requested
    if bisection:
        # Rebuild tree using bisection
        leaf_ids: list[int] = [int(node.value) for node in root.leaves]
        root, _ = _bisect_tree(ids=leaf_ids, next_id=max(leaf_ids))
        links = np.array(_get_linkage(root))

    return Dendrogram(root=root, linkage=links, method=method, distance=dist, assets=cor.columns)

compute_corr(df)

Compute correlation matrix from a DataFrame of returns.

Source code in src/pyhrp/hrp.py
def compute_corr(df: pl.DataFrame) -> pl.DataFrame:
    """Compute correlation matrix from a DataFrame of returns."""
    cols = df.columns
    corr = np.atleast_2d(np.corrcoef(df.to_numpy().T))
    return pl.DataFrame(dict(zip(cols, corr, strict=True)))

compute_cov(df)

Compute covariance matrix from a DataFrame of returns.

Source code in src/pyhrp/hrp.py
def compute_cov(df: pl.DataFrame) -> pl.DataFrame:
    """Compute covariance matrix from a DataFrame of returns."""
    cols = df.columns
    cov = np.atleast_2d(np.cov(df.to_numpy().T))
    return pl.DataFrame(dict(zip(cols, cov, strict=True)))

hrp(prices, node=None, method='ward', bisection=False)

Compute the hierarchical risk parity portfolio weights.

This is the main entry point for the HRP algorithm. It calculates returns from prices, builds a hierarchical clustering tree if not provided, and applies risk parity weights.

Parameters:

Name Type Description Default
prices DataFrame

Asset price time series (columns are assets, rows are dates)

required
node Cluster

Root node of the hierarchical clustering tree. If None, a tree will be built from the correlation matrix.

None
method Literal['single', 'complete', 'average', 'ward']

Linkage method to use for distance calculation - "single": minimum distance between points (nearest neighbor) - "complete": maximum distance between points (furthest neighbor) - "average": average distance between all points - "ward": Ward variance minimization

'ward'
bisection bool

Whether to use bisection method for tree construction

False

Returns:

Name Type Description
Cluster Cluster

The root cluster with portfolio weights assigned according to HRP

Examples:

>>> import polars as pl
>>> from pyhrp.hrp import hrp
>>> prices = pl.DataFrame({"A": [100.0, 101.0, 99.0, 102.0], "B": [50.0, 51.0, 49.0, 52.0]})
>>> root = hrp(prices, method="ward")
>>> round(sum(root.portfolio.weights.values()), 6)
1.0
Source code in src/pyhrp/hrp.py
def hrp(
    prices: pl.DataFrame,
    node: Cluster | None = None,
    method: Literal["single", "complete", "average", "ward"] = "ward",
    bisection: bool = False,
) -> Cluster:
    """Compute the hierarchical risk parity portfolio weights.

    This is the main entry point for the HRP algorithm. It calculates returns from prices,
    builds a hierarchical clustering tree if not provided, and applies risk parity weights.

    Args:
        prices (pl.DataFrame): Asset price time series (columns are assets, rows are dates)
        node (Cluster, optional): Root node of the hierarchical clustering tree.
            If None, a tree will be built from the correlation matrix.
        method (Literal["single", "complete", "average", "ward"]): Linkage method to use for distance calculation
            - "single": minimum distance between points (nearest neighbor)
            - "complete": maximum distance between points (furthest neighbor)
            - "average": average distance between all points
            - "ward": Ward variance minimization
        bisection (bool): Whether to use bisection method for tree construction

    Returns:
        Cluster: The root cluster with portfolio weights assigned according to HRP

    Examples:
        >>> import polars as pl
        >>> from pyhrp.hrp import hrp
        >>> prices = pl.DataFrame({"A": [100.0, 101.0, 99.0, 102.0], "B": [50.0, 51.0, 49.0, 52.0]})
        >>> root = hrp(prices, method="ward")
        >>> round(sum(root.portfolio.weights.values()), 6)
        1.0
    """
    returns = _returns(prices)
    cov = compute_cov(returns)
    cor = compute_corr(returns)
    node = node or build_tree(cor, method=method, bisection=bisection).root

    return risk_parity(root=node, cov=cov)

schur_hrp(prices, node=None, method='ward', bisection=False, gamma=0.5)

Compute Schur Complementary Allocation portfolio weights.

Extends HRP by augmenting each sub-covariance block with off-diagonal information via Schur complements before splitting risk between clusters. Introduced by Peter Cotton (arXiv:2411.05807). At gamma=0 this is identical to HRP; at gamma=1 it recovers the global minimum-variance portfolio through the same recursive hierarchy.

Parameters:

Name Type Description Default
prices DataFrame

Asset price time series (columns are assets, rows are dates)

required
node Cluster

Root node of the hierarchical clustering tree. If None, a tree will be built from the correlation matrix.

None
method Literal['single', 'complete', 'average', 'ward']

Linkage method for clustering

'ward'
bisection bool

Whether to use bisection method for tree construction

False
gamma float

Schur interpolation parameter in [0, 1]. 0 recovers standard HRP; 1 recovers minimum-variance portfolio.

0.5

Returns:

Name Type Description
Cluster Cluster

The root cluster with portfolio weights assigned

Examples:

>>> import polars as pl
>>> from pyhrp.hrp import schur_hrp
>>> prices = pl.DataFrame({"A": [100.0, 101.0, 99.0, 102.0], "B": [50.0, 51.0, 49.0, 52.0]})
>>> root = schur_hrp(prices, method="ward", gamma=0.5)
>>> round(sum(root.portfolio.weights.values()), 6)
1.0
Source code in src/pyhrp/hrp.py
def schur_hrp(
    prices: pl.DataFrame,
    node: Cluster | None = None,
    method: Literal["single", "complete", "average", "ward"] = "ward",
    bisection: bool = False,
    gamma: float = 0.5,
) -> Cluster:
    """Compute Schur Complementary Allocation portfolio weights.

    Extends HRP by augmenting each sub-covariance block with off-diagonal information
    via Schur complements before splitting risk between clusters. Introduced by Peter Cotton
    (arXiv:2411.05807). At gamma=0 this is identical to HRP; at gamma=1 it recovers the
    global minimum-variance portfolio through the same recursive hierarchy.

    Args:
        prices (pl.DataFrame): Asset price time series (columns are assets, rows are dates)
        node (Cluster, optional): Root node of the hierarchical clustering tree.
            If None, a tree will be built from the correlation matrix.
        method (Literal["single", "complete", "average", "ward"]): Linkage method for clustering
        bisection (bool): Whether to use bisection method for tree construction
        gamma (float): Schur interpolation parameter in [0, 1].
            0 recovers standard HRP; 1 recovers minimum-variance portfolio.

    Returns:
        Cluster: The root cluster with portfolio weights assigned

    Examples:
        >>> import polars as pl
        >>> from pyhrp.hrp import schur_hrp
        >>> prices = pl.DataFrame({"A": [100.0, 101.0, 99.0, 102.0], "B": [50.0, 51.0, 49.0, 52.0]})
        >>> root = schur_hrp(prices, method="ward", gamma=0.5)
        >>> round(sum(root.portfolio.weights.values()), 6)
        1.0
    """
    returns = _returns(prices)
    cov = compute_cov(returns)
    cor = compute_corr(returns)
    node = node or build_tree(cor, method=method, bisection=bisection).root

    return schur_risk_parity(root=node, cov=cov, gamma=gamma)

Portfolio optimization algorithms for hierarchical risk parity.

This module implements various portfolio optimization algorithms: - risk_parity: The main hierarchical risk parity algorithm - schur_risk_parity: Schur Complementary Allocation (Cotton, arXiv:2411.05807) - one_over_n: A simple equal-weight allocation strategy

one_over_n(dendrogram)

Generate portfolios using the 1/N (equal weight) strategy at each tree level.

This function implements a hierarchical 1/N strategy where weights are distributed equally among assets within each cluster at each level of the tree. The weight assigned to each cluster decreases by half at each level.

Parameters:

Name Type Description Default
dendrogram Dendrogram

A dendrogram object containing the hierarchical clustering tree and the list of assets

required

Yields:

Type Description
Generator[tuple[int, Portfolio]]

tuple[int, Portfolio]: A tuple containing the level number and the portfolio at that level

Examples:

>>> import polars as pl
>>> from pyhrp.hrp import build_tree
>>> from pyhrp.algos import one_over_n
>>> cor = pl.DataFrame({"A": [1.0, 0.3], "B": [0.3, 1.0]})
>>> dg = build_tree(cor, method="ward")
>>> levels = list(one_over_n(dg))
>>> len(levels) > 0
True
Source code in src/pyhrp/algos.py
def one_over_n(dendrogram: Dendrogram) -> Generator[tuple[int, Portfolio]]:
    """Generate portfolios using the 1/N (equal weight) strategy at each tree level.

    This function implements a hierarchical 1/N strategy where weights are
    distributed equally among assets within each cluster at each level of the tree.
    The weight assigned to each cluster decreases by half at each level.

    Args:
        dendrogram: A dendrogram object containing the hierarchical clustering tree
                   and the list of assets

    Yields:
        tuple[int, Portfolio]: A tuple containing the level number and the portfolio
                              at that level

    Examples:
        >>> import polars as pl
        >>> from pyhrp.hrp import build_tree
        >>> from pyhrp.algos import one_over_n
        >>> cor = pl.DataFrame({"A": [1.0, 0.3], "B": [0.3, 1.0]})
        >>> dg = build_tree(cor, method="ward")
        >>> levels = list(one_over_n(dg))
        >>> len(levels) > 0
        True
    """
    root = dendrogram.root
    assets = dendrogram.assets

    # Initial weight to distribute
    w: float = 1.0

    # Process each level of the tree
    for n, level in enumerate(root.levels):
        for node in level:
            # Distribute weight equally among all leaves in this node
            for leaf in node.leaves:
                root.portfolio[assets[leaf.value]] = w / node.leaf_count

        # Reduce weight for the next level
        w *= 0.5

        # Yield the current level number and a deep copy of the portfolio
        yield n, deepcopy(root.portfolio)

risk_parity(root, cov)

Compute hierarchical risk parity weights for a cluster tree.

This is the main algorithm for hierarchical risk parity. It recursively traverses the cluster tree and assigns weights to each node based on the risk parity principle.

Note

The tree is modified in place: the portfolio of every node is rebuilt from scratch, so the function is idempotent and a tree can be reused with a different covariance matrix.

Parameters:

Name Type Description Default
root Cluster

The root node of the cluster tree

required
cov DataFrame

Covariance matrix of asset returns

required

Returns:

Name Type Description
Cluster Cluster

The root node with portfolio weights assigned

Examples:

>>> import polars as pl
>>> from pyhrp.cluster import Cluster
>>> from pyhrp.algos import risk_parity
>>> cov = pl.DataFrame({"A": [4.0, 0.0], "B": [0.0, 1.0]})
>>> root = Cluster(2, left=Cluster(0), right=Cluster(1))
>>> cluster = risk_parity(root=root, cov=cov)
>>> round(cluster.portfolio["B"], 1)
0.8
Source code in src/pyhrp/algos.py
def risk_parity(root: Cluster, cov: pl.DataFrame) -> Cluster:
    """Compute hierarchical risk parity weights for a cluster tree.

    This is the main algorithm for hierarchical risk parity. It recursively
    traverses the cluster tree and assigns weights to each node based on
    the risk parity principle.

    Note:
        The tree is modified in place: the portfolio of every node is rebuilt
        from scratch, so the function is idempotent and a tree can be reused
        with a different covariance matrix.

    Args:
        root (Cluster): The root node of the cluster tree
        cov (pl.DataFrame): Covariance matrix of asset returns

    Returns:
        Cluster: The root node with portfolio weights assigned

    Examples:
        >>> import polars as pl
        >>> from pyhrp.cluster import Cluster
        >>> from pyhrp.algos import risk_parity
        >>> cov = pl.DataFrame({"A": [4.0, 0.0], "B": [0.0, 1.0]})
        >>> root = Cluster(2, left=Cluster(0), right=Cluster(1))
        >>> cluster = risk_parity(root=root, cov=cov)
        >>> round(cluster.portfolio["B"], 1)
        0.8
    """
    cov_np = cov.to_numpy()
    index = {name: i for i, name in enumerate(cov.columns)}

    def combine(cluster: Cluster) -> Cluster:
        """Combine the child portfolios of a cluster via inverse-variance split."""
        left, right = _children(cluster)
        v_left = _block_variance(left.portfolio, cov_np, index)
        v_right = _block_variance(right.portfolio, cov_np, index)
        return _split(cluster, v_left, v_right)

    return _allocate(root, cov.columns, combine)

schur_risk_parity(root, cov, gamma=0.5)

Compute Schur Complementary Allocation weights for a cluster tree.

An extension of HRP introduced by Peter Cotton (arXiv:2411.05807) that augments sub-covariance matrices with off-diagonal block information via Schur complements. At gamma=0 this recovers standard HRP; at gamma=1 it recovers the minimum-variance portfolio through the same recursive structure.

Note

The tree is modified in place: the portfolio of every node is rebuilt from scratch, so the function is idempotent and a tree can be reused with a different covariance matrix or gamma.

Parameters:

Name Type Description Default
root Cluster

The root node of the cluster tree

required
cov DataFrame

Covariance matrix of asset returns

required
gamma float

Interpolation parameter in [0, 1]. 0 = HRP, 1 = minimum variance.

0.5

Returns:

Name Type Description
Cluster Cluster

The root node with portfolio weights assigned

Raises:

Type Description
ValueError

If gamma is outside the interval [0, 1].

Examples:

>>> import polars as pl
>>> from pyhrp.cluster import Cluster
>>> from pyhrp.algos import schur_risk_parity
>>> cov = pl.DataFrame({"A": [4.0, 0.0], "B": [0.0, 1.0]})
>>> root = Cluster(2, left=Cluster(0), right=Cluster(1))
>>> cluster = schur_risk_parity(root=root, cov=cov, gamma=0.5)
>>> round(cluster.portfolio["B"], 1)
0.8
Source code in src/pyhrp/algos.py
def schur_risk_parity(root: Cluster, cov: pl.DataFrame, gamma: float = 0.5) -> Cluster:
    """Compute Schur Complementary Allocation weights for a cluster tree.

    An extension of HRP introduced by Peter Cotton (arXiv:2411.05807) that augments
    sub-covariance matrices with off-diagonal block information via Schur complements.
    At gamma=0 this recovers standard HRP; at gamma=1 it recovers the minimum-variance
    portfolio through the same recursive structure.

    Note:
        The tree is modified in place: the portfolio of every node is rebuilt
        from scratch, so the function is idempotent and a tree can be reused
        with a different covariance matrix or gamma.

    Args:
        root (Cluster): The root node of the cluster tree
        cov (pl.DataFrame): Covariance matrix of asset returns
        gamma (float): Interpolation parameter in [0, 1]. 0 = HRP, 1 = minimum variance.

    Returns:
        Cluster: The root node with portfolio weights assigned

    Raises:
        ValueError: If gamma is outside the interval [0, 1].

    Examples:
        >>> import polars as pl
        >>> from pyhrp.cluster import Cluster
        >>> from pyhrp.algos import schur_risk_parity
        >>> cov = pl.DataFrame({"A": [4.0, 0.0], "B": [0.0, 1.0]})
        >>> root = Cluster(2, left=Cluster(0), right=Cluster(1))
        >>> cluster = schur_risk_parity(root=root, cov=cov, gamma=0.5)
        >>> round(cluster.portfolio["B"], 1)
        0.8
    """
    if not 0.0 <= gamma <= 1.0:
        msg = f"gamma must be in [0, 1], got {gamma}"
        raise ValueError(msg)

    cov_np = cov.to_numpy()
    index = {name: i for i, name in enumerate(cov.columns)}

    def combine(cluster: Cluster) -> Cluster:
        """Combine the child portfolios of a cluster via a Schur-augmented split."""
        left, right = _children(cluster)

        li = [index[a] for a in left.portfolio.assets]
        ri = [index[a] for a in right.portfolio.assets]

        a_mat = cov_np[np.ix_(li, li)]
        b_mat = cov_np[np.ix_(li, ri)]
        d_mat = cov_np[np.ix_(ri, ri)]

        w_left = np.array([left.portfolio[a] for a in left.portfolio.assets])
        w_right = np.array([right.portfolio[a] for a in right.portfolio.assets])

        # Schur-augmented blocks: condition each group on the other
        a_aug = a_mat - gamma * (b_mat @ _solve(d_mat, b_mat.T))
        d_aug = d_mat - gamma * (b_mat.T @ _solve(a_mat, b_mat))

        v_left = float(w_left @ a_aug @ w_left)
        v_right = float(w_right @ d_aug @ w_right)
        return _split(cluster, v_left, v_right)

    return _allocate(root, cov.columns, combine)

Data structures for hierarchical risk parity portfolio optimization.

This module defines the core data structures used in the hierarchical risk parity algorithm: - Portfolio: Manages a collection of asset weights (strings identify assets) - Cluster: Represents a node in the hierarchical clustering tree

Cluster

Bases: Node[int]

Represents a cluster in the hierarchical clustering tree.

Clusters are the nodes of the graphs we build. Each cluster is aware of the left and the right cluster it is connecting to. Each cluster also has an associated portfolio.

Attributes:

Name Type Description
portfolio Portfolio

The portfolio associated with this cluster

Source code in src/pyhrp/cluster.py
class Cluster(Node[int]):
    """Represents a cluster in the hierarchical clustering tree.

    Clusters are the nodes of the graphs we build.
    Each cluster is aware of the left and the right cluster
    it is connecting to. Each cluster also has an associated portfolio.

    Attributes:
        portfolio (Portfolio): The portfolio associated with this cluster
    """

    def __init__(self, value: int, left: Cluster | None = None, right: Cluster | None = None) -> None:
        """Initialize a new Cluster.

        Args:
            value (int): The identifier for this cluster
            left (Cluster, optional): The left child cluster
            right (Cluster, optional): The right child cluster
        """
        super().__init__(value=value, left=left, right=right)
        self.portfolio = Portfolio()

    # Override narrows the return type to list[Cluster] and validates tree integrity;
    # the traversal order (left to right) matches Node.leaves.
    @property
    def leaves(self) -> list[Cluster]:
        """Get all reachable leaf nodes in left-to-right dendrogram order.

        Returns:
            list[Cluster]: List of all leaf nodes reachable from this cluster
        """
        if self.is_leaf:
            return [self]
        else:
            if self.left is None:
                raise ValueError("Expected left child to exist for non-leaf cluster")  # noqa: TRY003
            if self.right is None:
                raise ValueError("Expected right child to exist for non-leaf cluster")  # noqa: TRY003
            if not isinstance(self.left, Cluster):
                raise TypeError(f"Expected left child to be a Cluster for node {self.value}")  # noqa: TRY003
            if not isinstance(self.right, Cluster):
                raise TypeError(f"Expected right child to be a Cluster for node {self.value}")  # noqa: TRY003
            return self.left.leaves + self.right.leaves

leaves property

Get all reachable leaf nodes in left-to-right dendrogram order.

Returns:

Type Description
list[Cluster]

list[Cluster]: List of all leaf nodes reachable from this cluster

__init__(value, left=None, right=None)

Initialize a new Cluster.

Parameters:

Name Type Description Default
value int

The identifier for this cluster

required
left Cluster

The left child cluster

None
right Cluster

The right child cluster

None
Source code in src/pyhrp/cluster.py
def __init__(self, value: int, left: Cluster | None = None, right: Cluster | None = None) -> None:
    """Initialize a new Cluster.

    Args:
        value (int): The identifier for this cluster
        left (Cluster, optional): The left child cluster
        right (Cluster, optional): The right child cluster
    """
    super().__init__(value=value, left=left, right=right)
    self.portfolio = Portfolio()

Portfolio dataclass

Container for portfolio asset weights.

This lightweight class stores and manipulates a mapping from asset names to their portfolio weights, and provides convenience helpers for analysis and visualization.

Attributes:

Name Type Description
_weights dict[str, float]

Internal mapping from asset symbol to weight.

Source code in src/pyhrp/cluster.py
@dataclass
class Portfolio:
    """Container for portfolio asset weights.

    This lightweight class stores and manipulates a mapping from asset names to
    their portfolio weights, and provides convenience helpers for analysis and
    visualization.

    Attributes:
        _weights (dict[str, float]): Internal mapping from asset symbol to weight.
    """

    _weights: dict[str, float] = field(default_factory=dict)

    @property
    def assets(self) -> list[str]:
        """List of asset names present in the portfolio.

        Returns:
            list[str]: Asset identifiers in insertion order (Python 3.7+ dict order).
        """
        return list(self._weights.keys())

    def variance(self, cov: pl.DataFrame) -> float:
        """Calculate the variance of the portfolio.

        Args:
            cov (pl.DataFrame): Covariance matrix where columns and rows correspond
                to assets in the same order as columns list.

        Returns:
            float: Portfolio variance
        """
        assets = self.assets
        index = {name: i for i, name in enumerate(cov.columns)}
        row_indices = [index[a] for a in assets]
        cov_matrix = cov.to_numpy()
        c = cov_matrix[np.ix_(row_indices, row_indices)]
        w = np.array([self._weights[a] for a in assets])
        return float(w @ c @ w)

    def __getitem__(self, item: str) -> float:
        """Return the weight for a given asset.

        Args:
            item (str): Asset name/symbol.

        Returns:
            float: The weight associated with the asset.

        Raises:
            KeyError: If the asset is not present in the portfolio.
        """
        return self._weights[item]

    def __setitem__(self, key: str, value: float) -> None:
        """Set or update the weight for an asset.

        Args:
            key (str): Asset name/symbol.
            value (float): Portfolio weight for the asset.
        """
        self._weights[key] = value

    @property
    def weights(self) -> dict[str, float]:
        """Get all weights as a dict sorted alphabetically by asset name.

        Returns:
            dict[str, float]: Mapping from asset name to weight, sorted by name.
        """
        return dict(sorted(self._weights.items()))

    def plot(self, names: list[str]) -> go.Figure:
        """Plot the portfolio weights as a bar chart.

        Args:
            names (list[str]): List of asset names to include in the plot

        Returns:
            go.Figure: The plotly figure
        """
        w = self.weights
        values = [w[n] for n in names]
        fig = go.Figure(go.Bar(x=names, y=values, marker_color="steelblue"))
        fig.update_layout(xaxis={"tickangle": -90})
        return fig

assets property

List of asset names present in the portfolio.

Returns:

Type Description
list[str]

list[str]: Asset identifiers in insertion order (Python 3.7+ dict order).

weights property

Get all weights as a dict sorted alphabetically by asset name.

Returns:

Type Description
dict[str, float]

dict[str, float]: Mapping from asset name to weight, sorted by name.

__getitem__(item)

Return the weight for a given asset.

Parameters:

Name Type Description Default
item str

Asset name/symbol.

required

Returns:

Name Type Description
float float

The weight associated with the asset.

Raises:

Type Description
KeyError

If the asset is not present in the portfolio.

Source code in src/pyhrp/cluster.py
def __getitem__(self, item: str) -> float:
    """Return the weight for a given asset.

    Args:
        item (str): Asset name/symbol.

    Returns:
        float: The weight associated with the asset.

    Raises:
        KeyError: If the asset is not present in the portfolio.
    """
    return self._weights[item]

__setitem__(key, value)

Set or update the weight for an asset.

Parameters:

Name Type Description Default
key str

Asset name/symbol.

required
value float

Portfolio weight for the asset.

required
Source code in src/pyhrp/cluster.py
def __setitem__(self, key: str, value: float) -> None:
    """Set or update the weight for an asset.

    Args:
        key (str): Asset name/symbol.
        value (float): Portfolio weight for the asset.
    """
    self._weights[key] = value

plot(names)

Plot the portfolio weights as a bar chart.

Parameters:

Name Type Description Default
names list[str]

List of asset names to include in the plot

required

Returns:

Type Description
Figure

go.Figure: The plotly figure

Source code in src/pyhrp/cluster.py
def plot(self, names: list[str]) -> go.Figure:
    """Plot the portfolio weights as a bar chart.

    Args:
        names (list[str]): List of asset names to include in the plot

    Returns:
        go.Figure: The plotly figure
    """
    w = self.weights
    values = [w[n] for n in names]
    fig = go.Figure(go.Bar(x=names, y=values, marker_color="steelblue"))
    fig.update_layout(xaxis={"tickangle": -90})
    return fig

variance(cov)

Calculate the variance of the portfolio.

Parameters:

Name Type Description Default
cov DataFrame

Covariance matrix where columns and rows correspond to assets in the same order as columns list.

required

Returns:

Name Type Description
float float

Portfolio variance

Source code in src/pyhrp/cluster.py
def variance(self, cov: pl.DataFrame) -> float:
    """Calculate the variance of the portfolio.

    Args:
        cov (pl.DataFrame): Covariance matrix where columns and rows correspond
            to assets in the same order as columns list.

    Returns:
        float: Portfolio variance
    """
    assets = self.assets
    index = {name: i for i, name in enumerate(cov.columns)}
    row_indices = [index[a] for a in assets]
    cov_matrix = cov.to_numpy()
    c = cov_matrix[np.ix_(row_indices, row_indices)]
    w = np.array([self._weights[a] for a in assets])
    return float(w @ c @ w)

A lightweight binary tree implementation to replace the binarytree dependency.

This module provides a simple Node class that can be used to create binary trees. It implements only the functionality needed by the pyhrp package.

Node

A binary tree node with left and right children.

This class implements the minimal functionality needed from the binarytree.Node class that is used in the pyhrp package.

Attributes:

Name Type Description
value

The value of the node

left

The left child node

right

The right child node

Source code in src/pyhrp/treelib.py
class Node[T: NodeValue]:
    """A binary tree node with left and right children.

    This class implements the minimal functionality needed from the binarytree.Node class
    that is used in the pyhrp package.

    Attributes:
        value: The value of the node
        left: The left child node
        right: The right child node
    """

    def __init__(self, value: T, left: Node[T] | None = None, right: Node[T] | None = None) -> None:
        """Initialize a new Node.

        Args:
            value: The value of the node
            left: The left child node
            right: The right child node
        """
        self.value = value
        self.left = left
        self.right = right

    @property
    def is_leaf(self) -> bool:
        """Check if this node is a leaf node (has no children).

        Returns:
            bool: True if this is a leaf node, False otherwise
        """
        return self.left is None and self.right is None

    @property
    def leaves(self) -> Sequence[Node[T]]:
        """Get all leaf nodes in the tree rooted at this node.

        Returns:
            List[Node]: List of all leaf nodes
        """
        if self.is_leaf:
            return [self]

        result: list[Node[T]] = []
        if self.left:
            result.extend(self.left.leaves)
        if self.right:
            result.extend(self.right.leaves)

        return result

    @property
    def levels(self) -> list[list[Node[T]]]:
        """Get nodes by level in the tree.

        Returns:
            List[List[Node]]: List of lists of nodes at each level
        """
        result: list[list[Node[T]]] = []
        current_level: list[Node[T]] = [self]

        while current_level:
            result.append(current_level)
            next_level = []

            for node in current_level:
                if node.left:
                    next_level.append(node.left)
                if node.right:
                    next_level.append(node.right)

            current_level = next_level

        return result

    @property
    def leaf_count(self) -> int:
        """Count the number of leaf nodes in the tree.

        Returns:
            int: Number of leaf nodes
        """
        return len(self.leaves)

    @property
    def size(self) -> int:
        """Count the total number of nodes in the tree.

        Returns:
            int: Total number of nodes
        """
        size = 1  # Count this node
        if self.left:
            size += self.left.size
        if self.right:
            size += self.right.size
        return size

    def __iter__(self) -> Iterator[Node[T]]:
        """Iterate through all nodes in the tree in level-order.

        Returns:
            Iterator[Node]: Iterator over all nodes
        """
        queue: deque[Node[T]] = deque([self])
        while queue:
            node = queue.popleft()
            yield node
            if node.left:
                queue.append(node.left)
            if node.right:
                queue.append(node.right)

is_leaf property

Check if this node is a leaf node (has no children).

Returns:

Name Type Description
bool bool

True if this is a leaf node, False otherwise

leaf_count property

Count the number of leaf nodes in the tree.

Returns:

Name Type Description
int int

Number of leaf nodes

leaves property

Get all leaf nodes in the tree rooted at this node.

Returns:

Type Description
Sequence[Node[T]]

List[Node]: List of all leaf nodes

levels property

Get nodes by level in the tree.

Returns:

Type Description
list[list[Node[T]]]

List[List[Node]]: List of lists of nodes at each level

size property

Count the total number of nodes in the tree.

Returns:

Name Type Description
int int

Total number of nodes

__init__(value, left=None, right=None)

Initialize a new Node.

Parameters:

Name Type Description Default
value T

The value of the node

required
left Node[T] | None

The left child node

None
right Node[T] | None

The right child node

None
Source code in src/pyhrp/treelib.py
def __init__(self, value: T, left: Node[T] | None = None, right: Node[T] | None = None) -> None:
    """Initialize a new Node.

    Args:
        value: The value of the node
        left: The left child node
        right: The right child node
    """
    self.value = value
    self.left = left
    self.right = right

__iter__()

Iterate through all nodes in the tree in level-order.

Returns:

Type Description
Iterator[Node[T]]

Iterator[Node]: Iterator over all nodes

Source code in src/pyhrp/treelib.py
def __iter__(self) -> Iterator[Node[T]]:
    """Iterate through all nodes in the tree in level-order.

    Returns:
        Iterator[Node]: Iterator over all nodes
    """
    queue: deque[Node[T]] = deque([self])
    while queue:
        node = queue.popleft()
        yield node
        if node.left:
            queue.append(node.left)
        if node.right:
            queue.append(node.right)