Source code for lymph.models.bilateral

"""Module for bilateral lymphatic tumor progression models."""

from __future__ import annotations

import logging
import warnings
from collections.abc import Iterable, Sequence
from typing import Any, Literal

import numpy as np
import pandas as pd

from lymph import diagnosis_times, matrix, modalities, models, types, utils

warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning)
logger = logging.getLogger(__name__)


[docs] class Bilateral( diagnosis_times.Composite, modalities.Composite, types.Model, ): """Class that models metastatic progression in a bilateral lymphatic system. This is achieved by creating two instances of the :py:class:`~lymph.models.Unilateral` model, one for the ipsi- and one for the contralateral side of the neck. The two sides are assumed to be independent of each other, given the diagnosis time over which we marginalize. .. seealso:: :py:class:`~lymph.models.Unilateral` Two instances of this class are created as attributes. One for the ipsi- and one for the contralateral side of the neck. """
[docs] def __init__( self, graph_dict: types.GraphDictType, is_symmetric: dict[str, bool] | None = None, named_params: Sequence[str] | None = None, uni_kwargs: dict[str, Any] | None = None, ipsi_kwargs: dict[str, Any] | None = None, contra_kwargs: dict[str, Any] | None = None, **_kwargs, ) -> None: """Initialize both sides of the neck as :py:class:`.models.Unilateral`. The ``graph_dict`` is a dictionary of tuples as keys and lists of strings as values. It is passed to both :py:class:`.models.Unilateral` instances, which in turn pass it to the :py:class:`.graph.Representation` class that stores the graph. With the dictionary ``is_symmetric`` the user can specify which aspects of the model are symmetric. Valid keys are ``"tumor_spread"`` and ``"lnl_spread"``. The values are booleans, with ``True`` meaning that the aspect is symmetric. .. note:: The symmetries of tumor and LNL spread are only guaranteed if the respective parameters are set via the :py:meth:`.set_params()` method of this bilateral model. It is still possible to set different parameters for the ipsi- and contralateral side by using their respective :py:meth:`.Unilateral.set_params()` method. The ``uni_kwargs`` are passed to both instances of the unilateral model, while the ``ipsi_kwargs`` and ``contra_kwargs`` are passed to the ipsi- and contralateral side, respectively. The ipsi- and contralateral kwargs override the unilateral kwargs and may also override the ``graph_dict``. This allows the user to specify different graphs for the two sides of the neck. """ self._init_models( graph_dict=graph_dict, uni_kwargs=uni_kwargs, ipsi_kwargs=ipsi_kwargs, contra_kwargs=contra_kwargs, ) is_symmetric = is_symmetric or {} is_symmetric["tumor_spread"] = is_symmetric.get("tumor_spread", False) is_symmetric["lnl_spread"] = is_symmetric.get("lnl_spread", True) self.is_symmetric = is_symmetric diagnosis_times.Composite.__init__( self, distribution_children={"ipsi": self.ipsi, "contra": self.contra}, is_distribution_leaf=False, ) modalities.Composite.__init__( self, modality_children={"ipsi": self.ipsi, "contra": self.contra}, is_modality_leaf=False, ) if named_params is not None: self.named_params = named_params
def _init_models( self, graph_dict: dict[tuple[str], list[str]], uni_kwargs: dict[str, Any] | None = None, ipsi_kwargs: dict[str, Any] | None = None, contra_kwargs: dict[str, Any] | None = None, ): """Initialize the two unilateral models.""" if uni_kwargs is None: uni_kwargs = {} _ipsi_kwargs = uni_kwargs.copy() _ipsi_kwargs["graph_dict"] = graph_dict _ipsi_kwargs.update(ipsi_kwargs or {}) _contra_kwargs = uni_kwargs.copy() _contra_kwargs["graph_dict"] = graph_dict _contra_kwargs.update(contra_kwargs or {}) self.ipsi = models.Unilateral(**_ipsi_kwargs) self.contra = models.Unilateral(**_contra_kwargs)
[docs] @classmethod def binary(cls, *args, **kwargs) -> Bilateral: """Initialize a binary bilateral model. This is a convenience method that sets the ``allowed_states`` of the ``uni_kwargs`` to ``[0, 1]``. All other ``args`` and ``kwargs`` are passed to the :py:meth:`.__init__` method. """ uni_kwargs = kwargs.pop("uni_kwargs", {}) uni_kwargs["allowed_states"] = [0, 1] return cls(*args, uni_kwargs=uni_kwargs, **kwargs)
[docs] @classmethod def trinary(cls, *args, **kwargs) -> Bilateral: """Initialize a trinary bilateral model. This is a convenience method that sets the ``allowed_states`` of the ``uni_kwargs`` to ``[0, 1, 2]``. All other ``args`` and ``kwargs`` are passed to the :py:meth:`.__init__` method. """ uni_kwargs = kwargs.pop("uni_kwargs", {}) uni_kwargs["allowed_states"] = [0, 1, 2] return cls(*args, uni_kwargs=uni_kwargs, **kwargs)
@property def is_trinary(self) -> bool: """Return whether the model is trinary.""" if self.ipsi.is_trinary != self.contra.is_trinary: raise ValueError("Both sides must be of the same 'narity'.") return self.ipsi.is_trinary @property def is_binary(self) -> bool: """Return whether the model is binary.""" if self.ipsi.is_binary != self.contra.is_binary: raise ValueError("Both sides must be of the same 'narity'.") return self.ipsi.is_binary
[docs] def get_tumor_spread_params( self, as_dict: bool = True, as_flat: bool = True, ) -> types.ParamsType: """Return the parameters of the model's spread from tumor to LNLs. If the attribute dictionary :py:attr:`.is_symmetric` stores the key-value pair ``"tumor_spread": True``, the parameters are returned as a single dictionary, since they are the same ipsi- and contralaterally. Otherwise, the parameters are returned as a dictionary with two keys, ``"ipsi"`` and ``"contra"``. """ params = { "ipsi": self.ipsi.get_tumor_spread_params(as_flat=as_flat), "contra": self.contra.get_tumor_spread_params(as_flat=as_flat), } if self.is_symmetric["tumor_spread"]: if params["ipsi"] != params["contra"]: warnings.warn( "The tumor spread parameters are not symmetric. " "Returning the ipsilateral parameters.", ) params = params["ipsi"] if as_flat or not as_dict: params = utils.flatten(params) return params if as_dict else params.values()
[docs] def get_lnl_spread_params( self, as_dict: bool = True, as_flat: bool = True, ) -> types.ParamsType: """Return the parameters of the model's spread from LNLs to tumor. Similarly to the :py:meth:`.get_tumor_spread_params` method, this returns only one dictionary if the attribute dictionary :py:attr:`.is_symmetric` stores the key-value pair ``"lnl_spread": True``. Otherwise, the parameters are returned as a dictionary with two keys, ``"ipsi"`` and ``"contra"``. """ params = { "ipsi": self.ipsi.get_lnl_spread_params(as_flat=as_flat), "contra": self.contra.get_lnl_spread_params(as_flat=as_flat), } if self.is_symmetric["lnl_spread"]: if params["ipsi"] != params["contra"]: warnings.warn( "The LNL spread parameters are not symmetric. " "Returning the ipsilateral parameters.", ) params = params["ipsi"] if as_flat or not as_dict: params = utils.flatten(params) return params if as_dict else params.values()
[docs] def get_spread_params( self, as_dict: bool = True, as_flat: bool = True, ) -> types.ParamsType: """Return the parameters of the model's spread edges. Depending on the symmetries (i.e. the ``is_symmetric`` attribute), this returns different results: If ``is_symmetric["tumor_spread"] = False``, the flattened (``as_flat=True``) dictionary (``as_dict=True``) will contain keys of the form ``ipsi_Tto<lnl>_spread`` and ``contra_Tto<lnl>_spread``, where ``<lnl>`` is the name of the lymph node level. However, if the tumor spread is set to be symmetric, the leading ``ipsi_`` or ``contra_`` is omitted, since it's valid for both sides. This is consistent with how the :py:meth:`.set_params` method expects the keyword arguments in case of the symmetry configurations. >>> model = Bilateral(graph_dict={ ... ("tumor", "T"): ["II", "III"], ... ("lnl", "II"): ["III"], ... ("lnl", "III"): [], ... }) >>> num_dims = model.get_num_dims() >>> num_dims 5 >>> model.set_spread_params( ... *np.round(np.linspace(0., 1., num_dims+1), 2), ... ) # doctest: +SKIP (1.0,) >>> model.get_spread_params(as_flat=False) # doctest: +SKIP {'ipsi': {'TtoII': {'spread': 0.0}, 'TtoIII': {'spread': 0.2}}, 'contra': {'TtoII': {'spread': 0.4}, 'TtoIII': {'spread': 0.6}}, 'IItoIII': {'spread': 0.8}} >>> model.get_spread_params(as_flat=True) # doctest: +SKIP {'ipsi_TtoII_spread': 0.0, 'ipsi_TtoIII_spread': 0.2, 'contra_TtoII_spread': 0.4, 'contra_TtoIII_spread': 0.6, 'IItoIII_spread': 0.8} """ params = self.get_tumor_spread_params(as_flat=False) if ( not self.is_symmetric["tumor_spread"] and not self.is_symmetric["lnl_spread"] ): params["ipsi"].update(self.get_lnl_spread_params(as_flat=False)["ipsi"]) params["contra"].update(self.get_lnl_spread_params(as_flat=False)["contra"]) else: params.update(self.get_lnl_spread_params(as_flat=as_flat)) if as_flat or not as_dict: params = utils.flatten(params) return params if as_dict else params.values()
[docs] def get_params( self, as_dict: bool = True, as_flat: bool = True, ) -> types.ParamsType: """Return the parameters of the model. It returns the combination of the call to the :py:meth:`.Unilateral.get_params` of the ipsi- and contralateral side. For the use of the ``as_dict`` and ``as_flat`` arguments, see the documentation of the :py:meth:`.types.Model.get_params` method. Also see the :py:meth:`.get_spread_params` method to understand how the symmetry settings affect the return value. """ params = self.get_spread_params(as_flat=as_flat) params.update(self.get_distribution_params(as_flat=as_flat)) if as_flat or not as_dict: params = utils.flatten(params) return params if as_dict else params.values()
[docs] def set_tumor_spread_params(self, *args: float, **kwargs: float) -> tuple[float]: """Set the parameters of the model's spread from tumor to LNLs.""" kwargs, global_kwargs = utils.unflatten_and_split( kwargs, expected_keys=["ipsi", "contra"], ) ipsi_kwargs = global_kwargs.copy() ipsi_kwargs.update(kwargs.get("ipsi", {})) contra_kwargs = global_kwargs.copy() contra_kwargs.update(kwargs.get("contra", {})) args = self.ipsi.set_tumor_spread_params(*args, **ipsi_kwargs) if self.is_symmetric["tumor_spread"]: utils.synchronize_params( get_from=self.ipsi.graph.tumor_edges, set_to=self.contra.graph.tumor_edges, ) else: args = self.contra.set_tumor_spread_params(*args, **contra_kwargs) return args
[docs] def set_lnl_spread_params(self, *args: float, **kwargs: float) -> tuple[float]: """Set the parameters of the model's spread from LNLs to tumor.""" kwargs, global_kwargs = utils.unflatten_and_split( kwargs, expected_keys=["ipsi", "contra"], ) ipsi_kwargs = global_kwargs.copy() ipsi_kwargs.update(kwargs.get("ipsi", {})) contra_kwargs = global_kwargs.copy() contra_kwargs.update(kwargs.get("contra", {})) args = self.ipsi.set_lnl_spread_params(*args, **ipsi_kwargs) if self.is_symmetric["lnl_spread"]: utils.synchronize_params( get_from=self.ipsi.graph.lnl_edges, set_to=self.contra.graph.lnl_edges, ) else: args = self.contra.set_lnl_spread_params(*args, **contra_kwargs) return args
[docs] def set_spread_params(self, *args: float, **kwargs: float) -> tuple[float]: """Set the parameters of the model's spread edges.""" args = self.set_tumor_spread_params(*args, **kwargs) return self.set_lnl_spread_params(*args, **kwargs)
[docs] def set_params(self, *args: float, **kwargs: float) -> tuple[float]: """Set new parameters to the model. This works almost exactly as the unilateral model's :py:meth:`.Unilateral.set_params` method. However, this one allows the user to set the parameters of individual sides of the neck by prefixing the keyword arguments' names with ``"ipsi_"`` or ``"contra_"``. Anything not prefixed by ``"ipsi_"`` or ``"contra_"`` is passed to both sides of the neck. This does obviously not work with positional arguments. When setting the parameters via positional arguments, the order is important: 1. The parameters of the edges from tumor to LNLs: 1. first the ipsilateral parameters, 2. if ``is_symmetric["tumor_spread"]`` is ``False``, the contralateral parameters. Otherwise, the ipsilateral parameters are used for both sides. 2. The parameters of the edges from LNLs to tumor: 1. again, first the ipsilateral parameters, 2. if ``is_symmetric["lnl_spread"]`` is ``False``, the contralateral parameters. Otherwise, the ipsilateral parameters are used for both sides. 3. The parameters of the parametric distributions for marginalizing over diagnosis times. When still some positional arguments remain after that, they are returned in a tuple. """ args = self.set_spread_params(*args, **kwargs) return self.set_distribution_params(*args, **kwargs)
[docs] def load_patient_data( self, patient_data: pd.DataFrame, mapping: callable | dict[int, Any] = utils.early_late_mapping, ) -> None: """Load patient data into the model. Amounts to calling the :py:meth:`~lymph.models.Unilateral.load_patient_data` method of both sides of the neck. """ self.ipsi.load_patient_data(patient_data, "ipsi", mapping) self.contra.load_patient_data(patient_data, "contra", mapping) # Keep all columns except '_model' # From '_model' only keep those with first subheader '#' or 'core' cols = [col for col in self.ipsi.patient_data.columns if col[0] != "_model"] cols += [ col for col in self.ipsi.patient_data.columns if col[0] == "_model" and (col[1] == "#" or col[1] == "core") ] self.patient_data = self.ipsi.patient_data[cols]
[docs] def state_dist( self, t_stage: str = "early", mode: Literal["HMM", "BN"] = "HMM", ) -> np.ndarray: """Compute the joint distribution over the ipsi- & contralateral hidden states. This computes the state distributions of both sides and returns their outer product. In case ``mode`` is ``"HMM"`` (default), the state distributions are first marginalized over the diagnosis time distributions of the respective ``t_stage``. .. seealso:: :py:meth:`.Unilateral.state_dist` The corresponding unilateral function. Note that this method returns a 2D array, because it computes the probability of any possible combination of ipsi- and contralateral states. """ if mode == "HMM": ipsi_state_evo = self.ipsi.state_dist_evo() contra_state_evo = self.contra.state_dist_evo() time_marg_matrix = np.diag(self.get_distribution(t_stage).pmf) result = ipsi_state_evo.T @ time_marg_matrix @ contra_state_evo elif mode == "BN": ipsi_state_dist = self.ipsi.state_dist(mode=mode) contra_state_dist = self.contra.state_dist(mode=mode) result = np.outer(ipsi_state_dist, contra_state_dist) else: raise ValueError(f"Unknown mode '{mode}'.") return result
[docs] def obs_dist( self, given_state_dist: np.ndarray | None = None, t_stage: str = "early", mode: Literal["HMM", "BN"] = "HMM", ) -> np.ndarray: """Compute the joint distribution over the ipsi- & contralateral observations. .. seealso:: :py:meth:`.Unilateral.obs_dist` The corresponding unilateral function. Note that this method returns a 2D array, because it computes the probability of any possible combination of ipsi- and contralateral observations. """ if given_state_dist is None: given_state_dist = self.state_dist(t_stage=t_stage, mode=mode) return ( self.ipsi.observation_matrix().T
[docs] @ given_state_dist @ self.contra.observation_matrix() )
def patient_likelihoods( self, t_stage: str, mode: Literal["HMM", "BN"] = "HMM", ) -> np.ndarray: """Compute the likelihood of each patient individually.""" if mode == "HMM": joint_state_dist = self.state_dist(t_stage=t_stage, mode=mode) return matrix.fast_trace( self.ipsi.diagnosis_matrix(t_stage), joint_state_dist @ self.contra.diagnosis_matrix(t_stage).T, ) raise NotImplementedError( f"Mode '{mode}' not implemented for patient likelihoods. " "Only 'HMM? is supported.", )
def _bn_likelihood(self, log: bool = True, t_stage: str | None = None) -> float: """Compute the BN likelihood of data, using the stored params.""" joint_state_dist = self.state_dist(mode="BN") patient_llhs = matrix.fast_trace( self.ipsi.diagnosis_matrix(t_stage), joint_state_dist @ self.contra.diagnosis_matrix(t_stage).T, ) return np.sum(np.log(patient_llhs)) if log else np.prod(patient_llhs) def _hmm_likelihood(self, log: bool = True, t_stage: str | None = None) -> float: """Compute the HMM likelihood of data, using the stored params.""" llh = 0.0 if log else 1.0 ipsi_dist_evo = self.ipsi.state_dist_evo() contra_dist_evo = self.contra.state_dist_evo() if t_stage is None: t_stages = self.t_stages else: t_stages = [t_stage] for stage in t_stages: diag_time_matrix = np.diag(self.get_distribution(stage).pmf) # Note that I am not using the `comp_joint_state_dist` method here, since # that would recompute the state dist evolution for each T-stage. joint_state_dist = ipsi_dist_evo.T @ diag_time_matrix @ contra_dist_evo patient_llhs = matrix.fast_trace( self.ipsi.diagnosis_matrix(stage), joint_state_dist @ self.contra.diagnosis_matrix(stage).T, ) llh = utils.add_or_mult(llh, patient_llhs, log) return llh
[docs] def likelihood( self, given_params: types.ParamsType | None = None, log: bool = True, t_stage: str | None = None, mode: Literal["HMM", "BN"] = "HMM", ): """Compute the (log-)likelihood of the stored data given the model (and params). See the documentation of :py:meth:`.types.Model.likelihood` for more information on how to use the ``given_params`` parameter. Returns the log-likelihood if ``log`` is set to ``True``. The ``mode`` parameter determines whether the likelihood is computed for the hidden Markov model (``"HMM"``) or the Bayesian network (``"BN"``). .. note:: The computation is much faster if no parameters are given, since then the transition matrix does not need to be recomputed. .. seealso:: :py:meth:`.Unilateral.likelihood` The corresponding unilateral function. """ try: # all functions and methods called here should raise a ValueError if the # given parameters are invalid... utils.safe_set_params(self, given_params) except ValueError: return -np.inf if log else 0.0 if mode == "HMM": return self._hmm_likelihood(log, t_stage) if mode == "BN": return self._bn_likelihood(log, t_stage) raise ValueError("Invalid mode. Must be either 'HMM' or 'BN'.")
[docs] def posterior_state_dist( self, given_params: types.ParamsType | None = None, given_state_dist: np.ndarray | None = None, given_diagnosis: dict[str, types.DiagnosisType] | None = None, t_stage: str | int = "early", mode: Literal["HMM", "BN"] = "HMM", ) -> np.ndarray: """Compute joint post. dist. over ipsi & contra states, ``given_diagnosis``. ``given_diagnosis`` is a dictionary storing one :py:obj:`.types.DiagnosisType` each for the ``"ipsi"`` and ``"contra"`` side of the neck. Essentially, this is the risk for any possible combination of ipsi- and contralateral involvement, given the provided diagnosis. .. warning:: As in the :py:meth:`.Unilateral.posterior_state_dist` method, one may provide a precomputed (joint) state dist via the ``given_state_dist`` argument (should be a square matrix). In this case, the ``given_params`` are ignored and the model does not need to recompute e.g. the :py:meth:`.transition_matrix` or :py:meth:`.state_dist`, making the computation much faster. However, this will mean that ``t_stage`` and ``mode`` are also ignored, since these are only used to compute the state distribution. """ if given_state_dist is None: utils.safe_set_params(self, given_params) given_state_dist = self.state_dist(t_stage=t_stage, mode=mode) if given_diagnosis is None: given_diagnosis = {} diagnosis_given_state = {} for side in ["ipsi", "contra"]: if side not in given_diagnosis: warnings.warn(f"No diagnosis given for {side}lateral side.") diagnosis_encoding = getattr(self, side).compute_encoding( given_diagnosis.get(side, {}), ) observation_matrix = getattr(self, side).observation_matrix() # vector with P(Z=z|X) for each state X. A data matrix for one "patient" diagnosis_given_state[side] = diagnosis_encoding @ observation_matrix.T # matrix with P(Zi=zi,Zc=zc|Xi,Xc) * P(Xi,Xc) for all states Xi,Xc. joint_diagnosis_and_state = ( np.outer( diagnosis_given_state["ipsi"], diagnosis_given_state["contra"], ) * given_state_dist ) # Following Bayes' theorem, this is P(Xi,Xc|Zi=zi,Zc=zc) which is given by # P(Zi=zi,Zc=zc|Xi,Xc) * P(Xi,Xc) / P(Zi=zi,Zc=zc) return joint_diagnosis_and_state / np.sum(joint_diagnosis_and_state)
[docs] def marginalize( self, involvement: dict[str, types.PatternType], given_state_dist: np.ndarray | None = None, t_stage: str = "early", mode: Literal["HMM", "BN"] = "HMM", ) -> float: """Marginalize ``given_state_dist`` over matching ``involvement`` patterns. Any state that matches the provided ``involvement`` pattern is marginalized over. For this, the :py:func:`.matrix.compute_encoding` function is used. If ``given_state_dist`` is ``None``, it will be computed by calling :py:meth:`.state_dist` with the given ``t_stage`` and ``mode``. These arguments are ignored if ``given_state_dist`` is provided. """ if given_state_dist is None: given_state_dist = self.state_dist(t_stage=t_stage, mode=mode) marginalize_over_states = {} for side in ["ipsi", "contra"]: side_graph = getattr(self, side).graph marginalize_over_states[side] = matrix.compute_encoding( lnls=side_graph.lnls.keys(), pattern=involvement.get(side, {}), base=3 if self.is_trinary else 2, ) return ( marginalize_over_states["ipsi"]
[docs] @ given_state_dist @ marginalize_over_states["contra"] )
def risk( self, involvement: dict[str, types.PatternType], given_params: types.ParamsType | None = None, given_state_dist: np.ndarray | None = None, given_diagnosis: dict[str, types.DiagnosisType] | None = None, t_stage: str = "early", mode: Literal["HMM", "BN"] = "HMM", ) -> float: """Compute risk of the ``involvement`` patterns, given parameters and diagnosis. The ``involvement`` of interest is expected to be a :py:obj:`.PatternType` for each side of the neck (``"ipsi"`` and ``"contra"``). This method then marginalizes over those posterior state probabilities that match the ``involvement`` patterns. If ``involvement`` is not provided, the method returns the posterior state distribution as computed by the :py:meth:`.posterior_state_dist` method. See its docstring for more details on the remaining arguments. """ # TODO: test this method posterior_state_dist = self.posterior_state_dist( given_params=given_params, given_state_dist=given_state_dist, given_diagnosis=given_diagnosis, t_stage=t_stage, mode=mode, ) return self.marginalize(involvement, posterior_state_dist)
[docs] def draw_patients( self, num: int, stage_dist: Iterable[float], rng: np.random.Generator | None = None, seed: int = 42, **_kwargs, ) -> pd.DataFrame: """Draw ``num`` random patients from the parametrized model. .. seealso:: :py:meth:`.diagnosis_times.Distribution.draw_diag_times` Method to draw diagnosis times from a distribution. :py:meth:`.Unilateral.draw_diagnosis` Method to draw individual diagnosis from a unilateral model. :py:meth:`.Unilateral.draw_patients` The unilateral method to draw a synthetic dataset. """ if rng is None: rng = np.random.default_rng(seed) if sum(stage_dist) != 1.0: warnings.warn("Sum of stage distribution is not 1. Renormalizing.") stage_dist = np.array(stage_dist) / sum(stage_dist) drawn_t_stages = rng.choice( a=self.t_stages, p=stage_dist, size=num, ) drawn_diag_times = [ self.get_distribution(t_stage).draw_diag_times(rng=rng) for t_stage in drawn_t_stages ] drawn_obs_ipsi = self.ipsi.draw_diagnosis(drawn_diag_times, rng=rng) drawn_obs_contra = self.contra.draw_diagnosis(drawn_diag_times, rng=rng) drawn_obs = np.concatenate([drawn_obs_ipsi, drawn_obs_contra], axis=1) # construct MultiIndex with "ipsi" and "contra" at top level to allow # concatenation of the two separate drawn diagnosis sides = ["ipsi", "contra"] modality_names = list(self.get_all_modalities().keys()) lnl_names = list(self.ipsi.graph.lnls.keys()) multi_cols = pd.MultiIndex.from_product([sides, modality_names, lnl_names]) # reorder the column levels and thus also the individual columns to match the # LyProX format without mixing up the data dataset = pd.DataFrame(drawn_obs, columns=multi_cols) dataset = dataset.reorder_levels(order=[1, 0, 2], axis="columns") dataset = dataset.sort_index(axis="columns", level=0) dataset[("tumor", "core", "t_stage")] = drawn_t_stages return dataset