Trainers

`hgp_lib.trainers.gp_trainer.GPTrainer`

High-level trainer for Boolean Genetic Programming. Accepts a TrainerConfig containing a BooleanGPConfig and training options. Runs the training loop and optionally validates every val_every epochs. Returns a HierarchicalHistory with GenerationMetrics per epoch.

Parameters:

Name	Type	Description	Default
`config`	`TrainerConfig`	Configuration with gp_config (BooleanGPConfig), num_epochs, optional val_data/val_labels, val_every, progress options.	required

Examples:

>>> import numpy as np
>>> from hgp_lib.configs import BooleanGPConfig, TrainerConfig
>>> from hgp_lib.trainers import GPTrainer
>>>
>>> def accuracy(predictions, labels):
...     return np.mean(predictions == labels)
>>>
>>> train_data = np.array([[True, False, True, False], [False, True, False, True]])
>>> train_labels = np.array([1, 0])
>>> val_data = np.array([[True, True, False, False]])
>>> val_labels = np.array([1])
>>> gp_config = BooleanGPConfig(
...     score_fn=accuracy,
...     train_data=train_data,
...     train_labels=train_labels,
...     optimize_scorer=False,
... )
>>> config = TrainerConfig(
...     gp_config=gp_config,
...     num_epochs=10,
...     val_data=val_data,
...     val_labels=val_labels,
...     val_every=5,
...     progress_bar=False,
... )
>>> trainer = GPTrainer(config)
>>> trainer_result_history = trainer.fit()

Source code in hgp_lib\trainers\gp_trainer.py

class GPTrainer:
    """
    High-level trainer for Boolean Genetic Programming.
    Accepts a TrainerConfig containing a BooleanGPConfig and training options.
    Runs the training loop and optionally validates every val_every epochs.
    Returns a HierarchicalHistory with GenerationMetrics per epoch.

    Args:
        config (TrainerConfig): Configuration with gp_config (BooleanGPConfig),
            num_epochs, optional val_data/val_labels, val_every, progress options.

    Examples:
        >>> import numpy as np
        >>> from hgp_lib.configs import BooleanGPConfig, TrainerConfig
        >>> from hgp_lib.trainers import GPTrainer
        >>>
        >>> def accuracy(predictions, labels):
        ...     return np.mean(predictions == labels)
        >>>
        >>> train_data = np.array([[True, False, True, False], [False, True, False, True]])
        >>> train_labels = np.array([1, 0])
        >>> val_data = np.array([[True, True, False, False]])
        >>> val_labels = np.array([1])
        >>> gp_config = BooleanGPConfig(
        ...     score_fn=accuracy,
        ...     train_data=train_data,
        ...     train_labels=train_labels,
        ...     optimize_scorer=False,
        ... )
        >>> config = TrainerConfig(
        ...     gp_config=gp_config,
        ...     num_epochs=10,
        ...     val_data=val_data,
        ...     val_labels=val_labels,
        ...     val_every=5,
        ...     progress_bar=False,
        ... )
        >>> trainer = GPTrainer(config)
        >>> trainer_result_history = trainer.fit()
    """

    def __init__(self, config: TrainerConfig):
        validate_trainer_config(config)

        self.config = config
        self.gp_algo = BooleanGP(config.gp_config)
        self.num_epochs = config.num_epochs
        self.val_every = config.val_every
        self.progress_bar = config.progress_bar
        self.leave_progress_bar = config.leave_progress_bar
        self.progress_callback = config.progress_callback

        self.score_fn = self.gp_algo.score_fn  # Maybe optimized
        if config.val_data is not None and config.gp_config.optimize_scorer:
            self.val_score_fn, self.val_cm, self.val_data, self.val_labels = (
                optimize_scorers_for_data(
                    config.gp_config.score_fn,
                    confusion_matrix,
                    data=config.val_data,
                    labels=config.val_labels,
                )
            )
        else:
            self.val_score_fn = config.gp_config.score_fn
            self.val_cm = confusion_matrix
            self.val_data = config.val_data
            self.val_labels = config.val_labels

    def fit(self) -> PopulationHistory:
        """
        Trains the Boolean GP model for the specified number of epochs.
        Returns:
            HierarchicalHistory: History with parent and child population metrics.
        """
        parent_generations: List[GenerationMetrics] = []
        val_score = 0.0

        with tqdm(
            range(self.num_epochs),
            desc="Epochs",
            disable=not self.progress_bar,
            leave=self.leave_progress_bar,
        ) as tbar:
            for epoch in tbar:
                gen_metrics = self.gp_algo.step()

                # Get validation scores if validation data is available
                if self.val_data is not None and (
                    (epoch + 1) % self.val_every == 0 or epoch == self.num_epochs - 1
                ):
                    val_score = self.gp_algo.evaluate_best(
                        self.val_data,
                        self.val_labels,
                        self.val_score_fn,
                    )

                    gen_metrics.val_score = val_score

                parent_generations.append(gen_metrics)

                if (
                    self.progress_callback is not None
                    and (epoch + 1) % self.config.progress_update_interval == 0
                ):
                    self.progress_callback(self.config.progress_update_interval)

                tbar.set_postfix(
                    {
                        "train_best": f"{gen_metrics.best_train_score:.4f}",
                        "val_best": f"{val_score:.4f}",
                    }
                )

        # Send remaining epochs not covered by progress_update_interval
        remaining_epochs = self.num_epochs % self.config.progress_update_interval
        if remaining_epochs > 0 and self.progress_callback is not None:
            self.progress_callback(remaining_epochs)

        tp, fp, fn, tn = self.gp_algo.train_cm(
            self.gp_algo.global_best_rule.evaluate(self.gp_algo.train_data),
            self.gp_algo.train_labels,
        )
        val_tp, val_fp, val_fn, val_tn = None, None, None, None
        if self.val_data is not None:
            val_tp, val_fp, val_fn, val_tn = self.val_cm(
                self.gp_algo.global_best_rule.evaluate(self.val_data), self.val_labels
            )
        return PopulationHistory(
            generations=parent_generations,
            tp=tp,
            fp=fp,
            fn=fn,
            tn=tn,
            val_tp=val_tp,
            val_fp=val_fp,
            val_fn=val_fn,
            val_tn=val_tn,
            global_best_rule=self.gp_algo.global_best_rule,
        )

`fit()`

Trains the Boolean GP model for the specified number of epochs. Returns: HierarchicalHistory: History with parent and child population metrics.

Source code in hgp_lib\trainers\gp_trainer.py

def fit(self) -> PopulationHistory:
    """
    Trains the Boolean GP model for the specified number of epochs.
    Returns:
        HierarchicalHistory: History with parent and child population metrics.
    """
    parent_generations: List[GenerationMetrics] = []
    val_score = 0.0

    with tqdm(
        range(self.num_epochs),
        desc="Epochs",
        disable=not self.progress_bar,
        leave=self.leave_progress_bar,
    ) as tbar:
        for epoch in tbar:
            gen_metrics = self.gp_algo.step()

            # Get validation scores if validation data is available
            if self.val_data is not None and (
                (epoch + 1) % self.val_every == 0 or epoch == self.num_epochs - 1
            ):
                val_score = self.gp_algo.evaluate_best(
                    self.val_data,
                    self.val_labels,
                    self.val_score_fn,
                )

                gen_metrics.val_score = val_score

            parent_generations.append(gen_metrics)

            if (
                self.progress_callback is not None
                and (epoch + 1) % self.config.progress_update_interval == 0
            ):
                self.progress_callback(self.config.progress_update_interval)

            tbar.set_postfix(
                {
                    "train_best": f"{gen_metrics.best_train_score:.4f}",
                    "val_best": f"{val_score:.4f}",
                }
            )

    # Send remaining epochs not covered by progress_update_interval
    remaining_epochs = self.num_epochs % self.config.progress_update_interval
    if remaining_epochs > 0 and self.progress_callback is not None:
        self.progress_callback(remaining_epochs)

    tp, fp, fn, tn = self.gp_algo.train_cm(
        self.gp_algo.global_best_rule.evaluate(self.gp_algo.train_data),
        self.gp_algo.train_labels,
    )
    val_tp, val_fp, val_fn, val_tn = None, None, None, None
    if self.val_data is not None:
        val_tp, val_fp, val_fn, val_tn = self.val_cm(
            self.gp_algo.global_best_rule.evaluate(self.val_data), self.val_labels
        )
    return PopulationHistory(
        generations=parent_generations,
        tp=tp,
        fp=fp,
        fn=fn,
        tn=tn,
        val_tp=val_tp,
        val_fp=val_fp,
        val_fn=val_fn,
        val_tn=val_tn,
        global_best_rule=self.gp_algo.global_best_rule,
    )