Skip to content

Trainers

hgp_lib.trainers.gp_trainer.GPTrainer

High-level trainer for Boolean Genetic Programming. Accepts a TrainerConfig containing a BooleanGPConfig and training options. Runs the training loop and optionally validates every val_every epochs. Returns a HierarchicalHistory with GenerationMetrics per epoch.

Parameters:

Name Type Description Default
config TrainerConfig

Configuration with gp_config (BooleanGPConfig), num_epochs, optional val_data/val_labels, val_every, progress options.

required

Examples:

>>> import numpy as np
>>> from hgp_lib.configs import BooleanGPConfig, TrainerConfig
>>> from hgp_lib.trainers import GPTrainer
>>>
>>> def accuracy(predictions, labels):
...     return np.mean(predictions == labels)
>>>
>>> train_data = np.array([[True, False, True, False], [False, True, False, True]])
>>> train_labels = np.array([1, 0])
>>> val_data = np.array([[True, True, False, False]])
>>> val_labels = np.array([1])
>>> gp_config = BooleanGPConfig(
...     score_fn=accuracy,
...     train_data=train_data,
...     train_labels=train_labels,
...     optimize_scorer=False,
... )
>>> config = TrainerConfig(
...     gp_config=gp_config,
...     num_epochs=10,
...     val_data=val_data,
...     val_labels=val_labels,
...     val_every=5,
...     progress_bar=False,
... )
>>> trainer = GPTrainer(config)
>>> trainer_result_history = trainer.fit()
Source code in hgp_lib\trainers\gp_trainer.py
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
class GPTrainer:
    """
    High-level trainer for Boolean Genetic Programming.
    Accepts a TrainerConfig containing a BooleanGPConfig and training options.
    Runs the training loop and optionally validates every val_every epochs.
    Returns a HierarchicalHistory with GenerationMetrics per epoch.

    Args:
        config (TrainerConfig): Configuration with gp_config (BooleanGPConfig),
            num_epochs, optional val_data/val_labels, val_every, progress options.

    Examples:
        >>> import numpy as np
        >>> from hgp_lib.configs import BooleanGPConfig, TrainerConfig
        >>> from hgp_lib.trainers import GPTrainer
        >>>
        >>> def accuracy(predictions, labels):
        ...     return np.mean(predictions == labels)
        >>>
        >>> train_data = np.array([[True, False, True, False], [False, True, False, True]])
        >>> train_labels = np.array([1, 0])
        >>> val_data = np.array([[True, True, False, False]])
        >>> val_labels = np.array([1])
        >>> gp_config = BooleanGPConfig(
        ...     score_fn=accuracy,
        ...     train_data=train_data,
        ...     train_labels=train_labels,
        ...     optimize_scorer=False,
        ... )
        >>> config = TrainerConfig(
        ...     gp_config=gp_config,
        ...     num_epochs=10,
        ...     val_data=val_data,
        ...     val_labels=val_labels,
        ...     val_every=5,
        ...     progress_bar=False,
        ... )
        >>> trainer = GPTrainer(config)
        >>> trainer_result_history = trainer.fit()
    """

    def __init__(self, config: TrainerConfig):
        validate_trainer_config(config)

        self.config = config
        self.gp_algo = BooleanGP(config.gp_config)
        self.num_epochs = config.num_epochs
        self.val_every = config.val_every
        self.progress_bar = config.progress_bar
        self.leave_progress_bar = config.leave_progress_bar
        self.progress_callback = config.progress_callback

        self.score_fn = self.gp_algo.score_fn  # Maybe optimized
        if config.val_data is not None and config.gp_config.optimize_scorer:
            self.val_score_fn, self.val_cm, self.val_data, self.val_labels = (
                optimize_scorers_for_data(
                    config.gp_config.score_fn,
                    confusion_matrix,
                    data=config.val_data,
                    labels=config.val_labels,
                )
            )
        else:
            self.val_score_fn = config.gp_config.score_fn
            self.val_cm = confusion_matrix
            self.val_data = config.val_data
            self.val_labels = config.val_labels

    def fit(self) -> PopulationHistory:
        """
        Trains the Boolean GP model for the specified number of epochs.
        Returns:
            HierarchicalHistory: History with parent and child population metrics.
        """
        parent_generations: List[GenerationMetrics] = []
        val_score = 0.0

        with tqdm(
            range(self.num_epochs),
            desc="Epochs",
            disable=not self.progress_bar,
            leave=self.leave_progress_bar,
        ) as tbar:
            for epoch in tbar:
                gen_metrics = self.gp_algo.step()

                # Get validation scores if validation data is available
                if self.val_data is not None and (
                    (epoch + 1) % self.val_every == 0 or epoch == self.num_epochs - 1
                ):
                    val_score = self.gp_algo.evaluate_best(
                        self.val_data,
                        self.val_labels,
                        self.val_score_fn,
                    )

                    gen_metrics.val_score = val_score

                parent_generations.append(gen_metrics)

                if (
                    self.progress_callback is not None
                    and (epoch + 1) % self.config.progress_update_interval == 0
                ):
                    self.progress_callback(self.config.progress_update_interval)

                tbar.set_postfix(
                    {
                        "train_best": f"{gen_metrics.best_train_score:.4f}",
                        "val_best": f"{val_score:.4f}",
                    }
                )

        # Send remaining epochs not covered by progress_update_interval
        remaining_epochs = self.num_epochs % self.config.progress_update_interval
        if remaining_epochs > 0 and self.progress_callback is not None:
            self.progress_callback(remaining_epochs)

        tp, fp, fn, tn = self.gp_algo.train_cm(
            self.gp_algo.global_best_rule.evaluate(self.gp_algo.train_data),
            self.gp_algo.train_labels,
        )
        val_tp, val_fp, val_fn, val_tn = None, None, None, None
        if self.val_data is not None:
            val_tp, val_fp, val_fn, val_tn = self.val_cm(
                self.gp_algo.global_best_rule.evaluate(self.val_data), self.val_labels
            )
        return PopulationHistory(
            generations=parent_generations,
            tp=tp,
            fp=fp,
            fn=fn,
            tn=tn,
            val_tp=val_tp,
            val_fp=val_fp,
            val_fn=val_fn,
            val_tn=val_tn,
            global_best_rule=self.gp_algo.global_best_rule,
        )

fit()

Trains the Boolean GP model for the specified number of epochs. Returns: HierarchicalHistory: History with parent and child population metrics.

Source code in hgp_lib\trainers\gp_trainer.py
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
def fit(self) -> PopulationHistory:
    """
    Trains the Boolean GP model for the specified number of epochs.
    Returns:
        HierarchicalHistory: History with parent and child population metrics.
    """
    parent_generations: List[GenerationMetrics] = []
    val_score = 0.0

    with tqdm(
        range(self.num_epochs),
        desc="Epochs",
        disable=not self.progress_bar,
        leave=self.leave_progress_bar,
    ) as tbar:
        for epoch in tbar:
            gen_metrics = self.gp_algo.step()

            # Get validation scores if validation data is available
            if self.val_data is not None and (
                (epoch + 1) % self.val_every == 0 or epoch == self.num_epochs - 1
            ):
                val_score = self.gp_algo.evaluate_best(
                    self.val_data,
                    self.val_labels,
                    self.val_score_fn,
                )

                gen_metrics.val_score = val_score

            parent_generations.append(gen_metrics)

            if (
                self.progress_callback is not None
                and (epoch + 1) % self.config.progress_update_interval == 0
            ):
                self.progress_callback(self.config.progress_update_interval)

            tbar.set_postfix(
                {
                    "train_best": f"{gen_metrics.best_train_score:.4f}",
                    "val_best": f"{val_score:.4f}",
                }
            )

    # Send remaining epochs not covered by progress_update_interval
    remaining_epochs = self.num_epochs % self.config.progress_update_interval
    if remaining_epochs > 0 and self.progress_callback is not None:
        self.progress_callback(remaining_epochs)

    tp, fp, fn, tn = self.gp_algo.train_cm(
        self.gp_algo.global_best_rule.evaluate(self.gp_algo.train_data),
        self.gp_algo.train_labels,
    )
    val_tp, val_fp, val_fn, val_tn = None, None, None, None
    if self.val_data is not None:
        val_tp, val_fp, val_fn, val_tn = self.val_cm(
            self.gp_algo.global_best_rule.evaluate(self.val_data), self.val_labels
        )
    return PopulationHistory(
        generations=parent_generations,
        tp=tp,
        fp=fp,
        fn=fn,
        tn=tn,
        val_tp=val_tp,
        val_fp=val_fp,
        val_fn=val_fn,
        val_tn=val_tn,
        global_best_rule=self.gp_algo.global_best_rule,
    )