forked from ahmedfgad/GeneticAlgorithmPython
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparent_selection.py
More file actions
523 lines (421 loc) · 28.1 KB
/
parent_selection.py
File metadata and controls
523 lines (421 loc) · 28.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
"""
The pygad.utils.parent_selection module has all the built-in parent selection operators.
"""
import numpy
class ParentSelection:
def __init__():
pass
def steady_state_selection(self, fitness, num_parents):
"""
Selects the parents using the steady-state selection technique.
This is by sorting the solutions based on the fitness and select the best ones as parents.
Later, these parents will mate to produce the offspring.
It accepts 2 parameters:
-fitness: The fitness values of the solutions in the current population.
-num_parents: The number of parents to be selected.
It returns:
-An array of the selected parents.
-The indices of the selected solutions.
"""
# Return the indices of the sorted solutions (all solutions in the population).
# This function works with both single- and multi-objective optimization problems.
fitness_sorted = self.sort_solutions_nsga2(fitness=fitness)
# Selecting the best individuals in the current generation as parents for producing the offspring of the next generation.
if self.gene_type_single == True:
parents = numpy.empty((num_parents, self.population.shape[1]), dtype=self.gene_type[0])
else:
parents = numpy.empty((num_parents, self.population.shape[1]), dtype=object)
for parent_num in range(num_parents):
parents[parent_num, :] = self.population[fitness_sorted[parent_num], :].copy()
return parents, numpy.array(fitness_sorted[:num_parents])
def rank_selection(self, fitness, num_parents):
"""
Selects the parents using the rank selection technique. Later, these parents will mate to produce the offspring.
Rank selection gives a rank from 1 to N (number of solutions) to each solution based on its fitness.
It accepts 2 parameters:
-fitness: The fitness values of the solutions in the current population.
-num_parents: The number of parents to be selected.
It returns:
-An array of the selected parents.
-The indices of the selected solutions.
"""
# Return the indices of the sorted solutions (all solutions in the population).
# This function works with both single- and multi-objective optimization problems.
fitness_sorted = self.sort_solutions_nsga2(fitness=fitness)
# Rank the solutions based on their fitness. The worst is gives the rank 1. The best has the rank N.
rank = numpy.arange(1, self.sol_per_pop+1)
probs = rank / numpy.sum(rank)
probs_start, probs_end, parents = self.wheel_cumulative_probs(probs=probs.copy(),
num_parents=num_parents)
parents_indices = []
for parent_num in range(num_parents):
rand_prob = numpy.random.rand()
for idx in range(probs.shape[0]):
if (rand_prob >= probs_start[idx] and rand_prob < probs_end[idx]):
# The variable idx has the rank of solution but not its index in the population.
# Return the correct index of the solution.
mapped_idx = fitness_sorted[idx]
parents[parent_num, :] = self.population[mapped_idx, :].copy()
parents_indices.append(mapped_idx)
break
return parents, numpy.array(parents_indices)
def random_selection(self, fitness, num_parents):
"""
Selects the parents randomly. Later, these parents will mate to produce the offspring.
It accepts 2 parameters:
-fitness: The fitness values of the solutions in the current population.
-num_parents: The number of parents to be selected.
It returns:
-An array of the selected parents.
-The indices of the selected solutions.
"""
if self.gene_type_single == True:
parents = numpy.empty((num_parents, self.population.shape[1]), dtype=self.gene_type[0])
else:
parents = numpy.empty((num_parents, self.population.shape[1]), dtype=object)
rand_indices = numpy.random.randint(low=0.0, high=fitness.shape[0], size=num_parents)
for parent_num in range(num_parents):
parents[parent_num, :] = self.population[rand_indices[parent_num], :].copy()
return parents, rand_indices
def tournament_selection(self, fitness, num_parents):
"""
Selects the parents using the tournament selection technique. Later, these parents will mate to produce the offspring.
It accepts 2 parameters:
-fitness: The fitness values of the solutions in the current population.
-num_parents: The number of parents to be selected.
It returns:
-An array of the selected parents.
-The indices of the selected solutions.
"""
# Return the indices of the sorted solutions (all solutions in the population).
# This function works with both single- and multi-objective optimization problems.
fitness_sorted = self.sort_solutions_nsga2(fitness=fitness)
if self.gene_type_single == True:
parents = numpy.empty((num_parents, self.population.shape[1]), dtype=self.gene_type[0])
else:
parents = numpy.empty((num_parents, self.population.shape[1]), dtype=object)
parents_indices = []
for parent_num in range(num_parents):
# Generate random indices for the candiadate solutions.
rand_indices = numpy.random.randint(low=0.0, high=len(fitness), size=self.K_tournament)
# K_fitnesses = fitness[rand_indices]
# selected_parent_idx = numpy.where(K_fitnesses == numpy.max(K_fitnesses))[0][0]
# Find the rank of the candidate solutions. The lower the rank, the better the solution.
rand_indices_rank = [fitness_sorted.index(rand_idx) for rand_idx in rand_indices]
# Select the solution with the lowest rank as a parent.
selected_parent_idx = rand_indices_rank.index(min(rand_indices_rank))
# Append the index of the selected parent.
parents_indices.append(rand_indices[selected_parent_idx])
# Insert the selected parent.
parents[parent_num, :] = self.population[rand_indices[selected_parent_idx], :].copy()
return parents, numpy.array(parents_indices)
def roulette_wheel_selection(self, fitness, num_parents):
"""
Selects the parents using the roulette wheel selection technique. Later, these parents will mate to produce the offspring.
It accepts 2 parameters:
-fitness: The fitness values of the solutions in the current population.
-num_parents: The number of parents to be selected.
It returns:
-An array of the selected parents.
-The indices of the selected solutions.
"""
## Make edits to work with multi-objective optimization.
## The objective is to convert the fitness from M-D array to just 1D array.
## There are 2 ways:
# 1) By summing the fitness values of each solution.
# 2) By using only 1 objective to create the roulette wheel and excluding the others.
# Take the sum of the fitness values of each solution.
if len(fitness.shape) > 1:
# Multi-objective optimization problem.
# Sum the fitness values of each solution to reduce the fitness from M-D array to just 1D array.
fitness = numpy.sum(fitness, axis=1)
else:
# Single-objective optimization problem.
pass
# Reaching this step extends that fitness is a 1D array.
fitness_sum = numpy.sum(fitness)
if fitness_sum == 0:
self.logger.error("Cannot proceed because the sum of fitness values is zero. Cannot divide by zero.")
raise ZeroDivisionError("Cannot proceed because the sum of fitness values is zero. Cannot divide by zero.")
probs = fitness / fitness_sum
probs_start, probs_end, parents = self.wheel_cumulative_probs(probs=probs.copy(),
num_parents=num_parents)
parents_indices = []
for parent_num in range(num_parents):
rand_prob = numpy.random.rand()
for idx in range(probs.shape[0]):
if (rand_prob >= probs_start[idx] and rand_prob < probs_end[idx]):
parents[parent_num, :] = self.population[idx, :].copy()
parents_indices.append(idx)
break
return parents, numpy.array(parents_indices)
def wheel_cumulative_probs(self, probs, num_parents):
"""
A helper function to calculate the wheel probabilities for these 2 methods:
1) roulette_wheel_selection
2) rank_selection
It accepts a single 1D array representing the probabilities of selecting each solution.
It returns 2 1D arrays:
1) probs_start has the start of each range.
2) probs_start has the end of each range.
It also returns an empty array for the parents.
"""
probs_start = numpy.zeros(probs.shape, dtype=float) # An array holding the start values of the ranges of probabilities.
probs_end = numpy.zeros(probs.shape, dtype=float) # An array holding the end values of the ranges of probabilities.
curr = 0.0
# Calculating the probabilities of the solutions to form a roulette wheel.
for _ in range(probs.shape[0]):
min_probs_idx = numpy.where(probs == numpy.min(probs))[0][0]
probs_start[min_probs_idx] = curr
curr = curr + probs[min_probs_idx]
probs_end[min_probs_idx] = curr
# Replace 99999999999 by float('inf')
# probs[min_probs_idx] = 99999999999
probs[min_probs_idx] = float('inf')
# Selecting the best individuals in the current generation as parents for producing the offspring of the next generation.
if self.gene_type_single == True:
parents = numpy.empty((num_parents, self.population.shape[1]), dtype=self.gene_type[0])
else:
parents = numpy.empty((num_parents, self.population.shape[1]), dtype=object)
return probs_start, probs_end, parents
def stochastic_universal_selection(self, fitness, num_parents):
"""
Selects the parents using the stochastic universal selection technique. Later, these parents will mate to produce the offspring.
It accepts 2 parameters:
-fitness: The fitness values of the solutions in the current population.
-num_parents: The number of parents to be selected.
It returns:
-An array of the selected parents.
-The indices of the selected solutions.
"""
## Make edits to work with multi-objective optimization.
## The objective is to convert the fitness from M-D array to just 1D array.
## There are 2 ways:
# 1) By summing the fitness values of each solution.
# 2) By using only 1 objective to create the roulette wheel and excluding the others.
# Take the sum of the fitness values of each solution.
if len(fitness.shape) > 1:
# Multi-objective optimization problem.
# Sum the fitness values of each solution to reduce the fitness from M-D array to just 1D array.
fitness = numpy.sum(fitness, axis=1)
else:
# Single-objective optimization problem.
pass
# Reaching this step extends that fitness is a 1D array.
fitness_sum = numpy.sum(fitness)
if fitness_sum == 0:
self.logger.error("Cannot proceed because the sum of fitness values is zero. Cannot divide by zero.")
raise ZeroDivisionError("Cannot proceed because the sum of fitness values is zero. Cannot divide by zero.")
probs = fitness / fitness_sum
probs_start = numpy.zeros(probs.shape, dtype=float) # An array holding the start values of the ranges of probabilities.
probs_end = numpy.zeros(probs.shape, dtype=float) # An array holding the end values of the ranges of probabilities.
curr = 0.0
# Calculating the probabilities of the solutions to form a roulette wheel.
for _ in range(probs.shape[0]):
min_probs_idx = numpy.where(probs == numpy.min(probs))[0][0]
probs_start[min_probs_idx] = curr
curr = curr + probs[min_probs_idx]
probs_end[min_probs_idx] = curr
# Replace 99999999999 by float('inf')
# probs[min_probs_idx] = 99999999999
probs[min_probs_idx] = float('inf')
pointers_distance = 1.0 / self.num_parents_mating # Distance between different pointers.
first_pointer = numpy.random.uniform(low=0.0,
high=pointers_distance,
size=1)[0] # Location of the first pointer.
# Selecting the best individuals in the current generation as parents for producing the offspring of the next generation.
if self.gene_type_single == True:
parents = numpy.empty((num_parents, self.population.shape[1]), dtype=self.gene_type[0])
else:
parents = numpy.empty((num_parents, self.population.shape[1]), dtype=object)
parents_indices = []
for parent_num in range(num_parents):
rand_pointer = first_pointer + parent_num*pointers_distance
for idx in range(probs.shape[0]):
if (rand_pointer >= probs_start[idx] and rand_pointer < probs_end[idx]):
parents[parent_num, :] = self.population[idx, :].copy()
parents_indices.append(idx)
break
return parents, numpy.array(parents_indices)
def tournament_selection_nsga2(self,
fitness,
num_parents
):
"""
Select the parents using the tournament selection technique for NSGA-II.
The traditional tournament selection uses the fitness values. But the tournament selection for NSGA-II uses non-dominated sorting and crowding distance.
Using non-dominated sorting, the solutions are distributed across pareto fronts. The fronts are given the indices 0, 1, 2, ..., N where N is the number of pareto fronts. The lower the index of the pareto front, the better its solutions.
To select the parents solutions, 2 solutions are selected randomly. If the 2 solutions are in different pareto fronts, then the solution comming from a pareto front with lower index is selected.
If 2 solutions are in the same pareto front, then crowding distance is calculated. The solution with the higher crowding distance is selected.
If the 2 solutions are in the same pareto front and have the same crowding distance, then a solution is randomly selected.
Later, the selected parents will mate to produce the offspring.
It accepts 2 parameters:
-fitness: The fitness values for the current population.
-num_parents: The number of parents to be selected.
-pareto_fronts: A nested array of all the pareto fronts. Each front has its solutions.
-solutions_fronts_indices: A list of the pareto front index of each solution in the current population.
It returns:
-An array of the selected parents.
-The indices of the selected solutions.
"""
if self.gene_type_single == True:
parents = numpy.empty((num_parents, self.population.shape[1]), dtype=self.gene_type[0])
else:
parents = numpy.empty((num_parents, self.population.shape[1]), dtype=object)
# Verify that the problem is multi-objective optimization as the tournament NSGA-II selection is only applied to multi-objective problems.
if type(fitness[0]) in [list, tuple, numpy.ndarray]:
pass
elif type(fitness[0]) in self.supported_int_float_types:
raise ValueError('The tournament NSGA-II parent selection operator is only applied when optimizing multi-objective problems.\n\nBut a single-objective optimization problem found as the fitness function returns a single numeric value.\n\nTo use multi-objective optimization, consider returning an iterable of any of these data types:\n1)list\n2)tuple\n3)numpy.ndarray')
# The indices of the selected parents.
parents_indices = []
# If there is only a single objective, each pareto front is expected to have only 1 solution.
# TODO Make a test to check for that behaviour and add it to the GitHub actions tests.
pareto_fronts, solutions_fronts_indices = self.non_dominated_sorting(fitness)
self.pareto_fronts = pareto_fronts.copy()
# Randomly generate pairs of indices to apply for NSGA-II tournament selection for selecting the parents solutions.
rand_indices = numpy.random.randint(low=0.0,
high=len(solutions_fronts_indices),
size=(num_parents, self.K_tournament))
for parent_num in range(num_parents):
# Return the indices of the current 2 solutions.
current_indices = rand_indices[parent_num]
# Return the front index of the 2 solutions.
parent_fronts_indices = solutions_fronts_indices[current_indices]
if parent_fronts_indices[0] < parent_fronts_indices[1]:
# If the first solution is in a lower pareto front than the second, then select it.
selected_parent_idx = current_indices[0]
elif parent_fronts_indices[0] > parent_fronts_indices[1]:
# If the second solution is in a lower pareto front than the first, then select it.
selected_parent_idx = current_indices[1]
else:
# The 2 solutions are in the same pareto front.
# The selection is made using the crowding distance.
# A list holding the crowding distance of the current 2 solutions. It is initialized to -1.
solutions_crowding_distance = [-1, -1]
# Fetch the current pareto front.
pareto_front = pareto_fronts[parent_fronts_indices[0]] # Index 1 can also be used.
# If there is only 1 solution in the pareto front, just return it without calculating the crowding distance (it is useless).
if pareto_front.shape[0] == 1:
selected_parent_idx = current_indices[0] # Index 1 can also be used.
else:
# Reaching here means the pareto front has more than 1 solution.
# Calculate the crowding distance of the solutions of the pareto front.
obj_crowding_distance_list, crowding_distance_sum, crowding_dist_front_sorted_indices, crowding_dist_pop_sorted_indices = self.crowding_distance(pareto_front=pareto_front.copy(),
fitness=fitness)
# This list has the sorted front-based indices for the solutions in the current pareto front.
crowding_dist_front_sorted_indices = list(crowding_dist_front_sorted_indices)
# This list has the sorted population-based indices for the solutions in the current pareto front.
crowding_dist_pop_sorted_indices = list(crowding_dist_pop_sorted_indices)
# Return the indices of the solutions from the pareto front.
solution1_idx = crowding_dist_pop_sorted_indices.index(current_indices[0])
solution2_idx = crowding_dist_pop_sorted_indices.index(current_indices[1])
# Fetch the crowding distance using the indices.
solutions_crowding_distance[0] = crowding_distance_sum[solution1_idx][1]
solutions_crowding_distance[1] = crowding_distance_sum[solution2_idx][1]
# # Instead of using the crowding distance, we can select the solution that comes first in the list.
# # Its limitation is that it is biased towards the low indexed solution if the 2 solutions have the same crowding distance.
# if solution1_idx < solution2_idx:
# # Select the first solution if it has higher crowding distance.
# selected_parent_idx = current_indices[0]
# else:
# # Select the second solution if it has higher crowding distance.
# selected_parent_idx = current_indices[1]
if solutions_crowding_distance[0] > solutions_crowding_distance[1]:
# Select the first solution if it has higher crowding distance.
selected_parent_idx = current_indices[0]
elif solutions_crowding_distance[1] > solutions_crowding_distance[0]:
# Select the second solution if it has higher crowding distance.
selected_parent_idx = current_indices[1]
else:
# If the crowding distance is equal, select the parent randomly.
rand_num = numpy.random.uniform()
if rand_num < 0.5:
# If the random number is < 0.5, then select the first solution.
selected_parent_idx = current_indices[0]
else:
# If the random number is >= 0.5, then select the second solution.
selected_parent_idx = current_indices[1]
# Insert the selected parent index.
parents_indices.append(selected_parent_idx)
# Insert the selected parent.
parents[parent_num, :] = self.population[selected_parent_idx, :].copy()
# Make sure the parents indices is returned as a NumPy array.
return parents, numpy.array(parents_indices)
def nsga2_selection(self,
fitness,
num_parents
):
"""
Select the parents using the Non-Dominated Sorting Genetic Algorithm II (NSGA-II).
The selection is done using non-dominated sorting and crowding distance.
Using non-dominated sorting, the solutions are distributed across pareto fronts. The fronts are given the indices 0, 1, 2, ..., N where N is the number of pareto fronts. The lower the index of the pareto front, the better its solutions.
The parents are selected from the lower pareto fronts and moving up until selecting the number of desired parents.
A solution from a pareto front X cannot be taken as a parent until all solutions in pareto front Y is selected given that Y < X.
For a pareto front X, if only a subset of its solutions is needed, then the corwding distance is used to determine which solutions to be selected from the front. The solution with the higher crowding distance is selected.
If the 2 solutions are in the same pareto front and have the same crowding distance, then a solution is randomly selected.
Later, the selected parents will mate to produce the offspring.
It accepts 2 parameters:
-fitness: The fitness values for the current population.
-num_parents: The number of parents to be selected.
-pareto_fronts: A nested array of all the pareto fronts. Each front has its solutions.
-solutions_fronts_indices: A list of the pareto front index of each solution in the current population.
It returns:
-An array of the selected parents.
-The indices of the selected solutions.
"""
if self.gene_type_single == True:
parents = numpy.empty((num_parents, self.population.shape[1]), dtype=self.gene_type[0])
else:
parents = numpy.empty((num_parents, self.population.shape[1]), dtype=object)
# Verify that the problem is multi-objective optimization as the NSGA-II selection is only applied to multi-objective problems.
if type(fitness[0]) in [list, tuple, numpy.ndarray]:
pass
elif type(fitness[0]) in self.supported_int_float_types:
raise ValueError('The NSGA-II parent selection operator is only applied when optimizing multi-objective problems.\n\nBut a single-objective optimization problem found as the fitness function returns a single numeric value.\n\nTo use multi-objective optimization, consider returning an iterable of any of these data types:\n1)list\n2)tuple\n3)numpy.ndarray')
# The indices of the selected parents.
parents_indices = []
# If there is only a single objective, each pareto front is expected to have only 1 solution.
# TODO Make a test to check for that behaviour.
pareto_fronts, solutions_fronts_indices = self.non_dominated_sorting(fitness)
self.pareto_fronts = pareto_fronts.copy()
# The number of remaining parents to be selected.
num_remaining_parents = num_parents
# Index of the current parent.
current_parent_idx = 0
# A loop variable holding the index of the current pareto front.
pareto_front_idx = 0
while num_remaining_parents != 0 and pareto_front_idx < len(pareto_fronts):
# Return the current pareto front.
current_pareto_front = pareto_fronts[pareto_front_idx]
# Check if the entire front fits into the parents array.
# If so, then insert all the solutions in the current front into the parents array.
if num_remaining_parents >= len(current_pareto_front):
for sol_idx in range(len(current_pareto_front)):
selected_solution_idx = current_pareto_front[sol_idx, 0]
# Insert the parent into the parents array.
parents[current_parent_idx, :] = self.population[selected_solution_idx, :].copy()
# Insert the index of the selected parent.
parents_indices.append(selected_solution_idx)
# Increase the parent index.
current_parent_idx += 1
# Decrement the number of remaining parents by the length of the pareto front.
num_remaining_parents -= len(current_pareto_front)
else:
# If only a subset of the front is needed, then use the crowding distance to sort the solutions and select only the number needed.
# Calculate the crowding distance of the solutions of the pareto front.
obj_crowding_distance_list, crowding_distance_sum, crowding_dist_front_sorted_indices, crowding_dist_pop_sorted_indices = self.crowding_distance(pareto_front=current_pareto_front.copy(),
fitness=fitness)
for selected_solution_idx in crowding_dist_pop_sorted_indices[0:num_remaining_parents]:
# Insert the parent into the parents array.
parents[current_parent_idx, :] = self.population[selected_solution_idx, :].copy()
# Insert the index of the selected parent.
parents_indices.append(selected_solution_idx)
# Increase the parent index.
current_parent_idx += 1
# Decrement the number of remaining parents by the number of selected parents.
num_remaining_parents -= num_remaining_parents
# Increase the pareto front index to take parents from the next front.
pareto_front_idx += 1
# Make sure the parents indices is returned as a NumPy array.
return parents, numpy.array(parents_indices)