GeneticAlgorithmPython/pygad/utils/parent_selection.py at master · NullLabTests/GeneticAlgorithmPython

History

523 lines (421 loc) · 28.1 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

492

493

494

495

496

497

498

499

500

501

502

503

504

505

506

507

508

509

510

511

512

513

514

515

516

517

518

519

520

521

522

523

"""

The pygad.utils.parent_selection module has all the built-in parent selection operators.

"""

import numpy

class ParentSelection:

def __init__():

pass

def steady_state_selection(self, fitness, num_parents):

"""

Selects the parents using the steady-state selection technique.

This is by sorting the solutions based on the fitness and select the best ones as parents.

Later, these parents will mate to produce the offspring.

It accepts 2 parameters:

-fitness: The fitness values of the solutions in the current population.

-num_parents: The number of parents to be selected.

It returns:

-An array of the selected parents.

-The indices of the selected solutions.

"""

# Return the indices of the sorted solutions (all solutions in the population).

# This function works with both single- and multi-objective optimization problems.

fitness_sorted = self.sort_solutions_nsga2(fitness=fitness)

# Selecting the best individuals in the current generation as parents for producing the offspring of the next generation.

if self.gene_type_single == True:

parents = numpy.empty((num_parents, self.population.shape[1]), dtype=self.gene_type[0])

else:

parents = numpy.empty((num_parents, self.population.shape[1]), dtype=object)

for parent_num in range(num_parents):

parents[parent_num, :] = self.population[fitness_sorted[parent_num], :].copy()

return parents, numpy.array(fitness_sorted[:num_parents])

def rank_selection(self, fitness, num_parents):

"""

Selects the parents using the rank selection technique. Later, these parents will mate to produce the offspring.

Rank selection gives a rank from 1 to N (number of solutions) to each solution based on its fitness.

It accepts 2 parameters:

-fitness: The fitness values of the solutions in the current population.

-num_parents: The number of parents to be selected.

It returns:

-An array of the selected parents.

-The indices of the selected solutions.

"""

# Return the indices of the sorted solutions (all solutions in the population).

# This function works with both single- and multi-objective optimization problems.

fitness_sorted = self.sort_solutions_nsga2(fitness=fitness)

# Rank the solutions based on their fitness. The worst is gives the rank 1. The best has the rank N.

rank = numpy.arange(1, self.sol_per_pop+1)

probs = rank / numpy.sum(rank)

probs_start, probs_end, parents = self.wheel_cumulative_probs(probs=probs.copy(),

num_parents=num_parents)

parents_indices = []

for parent_num in range(num_parents):

rand_prob = numpy.random.rand()

for idx in range(probs.shape[0]):

if (rand_prob >= probs_start[idx] and rand_prob < probs_end[idx]):

# The variable idx has the rank of solution but not its index in the population.

# Return the correct index of the solution.

mapped_idx = fitness_sorted[idx]

parents[parent_num, :] = self.population[mapped_idx, :].copy()

parents_indices.append(mapped_idx)

break

return parents, numpy.array(parents_indices)

def random_selection(self, fitness, num_parents):

"""

Selects the parents randomly. Later, these parents will mate to produce the offspring.

It accepts 2 parameters:

-fitness: The fitness values of the solutions in the current population.

-num_parents: The number of parents to be selected.

It returns:

-An array of the selected parents.

-The indices of the selected solutions.

"""

if self.gene_type_single == True:

parents = numpy.empty((num_parents, self.population.shape[1]), dtype=self.gene_type[0])

else:

parents = numpy.empty((num_parents, self.population.shape[1]), dtype=object)

rand_indices = numpy.random.randint(low=0.0, high=fitness.shape[0], size=num_parents)

for parent_num in range(num_parents):

parents[parent_num, :] = self.population[rand_indices[parent_num], :].copy()

return parents, rand_indices

def tournament_selection(self, fitness, num_parents):

"""

Selects the parents using the tournament selection technique. Later, these parents will mate to produce the offspring.

It accepts 2 parameters:

-fitness: The fitness values of the solutions in the current population.

-num_parents: The number of parents to be selected.

It returns:

-An array of the selected parents.

-The indices of the selected solutions.

"""

# Return the indices of the sorted solutions (all solutions in the population).

# This function works with both single- and multi-objective optimization problems.

fitness_sorted = self.sort_solutions_nsga2(fitness=fitness)

if self.gene_type_single == True:

parents = numpy.empty((num_parents, self.population.shape[1]), dtype=self.gene_type[0])

else:

parents = numpy.empty((num_parents, self.population.shape[1]), dtype=object)

parents_indices = []

for parent_num in range(num_parents):

# Generate random indices for the candiadate solutions.

rand_indices = numpy.random.randint(low=0.0, high=len(fitness), size=self.K_tournament)

# K_fitnesses = fitness[rand_indices]

# selected_parent_idx = numpy.where(K_fitnesses == numpy.max(K_fitnesses))[0][0]

# Find the rank of the candidate solutions. The lower the rank, the better the solution.

rand_indices_rank = [fitness_sorted.index(rand_idx) for rand_idx in rand_indices]

# Select the solution with the lowest rank as a parent.

selected_parent_idx = rand_indices_rank.index(min(rand_indices_rank))

# Append the index of the selected parent.

parents_indices.append(rand_indices[selected_parent_idx])

# Insert the selected parent.

parents[parent_num, :] = self.population[rand_indices[selected_parent_idx], :].copy()

return parents, numpy.array(parents_indices)

def roulette_wheel_selection(self, fitness, num_parents):

"""

Selects the parents using the roulette wheel selection technique. Later, these parents will mate to produce the offspring.

It accepts 2 parameters:

-fitness: The fitness values of the solutions in the current population.

-num_parents: The number of parents to be selected.

It returns:

-An array of the selected parents.

-The indices of the selected solutions.

"""

## Make edits to work with multi-objective optimization.

## The objective is to convert the fitness from M-D array to just 1D array.

## There are 2 ways:

# 1) By summing the fitness values of each solution.

# 2) By using only 1 objective to create the roulette wheel and excluding the others.

# Take the sum of the fitness values of each solution.

if len(fitness.shape) > 1:

# Multi-objective optimization problem.

# Sum the fitness values of each solution to reduce the fitness from M-D array to just 1D array.

fitness = numpy.sum(fitness, axis=1)

else:

# Single-objective optimization problem.

pass

# Reaching this step extends that fitness is a 1D array.

fitness_sum = numpy.sum(fitness)

if fitness_sum == 0:

self.logger.error("Cannot proceed because the sum of fitness values is zero. Cannot divide by zero.")

raise ZeroDivisionError("Cannot proceed because the sum of fitness values is zero. Cannot divide by zero.")

probs = fitness / fitness_sum

probs_start, probs_end, parents = self.wheel_cumulative_probs(probs=probs.copy(),

num_parents=num_parents)

parents_indices = []

for parent_num in range(num_parents):

rand_prob = numpy.random.rand()

for idx in range(probs.shape[0]):

if (rand_prob >= probs_start[idx] and rand_prob < probs_end[idx]):

parents[parent_num, :] = self.population[idx, :].copy()

parents_indices.append(idx)

break

return parents, numpy.array(parents_indices)

def wheel_cumulative_probs(self, probs, num_parents):

"""

A helper function to calculate the wheel probabilities for these 2 methods:

1) roulette_wheel_selection

2) rank_selection

It accepts a single 1D array representing the probabilities of selecting each solution.

It returns 2 1D arrays:

1) probs_start has the start of each range.

2) probs_start has the end of each range.

It also returns an empty array for the parents.

"""

probs_start = numpy.zeros(probs.shape, dtype=float) # An array holding the start values of the ranges of probabilities.

probs_end = numpy.zeros(probs.shape, dtype=float) # An array holding the end values of the ranges of probabilities.

curr = 0.0

# Calculating the probabilities of the solutions to form a roulette wheel.

for _ in range(probs.shape[0]):

min_probs_idx = numpy.where(probs == numpy.min(probs))[0][0]

probs_start[min_probs_idx] = curr

curr = curr + probs[min_probs_idx]

probs_end[min_probs_idx] = curr

# Replace 99999999999 by float('inf')

# probs[min_probs_idx] = 99999999999

probs[min_probs_idx] = float('inf')

# Selecting the best individuals in the current generation as parents for producing the offspring of the next generation.

if self.gene_type_single == True:

parents = numpy.empty((num_parents, self.population.shape[1]), dtype=self.gene_type[0])

else:

parents = numpy.empty((num_parents, self.population.shape[1]), dtype=object)

return probs_start, probs_end, parents

def stochastic_universal_selection(self, fitness, num_parents):

"""

Selects the parents using the stochastic universal selection technique. Later, these parents will mate to produce the offspring.

It accepts 2 parameters:

-fitness: The fitness values of the solutions in the current population.

-num_parents: The number of parents to be selected.

It returns:

-An array of the selected parents.

-The indices of the selected solutions.

"""

## Make edits to work with multi-objective optimization.

## The objective is to convert the fitness from M-D array to just 1D array.

## There are 2 ways:

# 1) By summing the fitness values of each solution.

# 2) By using only 1 objective to create the roulette wheel and excluding the others.

# Take the sum of the fitness values of each solution.

if len(fitness.shape) > 1:

# Multi-objective optimization problem.

# Sum the fitness values of each solution to reduce the fitness from M-D array to just 1D array.

fitness = numpy.sum(fitness, axis=1)

else:

# Single-objective optimization problem.

pass

# Reaching this step extends that fitness is a 1D array.

fitness_sum = numpy.sum(fitness)

if fitness_sum == 0:

self.logger.error("Cannot proceed because the sum of fitness values is zero. Cannot divide by zero.")

raise ZeroDivisionError("Cannot proceed because the sum of fitness values is zero. Cannot divide by zero.")

probs = fitness / fitness_sum

probs_start = numpy.zeros(probs.shape, dtype=float) # An array holding the start values of the ranges of probabilities.

probs_end = numpy.zeros(probs.shape, dtype=float) # An array holding the end values of the ranges of probabilities.

curr = 0.0

# Calculating the probabilities of the solutions to form a roulette wheel.

for _ in range(probs.shape[0]):

min_probs_idx = numpy.where(probs == numpy.min(probs))[0][0]

probs_start[min_probs_idx] = curr

curr = curr + probs[min_probs_idx]

probs_end[min_probs_idx] = curr

# Replace 99999999999 by float('inf')

# probs[min_probs_idx] = 99999999999

probs[min_probs_idx] = float('inf')

pointers_distance = 1.0 / self.num_parents_mating # Distance between different pointers.

first_pointer = numpy.random.uniform(low=0.0,

high=pointers_distance,

size=1)[0] # Location of the first pointer.

# Selecting the best individuals in the current generation as parents for producing the offspring of the next generation.

if self.gene_type_single == True:

parents = numpy.empty((num_parents, self.population.shape[1]), dtype=self.gene_type[0])

else:

parents = numpy.empty((num_parents, self.population.shape[1]), dtype=object)

parents_indices = []

for parent_num in range(num_parents):

rand_pointer = first_pointer + parent_num*pointers_distance

for idx in range(probs.shape[0]):

if (rand_pointer >= probs_start[idx] and rand_pointer < probs_end[idx]):

parents[parent_num, :] = self.population[idx, :].copy()

parents_indices.append(idx)

break

return parents, numpy.array(parents_indices)

def tournament_selection_nsga2(self,

fitness,

num_parents

"""

Select the parents using the tournament selection technique for NSGA-II.

The traditional tournament selection uses the fitness values. But the tournament selection for NSGA-II uses non-dominated sorting and crowding distance.

Using non-dominated sorting, the solutions are distributed across pareto fronts. The fronts are given the indices 0, 1, 2, ..., N where N is the number of pareto fronts. The lower the index of the pareto front, the better its solutions.

To select the parents solutions, 2 solutions are selected randomly. If the 2 solutions are in different pareto fronts, then the solution comming from a pareto front with lower index is selected.

If 2 solutions are in the same pareto front, then crowding distance is calculated. The solution with the higher crowding distance is selected.

If the 2 solutions are in the same pareto front and have the same crowding distance, then a solution is randomly selected.

Later, the selected parents will mate to produce the offspring.

It accepts 2 parameters:

-fitness: The fitness values for the current population.

-num_parents: The number of parents to be selected.

-pareto_fronts: A nested array of all the pareto fronts. Each front has its solutions.

-solutions_fronts_indices: A list of the pareto front index of each solution in the current population.

It returns:

-An array of the selected parents.

-The indices of the selected solutions.

"""

if self.gene_type_single == True:

parents = numpy.empty((num_parents, self.population.shape[1]), dtype=self.gene_type[0])

else:

parents = numpy.empty((num_parents, self.population.shape[1]), dtype=object)

# Verify that the problem is multi-objective optimization as the tournament NSGA-II selection is only applied to multi-objective problems.

if type(fitness[0]) in [list, tuple, numpy.ndarray]:

pass

elif type(fitness[0]) in self.supported_int_float_types:

raise ValueError('The tournament NSGA-II parent selection operator is only applied when optimizing multi-objective problems.\n\nBut a single-objective optimization problem found as the fitness function returns a single numeric value.\n\nTo use multi-objective optimization, consider returning an iterable of any of these data types:\n1)list\n2)tuple\n3)numpy.ndarray')

# The indices of the selected parents.

parents_indices = []

# If there is only a single objective, each pareto front is expected to have only 1 solution.

# TODO Make a test to check for that behaviour and add it to the GitHub actions tests.

pareto_fronts, solutions_fronts_indices = self.non_dominated_sorting(fitness)

self.pareto_fronts = pareto_fronts.copy()

# Randomly generate pairs of indices to apply for NSGA-II tournament selection for selecting the parents solutions.

rand_indices = numpy.random.randint(low=0.0,

high=len(solutions_fronts_indices),

size=(num_parents, self.K_tournament))

for parent_num in range(num_parents):

# Return the indices of the current 2 solutions.

current_indices = rand_indices[parent_num]

# Return the front index of the 2 solutions.

parent_fronts_indices = solutions_fronts_indices[current_indices]

if parent_fronts_indices[0] < parent_fronts_indices[1]:

# If the first solution is in a lower pareto front than the second, then select it.

selected_parent_idx = current_indices[0]

elif parent_fronts_indices[0] > parent_fronts_indices[1]:

# If the second solution is in a lower pareto front than the first, then select it.

selected_parent_idx = current_indices[1]

else:

# The 2 solutions are in the same pareto front.

# The selection is made using the crowding distance.

# A list holding the crowding distance of the current 2 solutions. It is initialized to -1.

solutions_crowding_distance = [-1, -1]

# Fetch the current pareto front.

pareto_front = pareto_fronts[parent_fronts_indices[0]] # Index 1 can also be used.

# If there is only 1 solution in the pareto front, just return it without calculating the crowding distance (it is useless).

if pareto_front.shape[0] == 1:

selected_parent_idx = current_indices[0] # Index 1 can also be used.

else:

# Reaching here means the pareto front has more than 1 solution.

# Calculate the crowding distance of the solutions of the pareto front.

obj_crowding_distance_list, crowding_distance_sum, crowding_dist_front_sorted_indices, crowding_dist_pop_sorted_indices = self.crowding_distance(pareto_front=pareto_front.copy(),

fitness=fitness)

# This list has the sorted front-based indices for the solutions in the current pareto front.

crowding_dist_front_sorted_indices = list(crowding_dist_front_sorted_indices)

# This list has the sorted population-based indices for the solutions in the current pareto front.

crowding_dist_pop_sorted_indices = list(crowding_dist_pop_sorted_indices)

# Return the indices of the solutions from the pareto front.

solution1_idx = crowding_dist_pop_sorted_indices.index(current_indices[0])

solution2_idx = crowding_dist_pop_sorted_indices.index(current_indices[1])

# Fetch the crowding distance using the indices.

solutions_crowding_distance[0] = crowding_distance_sum[solution1_idx][1]

solutions_crowding_distance[1] = crowding_distance_sum[solution2_idx][1]

# # Instead of using the crowding distance, we can select the solution that comes first in the list.

# # Its limitation is that it is biased towards the low indexed solution if the 2 solutions have the same crowding distance.

# if solution1_idx < solution2_idx:

# # Select the first solution if it has higher crowding distance.

# selected_parent_idx = current_indices[0]

# else:

# # Select the second solution if it has higher crowding distance.

# selected_parent_idx = current_indices[1]

if solutions_crowding_distance[0] > solutions_crowding_distance[1]:

# Select the first solution if it has higher crowding distance.

selected_parent_idx = current_indices[0]

elif solutions_crowding_distance[1] > solutions_crowding_distance[0]:

# Select the second solution if it has higher crowding distance.

selected_parent_idx = current_indices[1]

else:

# If the crowding distance is equal, select the parent randomly.

rand_num = numpy.random.uniform()

if rand_num < 0.5:

# If the random number is < 0.5, then select the first solution.

selected_parent_idx = current_indices[0]

else:

# If the random number is >= 0.5, then select the second solution.

selected_parent_idx = current_indices[1]

# Insert the selected parent index.

parents_indices.append(selected_parent_idx)

# Insert the selected parent.

parents[parent_num, :] = self.population[selected_parent_idx, :].copy()

# Make sure the parents indices is returned as a NumPy array.

return parents, numpy.array(parents_indices)

def nsga2_selection(self,

fitness,

num_parents

"""

Select the parents using the Non-Dominated Sorting Genetic Algorithm II (NSGA-II).

The selection is done using non-dominated sorting and crowding distance.

The parents are selected from the lower pareto fronts and moving up until selecting the number of desired parents.

A solution from a pareto front X cannot be taken as a parent until all solutions in pareto front Y is selected given that Y < X.

For a pareto front X, if only a subset of its solutions is needed, then the corwding distance is used to determine which solutions to be selected from the front. The solution with the higher crowding distance is selected.

If the 2 solutions are in the same pareto front and have the same crowding distance, then a solution is randomly selected.

Later, the selected parents will mate to produce the offspring.

It accepts 2 parameters:

-fitness: The fitness values for the current population.

-num_parents: The number of parents to be selected.

-pareto_fronts: A nested array of all the pareto fronts. Each front has its solutions.

-solutions_fronts_indices: A list of the pareto front index of each solution in the current population.

It returns:

-An array of the selected parents.

-The indices of the selected solutions.

"""

if self.gene_type_single == True:

parents = numpy.empty((num_parents, self.population.shape[1]), dtype=self.gene_type[0])

else:

parents = numpy.empty((num_parents, self.population.shape[1]), dtype=object)

# Verify that the problem is multi-objective optimization as the NSGA-II selection is only applied to multi-objective problems.

if type(fitness[0]) in [list, tuple, numpy.ndarray]:

pass

elif type(fitness[0]) in self.supported_int_float_types:

raise ValueError('The NSGA-II parent selection operator is only applied when optimizing multi-objective problems.\n\nBut a single-objective optimization problem found as the fitness function returns a single numeric value.\n\nTo use multi-objective optimization, consider returning an iterable of any of these data types:\n1)list\n2)tuple\n3)numpy.ndarray')

# The indices of the selected parents.

parents_indices = []

# If there is only a single objective, each pareto front is expected to have only 1 solution.

# TODO Make a test to check for that behaviour.

pareto_fronts, solutions_fronts_indices = self.non_dominated_sorting(fitness)

self.pareto_fronts = pareto_fronts.copy()

# The number of remaining parents to be selected.

num_remaining_parents = num_parents

# Index of the current parent.

current_parent_idx = 0

# A loop variable holding the index of the current pareto front.

pareto_front_idx = 0

while num_remaining_parents != 0 and pareto_front_idx < len(pareto_fronts):

# Return the current pareto front.

current_pareto_front = pareto_fronts[pareto_front_idx]

# Check if the entire front fits into the parents array.

# If so, then insert all the solutions in the current front into the parents array.

if num_remaining_parents >= len(current_pareto_front):

for sol_idx in range(len(current_pareto_front)):

selected_solution_idx = current_pareto_front[sol_idx, 0]

# Insert the parent into the parents array.

parents[current_parent_idx, :] = self.population[selected_solution_idx, :].copy()

# Insert the index of the selected parent.

parents_indices.append(selected_solution_idx)

# Increase the parent index.

current_parent_idx += 1

# Decrement the number of remaining parents by the length of the pareto front.

num_remaining_parents -= len(current_pareto_front)

else:

# If only a subset of the front is needed, then use the crowding distance to sort the solutions and select only the number needed.

# Calculate the crowding distance of the solutions of the pareto front.

obj_crowding_distance_list, crowding_distance_sum, crowding_dist_front_sorted_indices, crowding_dist_pop_sorted_indices = self.crowding_distance(pareto_front=current_pareto_front.copy(),

fitness=fitness)

for selected_solution_idx in crowding_dist_pop_sorted_indices[0:num_remaining_parents]:

# Insert the parent into the parents array.

parents[current_parent_idx, :] = self.population[selected_solution_idx, :].copy()

# Insert the index of the selected parent.

parents_indices.append(selected_solution_idx)

# Increase the parent index.

current_parent_idx += 1

# Decrement the number of remaining parents by the number of selected parents.

num_remaining_parents -= num_remaining_parents

# Increase the pareto front index to take parents from the next front.

pareto_front_idx += 1

# Make sure the parents indices is returned as a NumPy array.

return parents, numpy.array(parents_indices)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

parent_selection.py

Latest commit

History

parent_selection.py

File metadata and controls