-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathcontext_encoder.cpp
More file actions
122 lines (100 loc) · 4.27 KB
/
context_encoder.cpp
File metadata and controls
122 lines (100 loc) · 4.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#include "bcod/context_encoder.hpp"
#include "bcod/logging.hpp"
#include <fstream>
#include <cstring>
namespace bcod {
struct ContextEncoder::Impl {
Impl(const std::string& weights_path) {
load_weights(weights_path);
hidden_dim_ = 128;
num_layers_ = 3;
dropout_rate_ = 0.1f;
use_layer_norm_ = true;
}
void load_weights(const std::string& path) {
std::ifstream file(path, std::ios::binary);
if (!file) {
BCOD_ERROR("Failed to open weights file: ", path);
return;
}
file.seekg(0, std::ios::end);
size_t size = file.tellg();
file.seekg(0, std::ios::beg);
weights_.resize(size / sizeof(float));
file.read(reinterpret_cast<char*>(weights_.data()), size);
}
std::vector<float> weights_;
int hidden_dim_;
int num_layers_;
float dropout_rate_;
bool use_layer_norm_;
};
ContextEncoder::ContextEncoder(const std::string& weights_path)
: pimpl_(std::make_unique<Impl>(weights_path)) {}
ContextEncoder::~ContextEncoder() = default;
void ContextEncoder::encode(const BeliefRaster& raster,
const Eigen::Vector2d& goal,
std::vector<float>& context) const {
const int input_size = BeliefRaster::WIDTH * BeliefRaster::HEIGHT *
BeliefRaster::CHANNELS + 2;
const int output_size = pimpl_->hidden_dim_;
std::vector<float> input(input_size);
int idx = 0;
for (int y = 0; y < BeliefRaster::HEIGHT; ++y) {
for (int x = 0; x < BeliefRaster::WIDTH; ++x) {
for (int c = 0; c < BeliefRaster::CHANNELS; ++c) {
input[idx++] = raster.at(x, y, c);
}
}
}
input[idx++] = static_cast<float>(goal.x());
input[idx++] = static_cast<float>(goal.y());
context.resize(output_size);
std::fill(context.begin(), context.end(), 0.0f);
for (int layer = 0; layer < pimpl_->num_layers_; ++layer) {
const float* layer_weights = pimpl_->weights_.data() +
layer * (input_size * output_size + 2 * output_size);
const float* gamma = layer_weights + input_size * output_size;
const float* beta = gamma + output_size;
apply_film(input.data(), gamma, beta, context.data(),
1, output_size);
input = context;
}
}
void ContextEncoder::apply_film(const float* input,
const float* gamma,
const float* beta,
float* output,
int batch_size,
int channels) const {
for (int b = 0; b < batch_size; ++b) {
for (int c = 0; c < channels; ++c) {
float sum = 0.0f;
for (int i = 0; i < BeliefRaster::WIDTH * BeliefRaster::HEIGHT *
BeliefRaster::CHANNELS + 2; ++i) {
sum += input[b * (BeliefRaster::WIDTH * BeliefRaster::HEIGHT *
BeliefRaster::CHANNELS + 2) + i] *
gamma[c * (BeliefRaster::WIDTH * BeliefRaster::HEIGHT *
BeliefRaster::CHANNELS + 2) + i];
}
if (pimpl_->use_layer_norm_) {
float mean = sum / (BeliefRaster::WIDTH * BeliefRaster::HEIGHT *
BeliefRaster::CHANNELS + 2);
float var = 0.0f;
for (int i = 0; i < BeliefRaster::WIDTH * BeliefRaster::HEIGHT *
BeliefRaster::CHANNELS + 2; ++i) {
float diff = input[b * (BeliefRaster::WIDTH * BeliefRaster::HEIGHT *
BeliefRaster::CHANNELS + 2) + i] - mean;
var += diff * diff;
}
var /= (BeliefRaster::WIDTH * BeliefRaster::HEIGHT *
BeliefRaster::CHANNELS + 2);
output[b * channels + c] = (sum - mean) / std::sqrt(var + 1e-5f) *
gamma[c] + beta[c];
} else {
output[b * channels + c] = sum + beta[c];
}
}
}
}
} // namespace bcod