X Tutup
Skip to content

Commit 6cd777b

Browse files
jshaRoland Bracewell Shoemaker
authored andcommitted
Fix up stats after letsencrypt#3167 (letsencrypt#3185)
There were two bugs in letsencrypt#3167: All process-level stats got prefixed with "boulder", which broke dashboards. All request_time stats got dropped, because measured_http was using the prometheus DefaultRegisterer. To fix, this PR plumbs through a scope object to measured_http, and uses an empty prefix when calling NewProcessCollector().
1 parent 06d348c commit 6cd777b

File tree

8 files changed

+36
-25
lines changed

8 files changed

+36
-25
lines changed

cmd/boulder-wfe/main.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,9 +133,10 @@ func main() {
133133
wfe.BaseURL = c.Common.BaseURL
134134

135135
logger.Info(fmt.Sprintf("Server running, listening on %s...\n", c.WFE.ListenAddress))
136+
handler := wfe.Handler()
136137
srv := &http.Server{
137138
Addr: c.WFE.ListenAddress,
138-
Handler: wfe.Handler(),
139+
Handler: handler,
139140
}
140141

141142
go func() {
@@ -149,7 +150,7 @@ func main() {
149150
if c.WFE.TLSListenAddress != "" {
150151
tlsSrv = &http.Server{
151152
Addr: c.WFE.TLSListenAddress,
152-
Handler: wfe.Handler(),
153+
Handler: handler,
153154
}
154155
go func() {
155156
err := tlsSrv.ListenAndServeTLS(c.WFE.ServerCertificatePath, c.WFE.ServerKeyPath)

cmd/boulder-wfe2/main.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,9 +133,10 @@ func main() {
133133
wfe.BaseURL = c.Common.BaseURL
134134

135135
logger.Info(fmt.Sprintf("Server running, listening on %s...\n", c.WFE.ListenAddress))
136+
handler := wfe.Handler()
136137
srv := &http.Server{
137138
Addr: c.WFE.ListenAddress,
138-
Handler: wfe.Handler(),
139+
Handler: handler,
139140
}
140141

141142
go func() {
@@ -149,7 +150,7 @@ func main() {
149150
if c.WFE.TLSListenAddress != "" {
150151
tlsSrv = &http.Server{
151152
Addr: c.WFE.TLSListenAddress,
152-
Handler: wfe.Handler(),
153+
Handler: handler,
153154
}
154155
go func() {
155156
err := tlsSrv.ListenAndServeTLS(c.WFE.ServerCertificatePath, c.WFE.ServerKeyPath)

cmd/ocsp-responder/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,5 +253,5 @@ func mux(scope metrics.Scope, responderPath string, source cfocsp.Source) http.H
253253
}
254254
stripPrefix.ServeHTTP(w, r)
255255
})
256-
return measured_http.New(&ocspMux{h}, cmd.Clock())
256+
return measured_http.New(&ocspMux{h}, cmd.Clock(), scope)
257257
}

cmd/shell.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ func NewLogger(logConf SyslogConfig) blog.Logger {
153153
func newScope(addr string, logger blog.Logger) metrics.Scope {
154154
registry := prometheus.NewRegistry()
155155
registry.MustRegister(prometheus.NewGoCollector())
156-
registry.MustRegister(prometheus.NewProcessCollector(os.Getpid(), "boulder"))
156+
registry.MustRegister(prometheus.NewProcessCollector(os.Getpid(), ""))
157157

158158
mux := http.NewServeMux()
159159
// Register each of the available pprof handlers. These are all registered on

metrics/measured_http/http.go

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,10 @@ import (
55
"net/http"
66

77
"github.com/jmhodges/clock"
8+
"github.com/letsencrypt/boulder/metrics"
89
"github.com/prometheus/client_golang/prometheus"
910
)
1011

11-
var (
12-
responseTime = prometheus.NewHistogramVec(
13-
prometheus.HistogramOpts{
14-
Name: "response_time",
15-
Help: "Time taken to respond to a request",
16-
},
17-
[]string{"endpoint", "method", "code"})
18-
)
19-
20-
func init() {
21-
prometheus.MustRegister(responseTime)
22-
}
23-
2412
// responseWriterWithStatus satisfies http.ResponseWriter, but keeps track of the
2513
// status code for gathering stats.
2614
type responseWriterWithStatus struct {
@@ -50,7 +38,14 @@ type MeasuredHandler struct {
5038
stat *prometheus.HistogramVec
5139
}
5240

53-
func New(m serveMux, clk clock.Clock) *MeasuredHandler {
41+
func New(m serveMux, clk clock.Clock, scope metrics.Scope) *MeasuredHandler {
42+
responseTime := prometheus.NewHistogramVec(
43+
prometheus.HistogramOpts{
44+
Name: "response_time",
45+
Help: "Time taken to respond to a request",
46+
},
47+
[]string{"endpoint", "method", "code"})
48+
scope.MustRegister(responseTime)
5449
return &MeasuredHandler{
5550
serveMux: m,
5651
clk: clk,

test/integration-test.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,17 @@ def test_admin_revoker_authz():
417417
if data['status'] != "revoked":
418418
raise Exception("Authorization wasn't revoked")
419419

420+
def test_stats():
421+
def expect_stat(port, stat):
422+
url = "http://localhost:%d/metrics" % port
423+
response = requests.get(url)
424+
if not stat in response.content:
425+
print(response.content)
426+
raise Exception("%s not present in %s" % (stat, url))
427+
expect_stat(8000, "\nresponse_time_count{")
428+
expect_stat(8000, "\ngo_goroutines ")
429+
expect_stat(8001, "\ngo_goroutines ")
430+
420431
exit_status = 1
421432
tempdir = tempfile.mkdtemp()
422433

@@ -483,6 +494,7 @@ def run_chisel():
483494
test_renewal_exemption()
484495
test_expired_authzs_404()
485496
test_account_update()
497+
test_stats()
486498

487499
if __name__ == "__main__":
488500
try:

wfe/wfe.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,7 @@ func (wfe *WebFrontEndImpl) Handler() http.Handler {
323323
// meaning we can wind up returning 405 when we mean to return 404. See
324324
// https://github.com/letsencrypt/boulder/issues/717
325325
m.Handle("/", web.NewTopHandler(wfe.log, web.WFEHandlerFunc(wfe.Index)))
326-
return measured_http.New(m, wfe.clk)
326+
return measured_http.New(m, wfe.clk, wfe.stats)
327327
}
328328

329329
// Method implementations

wfe2/wfe.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ type WebFrontEndImpl struct {
6565
log blog.Logger
6666
clk clock.Clock
6767
stats wfe2Stats
68+
scope metrics.Scope
6869

6970
// URL configuration parameters
7071
BaseURL string
@@ -99,12 +100,12 @@ type WebFrontEndImpl struct {
99100

100101
// NewWebFrontEndImpl constructs a web service for Boulder
101102
func NewWebFrontEndImpl(
102-
stats metrics.Scope,
103+
scope metrics.Scope,
103104
clk clock.Clock,
104105
keyPolicy goodkey.KeyPolicy,
105106
logger blog.Logger,
106107
) (WebFrontEndImpl, error) {
107-
nonceService, err := nonce.NewNonceService(stats)
108+
nonceService, err := nonce.NewNonceService(scope)
108109
if err != nil {
109110
return WebFrontEndImpl{}, err
110111
}
@@ -114,7 +115,8 @@ func NewWebFrontEndImpl(
114115
clk: clk,
115116
nonceService: nonceService,
116117
keyPolicy: keyPolicy,
117-
stats: initStats(stats),
118+
stats: initStats(scope),
119+
scope: scope,
118120
}, nil
119121
}
120122

@@ -318,7 +320,7 @@ func (wfe *WebFrontEndImpl) Handler() http.Handler {
318320
// meaning we can wind up returning 405 when we mean to return 404. See
319321
// https://github.com/letsencrypt/boulder/issues/717
320322
m.Handle("/", web.NewTopHandler(wfe.log, web.WFEHandlerFunc(wfe.Index)))
321-
return measured_http.New(m, wfe.clk)
323+
return measured_http.New(m, wfe.clk, wfe.scope)
322324
}
323325

324326
// Method implementations

0 commit comments

Comments
 (0)
X Tutup