摘要:
目前将raft引入influxdb组建成高可用的raft集群, 但是在日志复制的时候follow宕机, 本文记录该问题.
follow节点宕机时信息:
崩溃的堆栈:
(gdb) bt
#0 0x0000000000620a70 in bufio.(*Reader).fill (b=0xc00009d140) at /usr/lib/golang/src/bufio/bufio.go:101
#1 0x0000000000621db5 in bufio.(*Reader).ReadRune (b=0xc00009d140, r=0, size=0, err=...) at /usr/lib/golang/src/bufio/bufio.go:288
#2 0x0000000000a495ff in influxdb.cluster/services/influxql.(*reader).read (r=0xc0000f3300, ch=0, pos=...) at /root/work/influxdb-1.8.4/services/influxql/scanner.go:458
#3 0x0000000000a46246 in influxdb.cluster/services/influxql.(*Scanner).Scan (s=0xc000010c38, tok=0, pos=..., lit=...) at /root/work/influxdb-1.8.4/services/influxql/scanner.go:26
#4 0x0000000000a4ebc5 in influxdb.cluster/services/influxql.(*Scanner).Scan-fm (tok=0, pos=..., lit=...) at /root/work/influxdb-1.8.4/services/influxql/scanner.go:24
#5 0x0000000000a49144 in influxdb.cluster/services/influxql.(*bufScanner).scanFunc (s=0xc0003f03f0, scan=
{void (influxdb.cluster/services/influxql.Token *, influxdb.cluster/services/influxql.Pos *, string *)} 0xc00037ef98, tok=0, pos=..., lit=...) at /root/work/influxdb-1.8.4/services/influxql/scanner.go:402
#6 0x0000000000a48d2b in influxdb.cluster/services/influxql.(*bufScanner).Scan (s=0xc0003f03f0, tok=0, pos=..., lit=...) at /root/work/influxdb-1.8.4/services/influxql/scanner.go:383
#7 0x0000000000a43685 in influxdb.cluster/services/influxql.(*Parser).Scan (p=0xc00042f650, tok=0, pos=..., lit=...) at /root/work/influxdb-1.8.4/services/influxql/parser.go:2897
#8 0x0000000000a43786 in influxdb.cluster/services/influxql.(*Parser).ScanIgnoreWhitespace (p=0xc00042f650, tok=0, pos=..., lit=...) at /root/work/influxdb-1.8.4/services/influxql/parser.go:2902
#9 0x0000000000a24f7a in influxdb.cluster/services/influxql.(*Parser).ParseQuery (p=0xc00042f650, ~r0=0x0, ~r1=...) at /root/work/influxdb-1.8.4/services/influxql/parser.go:74
#10 0x00000000014c90af in influxdb.cluster/services/httpd.(*Handler).ServeQueryApply (h=0xc0000f5a00, qr=..., uid=..., opts=..., ~r3=...) at /root/work/influxdb-1.8.4/services/httpd/handler.go:432
#11 0x00000000014c59fe in influxdb.cluster/services/httpd.ServeQueryApply (qr=..., uid=..., opts=..., ~r3=...) at /root/work/influxdb-1.8.4/services/httpd/handler.go:145
#12 0x00000000011c2319 in influxdb.cluster/services/haraft.(*wordTracker).ApplyQuery (f=0xc000343400, b=..., ~r1=...) at /root/work/influxdb-1.8.4/services/haraft/wordtracker.go:67
#13 0x00000000011c28f3 in influxdb.cluster/services/haraft.(*wordTracker).Apply (f=0xc000343400, l=0xc00009ce40, ~r1=...) at /root/work/influxdb-1.8.4/services/haraft/wordtracker.go:93
#14 0x0000000000e0060f in /hashicorp/raft.(*Raft).runFSM.func1 (req=0xc00042f550) at /root/work/influxdb-1.8.4/vendor//hashicorp/raft/fsm.go:90
#15 0x0000000000dffe18 in /hashicorp/raft.(*Raft).runFSM.func2 (reqs=...) at /root/work/influxdb-1.8.4/vendor//hashicorp/raft/fsm.go:113
#16 0x0000000000dff1ca in /hashicorp/raft.(*Raft).runFSM (r=0xc0002a2a00) at /root/work/influxdb-1.8.4/vendor//hashicorp/raft/fsm.go:216
#17 0x0000000000e2214b in /hashicorp/raft.(*Raft).runFSM-fm () at /root/work/influxdb-1.8.4/vendor//hashicorp/raft/fsm.go:69
#18 0x0000000000e20a3c in /hashicorp/raft.(*raftState).goFunc.func1 () at /root/work/influxdb-1.8.4/vendor//hashicorp/raft/state.go:146
#19 0x0000000000473921 in runtime.goexit () at /usr/lib/golang/src/runtime/asm_amd64.s:1581
#20 0x0000000000000000 in ?? ()
奔溃的原因:
(gdb) p b.rd
$6 = {tab = 0x0, data = 0x0}
(gdb)
核心函数:
service:influxql:reader:read
// read reads the next rune from the reader.
func (r *reader) read() (ch rune, pos Pos) {
// If we have unread characters then read them off the buffer first.
if r.n > 0 {
r.n--
return r.curr()
}
// Read next rune from underlying reader.
// Any error (including io.EOF) should return as EOF.
ch, _, err := r.r.ReadRune()
if err != nil {
ch = eof
} else if ch == '\r' {
if ch, _, err := r.r.ReadRune(); err != nil {
// nop
} else if ch != '\n' {
_ = r.r.UnreadRune()
}
ch = '\n'
}
// Save character and position to the buffer.
r.i = (r.i + 1) % len(r.buf)
buf := &r.buf[r.i]
buf.ch, buf.pos = ch, r.pos
// Update position.
// Only count EOF once.
if ch == '\n' {
r.pos.Line++
r.pos.Char = 0
} else if !r.eof {
r.pos.Char++
}
// Mark the reader as EOF.
// This is used so we don't double count EOF characters.
if ch == eof {
r.eof = true
}
return r.curr()
}