Skip to content

Commit c586f14

Browse files
AliSQLAliSQL
authored andcommitted
[Feature] Issue #46 Semisync optimization and fix invalid packet error
Description ----------- Originally Semisync use the SELECT() function to listener all slave sockets, But SELECT() has several restrictions, so replace it by poll(); Ack_receiver maybe read invalid socket fd and report network packet errors. We should clear socket when read return error with ER_NET_READ_ERROR, if not, Ack_receiver will report a lot of network packet errors until dump thread is killed;
1 parent 4f246a8 commit c586f14

3 files changed

Lines changed: 151 additions & 42 deletions

File tree

sql/semisync_master_ack_receiver.cc

Lines changed: 14 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
#include "semisync_master.h"
1717
#include "semisync_master_ack_receiver.h"
18+
#include "semisync_master_socket_listener.h"
1819

1920
extern ReplSemiSyncMaster repl_semisync;
2021

@@ -183,22 +184,6 @@ inline void Ack_receiver::wait_for_slave_connection()
183184
mysql_cond_wait(&m_cond, &m_mutex);
184185
}
185186

186-
my_socket Ack_receiver::get_slave_sockets(fd_set *fds)
187-
{
188-
my_socket max_fd= INVALID_SOCKET;
189-
unsigned int i;
190-
191-
FD_ZERO(fds);
192-
for (i= 0; i < m_slaves.size(); i++)
193-
{
194-
my_socket fd= m_slaves[i].sock_fd();
195-
max_fd= (fd > max_fd ? fd : max_fd);
196-
FD_SET(fd, fds);
197-
}
198-
199-
return max_fd;
200-
}
201-
202187
/* Auxilary function to initialize a NET object with given net buffer. */
203188
static void init_net(NET *net, unsigned char *buff, unsigned int buff_len)
204189
{
@@ -214,10 +199,14 @@ void Ack_receiver::run()
214199
NET net;
215200
unsigned char net_buff[REPLY_MESSAGE_MAX_LENGTH];
216201

217-
fd_set read_fds;
218-
my_socket max_fd= INVALID_SOCKET;
219202
uint i;
220203

204+
#ifdef HAVE_POLL
205+
Poll_socket_listener listener(m_slaves);
206+
#else
207+
Select_socket_listener listener(m_slaves);
208+
#endif //HAVE_POLL
209+
221210
sql_print_information("Starting ack receiver thread");
222211

223212
init_net(&net, net_buff, REPLY_MESSAGE_MAX_LENGTH);
@@ -228,7 +217,6 @@ void Ack_receiver::run()
228217

229218
while (1)
230219
{
231-
fd_set fds;
232220
Slave_vector_it it;
233221
int ret;
234222

@@ -246,24 +234,20 @@ void Ack_receiver::run()
246234
continue;
247235
}
248236

249-
max_fd= get_slave_sockets(&read_fds);
237+
if (!listener.init_slave_sockets())
238+
goto end;
250239
m_slaves_changed= false;
251-
DBUG_PRINT("info", ("fd count %lu, max_fd %d", (ulong)m_slaves.size(),
252-
max_fd));
253240
}
254241

255-
struct timeval tv= {1, 0};
256-
fds= read_fds;
257-
/* select requires max fd + 1 for the first argument */
258-
ret= select(max_fd+1, &fds, NULL, NULL, &tv);
242+
ret= listener.listen_on_sockets();
259243
if (ret <= 0)
260244
{
261245
mysql_mutex_unlock(&m_mutex);
262246

263247
ret= DBUG_EVALUATE_IF("rpl_semisync_simulate_select_error", -1, ret);
264248

265-
if (ret == -1)
266-
sql_print_information("Failed to select() on semi-sync dump sockets, "
249+
if (ret == -1 && errno != EINTR)
250+
sql_print_information("Failed to wait on semi-sync dump sockets, "
267251
"error: errno=%d", socket_errno);
268252
/* Sleep 1us, so other threads can catch the m_mutex easily. */
269253
my_sleep(1);
@@ -274,7 +258,7 @@ void Ack_receiver::run()
274258
i= 0;
275259
while (i < m_slaves.size())
276260
{
277-
if (FD_ISSET(m_slaves[i].sock_fd(), &fds))
261+
if (listener.is_socket_active(i))
278262
{
279263
ulong len;
280264

@@ -286,7 +270,7 @@ void Ack_receiver::run()
286270
repl_semisync_master.reportReplyPacket(m_slaves[i].server_id(),
287271
net.read_pos, len);
288272
else if (net.last_errno == ER_NET_READ_ERROR)
289-
FD_CLR(m_slaves[i].sock_fd(), &read_fds);
273+
listener.clear_socket_info(i);
290274
}
291275
i++;
292276
}

sql/semisync_master_ack_receiver.h

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,19 @@
2121
#include "my_pthread.h"
2222
#include "sql_class.h"
2323
#include "semisync.h"
24+
25+
struct Slave
26+
{
27+
THD *thd;
28+
Vio vio;
29+
30+
my_socket sock_fd() const { return vio.mysql_socket.fd; }
31+
uint server_id() const { return thd->server_id; }
32+
};
33+
34+
typedef std::vector<Slave> Slave_vector;
35+
typedef Slave_vector::iterator Slave_vector_it;
36+
2437
/**
2538
Ack_receiver is responsible to control ack receive thread and maintain
2639
slave information used by ack receive thread.
@@ -93,17 +106,6 @@ class Ack_receiver : public ReplSemiSyncBase
93106
/* If slave list is updated(add or remove). */
94107
bool m_slaves_changed;
95108

96-
struct Slave
97-
{
98-
THD *thd;
99-
Vio vio;
100-
101-
my_socket sock_fd() { return vio.mysql_socket.fd; }
102-
uint server_id() { return thd->server_id; }
103-
};
104-
105-
typedef std::vector<Slave> Slave_vector;
106-
typedef Slave_vector::iterator Slave_vector_it;
107109
Slave_vector m_slaves;
108110

109111
pthread_t m_pid;
@@ -114,7 +116,6 @@ class Ack_receiver : public ReplSemiSyncBase
114116

115117
void set_stage_info(const PSI_stage_info &stage);
116118
void wait_for_slave_connection();
117-
my_socket get_slave_sockets(fd_set *fds);
118119
};
119120

120121
extern Ack_receiver ack_receiver;
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
/* Copyright (c) 2016 Oracle and/or its affiliates. All rights reserved.
2+
3+
This program is free software; you can redistribute it and/or modify
4+
it under the terms of the GNU General Public License as published by
5+
the Free Software Foundation; version 2 of the License.
6+
7+
This program is distributed in the hope that it will be useful,
8+
but WITHOUT ANY WARRANTY; without even the implied warranty of
9+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10+
GNU General Public License for more details.
11+
12+
You should have received a copy of the GNU General Public License
13+
along with this program; if not, write to the Free Software
14+
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
15+
16+
#ifndef SEMISYNC_MASTER_SOCKET_LISTENER
17+
#define SEMISYNC_MASTER_SOCKET_LISTENER
18+
#include "semisync_master_ack_receiver.h"
19+
20+
#ifdef HAVE_POLL
21+
#include <sys/poll.h>
22+
#include <vector>
23+
24+
class Poll_socket_listener
25+
{
26+
public:
27+
Poll_socket_listener(const Slave_vector &slaves)
28+
:m_slaves(slaves)
29+
{
30+
}
31+
32+
bool listen_on_sockets()
33+
{
34+
return poll(m_fds.data(), m_fds.size(), 1000 /*1 Second timeout*/);
35+
}
36+
37+
bool is_socket_active(int index)
38+
{
39+
return m_fds[index].revents & POLLIN;
40+
}
41+
42+
void clear_socket_info(int index)
43+
{
44+
m_fds[index].fd= -1;
45+
m_fds[index].events= 0;
46+
}
47+
48+
bool init_slave_sockets()
49+
{
50+
m_fds.clear();
51+
for (uint i= 0; i < m_slaves.size(); i++)
52+
{
53+
pollfd poll_fd;
54+
poll_fd.fd= m_slaves[i].sock_fd();
55+
poll_fd.events= POLLIN;
56+
m_fds.push_back(poll_fd);
57+
}
58+
return true;
59+
}
60+
61+
private:
62+
const Slave_vector &m_slaves;
63+
std::vector<pollfd> m_fds;
64+
};
65+
66+
#else //NO POLL
67+
68+
class Select_socket_listener
69+
{
70+
public:
71+
Select_socket_listener(const Slave_vector &slaves)
72+
:m_slaves(slaves), m_max_fd(INVALID_SOCKET)
73+
{
74+
}
75+
76+
bool listen_on_sockets()
77+
{
78+
/* Reinitialze the fds with active fds before calling select */
79+
m_fds= m_init_fds;
80+
struct timeval tv= {1,0};
81+
/* select requires max fd + 1 for the first argument */
82+
return select(m_max_fd+1, &m_fds, NULL, NULL, &tv);
83+
}
84+
85+
bool is_socket_active(int index)
86+
{
87+
return FD_ISSET(m_slaves[index].sock_fd(), &m_fds);
88+
}
89+
90+
void clear_socket_info(int index)
91+
{
92+
FD_CLR(m_slaves[index].sock_fd(), &m_init_fds);
93+
}
94+
95+
bool init_slave_sockets()
96+
{
97+
FD_ZERO(&m_init_fds);
98+
for (uint i= 0; i < m_slaves.size(); i++)
99+
{
100+
my_socket socket_id= m_slaves[i].sock_fd();
101+
m_max_fd= (socket_id > m_max_fd ? socket_id : m_max_fd);
102+
#ifndef WINDOWS
103+
if (socket_id > FD_SETSIZE)
104+
{
105+
sql_print_error("Semisync slave socket fd is %u. "
106+
"select() cannot handle if the socket fd is "
107+
"bigger than %u (FD_SETSIZE).", socket_id, FD_SETSIZE);
108+
return false;
109+
}
110+
#endif //WINDOWS
111+
FD_SET(socket_id, &m_init_fds);
112+
}
113+
return true;
114+
}
115+
116+
private:
117+
const Slave_vector &m_slaves;
118+
my_socket m_max_fd;
119+
fd_set m_init_fds;
120+
fd_set m_fds;
121+
};
122+
123+
#endif //HAVE_POLL
124+
#endif //SEMISYNC_MASTER_SOCKET_LISTENER

0 commit comments

Comments
 (0)