TAS
TCP Acceleration as an OS Service
kernel.c
1 /*
2  * Copyright 2019 University of Washington, Max Planck Institute for
3  * Software Systems, and The University of Texas at Austin
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining
6  * a copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sublicense, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be
14  * included in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  */
24 
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <unistd.h>
28 #include <sys/un.h>
29 #include <sys/socket.h>
30 #include <sys/types.h>
31 
32 #include <kernel_appif.h>
33 #include <utils_timeout.h>
34 #include "internal.h"
35 
36 #define NIC_RXQ_LEN (64 * 32 * 1024)
37 #define NIC_TXQ_LEN (64 * 8192)
38 
39 static int ksock_fd = -1;
40 static int kernel_evfd = 0;
41 
42 void flextcp_kernel_kick(void)
43 {
44  static uint64_t __thread last_ts = 0;
45  uint64_t now = util_rdtsc();
46 
47  /* fprintf(stderr, "kicking kernel?\n"); */
48 
49  if(now - last_ts > flexnic_info->poll_cycle_tas) {
50  // Kick kernel
51  /* fprintf(stderr, "kicking kernel\n"); */
52  assert(kernel_evfd != 0);
53  uint64_t val = 1;
54  int r = write(kernel_evfd, &val, sizeof(uint64_t));
55  assert(r == sizeof(uint64_t));
56  }
57 
58  last_ts = now;
59 }
60 
61 int flextcp_kernel_connect(void)
62 {
63  int fd, *pfd;
64  uint8_t b;
65  ssize_t r;
66  uint32_t num_fds, off, i, n;
67  struct sockaddr_un saun;
68  struct cmsghdr *cmsg;
69 
70  /* prepare socket address */
71  memset(&saun, 0, sizeof(saun));
72  saun.sun_family = AF_UNIX;
73  memcpy(saun.sun_path, KERNEL_SOCKET_PATH, sizeof(KERNEL_SOCKET_PATH));
74 
75  if ((fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0)) == -1) {
76  perror("flextcp_kernel_connect: socket failed");
77  return -1;
78  }
79 
80  if (connect(fd, (struct sockaddr *) &saun, sizeof(saun)) != 0) {
81  perror("flextcp_kernel_connect: connect failed");
82  return -1;
83  }
84 
85  struct iovec iov = {
86  .iov_base = &num_fds,
87  .iov_len = sizeof(uint32_t),
88  };
89  union {
90  char buf[CMSG_SPACE(sizeof(int) * 4)];
91  struct cmsghdr align;
92  } u;
93  struct msghdr msg = {
94  .msg_name = NULL,
95  .msg_namelen = 0,
96  .msg_iov = &iov,
97  .msg_iovlen = 1,
98  .msg_control = u.buf,
99  .msg_controllen = sizeof(u.buf),
100  .msg_flags = 0,
101  };
102 
103  /* receive welcome message:
104  * contains the fd for the kernel, and the count of flexnic fds */
105  if ((r = recvmsg(fd, &msg, 0)) != sizeof(uint32_t)) {
106  fprintf(stderr, "flextcp_kernel_connect: recvmsg failed (%zd)\n", r);
107  abort();
108  }
109 
110  /* get kernel fd from welcome message */
111  cmsg = CMSG_FIRSTHDR(&msg);
112  pfd = (int *) CMSG_DATA(cmsg);
113  if (msg.msg_controllen <= 0 || cmsg->cmsg_len != CMSG_LEN(sizeof(int))) {
114  fprintf(stderr, "flextcp_kernel_connect: accessing ancillary data "
115  "failed\n");
116  abort();
117  }
118  kernel_evfd = *pfd;
119 
120  /* receive fast path fds in batches of 4 */
121  off = 0;
122  for (off = 0 ; off < num_fds; ) {
123  iov.iov_base = &b;
124  iov.iov_len = 1;
125 
126  memset(&msg, 0, sizeof(msg));
127  msg.msg_iov = &iov;
128  msg.msg_iovlen = 1;
129  msg.msg_control = u.buf;
130  msg.msg_controllen = sizeof(u);
131 
132  /* receive fd message (up to 4 fds at once) */
133  if ((r = recvmsg(fd, &msg, 0)) != 1) {
134  fprintf(stderr, "flextcp_kernel_connect: recvmsg fd failed (%zd)\n", r);
135  abort();
136  }
137 
138  n = (num_fds - off >= 4 ? 4 : num_fds - off);
139 
140  /* get kernel fd from welcome message */
141  cmsg = CMSG_FIRSTHDR(&msg);
142  pfd = (int *) CMSG_DATA(cmsg);
143  if (msg.msg_controllen <= 0 || cmsg->cmsg_len != CMSG_LEN(sizeof(int) * n)) {
144  fprintf(stderr, "flextcp_kernel_connect: accessing ancillary data fds "
145  "failed\n");
146  abort();
147  }
148 
149  for (i = 0; i < n; i++) {
150  flexnic_evfd[off++] = pfd[i];
151  }
152  }
153 
154  ksock_fd = fd;
155  return 0;
156 }
157 
158 int flextcp_kernel_newctx(struct flextcp_context *ctx)
159 {
160  ssize_t sz, off, total_sz;
161  struct kernel_uxsock_response *resp;
162  uint8_t resp_buf[sizeof(*resp) +
163  FLEXTCP_MAX_FTCPCORES * sizeof(resp->flexnic_qs[0])];
164  struct kernel_uxsock_request req = {
165  .rxq_len = NIC_RXQ_LEN,
166  .txq_len = NIC_TXQ_LEN,
167  };
168  uint16_t i;
169 
170  /* send request on kernel socket */
171  struct iovec iov = {
172  .iov_base = &req,
173  .iov_len = sizeof(req),
174  };
175  union {
176  char buf[CMSG_SPACE(sizeof(int))];
177  struct cmsghdr align;
178  } u;
179  struct msghdr msg = {
180  .msg_name = NULL,
181  .msg_namelen = 0,
182  .msg_iov = &iov,
183  .msg_iovlen = 1,
184  .msg_control = u.buf,
185  .msg_controllen = sizeof(u.buf),
186  .msg_flags = 0,
187  };
188  struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
189  cmsg->cmsg_level = SOL_SOCKET;
190  cmsg->cmsg_type = SCM_RIGHTS;
191  cmsg->cmsg_len = CMSG_LEN(sizeof(int));
192  int *myfd = (int *)CMSG_DATA(cmsg);
193  *myfd = ctx->evfd;
194  sz = sendmsg(ksock_fd, &msg, 0);
195  assert(sz == sizeof(req));
196 
197  /* receive response on kernel socket */
198  resp = (struct kernel_uxsock_response *) resp_buf;
199  off = 0;
200  while (off < sizeof(*resp)) {
201  sz = read(ksock_fd, (uint8_t *) resp + off, sizeof(*resp) - off);
202  if (sz < 0) {
203  perror("flextcp_kernel_newctx: read failed");
204  return -1;
205  }
206  off += sz;
207  }
208 
209  if (resp->flexnic_qs_num > FLEXTCP_MAX_FTCPCORES) {
210  fprintf(stderr, "flextcp_kernel_newctx: stack only supports up to %u "
211  "queues, got %u\n", FLEXTCP_MAX_FTCPCORES, resp->flexnic_qs_num);
212  abort();
213  }
214  /* receive queues in response */
215  total_sz = sizeof(*resp) + resp->flexnic_qs_num * sizeof(resp->flexnic_qs[0]);
216  while (off < total_sz) {
217  sz = read(ksock_fd, (uint8_t *) resp + off, total_sz - off);
218  if (sz < 0) {
219  perror("flextcp_kernel_newctx: read failed");
220  return -1;
221  }
222  off += sz;
223  }
224 
225  if (resp->status != 0) {
226  fprintf(stderr, "flextcp_kernel_newctx: request failed\n");
227  return -1;
228  }
229 
230  /* fill in ctx struct */
231  ctx->kin_base = (uint8_t *) flexnic_mem + resp->app_out_off;
232  ctx->kin_len = resp->app_out_len / sizeof(struct kernel_appout);
233  ctx->kin_head = 0;
234 
235  ctx->kout_base = (uint8_t *) flexnic_mem + resp->app_in_off;
236  ctx->kout_len = resp->app_in_len / sizeof(struct kernel_appin);
237  ctx->kout_head = 0;
238 
239  ctx->db_id = resp->flexnic_db_id;
240  ctx->num_queues = resp->flexnic_qs_num;
241  ctx->next_queue = 0;
242 
243  ctx->rxq_len = NIC_RXQ_LEN;
244  ctx->txq_len = NIC_TXQ_LEN;
245 
246  for (i = 0; i < resp->flexnic_qs_num; i++) {
247  ctx->queues[i].rxq_base =
248  (uint8_t *) flexnic_mem + resp->flexnic_qs[i].rxq_off;
249  ctx->queues[i].txq_base =
250  (uint8_t *) flexnic_mem + resp->flexnic_qs[i].txq_off;
251 
252  ctx->queues[i].rxq_head = 0;
253  ctx->queues[i].txq_tail = 0;
254  ctx->queues[i].txq_avail = ctx->txq_len;
255  ctx->queues[i].last_ts = 0;
256  }
257 
258  return 0;
259 }
260 
261 int flextcp_kernel_reqscale(struct flextcp_context *ctx, uint32_t cores)
262 {
263  uint32_t pos = ctx->kin_head;
264  struct kernel_appout *kin = ctx->kin_base;
265 
266  kin += pos;
267 
268  if (kin->type != KERNEL_APPOUT_INVALID) {
269  fprintf(stderr, "flextcp_kernel_reqscale: no queue space\n");
270  return -1;
271  }
272 
273  kin->data.req_scale.num_cores = cores;
274  MEM_BARRIER();
275  kin->type = KERNEL_APPOUT_REQ_SCALE;
276  flextcp_kernel_kick();
277 
278  pos = pos + 1;
279  if (pos >= ctx->kin_len) {
280  pos = 0;
281  }
282  ctx->kin_head = pos;
283 
284  return 0;
285 }
uint64_t poll_cycle_tas
Definition: tas_memif.h:68