28 #include <rte_config.h> 29 #include <rte_memcpy.h> 30 #include <rte_malloc.h> 31 #include <rte_lcore.h> 32 #include <rte_ether.h> 33 #include <rte_ethdev.h> 34 #include <rte_mempool.h> 37 #include <rte_version.h> 38 #include <rte_spinlock.h> 41 #include <utils_rng.h> 42 #include <tas_memif.h> 45 #define PERTHREAD_MBUFS 2048 46 #define MBUF_SIZE (BUFFER_SIZE + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM) 47 #define RX_DESCRIPTORS 256 48 #define TX_DESCRIPTORS 128 50 uint8_t net_port_id = 0;
51 static struct rte_eth_conf port_conf = {
53 .mq_mode = ETH_MQ_RX_RSS,
56 .ignore_offload_bitfield = 1,
60 .mq_mode = ETH_MQ_TX_NONE,
65 .rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
73 static unsigned num_threads;
74 static struct network_rx_thread **net_threads;
76 static struct rte_eth_dev_info eth_devinfo;
83 uint16_t rss_reta_size;
84 static struct rte_eth_rss_reta_entry64 *rss_reta = NULL;
85 static uint16_t *rss_core_buckets = NULL;
87 static struct rte_mempool *mempool_alloc(
void);
88 static int reta_setup(
void);
89 static int reta_mlx5_resize(
void);
90 static rte_spinlock_t initlock = RTE_SPINLOCK_INITIALIZER;
92 int network_init(
unsigned n_threads)
98 num_threads = n_threads;
101 net_threads = rte_calloc(
"net thread ptrs", n_threads,
sizeof(*net_threads), 0);
102 if (net_threads == NULL) {
107 #if RTE_VER_YEAR < 18 108 count = rte_eth_dev_count();
110 count = rte_eth_dev_count_avail();
113 fprintf(stderr,
"No ethernet devices\n");
115 }
else if (count > 1) {
116 fprintf(stderr,
"Multiple ethernet devices\n");
120 RTE_ETH_FOREACH_DEV(p) {
125 rte_eth_macaddr_get(net_port_id, &
eth_addr);
126 rte_eth_dev_info_get(net_port_id, ð_devinfo);
128 if (eth_devinfo.max_rx_queues < n_threads ||
129 eth_devinfo.max_tx_queues < n_threads)
131 fprintf(stderr,
"Error: NIC does not support enough hw queues (rx=%u tx=%u)" 132 " for the requested number of cores (%u)\n", eth_devinfo.max_rx_queues,
133 eth_devinfo.max_tx_queues, n_threads);
138 if ((port_conf.rx_adv_conf.rss_conf.rss_hf &
139 eth_devinfo.flow_type_rss_offloads) !=
140 port_conf.rx_adv_conf.rss_conf.rss_hf)
142 fprintf(stderr,
"Warning: NIC does not support all requested RSS " 143 "hash functions.\n");
144 port_conf.rx_adv_conf.rss_conf.rss_hf &= eth_devinfo.flow_type_rss_offloads;
149 port_conf.txmode.offloads =
150 DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_TCP_CKSUM;
154 port_conf.intr_conf.rxq = 0;
157 ret = rte_eth_dev_configure(net_port_id, n_threads, n_threads, &port_conf);
159 fprintf(stderr,
"rte_eth_dev_configure failed\n");
166 if (reta_mlx5_resize() != 0) {
171 #if RTE_VER_YEAR < 18 172 eth_devinfo.default_txconf.txq_flags = ETH_TXQ_FLAGS_IGNORE;
174 eth_devinfo.default_rxconf.offloads = 0;
177 eth_devinfo.default_txconf.offloads = 0;
179 eth_devinfo.default_txconf.offloads =
180 DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_TCP_CKSUM;
187 rte_free(net_threads);
191 void network_cleanup(
void)
193 rte_eth_dev_stop(net_port_id);
194 rte_free(net_threads);
197 void network_dump_stats(
void)
199 struct rte_eth_stats stats;
200 if (rte_eth_stats_get(0, &stats) == 0) {
201 fprintf(stderr,
"network stats: ipackets=%"PRIu64
" opackets=%"PRIu64
202 " ibytes=%"PRIu64
" obytes=%"PRIu64
" imissed=%"PRIu64
" ierrors=%"PRIu64
203 " oerrors=%"PRIu64
" rx_nombuf=%"PRIu64
"\n", stats.ipackets,
204 stats.opackets, stats.ibytes, stats.obytes, stats.imissed,
205 stats.ierrors, stats.oerrors, stats.rx_nombuf);
207 fprintf(stderr,
"failed to get stats\n");
213 static volatile uint32_t tx_init_done = 0;
214 static volatile uint32_t rx_init_done = 0;
215 static volatile uint32_t start_done = 0;
221 if ((t->pool = mempool_alloc()) == NULL) {
226 t->queue_id = ctx->id;
227 rte_spinlock_lock(&initlock);
228 ret = rte_eth_tx_queue_setup(net_port_id, t->queue_id, TX_DESCRIPTORS,
229 rte_socket_id(), ð_devinfo.default_txconf);
230 rte_spinlock_unlock(&initlock);
232 fprintf(stderr,
"network_thread_init: rte_eth_tx_queue_setup failed\n");
237 __sync_add_and_fetch(&tx_init_done, 1);
238 while (tx_init_done < num_threads);
241 t->queue_id = ctx->id;
242 rte_spinlock_lock(&initlock);
243 ret = rte_eth_rx_queue_setup(net_port_id, t->queue_id, RX_DESCRIPTORS,
244 rte_socket_id(), ð_devinfo.default_rxconf, t->pool);
245 rte_spinlock_unlock(&initlock);
247 fprintf(stderr,
"network_thread_init: rte_eth_rx_queue_setup failed\n");
252 __sync_add_and_fetch(&rx_init_done, 1);
253 while (rx_init_done < num_threads);
257 if (rte_eth_dev_start(net_port_id) != 0) {
258 fprintf(stderr,
"rte_eth_dev_start failed\n");
264 ret = rte_eth_dev_get_vlan_offload(net_port_id);
265 ret |= ETH_VLAN_STRIP_OFFLOAD;
266 if (rte_eth_dev_set_vlan_offload(net_port_id, ret)) {
267 fprintf(stderr,
"network_thread_init: vlan off set failed\n");
274 if (reta_setup() != 0) {
275 fprintf(stderr,
"RETA setup failed\n");
287 rte_spinlock_lock(&initlock);
288 ret = rte_eth_dev_rx_intr_ctl_q(net_port_id, t->queue_id,
289 RTE_EPOLL_PER_THREAD, RTE_INTR_EVENT_ADD, NULL);
290 rte_spinlock_unlock(&initlock);
292 fprintf(stderr,
"network_thread_init: rte_eth_dev_rx_intr_ctl_q failed " 293 "(%d)\n", rte_errno);
294 goto error_int_queue;
311 int network_rx_interrupt_ctl(
struct network_thread *t,
int turnon)
314 return rte_eth_dev_rx_intr_enable(net_port_id, t->queue_id);
316 return rte_eth_dev_rx_intr_disable(net_port_id, t->queue_id);
320 static struct rte_mempool *mempool_alloc(
void)
322 static unsigned pool_id = 0;
325 n = __sync_fetch_and_add(&pool_id, 1);
326 snprintf(name, 32,
"mbuf_pool_%u\n", n);
327 return rte_mempool_create(name, PERTHREAD_MBUFS, MBUF_SIZE, 32,
328 sizeof(
struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init, NULL,
329 rte_pktmbuf_init, NULL, rte_socket_id(), 0);
333 static inline uint16_t core_min(uint16_t num)
335 uint16_t i, i_min = 0, v_min = UINT8_MAX;
337 for (i = 0; i < num; i++) {
338 if (rss_core_buckets[i] < v_min) {
339 v_min = rss_core_buckets[i];
347 static inline uint16_t core_max(uint16_t num)
349 uint16_t i, i_max = 0, v_max = 0;
351 for (i = 0; i < num; i++) {
352 if (rss_core_buckets[i] >= v_max) {
353 v_max = rss_core_buckets[i];
361 int network_scale_up(uint16_t old, uint16_t
new)
363 uint16_t i, j, k, c, share = rss_reta_size /
new;
364 uint16_t outer, inner;
367 for (k = 0; k < rss_reta_size; k += RTE_RETA_GROUP_SIZE) {
368 rss_reta[k / RTE_RETA_GROUP_SIZE].mask = 0;
372 for (j = old; j <
new; j++) {
373 for (i = 0; i < share; i++) {
376 for (; ; k = (k + 1) % rss_reta_size) {
377 outer = k / RTE_RETA_GROUP_SIZE;
378 inner = k % RTE_RETA_GROUP_SIZE;
379 if (rss_reta[outer].reta[inner] == c) {
380 rss_reta[outer].mask |= 1ULL << inner;
381 rss_reta[outer].reta[inner] = j;
382 fp_state->flow_group_steering[k] = j;
387 rss_core_buckets[c]--;
388 rss_core_buckets[j]++;
392 if (rte_eth_dev_rss_reta_update(net_port_id, rss_reta, rss_reta_size) != 0) {
393 fprintf(stderr,
"network_scale_up: rte_eth_dev_rss_reta_update failed\n");
400 int network_scale_down(uint16_t old, uint16_t
new)
402 uint16_t i, o_c, n_c, outer, inner;
405 for (i = 0; i < rss_reta_size; i += RTE_RETA_GROUP_SIZE) {
406 rss_reta[i / RTE_RETA_GROUP_SIZE].mask = 0;
409 for (i = 0; i < rss_reta_size; i++) {
410 outer = i / RTE_RETA_GROUP_SIZE;
411 inner = i % RTE_RETA_GROUP_SIZE;
413 o_c = rss_reta[outer].reta[inner];
417 rss_reta[outer].reta[inner] = n_c;
418 rss_reta[outer].mask |= 1ULL << inner;
420 fp_state->flow_group_steering[i] = n_c;
422 rss_core_buckets[o_c]--;
423 rss_core_buckets[n_c]++;
427 if (rte_eth_dev_rss_reta_update(net_port_id, rss_reta, rss_reta_size) != 0) {
428 fprintf(stderr,
"network_scale_down: rte_eth_dev_rss_reta_update failed\n");
435 static int reta_setup()
440 rss_reta_size = eth_devinfo.reta_size;
441 rss_reta = rte_calloc(
"rss reta", ((rss_reta_size + RTE_RETA_GROUP_SIZE - 1) /
442 RTE_RETA_GROUP_SIZE),
sizeof(*rss_reta), 0);
443 rss_core_buckets = rte_calloc(
"rss core buckets", fp_cores_max,
444 sizeof(*rss_core_buckets), 0);
446 if (rss_reta == NULL || rss_core_buckets == NULL) {
447 fprintf(stderr,
"reta_setup: rss_reta alloc failed\n");
451 if (rss_reta_size > FLEXNIC_PL_MAX_FLOWGROUPS) {
452 fprintf(stderr,
"reta_setup: reta size (%u) greater than maximum supported" 453 " (%u)\n", rss_reta_size, FLEXNIC_PL_MAX_FLOWGROUPS);
458 for (i = 0, c = 0; i < rss_reta_size; i++) {
459 rss_core_buckets[c]++;
460 rss_reta[i / RTE_RETA_GROUP_SIZE].mask = -1ULL;
461 rss_reta[i / RTE_RETA_GROUP_SIZE].reta[i % RTE_RETA_GROUP_SIZE] = c;
462 fp_state->flow_group_steering[i] = c;
463 c = (c + 1) % fp_cores_cur;
466 if (rte_eth_dev_rss_reta_update(net_port_id, rss_reta, rss_reta_size) != 0) {
467 fprintf(stderr,
"reta_setup: rte_eth_dev_rss_reta_update failed\n");
474 rte_free(rss_core_buckets);
483 static int reta_mlx5_resize(
void)
485 if (!strcmp(eth_devinfo.driver_name,
"net_mlx5")) {
490 eth_devinfo.reta_size = 512;
494 if (eth_devinfo.reta_size < 128) {
495 fprintf(stderr,
"net: RSS redirection table is small (%u), this results in" 496 " bad load balancing when scaling down\n", eth_devinfo.reta_size);