-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvmxnet3.c
2541 lines (2117 loc) · 76.8 KB
/
vmxnet3.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* QEMU VMWARE VMXNET3 paravirtual NIC
*
* Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
*
* Developed by Daynix Computing LTD (http://www.daynix.com)
*
* Authors:
* Dmitry Fleytman <[email protected]>
* Tamir Shomer <[email protected]>
* Yan Vugenfirer <[email protected]>
*
* This work is licensed under the terms of the GNU GPL, version 2.
* See the COPYING file in the top-level directory.
*
*/
#include "qemu/osdep.h"
#include "hw/hw.h"
#include "hw/pci/pci.h"
#include "hw/qdev-properties.h"
#include "net/tap.h"
#include "net/checksum.h"
#include "sysemu/sysemu.h"
#include "qemu/bswap.h"
#include "qemu/log.h"
#include "qemu/module.h"
#include "hw/pci/msix.h"
#include "hw/pci/msi.h"
#include "migration/register.h"
#include "migration/vmstate.h"
#include "vmxnet3.h"
#include "vmxnet3_defs.h"
#include "vmxnet_debug.h"
#include "vmware_utils.h"
#include "net_tx_pkt.h"
#include "net_rx_pkt.h"
#include "qom/object.h"
#define PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION 0x1
#define VMXNET3_MSIX_BAR_SIZE 0x2000
/* Compatibility flags for migration */
#define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT 0
#define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS \
(1 << VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT)
#define VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT 1
#define VMXNET3_COMPAT_FLAG_DISABLE_PCIE \
(1 << VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT)
#define VMXNET3_EXP_EP_OFFSET (0x48)
#define VMXNET3_MSI_OFFSET(s) \
((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x50 : 0x84)
#define VMXNET3_MSIX_OFFSET(s) \
((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0 : 0x9c)
#define VMXNET3_DSN_OFFSET (0x100)
#define VMXNET3_BAR0_IDX (0)
#define VMXNET3_BAR1_IDX (1)
#define VMXNET3_MSIX_BAR_IDX (2)
#define VMXNET3_OFF_MSIX_TABLE (0x000)
#define VMXNET3_OFF_MSIX_PBA(s) \
((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x800 : 0x1000)
/* Link speed in Mbps should be shifted by 16 */
#define VMXNET3_LINK_SPEED (1000 << 16)
/* Link status: 1 - up, 0 - down. */
#define VMXNET3_LINK_STATUS_UP 0x1
/* Least significant bit should be set for revision and version */
#define VMXNET3_UPT_REVISION 0x1
#define VMXNET3_DEVICE_REVISION 0x1
/* Number of interrupt vectors for non-MSIx modes */
#define VMXNET3_MAX_NMSIX_INTRS (1)
/* Macros for rings descriptors access */
#define VMXNET3_READ_TX_QUEUE_DESCR8(_d, dpa, field) \
(vmw_shmem_ld8(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
#define VMXNET3_WRITE_TX_QUEUE_DESCR8(_d, dpa, field, value) \
(vmw_shmem_st8(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field, value)))
#define VMXNET3_READ_TX_QUEUE_DESCR32(_d, dpa, field) \
(vmw_shmem_ld32(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
#define VMXNET3_WRITE_TX_QUEUE_DESCR32(_d, dpa, field, value) \
(vmw_shmem_st32(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field), value))
#define VMXNET3_READ_TX_QUEUE_DESCR64(_d, dpa, field) \
(vmw_shmem_ld64(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
#define VMXNET3_WRITE_TX_QUEUE_DESCR64(_d, dpa, field, value) \
(vmw_shmem_st64(_d, dpa + offsetof(struct Vmxnet3_TxQueueDesc, field), value))
#define VMXNET3_READ_RX_QUEUE_DESCR64(_d, dpa, field) \
(vmw_shmem_ld64(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field)))
#define VMXNET3_READ_RX_QUEUE_DESCR32(_d, dpa, field) \
(vmw_shmem_ld32(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field)))
#define VMXNET3_WRITE_RX_QUEUE_DESCR64(_d, dpa, field, value) \
(vmw_shmem_st64(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field), value))
#define VMXNET3_WRITE_RX_QUEUE_DESCR8(_d, dpa, field, value) \
(vmw_shmem_st8(_d, dpa + offsetof(struct Vmxnet3_RxQueueDesc, field), value))
/* Macros for guest driver shared area access */
#define VMXNET3_READ_DRV_SHARED64(_d, shpa, field) \
(vmw_shmem_ld64(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field)))
#define VMXNET3_READ_DRV_SHARED32(_d, shpa, field) \
(vmw_shmem_ld32(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field)))
#define VMXNET3_WRITE_DRV_SHARED32(_d, shpa, field, val) \
(vmw_shmem_st32(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field), val))
#define VMXNET3_READ_DRV_SHARED16(_d, shpa, field) \
(vmw_shmem_ld16(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field)))
#define VMXNET3_READ_DRV_SHARED8(_d, shpa, field) \
(vmw_shmem_ld8(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field)))
#define VMXNET3_READ_DRV_SHARED(_d, shpa, field, b, l) \
(vmw_shmem_read(_d, shpa + offsetof(struct Vmxnet3_DriverShared, field), b, l))
#define VMXNET_FLAG_IS_SET(field, flag) (((field) & (flag)) == (flag))
struct VMXNET3Class {
PCIDeviceClass parent_class;
DeviceRealize parent_dc_realize;
};
typedef struct VMXNET3Class VMXNET3Class;
DECLARE_CLASS_CHECKERS(VMXNET3Class, VMXNET3_DEVICE,
TYPE_VMXNET3)
static inline void vmxnet3_ring_init(PCIDevice *d,
Vmxnet3Ring *ring,
hwaddr pa,
uint32_t size,
uint32_t cell_size,
bool zero_region)
{
ring->pa = pa;
ring->size = size;
ring->cell_size = cell_size;
ring->gen = VMXNET3_INIT_GEN;
ring->next = 0;
if (zero_region) {
vmw_shmem_set(d, pa, 0, size * cell_size);
}
}
#define VMXNET3_RING_DUMP(macro, ring_name, ridx, r) \
macro("%s#%d: base %" PRIx64 " size %u cell_size %u gen %d next %u", \
(ring_name), (ridx), \
(r)->pa, (r)->size, (r)->cell_size, (r)->gen, (r)->next)
static inline void vmxnet3_ring_inc(Vmxnet3Ring *ring)
{
if (++ring->next >= ring->size) {
ring->next = 0;
ring->gen ^= 1;
}
}
static inline void vmxnet3_ring_dec(Vmxnet3Ring *ring)
{
if (ring->next-- == 0) {
ring->next = ring->size - 1;
ring->gen ^= 1;
}
}
static inline hwaddr vmxnet3_ring_curr_cell_pa(Vmxnet3Ring *ring)
{
return ring->pa + ring->next * ring->cell_size;
}
static inline void vmxnet3_ring_read_curr_cell(PCIDevice *d, Vmxnet3Ring *ring,
void *buff)
{
vmw_shmem_read(d, vmxnet3_ring_curr_cell_pa(ring), buff, ring->cell_size);
}
static inline void vmxnet3_ring_write_curr_cell(PCIDevice *d, Vmxnet3Ring *ring,
void *buff)
{
vmw_shmem_write(d, vmxnet3_ring_curr_cell_pa(ring), buff, ring->cell_size);
}
static inline size_t vmxnet3_ring_curr_cell_idx(Vmxnet3Ring *ring)
{
return ring->next;
}
static inline uint8_t vmxnet3_ring_curr_gen(Vmxnet3Ring *ring)
{
return ring->gen;
}
/* Debug trace-related functions */
static inline void
vmxnet3_dump_tx_descr(struct Vmxnet3_TxDesc *descr)
{
VMW_PKPRN("TX DESCR: "
"addr %" PRIx64 ", len: %d, gen: %d, rsvd: %d, "
"dtype: %d, ext1: %d, msscof: %d, hlen: %d, om: %d, "
"eop: %d, cq: %d, ext2: %d, ti: %d, tci: %d",
descr->addr, descr->len, descr->gen, descr->rsvd,
descr->dtype, descr->ext1, descr->msscof, descr->hlen, descr->om,
descr->eop, descr->cq, descr->ext2, descr->ti, descr->tci);
}
static inline void
vmxnet3_dump_virt_hdr(struct virtio_net_hdr *vhdr)
{
VMW_PKPRN("VHDR: flags 0x%x, gso_type: 0x%x, hdr_len: %d, gso_size: %d, "
"csum_start: %d, csum_offset: %d",
vhdr->flags, vhdr->gso_type, vhdr->hdr_len, vhdr->gso_size,
vhdr->csum_start, vhdr->csum_offset);
}
static inline void
vmxnet3_dump_rx_descr(struct Vmxnet3_RxDesc *descr)
{
VMW_PKPRN("RX DESCR: addr %" PRIx64 ", len: %d, gen: %d, rsvd: %d, "
"dtype: %d, ext1: %d, btype: %d",
descr->addr, descr->len, descr->gen,
descr->rsvd, descr->dtype, descr->ext1, descr->btype);
}
/* Interrupt management */
/*
* This function returns sign whether interrupt line is in asserted state
* This depends on the type of interrupt used. For INTX interrupt line will
* be asserted until explicit deassertion, for MSI(X) interrupt line will
* be deasserted automatically due to notification semantics of the MSI(X)
* interrupts
*/
static bool _vmxnet3_assert_interrupt_line(VMXNET3State *s, uint32_t int_idx)
{
PCIDevice *d = PCI_DEVICE(s);
if (s->msix_used && msix_enabled(d)) {
VMW_IRPRN("Sending MSI-X notification for vector %u", int_idx);
msix_notify(d, int_idx);
return false;
}
if (msi_enabled(d)) {
VMW_IRPRN("Sending MSI notification for vector %u", int_idx);
msi_notify(d, int_idx);
return false;
}
VMW_IRPRN("Asserting line for interrupt %u", int_idx);
pci_irq_assert(d);
return true;
}
static void _vmxnet3_deassert_interrupt_line(VMXNET3State *s, int lidx)
{
PCIDevice *d = PCI_DEVICE(s);
/*
* This function should never be called for MSI(X) interrupts
* because deassertion never required for message interrupts
*/
assert(!s->msix_used || !msix_enabled(d));
/*
* This function should never be called for MSI(X) interrupts
* because deassertion never required for message interrupts
*/
assert(!msi_enabled(d));
VMW_IRPRN("Deasserting line for interrupt %u", lidx);
pci_irq_deassert(d);
}
static void vmxnet3_update_interrupt_line_state(VMXNET3State *s, int lidx)
{
if (!s->interrupt_states[lidx].is_pending &&
s->interrupt_states[lidx].is_asserted) {
VMW_IRPRN("New interrupt line state for index %d is DOWN", lidx);
_vmxnet3_deassert_interrupt_line(s, lidx);
s->interrupt_states[lidx].is_asserted = false;
return;
}
if (s->interrupt_states[lidx].is_pending &&
!s->interrupt_states[lidx].is_masked &&
!s->interrupt_states[lidx].is_asserted) {
VMW_IRPRN("New interrupt line state for index %d is UP", lidx);
s->interrupt_states[lidx].is_asserted =
_vmxnet3_assert_interrupt_line(s, lidx);
s->interrupt_states[lidx].is_pending = false;
return;
}
}
static void vmxnet3_trigger_interrupt(VMXNET3State *s, int lidx)
{
PCIDevice *d = PCI_DEVICE(s);
s->interrupt_states[lidx].is_pending = true;
vmxnet3_update_interrupt_line_state(s, lidx);
if (s->msix_used && msix_enabled(d) && s->auto_int_masking) {
goto do_automask;
}
if (msi_enabled(d) && s->auto_int_masking) {
goto do_automask;
}
return;
do_automask:
s->interrupt_states[lidx].is_masked = true;
vmxnet3_update_interrupt_line_state(s, lidx);
}
static bool vmxnet3_interrupt_asserted(VMXNET3State *s, int lidx)
{
return s->interrupt_states[lidx].is_asserted;
}
static void vmxnet3_clear_interrupt(VMXNET3State *s, int int_idx)
{
s->interrupt_states[int_idx].is_pending = false;
if (s->auto_int_masking) {
s->interrupt_states[int_idx].is_masked = true;
}
vmxnet3_update_interrupt_line_state(s, int_idx);
}
static void
vmxnet3_on_interrupt_mask_changed(VMXNET3State *s, int lidx, bool is_masked)
{
s->interrupt_states[lidx].is_masked = is_masked;
vmxnet3_update_interrupt_line_state(s, lidx);
}
static bool vmxnet3_verify_driver_magic(PCIDevice *d, hwaddr dshmem)
{
return (VMXNET3_READ_DRV_SHARED32(d, dshmem, magic) == VMXNET3_REV1_MAGIC);
}
#define VMXNET3_GET_BYTE(x, byte_num) (((x) >> (byte_num)*8) & 0xFF)
#define VMXNET3_MAKE_BYTE(byte_num, val) \
(((uint32_t)((val) & 0xFF)) << (byte_num)*8)
static void vmxnet3_set_variable_mac(VMXNET3State *s, uint32_t h, uint32_t l)
{
s->conf.macaddr.a[0] = VMXNET3_GET_BYTE(l, 0);
s->conf.macaddr.a[1] = VMXNET3_GET_BYTE(l, 1);
s->conf.macaddr.a[2] = VMXNET3_GET_BYTE(l, 2);
s->conf.macaddr.a[3] = VMXNET3_GET_BYTE(l, 3);
s->conf.macaddr.a[4] = VMXNET3_GET_BYTE(h, 0);
s->conf.macaddr.a[5] = VMXNET3_GET_BYTE(h, 1);
VMW_CFPRN("Variable MAC: " MAC_FMT, MAC_ARG(s->conf.macaddr.a));
qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
}
static uint64_t vmxnet3_get_mac_low(MACAddr *addr)
{
return VMXNET3_MAKE_BYTE(0, addr->a[0]) |
VMXNET3_MAKE_BYTE(1, addr->a[1]) |
VMXNET3_MAKE_BYTE(2, addr->a[2]) |
VMXNET3_MAKE_BYTE(3, addr->a[3]);
}
static uint64_t vmxnet3_get_mac_high(MACAddr *addr)
{
return VMXNET3_MAKE_BYTE(0, addr->a[4]) |
VMXNET3_MAKE_BYTE(1, addr->a[5]);
}
static void
vmxnet3_inc_tx_consumption_counter(VMXNET3State *s, int qidx)
{
vmxnet3_ring_inc(&s->txq_descr[qidx].tx_ring);
}
static inline void
vmxnet3_inc_rx_consumption_counter(VMXNET3State *s, int qidx, int ridx)
{
vmxnet3_ring_inc(&s->rxq_descr[qidx].rx_ring[ridx]);
}
static inline void
vmxnet3_inc_tx_completion_counter(VMXNET3State *s, int qidx)
{
vmxnet3_ring_inc(&s->txq_descr[qidx].comp_ring);
}
static void
vmxnet3_inc_rx_completion_counter(VMXNET3State *s, int qidx)
{
vmxnet3_ring_inc(&s->rxq_descr[qidx].comp_ring);
}
static void
vmxnet3_dec_rx_completion_counter(VMXNET3State *s, int qidx)
{
vmxnet3_ring_dec(&s->rxq_descr[qidx].comp_ring);
}
static void vmxnet3_complete_packet(VMXNET3State *s, int qidx, uint32_t tx_ridx)
{
struct Vmxnet3_TxCompDesc txcq_descr;
PCIDevice *d = PCI_DEVICE(s);
VMXNET3_RING_DUMP(VMW_RIPRN, "TXC", qidx, &s->txq_descr[qidx].comp_ring);
memset(&txcq_descr, 0, sizeof(txcq_descr));
txcq_descr.txdIdx = tx_ridx;
txcq_descr.gen = vmxnet3_ring_curr_gen(&s->txq_descr[qidx].comp_ring);
txcq_descr.val1 = cpu_to_le32(txcq_descr.val1);
txcq_descr.val2 = cpu_to_le32(txcq_descr.val2);
vmxnet3_ring_write_curr_cell(d, &s->txq_descr[qidx].comp_ring, &txcq_descr);
/* Flush changes in TX descriptor before changing the counter value */
smp_wmb();
vmxnet3_inc_tx_completion_counter(s, qidx);
vmxnet3_trigger_interrupt(s, s->txq_descr[qidx].intr_idx);
}
static bool
vmxnet3_setup_tx_offloads(VMXNET3State *s)
{
switch (s->offload_mode) {
case VMXNET3_OM_NONE:
return net_tx_pkt_build_vheader(s->tx_pkt, false, false, 0);
case VMXNET3_OM_CSUM:
VMW_PKPRN("L4 CSO requested\n");
return net_tx_pkt_build_vheader(s->tx_pkt, false, true, 0);
case VMXNET3_OM_TSO:
VMW_PKPRN("GSO offload requested.");
if (!net_tx_pkt_build_vheader(s->tx_pkt, true, true,
s->cso_or_gso_size)) {
return false;
}
net_tx_pkt_update_ip_checksums(s->tx_pkt);
break;
default:
g_assert_not_reached();
return false;
}
return true;
}
static void
vmxnet3_tx_retrieve_metadata(VMXNET3State *s,
const struct Vmxnet3_TxDesc *txd)
{
s->offload_mode = txd->om;
s->cso_or_gso_size = txd->msscof;
s->tci = txd->tci;
s->needs_vlan = txd->ti;
}
typedef enum {
VMXNET3_PKT_STATUS_OK,
VMXNET3_PKT_STATUS_ERROR,
VMXNET3_PKT_STATUS_DISCARD,/* only for tx */
VMXNET3_PKT_STATUS_OUT_OF_BUF /* only for rx */
} Vmxnet3PktStatus;
static void
vmxnet3_on_tx_done_update_stats(VMXNET3State *s, int qidx,
Vmxnet3PktStatus status)
{
size_t tot_len = net_tx_pkt_get_total_len(s->tx_pkt);
struct UPT1_TxStats *stats = &s->txq_descr[qidx].txq_stats;
switch (status) {
case VMXNET3_PKT_STATUS_OK:
switch (net_tx_pkt_get_packet_type(s->tx_pkt)) {
case ETH_PKT_BCAST:
stats->bcastPktsTxOK++;
stats->bcastBytesTxOK += tot_len;
break;
case ETH_PKT_MCAST:
stats->mcastPktsTxOK++;
stats->mcastBytesTxOK += tot_len;
break;
case ETH_PKT_UCAST:
stats->ucastPktsTxOK++;
stats->ucastBytesTxOK += tot_len;
break;
default:
g_assert_not_reached();
}
if (s->offload_mode == VMXNET3_OM_TSO) {
/*
* According to VMWARE headers this statistic is a number
* of packets after segmentation but since we don't have
* this information in QEMU model, the best we can do is to
* provide number of non-segmented packets
*/
stats->TSOPktsTxOK++;
stats->TSOBytesTxOK += tot_len;
}
break;
case VMXNET3_PKT_STATUS_DISCARD:
stats->pktsTxDiscard++;
break;
case VMXNET3_PKT_STATUS_ERROR:
stats->pktsTxError++;
break;
default:
g_assert_not_reached();
}
}
static void
vmxnet3_on_rx_done_update_stats(VMXNET3State *s,
int qidx,
Vmxnet3PktStatus status)
{
struct UPT1_RxStats *stats = &s->rxq_descr[qidx].rxq_stats;
size_t tot_len = net_rx_pkt_get_total_len(s->rx_pkt);
switch (status) {
case VMXNET3_PKT_STATUS_OUT_OF_BUF:
stats->pktsRxOutOfBuf++;
break;
case VMXNET3_PKT_STATUS_ERROR:
stats->pktsRxError++;
break;
case VMXNET3_PKT_STATUS_OK:
switch (net_rx_pkt_get_packet_type(s->rx_pkt)) {
case ETH_PKT_BCAST:
stats->bcastPktsRxOK++;
stats->bcastBytesRxOK += tot_len;
break;
case ETH_PKT_MCAST:
stats->mcastPktsRxOK++;
stats->mcastBytesRxOK += tot_len;
break;
case ETH_PKT_UCAST:
stats->ucastPktsRxOK++;
stats->ucastBytesRxOK += tot_len;
break;
default:
g_assert_not_reached();
}
if (tot_len > s->mtu) {
stats->LROPktsRxOK++;
stats->LROBytesRxOK += tot_len;
}
break;
default:
g_assert_not_reached();
}
}
static inline void
vmxnet3_ring_read_curr_txdesc(PCIDevice *pcidev, Vmxnet3Ring *ring,
struct Vmxnet3_TxDesc *txd)
{
vmxnet3_ring_read_curr_cell(pcidev, ring, txd);
txd->addr = le64_to_cpu(txd->addr);
txd->val1 = le32_to_cpu(txd->val1);
txd->val2 = le32_to_cpu(txd->val2);
}
static inline bool
vmxnet3_pop_next_tx_descr(VMXNET3State *s,
int qidx,
struct Vmxnet3_TxDesc *txd,
uint32_t *descr_idx)
{
Vmxnet3Ring *ring = &s->txq_descr[qidx].tx_ring;
PCIDevice *d = PCI_DEVICE(s);
vmxnet3_ring_read_curr_txdesc(d, ring, txd);
if (txd->gen == vmxnet3_ring_curr_gen(ring)) {
/* Only read after generation field verification */
smp_rmb();
/* Re-read to be sure we got the latest version */
vmxnet3_ring_read_curr_txdesc(d, ring, txd);
VMXNET3_RING_DUMP(VMW_RIPRN, "TX", qidx, ring);
*descr_idx = vmxnet3_ring_curr_cell_idx(ring);
vmxnet3_inc_tx_consumption_counter(s, qidx);
return true;
}
return false;
}
static bool
vmxnet3_send_packet(VMXNET3State *s, uint32_t qidx)
{
Vmxnet3PktStatus status = VMXNET3_PKT_STATUS_OK;
if (!vmxnet3_setup_tx_offloads(s)) {
status = VMXNET3_PKT_STATUS_ERROR;
goto func_exit;
}
/* debug prints */
vmxnet3_dump_virt_hdr(net_tx_pkt_get_vhdr(s->tx_pkt));
net_tx_pkt_dump(s->tx_pkt);
if (!net_tx_pkt_send(s->tx_pkt, qemu_get_queue(s->nic))) {
status = VMXNET3_PKT_STATUS_DISCARD;
goto func_exit;
}
func_exit:
vmxnet3_on_tx_done_update_stats(s, qidx, status);
return (status == VMXNET3_PKT_STATUS_OK);
}
static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx)
{
struct Vmxnet3_TxDesc txd;
uint32_t txd_idx;
uint32_t data_len;
hwaddr data_pa;
for (;;) {
if (!vmxnet3_pop_next_tx_descr(s, qidx, &txd, &txd_idx)) {
break;
}
vmxnet3_dump_tx_descr(&txd);
if (!s->skip_current_tx_pkt) {
data_len = (txd.len > 0) ? txd.len : VMXNET3_MAX_TX_BUF_SIZE;
data_pa = txd.addr;
if (!net_tx_pkt_add_raw_fragment_pci(s->tx_pkt, PCI_DEVICE(s),
data_pa, data_len)) {
s->skip_current_tx_pkt = true;
}
}
if (s->tx_sop) {
vmxnet3_tx_retrieve_metadata(s, &txd);
s->tx_sop = false;
}
if (txd.eop) {
if (!s->skip_current_tx_pkt && net_tx_pkt_parse(s->tx_pkt)) {
if (s->needs_vlan) {
net_tx_pkt_setup_vlan_header(s->tx_pkt, s->tci);
}
vmxnet3_send_packet(s, qidx);
} else {
vmxnet3_on_tx_done_update_stats(s, qidx,
VMXNET3_PKT_STATUS_ERROR);
}
vmxnet3_complete_packet(s, qidx, txd_idx);
s->tx_sop = true;
s->skip_current_tx_pkt = false;
net_tx_pkt_reset(s->tx_pkt,
net_tx_pkt_unmap_frag_pci, PCI_DEVICE(s));
}
}
net_tx_pkt_reset(s->tx_pkt, net_tx_pkt_unmap_frag_pci, PCI_DEVICE(s));
}
static inline void
vmxnet3_read_next_rx_descr(VMXNET3State *s, int qidx, int ridx,
struct Vmxnet3_RxDesc *dbuf, uint32_t *didx)
{
PCIDevice *d = PCI_DEVICE(s);
Vmxnet3Ring *ring = &s->rxq_descr[qidx].rx_ring[ridx];
*didx = vmxnet3_ring_curr_cell_idx(ring);
vmxnet3_ring_read_curr_cell(d, ring, dbuf);
dbuf->addr = le64_to_cpu(dbuf->addr);
dbuf->val1 = le32_to_cpu(dbuf->val1);
dbuf->ext1 = le32_to_cpu(dbuf->ext1);
}
static inline uint8_t
vmxnet3_get_rx_ring_gen(VMXNET3State *s, int qidx, int ridx)
{
return s->rxq_descr[qidx].rx_ring[ridx].gen;
}
static inline hwaddr
vmxnet3_pop_rxc_descr(VMXNET3State *s, int qidx, uint32_t *descr_gen)
{
uint8_t ring_gen;
struct Vmxnet3_RxCompDesc rxcd;
hwaddr daddr =
vmxnet3_ring_curr_cell_pa(&s->rxq_descr[qidx].comp_ring);
pci_dma_read(PCI_DEVICE(s),
daddr, &rxcd, sizeof(struct Vmxnet3_RxCompDesc));
rxcd.val1 = le32_to_cpu(rxcd.val1);
rxcd.val2 = le32_to_cpu(rxcd.val2);
rxcd.val3 = le32_to_cpu(rxcd.val3);
ring_gen = vmxnet3_ring_curr_gen(&s->rxq_descr[qidx].comp_ring);
if (rxcd.gen != ring_gen) {
*descr_gen = ring_gen;
vmxnet3_inc_rx_completion_counter(s, qidx);
return daddr;
}
return 0;
}
static inline void
vmxnet3_revert_rxc_descr(VMXNET3State *s, int qidx)
{
vmxnet3_dec_rx_completion_counter(s, qidx);
}
#define RXQ_IDX (0)
#define RX_HEAD_BODY_RING (0)
#define RX_BODY_ONLY_RING (1)
static bool
vmxnet3_get_next_head_rx_descr(VMXNET3State *s,
struct Vmxnet3_RxDesc *descr_buf,
uint32_t *descr_idx,
uint32_t *ridx)
{
for (;;) {
uint32_t ring_gen;
vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING,
descr_buf, descr_idx);
/* If no more free descriptors - return */
ring_gen = vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_HEAD_BODY_RING);
if (descr_buf->gen != ring_gen) {
return false;
}
/* Only read after generation field verification */
smp_rmb();
/* Re-read to be sure we got the latest version */
vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING,
descr_buf, descr_idx);
/* Mark current descriptor as used/skipped */
vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_HEAD_BODY_RING);
/* If this is what we are looking for - return */
if (descr_buf->btype == VMXNET3_RXD_BTYPE_HEAD) {
*ridx = RX_HEAD_BODY_RING;
return true;
}
}
}
static bool
vmxnet3_get_next_body_rx_descr(VMXNET3State *s,
struct Vmxnet3_RxDesc *d,
uint32_t *didx,
uint32_t *ridx)
{
vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING, d, didx);
/* Try to find corresponding descriptor in head/body ring */
if (d->gen == vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_HEAD_BODY_RING)) {
/* Only read after generation field verification */
smp_rmb();
/* Re-read to be sure we got the latest version */
vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING, d, didx);
if (d->btype == VMXNET3_RXD_BTYPE_BODY) {
vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_HEAD_BODY_RING);
*ridx = RX_HEAD_BODY_RING;
return true;
}
}
/*
* If there is no free descriptors on head/body ring or next free
* descriptor is a head descriptor switch to body only ring
*/
vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_BODY_ONLY_RING, d, didx);
/* If no more free descriptors - return */
if (d->gen == vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_BODY_ONLY_RING)) {
/* Only read after generation field verification */
smp_rmb();
/* Re-read to be sure we got the latest version */
vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_BODY_ONLY_RING, d, didx);
assert(d->btype == VMXNET3_RXD_BTYPE_BODY);
*ridx = RX_BODY_ONLY_RING;
vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_BODY_ONLY_RING);
return true;
}
return false;
}
static inline bool
vmxnet3_get_next_rx_descr(VMXNET3State *s, bool is_head,
struct Vmxnet3_RxDesc *descr_buf,
uint32_t *descr_idx,
uint32_t *ridx)
{
if (is_head || !s->rx_packets_compound) {
return vmxnet3_get_next_head_rx_descr(s, descr_buf, descr_idx, ridx);
} else {
return vmxnet3_get_next_body_rx_descr(s, descr_buf, descr_idx, ridx);
}
}
/* In case packet was csum offloaded (either NEEDS_CSUM or DATA_VALID),
* the implementation always passes an RxCompDesc with a "Checksum
* calculated and found correct" to the OS (cnc=0 and tuc=1, see
* vmxnet3_rx_update_descr). This emulates the observed ESXi behavior.
*
* Therefore, if packet has the NEEDS_CSUM set, we must calculate
* and place a fully computed checksum into the tcp/udp header.
* Otherwise, the OS driver will receive a checksum-correct indication
* (CHECKSUM_UNNECESSARY), but with the actual tcp/udp checksum field
* having just the pseudo header csum value.
*
* While this is not a problem if packet is destined for local delivery,
* in the case the host OS performs forwarding, it will forward an
* incorrectly checksummed packet.
*/
static void vmxnet3_rx_need_csum_calculate(struct NetRxPkt *pkt,
const void *pkt_data,
size_t pkt_len)
{
struct virtio_net_hdr *vhdr;
bool hasip4, hasip6;
EthL4HdrProto l4hdr_proto;
uint8_t *data;
int len;
vhdr = net_rx_pkt_get_vhdr(pkt);
if (!VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
return;
}
net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
if (!(hasip4 || hasip6) ||
(l4hdr_proto != ETH_L4_HDR_PROTO_TCP &&
l4hdr_proto != ETH_L4_HDR_PROTO_UDP)) {
return;
}
vmxnet3_dump_virt_hdr(vhdr);
/* Validate packet len: csum_start + scum_offset + length of csum field */
if (pkt_len < (vhdr->csum_start + vhdr->csum_offset + 2)) {
VMW_PKPRN("packet len:%zu < csum_start(%d) + csum_offset(%d) + 2, "
"cannot calculate checksum",
pkt_len, vhdr->csum_start, vhdr->csum_offset);
return;
}
data = (uint8_t *)pkt_data + vhdr->csum_start;
len = pkt_len - vhdr->csum_start;
/* Put the checksum obtained into the packet */
stw_be_p(data + vhdr->csum_offset,
net_checksum_finish_nozero(net_checksum_add(len, data)));
vhdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
vhdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
}
static void vmxnet3_rx_update_descr(struct NetRxPkt *pkt,
struct Vmxnet3_RxCompDesc *rxcd)
{
int csum_ok, is_gso;
bool hasip4, hasip6;
EthL4HdrProto l4hdr_proto;
struct virtio_net_hdr *vhdr;
uint8_t offload_type;
if (net_rx_pkt_is_vlan_stripped(pkt)) {
rxcd->ts = 1;
rxcd->tci = net_rx_pkt_get_vlan_tag(pkt);
}
vhdr = net_rx_pkt_get_vhdr(pkt);
/*
* Checksum is valid when lower level tell so or when lower level
* requires checksum offload telling that packet produced/bridged
* locally and did travel over network after last checksum calculation
* or production
*/
csum_ok = VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_DATA_VALID) ||
VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM);
offload_type = vhdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
is_gso = (offload_type != VIRTIO_NET_HDR_GSO_NONE) ? 1 : 0;
if (!csum_ok && !is_gso) {
goto nocsum;
}
net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
if ((l4hdr_proto != ETH_L4_HDR_PROTO_TCP &&
l4hdr_proto != ETH_L4_HDR_PROTO_UDP) ||
(!hasip4 && !hasip6)) {
goto nocsum;
}
rxcd->cnc = 0;
rxcd->v4 = hasip4 ? 1 : 0;
rxcd->v6 = hasip6 ? 1 : 0;
rxcd->tcp = l4hdr_proto == ETH_L4_HDR_PROTO_TCP;
rxcd->udp = l4hdr_proto == ETH_L4_HDR_PROTO_UDP;
rxcd->fcs = rxcd->tuc = rxcd->ipc = 1;
return;
nocsum:
rxcd->cnc = 1;
return;
}
static void
vmxnet3_pci_dma_writev(PCIDevice *pci_dev,
const struct iovec *iov,
size_t start_iov_off,
hwaddr target_addr,
size_t bytes_to_copy)
{
size_t curr_off = 0;
size_t copied = 0;
while (bytes_to_copy) {
if (start_iov_off < (curr_off + iov->iov_len)) {
size_t chunk_len =
MIN((curr_off + iov->iov_len) - start_iov_off, bytes_to_copy);
pci_dma_write(pci_dev, target_addr + copied,
iov->iov_base + start_iov_off - curr_off,
chunk_len);
copied += chunk_len;
start_iov_off += chunk_len;
curr_off = start_iov_off;
bytes_to_copy -= chunk_len;
} else {
curr_off += iov->iov_len;
}
iov++;
}
}
static void
vmxnet3_pci_dma_write_rxcd(PCIDevice *pcidev, dma_addr_t pa,
struct Vmxnet3_RxCompDesc *rxcd)
{
rxcd->val1 = cpu_to_le32(rxcd->val1);
rxcd->val2 = cpu_to_le32(rxcd->val2);
rxcd->val3 = cpu_to_le32(rxcd->val3);
pci_dma_write(pcidev, pa, rxcd, sizeof(*rxcd));
}
static bool
vmxnet3_indicate_packet(VMXNET3State *s)
{
struct Vmxnet3_RxDesc rxd;
PCIDevice *d = PCI_DEVICE(s);
bool is_head = true;
uint32_t rxd_idx;
uint32_t rx_ridx = 0;
struct Vmxnet3_RxCompDesc rxcd;
uint32_t new_rxcd_gen = VMXNET3_INIT_GEN;
hwaddr new_rxcd_pa = 0;
hwaddr ready_rxcd_pa = 0;
struct iovec *data = net_rx_pkt_get_iovec(s->rx_pkt);
size_t bytes_copied = 0;
size_t bytes_left = net_rx_pkt_get_total_len(s->rx_pkt);
uint16_t num_frags = 0;
size_t chunk_size;
net_rx_pkt_dump(s->rx_pkt);
while (bytes_left > 0) {