1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
|
#include "types.h"
#include "param.h"
#include "memlayout.h"
#include "riscv.h"
#include "spinlock.h"
#include "proc.h"
#include "defs.h"
#include "e1000_dev.h"
#include "net.h"
#define TX_RING_SIZE 16
static struct tx_desc tx_ring[TX_RING_SIZE] __attribute__((aligned(16)));
static struct mbuf *tx_mbufs[TX_RING_SIZE];
#define RX_RING_SIZE 16
static struct rx_desc rx_ring[RX_RING_SIZE] __attribute__((aligned(16)));
static struct mbuf *rx_mbufs[RX_RING_SIZE];
// remember where the e1000's registers live.
static volatile uint32 *regs;
struct spinlock e1000_lock;
// called by pci_init().
// xregs is the memory address at which the
// e1000's registers are mapped.
void
e1000_init(uint32 *xregs)
{
int i;
initlock(&e1000_lock, "e1000");
regs = xregs;
// Reset the device
regs[E1000_IMS] = 0; // disable interrupts
regs[E1000_CTL] |= E1000_CTL_RST;
regs[E1000_IMS] = 0; // redisable interrupts
__sync_synchronize();
// [E1000 14.5] Transmit initialization
memset(tx_ring, 0, sizeof(tx_ring));
for (i = 0; i < TX_RING_SIZE; i++) {
tx_ring[i].status = E1000_TXD_STAT_DD;
tx_mbufs[i] = 0;
}
regs[E1000_TDBAL] = (uint64) tx_ring;
if(sizeof(tx_ring) % 128 != 0)
panic("e1000");
regs[E1000_TDLEN] = sizeof(tx_ring);
regs[E1000_TDH] = regs[E1000_TDT] = 0;
// [E1000 14.4] Receive initialization
memset(rx_ring, 0, sizeof(rx_ring));
for (i = 0; i < RX_RING_SIZE; i++) {
rx_mbufs[i] = mbufalloc(0);
if (!rx_mbufs[i])
panic("e1000");
rx_ring[i].addr = (uint64) rx_mbufs[i]->head;
}
regs[E1000_RDBAL] = (uint64) rx_ring;
if(sizeof(rx_ring) % 128 != 0)
panic("e1000");
regs[E1000_RDH] = 0;
regs[E1000_RDT] = RX_RING_SIZE - 1;
regs[E1000_RDLEN] = sizeof(rx_ring);
// filter by qemu's MAC address, 52:54:00:12:34:56
regs[E1000_RA] = 0x12005452;
regs[E1000_RA+1] = 0x5634 | (1<<31);
// multicast table
for (int i = 0; i < 4096/32; i++)
regs[E1000_MTA + i] = 0;
// transmitter control bits.
regs[E1000_TCTL] = E1000_TCTL_EN | // enable
E1000_TCTL_PSP | // pad short packets
(0x10 << E1000_TCTL_CT_SHIFT) | // collision stuff
(0x40 << E1000_TCTL_COLD_SHIFT);
regs[E1000_TIPG] = 10 | (8<<10) | (6<<20); // inter-pkt gap
// receiver control bits.
regs[E1000_RCTL] = E1000_RCTL_EN | // enable receiver
E1000_RCTL_BAM | // enable broadcast
E1000_RCTL_SZ_2048 | // 2048-byte rx buffers
E1000_RCTL_SECRC; // strip CRC
// ask e1000 for receive interrupts.
regs[E1000_RDTR] = 0; // interrupt after every received packet (no timer)
regs[E1000_RADV] = 0; // interrupt after every packet (no timer)
regs[E1000_IMS] = (1 << 7); // RXDW -- Receiver Descriptor Write Back
}
int
e1000_transmit(struct mbuf *m)
{
// the mbuf contains an ethernet frame; program it into
// the TX descriptor ring so that the e1000 sends it. Stash
// a pointer so that it can be freed after sending.
acquire(&e1000_lock);
int cur_idx = regs[E1000_TDT];
// check if the STAT_DD bit is set in current descriptor
// if not set, means a previous tx in this descripter is still in flight, return an error.
if(!(tx_ring[cur_idx].status | E1000_TXD_STAT_DD)){
release(&e1000_lock);
return -1;
}
// free previous mbuf and update current descriptor
if(tx_mbufs[cur_idx])
mbuffree(tx_mbufs[cur_idx]);
tx_ring[cur_idx].addr = (uint64)m->head;
tx_ring[cur_idx].length = (uint64)m->len;
tx_ring[cur_idx].cmd = E1000_TXD_CMD_RS | E1000_TXD_CMD_EOP;
// also clear status bits
tx_ring[cur_idx].status = 0;
// stash current mbuf to tx_mbufs (would be freed later)
tx_mbufs[cur_idx] = m;
// update the ring position to point to the next descriptor;
regs[E1000_TDT] = (cur_idx + 1) % TX_RING_SIZE;
release(&e1000_lock);
return 0;
}
static void
e1000_recv(void)
{
// Check for packets that have arrived from the e1000
// Create and deliver an mbuf for each packet (using net_rx()).
while(1){
acquire(&e1000_lock);
int cur_idx = (regs[E1000_RDT]+1) % RX_RING_SIZE;
// check if last rx is completed. If not, skip passing to net_rx()
if(!(rx_ring[cur_idx].status | E1000_RXD_STAT_DD))
break;
// update the mbuf's length to the len reported by rx_desc
// mbufput(rx_mbufs[cur_idx], rx_ring[cur_idx].length);
rx_mbufs[cur_idx]->len = rx_ring[cur_idx].length;
// stash mbuf, for later net_rx()
struct mbuf *rx_buf = rx_mbufs[cur_idx];
// net_rx() would free the passed mbuf invisibly, so we need to re-alloc it
rx_mbufs[cur_idx] = mbufalloc(0);
if(!rx_mbufs[cur_idx])
panic("e1000_recv: mbufalloc");
// update buffer addr and clear status bits
rx_ring[cur_idx].addr = (uint64)rx_mbufs[cur_idx]->head;
rx_ring[cur_idx].status = 0;
// update the E1000_RDT register to point to next position
regs[E1000_RDT] = cur_idx;
release(&e1000_lock);
// pass to the network stack, must not hold the lock coz it can lead to deadlocks under different cpus
net_rx(rx_buf);
}
}
void
e1000_intr(void)
{
// tell the e1000 we've seen this interrupt;
// without this the e1000 won't raise any
// further interrupts.
regs[E1000_ICR] = 0xffffffff;
e1000_recv();
}
|