summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSanjit Bhat <[email protected]>2023-10-26 06:44:48 -0400
committerSanjit Bhat <[email protected]>2023-10-26 06:44:48 -0400
commitcfae93475dfb4cb5cfe264f4c029136e1447c262 (patch)
tree699903e093e3a23caf7ce3899e7c80e48511f900
parent1ed40716eb54e371df9d1814b9129666b3fe4f09 (diff)
downloadxv6-labs-cfae93475dfb4cb5cfe264f4c029136e1447c262.tar.gz
xv6-labs-cfae93475dfb4cb5cfe264f4c029136e1447c262.tar.bz2
xv6-labs-cfae93475dfb4cb5cfe264f4c029136e1447c262.zip
net add missing files
-rw-r--r--conf/lab.mk1
-rwxr-xr-xgrade-lab-net43
-rw-r--r--gradelib.py628
-rw-r--r--kernel/e1000.c129
-rw-r--r--kernel/e1000_dev.h125
-rw-r--r--kernel/net.c374
-rw-r--r--kernel/net.h173
-rw-r--r--kernel/pci.c61
-rw-r--r--kernel/sysnet.c185
-rw-r--r--ping.py12
-rw-r--r--server.py13
-rw-r--r--user/nettests.c297
-rw-r--r--user/pingpong.c52
13 files changed, 2093 insertions, 0 deletions
diff --git a/conf/lab.mk b/conf/lab.mk
new file mode 100644
index 0000000..26dcd75
--- /dev/null
+++ b/conf/lab.mk
@@ -0,0 +1 @@
+LAB=net
diff --git a/grade-lab-net b/grade-lab-net
new file mode 100755
index 0000000..dd193e6
--- /dev/null
+++ b/grade-lab-net
@@ -0,0 +1,43 @@
+#!/usr/bin/env python3
+
+import re
+import subprocess
+from gradelib import *
+
+r = Runner(save("xv6.out"))
+
+@test(0, "running nettests")
+def test_nettest():
+ server = subprocess.Popen(["make", "server"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ r.run_qemu(shell_script([
+ 'nettests'
+ ]), timeout=30)
+ server.terminate()
+ server.communicate()
+
+@test(40, "nettest: ping", parent=test_nettest)
+def test_nettest_():
+ r.match('^testing ping: OK$')
+
+@test(20, "nettest: single process", parent=test_nettest)
+def test_nettest_():
+ r.match('^testing single-process pings: OK$')
+
+@test(20, "nettest: multi-process", parent=test_nettest)
+def test_nettest_fork_test():
+ r.match('^testing multi-process pings: OK$')
+
+@test(19, "nettest: DNS", parent=test_nettest)
+def test_nettest_dns_test():
+ r.match('^DNS OK$')
+
+#@test(10, "answers-net.txt")
+#def test_answers():
+# # just a simple sanity check, will be graded manually
+# check_answers("answers-net.txt")
+
+@test(1, "time")
+def test_time():
+ check_time()
+
+run_tests()
diff --git a/gradelib.py b/gradelib.py
new file mode 100644
index 0000000..f0d4934
--- /dev/null
+++ b/gradelib.py
@@ -0,0 +1,628 @@
+from __future__ import print_function
+
+import sys, os, re, time, socket, select, subprocess, errno, shutil, random, string, json
+from subprocess import check_call, Popen
+from optparse import OptionParser
+
+__all__ = []
+
+##################################################################
+# Test structure
+#
+
+__all__ += ["test", "end_part", "run_tests", "get_current_test"]
+
+TESTS = []
+TOTAL = POSSIBLE = 0
+PART_TOTAL = PART_POSSIBLE = 0
+CURRENT_TEST = None
+GRADES = {}
+
+def test(points, title=None, parent=None):
+ """Decorator for declaring test functions. If title is None, the
+ title of the test will be derived from the function name by
+ stripping the leading "test_" and replacing underscores with
+ spaces."""
+
+ def register_test(fn, title=title):
+ if not title:
+ assert fn.__name__.startswith("test_")
+ title = fn.__name__[5:].replace("_", " ")
+ if parent:
+ title = " " + title
+
+ def run_test():
+ global TOTAL, POSSIBLE, CURRENT_TEST, GRADES
+
+ # Handle test dependencies
+ if run_test.complete:
+ return run_test.ok
+ run_test.complete = True
+ parent_failed = False
+ if parent:
+ parent_failed = not parent()
+
+ # Run the test
+ fail = None
+ start = time.time()
+ CURRENT_TEST = run_test
+ sys.stdout.write("== Test %s == " % title)
+ if parent:
+ sys.stdout.write("\n")
+ sys.stdout.flush()
+ try:
+ if parent_failed:
+ raise AssertionError('Parent failed: %s' % parent.__name__)
+ fn()
+ except AssertionError as e:
+ fail = str(e)
+
+ # Display and handle test result
+ POSSIBLE += points
+ if points:
+ print("%s: %s" % (title, \
+ (color("red", "FAIL") if fail else color("green", "OK"))), end=' ')
+ if time.time() - start > 0.1:
+ print("(%.1fs)" % (time.time() - start), end=' ')
+ print()
+ if fail:
+ print(" %s" % fail.replace("\n", "\n "))
+ else:
+ TOTAL += points
+ if points:
+ GRADES[title] = 0 if fail else points
+
+ for callback in run_test.on_finish:
+ callback(fail)
+ CURRENT_TEST = None
+
+ run_test.ok = not fail
+ return run_test.ok
+
+ # Record test metadata on the test wrapper function
+ run_test.__name__ = fn.__name__
+ run_test.title = title
+ run_test.complete = False
+ run_test.ok = False
+ run_test.on_finish = []
+ TESTS.append(run_test)
+ return run_test
+ return register_test
+
+def end_part(name):
+ def show_part():
+ global PART_TOTAL, PART_POSSIBLE
+ print("Part %s score: %d/%d" % \
+ (name, TOTAL - PART_TOTAL, POSSIBLE - PART_POSSIBLE))
+ print()
+ PART_TOTAL, PART_POSSIBLE = TOTAL, POSSIBLE
+ show_part.title = ""
+ TESTS.append(show_part)
+
+def write_results():
+ global options
+ if not options.results:
+ return
+ try:
+ with open(options.results, "w") as f:
+ f.write(json.dumps(GRADES))
+ except OSError as e:
+ print("Provided a bad results path. Error:", e)
+
+def run_tests():
+ """Set up for testing and run the registered test functions."""
+
+ # Handle command line
+ global options
+ parser = OptionParser(usage="usage: %prog [-v] [filters...]")
+ parser.add_option("-v", "--verbose", action="store_true",
+ help="print commands")
+ parser.add_option("--color", choices=["never", "always", "auto"],
+ default="auto", help="never, always, or auto")
+ parser.add_option("--results", help="results file path")
+ (options, args) = parser.parse_args()
+
+ # Start with a full build to catch build errors
+ make()
+
+ # Clean the file system if there is one
+ reset_fs()
+
+ # Run tests
+ limit = list(map(str.lower, args))
+ try:
+ for test in TESTS:
+ if not limit or any(l in test.title.lower() for l in limit):
+ test()
+ if not limit:
+ write_results()
+ print("Score: %d/%d" % (TOTAL, POSSIBLE))
+ except KeyboardInterrupt:
+ pass
+ if TOTAL < POSSIBLE:
+ sys.exit(1)
+
+def get_current_test():
+ if not CURRENT_TEST:
+ raise RuntimeError("No test is running")
+ return CURRENT_TEST
+
+##################################################################
+# Assertions
+#
+
+__all__ += ["assert_equal", "assert_lines_match"]
+
+def assert_equal(got, expect, msg=""):
+ if got == expect:
+ return
+ if msg:
+ msg += "\n"
+ raise AssertionError("%sgot:\n %s\nexpected:\n %s" %
+ (msg, str(got).replace("\n", "\n "),
+ str(expect).replace("\n", "\n ")))
+
+def assert_lines_match(text, *regexps, **kw):
+ """Assert that all of regexps match some line in text. If a 'no'
+ keyword argument is given, it must be a list of regexps that must
+ *not* match any line in text."""
+
+ def assert_lines_match_kw(no=[]):
+ return no
+ no = assert_lines_match_kw(**kw)
+
+ # Check text against regexps
+ lines = text.splitlines()
+ good = set()
+ bad = set()
+ for i, line in enumerate(lines):
+ if any(re.match(r, line) for r in regexps):
+ good.add(i)
+ regexps = [r for r in regexps if not re.match(r, line)]
+ if any(re.match(r, line) for r in no):
+ bad.add(i)
+
+ if not regexps and not bad:
+ return
+
+ # We failed; construct an informative failure message
+ show = set()
+ for lineno in good.union(bad):
+ for offset in range(-2, 3):
+ show.add(lineno + offset)
+ if regexps:
+ show.update(n for n in range(len(lines) - 5, len(lines)))
+
+ msg = []
+ last = -1
+ for lineno in sorted(show):
+ if 0 <= lineno < len(lines):
+ if lineno != last + 1:
+ msg.append("...")
+ last = lineno
+ msg.append("%s %s" % (color("red", "BAD ") if lineno in bad else
+ color("green", "GOOD") if lineno in good
+ else " ",
+ lines[lineno]))
+ if last != len(lines) - 1:
+ msg.append("...")
+ if bad:
+ msg.append("unexpected lines in output")
+ for r in regexps:
+ msg.append(color("red", "MISSING") + " '%s'" % r)
+ raise AssertionError("\n".join(msg))
+
+##################################################################
+# Utilities
+#
+
+__all__ += ["make", "maybe_unlink", "reset_fs", "color", "random_str", "check_time", "check_answers"]
+
+MAKE_TIMESTAMP = 0
+
+def pre_make():
+ """Delay prior to running make to ensure file mtimes change."""
+ while int(time.time()) == MAKE_TIMESTAMP:
+ time.sleep(0.1)
+
+def post_make():
+ """Record the time after make completes so that the next run of
+ make can be delayed if needed."""
+ global MAKE_TIMESTAMP
+ MAKE_TIMESTAMP = int(time.time())
+
+def make(*target):
+ pre_make()
+ if Popen(("make",) + target).wait():
+ sys.exit(1)
+ post_make()
+
+def show_command(cmd):
+ from pipes import quote
+ print("\n$", " ".join(map(quote, cmd)))
+
+def maybe_unlink(*paths):
+ for path in paths:
+ try:
+ os.unlink(path)
+ except EnvironmentError as e:
+ if e.errno != errno.ENOENT:
+ raise
+
+COLORS = {"default": "\033[0m", "red": "\033[31m", "green": "\033[32m"}
+
+def color(name, text):
+ if options.color == "always" or (options.color == "auto" and os.isatty(1)):
+ return COLORS[name] + text + COLORS["default"]
+ return text
+
+def reset_fs():
+ if os.path.exists("obj/fs/clean-fs.img"):
+ shutil.copyfile("obj/fs/clean-fs.img", "obj/fs/fs.img")
+
+def random_str(n=8):
+ letters = string.ascii_letters + string.digits
+ return ''.join(random.choice(letters) for _ in range(n))
+
+def check_time():
+ try:
+ print("")
+ with open('time.txt') as f:
+ d = f.read().strip()
+ if not re.match(r'^\d+$', d):
+ raise AssertionError('time.txt does not contain a single integer (number of hours spent on the lab)')
+ except IOError:
+ raise AssertionError('Cannot read time.txt')
+
+def check_answers(file, n=10):
+ try:
+ print("")
+ with open(file) as f:
+ d = f.read().strip()
+ if len(d) < n:
+ raise AssertionError('%s does not seem to contain enough text' % file)
+ except IOError:
+ raise AssertionError('Cannot read %s' % file)
+
+
+##################################################################
+# Controllers
+#
+
+__all__ += ["QEMU", "GDBClient"]
+
+class QEMU(object):
+ _GDBPORT = None
+
+ def __init__(self, *make_args):
+ # Check that QEMU is not currently running
+ try:
+ GDBClient(self.get_gdb_port(), timeout=0).close()
+ except socket.error:
+ pass
+ else:
+ print("""\
+GDB stub found on port %d.
+QEMU appears to already be running. Please exit it if possible or use
+'killall qemu' or 'killall qemu.real'.""" % self.get_gdb_port(), file=sys.stderr)
+ sys.exit(1)
+
+ if options.verbose:
+ show_command(("make",) + make_args)
+ cmd = ("make", "-s", "--no-print-directory") + make_args
+ self.proc = Popen(cmd, stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT,
+ stdin=subprocess.PIPE)
+ # Accumulated output as a string
+ self.output = ""
+ # Accumulated output as a bytearray
+ self.outbytes = bytearray()
+ self.on_output = []
+
+ @staticmethod
+ def get_gdb_port():
+ if QEMU._GDBPORT is None:
+ p = Popen(["make", "-s", "--no-print-directory", "print-gdbport"],
+ stdout=subprocess.PIPE)
+ (out, _) = p.communicate()
+ if p.returncode:
+ raise RuntimeError(
+ "Failed to get gdbport: make exited with %d" %
+ p.returncode)
+ QEMU._GDBPORT = int(out)
+ return QEMU._GDBPORT
+
+ def fileno(self):
+ if self.proc:
+ return self.proc.stdout.fileno()
+
+ def handle_read(self):
+ buf = os.read(self.proc.stdout.fileno(), 4096)
+ self.outbytes.extend(buf)
+ self.output = self.outbytes.decode("utf-8", "replace")
+ for callback in self.on_output:
+ callback(buf)
+ if buf == b"":
+ self.wait()
+ return
+
+ def write(self, buf):
+ if isinstance(buf, str):
+ buf = buf.encode('utf-8')
+ self.proc.stdin.write(buf)
+ self.proc.stdin.flush()
+
+ def wait(self):
+ if self.proc:
+ self.proc.wait()
+ self.proc = None
+
+ def kill(self):
+ if self.proc:
+ self.proc.terminate()
+
+class GDBClient(object):
+ def __init__(self, port, timeout=15):
+ start = time.time()
+ while True:
+ self.sock = socket.socket()
+ try:
+ self.sock.settimeout(1)
+ self.sock.connect(("localhost", port))
+ break
+ except socket.error:
+ if time.time() >= start + timeout:
+ raise
+ self.__buf = ""
+
+ def fileno(self):
+ if self.sock:
+ return self.sock.fileno()
+
+ def handle_read(self):
+ try:
+ data = self.sock.recv(4096).decode("ascii", "replace")
+ except socket.error:
+ data = ""
+ if data == "":
+ self.sock.close()
+ self.sock = None
+ return
+ self.__buf += data
+
+ while True:
+ m = re.search(r"\$([^#]*)#[0-9a-zA-Z]{2}", self.__buf)
+ if not m:
+ break
+ pkt = m.group(1)
+ self.__buf = self.__buf[m.end():]
+
+ if pkt.startswith("T05"):
+ # Breakpoint
+ raise TerminateTest
+
+ def __send(self, cmd):
+ packet = "$%s#%02x" % (cmd, sum(map(ord, cmd)) % 256)
+ self.sock.sendall(packet.encode("ascii"))
+
+ def __send_break(self):
+ self.sock.sendall(b"\x03")
+
+ def close(self):
+ if self.sock:
+ self.sock.close()
+ self.sock = None
+
+ def cont(self):
+ self.__send("c")
+
+ def breakpoint(self, addr):
+ self.__send("Z1,%x,1" % addr)
+
+
+##################################################################
+# QEMU test runner
+#
+
+__all__ += ["TerminateTest", "Runner"]
+
+class TerminateTest(Exception):
+ pass
+
+class Runner():
+ def __init__(self, *default_monitors):
+ self.__default_monitors = default_monitors
+
+ def run_qemu(self, *monitors, **kw):
+ """Run a QEMU-based test. monitors should functions that will
+ be called with this Runner instance once QEMU and GDB are
+ started. Typically, they should register callbacks that throw
+ TerminateTest when stop events occur. The target_base
+ argument gives the make target to run. The make_args argument
+ should be a list of additional arguments to pass to make. The
+ timeout argument bounds how long to run before returning."""
+
+ def run_qemu_kw(target_base="qemu", make_args=[], timeout=30):
+ return target_base, make_args, timeout
+ target_base, make_args, timeout = run_qemu_kw(**kw)
+
+ # Start QEMU
+ pre_make()
+ self.qemu = QEMU(target_base + "-gdb", *make_args)
+ self.gdb = None
+
+ try:
+ # Wait for QEMU to start or make to fail. This will set
+ # self.gdb if QEMU starts.
+ self.qemu.on_output = [self.__monitor_start]
+ self.__react([self.qemu], timeout=90)
+ self.qemu.on_output = []
+ if self.gdb is None:
+ print("Failed to connect to QEMU; output:")
+ print(self.qemu.output)
+ sys.exit(1)
+ post_make()
+
+ # QEMU and GDB are up
+ self.reactors = [self.qemu, self.gdb]
+
+ # Start monitoring
+ for m in self.__default_monitors + monitors:
+ m(self)
+
+ # Run and react
+ self.gdb.cont()
+ self.__react(self.reactors, timeout)
+ finally:
+ # Shutdown QEMU
+ try:
+ if self.gdb is None:
+ sys.exit(1)
+ self.qemu.kill()
+ self.__react(self.reactors, 5)
+ self.gdb.close()
+ self.qemu.wait()
+ except:
+ print("""\
+Failed to shutdown QEMU. You might need to 'killall qemu' or
+'killall qemu.real'.
+""")
+ raise
+
+ def __monitor_start(self, output):
+ if b"\n" in output:
+ try:
+ self.gdb = GDBClient(self.qemu.get_gdb_port(), timeout=2)
+ raise TerminateTest
+ except socket.error:
+ pass
+ if not len(output):
+ raise TerminateTest
+
+ def __react(self, reactors, timeout):
+ deadline = time.time() + timeout
+ try:
+ while True:
+ timeleft = deadline - time.time()
+ if timeleft < 0:
+ sys.stdout.write("Timeout! ")
+ sys.stdout.flush()
+ return
+
+ rset = [r for r in reactors if r.fileno() is not None]
+ if not rset:
+ return
+
+ rset, _, _ = select.select(rset, [], [], timeleft)
+ for reactor in rset:
+ reactor.handle_read()
+ except TerminateTest:
+ pass
+
+ def user_test(self, binary, *monitors, **kw):
+ """Run a user test using the specified binary. Monitors and
+ keyword arguments are as for run_qemu. This runs on a disk
+ snapshot unless the keyword argument 'snapshot' is False."""
+
+ maybe_unlink("obj/kern/init.o", "obj/kern/kernel")
+ if kw.pop("snapshot", True):
+ kw.setdefault("make_args", []).append("QEMUEXTRA+=-snapshot")
+ self.run_qemu(target_base="run-%s" % binary, *monitors, **kw)
+
+ def match(self, *args, **kwargs):
+ """Shortcut to call assert_lines_match on the most recent QEMU
+ output."""
+
+ assert_lines_match(self.qemu.output, *args, **kwargs)
+
+##################################################################
+# Monitors
+#
+
+__all__ += ["save", "stop_breakpoint", "call_on_line", "stop_on_line", "shell_script"]
+
+def save(path):
+ """Return a monitor that writes QEMU's output to path. If the
+ test fails, copy the output to path.test-name."""
+
+ def setup_save(runner):
+ f.seek(0)
+ f.truncate()
+ runner.qemu.on_output.append(f.write)
+ get_current_test().on_finish.append(save_on_finish)
+
+ def save_on_finish(fail):
+ f.flush()
+ save_path = path + "." + get_current_test().__name__[5:]
+ if fail:
+ shutil.copyfile(path, save_path)
+ print(" QEMU output saved to %s" % save_path)
+ elif os.path.exists(save_path):
+ os.unlink(save_path)
+ print(" (Old %s failure log removed)" % save_path)
+
+ f = open(path, "wb")
+ return setup_save
+
+def stop_breakpoint(addr):
+ """Returns a monitor that stops when addr is reached. addr may be
+ a number or the name of a symbol."""
+
+ def setup_breakpoint(runner):
+ if isinstance(addr, str):
+ addrs = [int(sym[:16], 16) for sym in open("kernel/kernel.sym")
+ if sym[17:].strip() == addr]
+ assert len(addrs), "Symbol %s not found" % addr
+ runner.gdb.breakpoint(addrs[0])
+ else:
+ runner.gdb.breakpoint(addr)
+ return setup_breakpoint
+
+def call_on_line(regexp, callback):
+ """Returns a monitor that calls 'callback' when QEMU prints a line
+ matching 'regexp'."""
+
+ def setup_call_on_line(runner):
+ buf = bytearray()
+ def handle_output(output):
+ buf.extend(output)
+ while b"\n" in buf:
+ line, buf[:] = buf.split(b"\n", 1)
+ line = line.decode("utf-8", "replace")
+ if re.match(regexp, line):
+ callback(line)
+ runner.qemu.on_output.append(handle_output)
+ return setup_call_on_line
+
+def stop_on_line(regexp):
+ """Returns a monitor that stops when QEMU prints a line matching
+ 'regexp'."""
+
+ def stop(line):
+ raise TerminateTest
+ return call_on_line(regexp, stop)
+
+def shell_script(script, terminate_match=None):
+ """Returns a monitor that plays the script, and stops when the script is
+ done executing."""
+
+ def setup_call_on_line(runner):
+ class context:
+ n = 0
+ buf = bytearray()
+ def handle_output(output):
+ context.buf.extend(output)
+ if terminate_match is not None:
+ if re.match(terminate_match, context.buf.decode('utf-8', 'replace')):
+ raise TerminateTest
+ if b'$ ' in context.buf:
+ context.buf = bytearray()
+ if context.n < len(script):
+ runner.qemu.write(script[context.n])
+ runner.qemu.write('\n')
+ context.n += 1
+ else:
+ if terminate_match is None:
+ raise TerminateTest
+ runner.qemu.on_output.append(handle_output)
+ return setup_call_on_line
diff --git a/kernel/e1000.c b/kernel/e1000.c
new file mode 100644
index 0000000..70a2adf
--- /dev/null
+++ b/kernel/e1000.c
@@ -0,0 +1,129 @@
+#include "types.h"
+#include "param.h"
+#include "memlayout.h"
+#include "riscv.h"
+#include "spinlock.h"
+#include "proc.h"
+#include "defs.h"
+#include "e1000_dev.h"
+#include "net.h"
+
+#define TX_RING_SIZE 16
+static struct tx_desc tx_ring[TX_RING_SIZE] __attribute__((aligned(16)));
+static struct mbuf *tx_mbufs[TX_RING_SIZE];
+
+#define RX_RING_SIZE 16
+static struct rx_desc rx_ring[RX_RING_SIZE] __attribute__((aligned(16)));
+static struct mbuf *rx_mbufs[RX_RING_SIZE];
+
+// remember where the e1000's registers live.
+static volatile uint32 *regs;
+
+struct spinlock e1000_lock;
+
+// called by pci_init().
+// xregs is the memory address at which the
+// e1000's registers are mapped.
+void
+e1000_init(uint32 *xregs)
+{
+ int i;
+
+ initlock(&e1000_lock, "e1000");
+
+ regs = xregs;
+
+ // Reset the device
+ regs[E1000_IMS] = 0; // disable interrupts
+ regs[E1000_CTL] |= E1000_CTL_RST;
+ regs[E1000_IMS] = 0; // redisable interrupts
+ __sync_synchronize();
+
+ // [E1000 14.5] Transmit initialization
+ memset(tx_ring, 0, sizeof(tx_ring));
+ for (i = 0; i < TX_RING_SIZE; i++) {
+ tx_ring[i].status = E1000_TXD_STAT_DD;
+ tx_mbufs[i] = 0;
+ }
+ regs[E1000_TDBAL] = (uint64) tx_ring;
+ if(sizeof(tx_ring) % 128 != 0)
+ panic("e1000");
+ regs[E1000_TDLEN] = sizeof(tx_ring);
+ regs[E1000_TDH] = regs[E1000_TDT] = 0;
+
+ // [E1000 14.4] Receive initialization
+ memset(rx_ring, 0, sizeof(rx_ring));
+ for (i = 0; i < RX_RING_SIZE; i++) {
+ rx_mbufs[i] = mbufalloc(0);
+ if (!rx_mbufs[i])
+ panic("e1000");
+ rx_ring[i].addr = (uint64) rx_mbufs[i]->head;
+ }
+ regs[E1000_RDBAL] = (uint64) rx_ring;
+ if(sizeof(rx_ring) % 128 != 0)
+ panic("e1000");
+ regs[E1000_RDH] = 0;
+ regs[E1000_RDT] = RX_RING_SIZE - 1;
+ regs[E1000_RDLEN] = sizeof(rx_ring);
+
+ // filter by qemu's MAC address, 52:54:00:12:34:56
+ regs[E1000_RA] = 0x12005452;
+ regs[E1000_RA+1] = 0x5634 | (1<<31);
+ // multicast table
+ for (int i = 0; i < 4096/32; i++)
+ regs[E1000_MTA + i] = 0;
+
+ // transmitter control bits.
+ regs[E1000_TCTL] = E1000_TCTL_EN | // enable
+ E1000_TCTL_PSP | // pad short packets
+ (0x10 << E1000_TCTL_CT_SHIFT) | // collision stuff
+ (0x40 << E1000_TCTL_COLD_SHIFT);
+ regs[E1000_TIPG] = 10 | (8<<10) | (6<<20); // inter-pkt gap
+
+ // receiver control bits.
+ regs[E1000_RCTL] = E1000_RCTL_EN | // enable receiver
+ E1000_RCTL_BAM | // enable broadcast
+ E1000_RCTL_SZ_2048 | // 2048-byte rx buffers
+ E1000_RCTL_SECRC; // strip CRC
+
+ // ask e1000 for receive interrupts.
+ regs[E1000_RDTR] = 0; // interrupt after every received packet (no timer)
+ regs[E1000_RADV] = 0; // interrupt after every packet (no timer)
+ regs[E1000_IMS] = (1 << 7); // RXDW -- Receiver Descriptor Write Back
+}
+
+int
+e1000_transmit(struct mbuf *m)
+{
+ //
+ // Your code here.
+ //
+ // the mbuf contains an ethernet frame; program it into
+ // the TX descriptor ring so that the e1000 sends it. Stash
+ // a pointer so that it can be freed after sending.
+ //
+
+ return 0;
+}
+
+static void
+e1000_recv(void)
+{
+ //
+ // Your code here.
+ //
+ // Check for packets that have arrived from the e1000
+ // Create and deliver an mbuf for each packet (using net_rx()).
+ //
+}
+
+void
+e1000_intr(void)
+{
+ // tell the e1000 we've seen this interrupt;
+ // without this the e1000 won't raise any
+ // further interrupts.
+ regs[E1000_ICR] = 0xffffffff;
+
+ e1000_recv();
+}
diff --git a/kernel/e1000_dev.h b/kernel/e1000_dev.h
new file mode 100644
index 0000000..9b462df
--- /dev/null
+++ b/kernel/e1000_dev.h
@@ -0,0 +1,125 @@
+//
+// E1000 hardware definitions: registers and DMA ring format.
+// from the Intel 82540EP/EM &c manual.
+//
+
+/* Registers */
+#define E1000_CTL (0x00000/4) /* Device Control Register - RW */
+#define E1000_ICR (0x000C0/4) /* Interrupt Cause Read - R */
+#define E1000_IMS (0x000D0/4) /* Interrupt Mask Set - RW */
+#define E1000_RCTL (0x00100/4) /* RX Control - RW */
+#define E1000_TCTL (0x00400/4) /* TX Control - RW */
+#define E1000_TIPG (0x00410/4) /* TX Inter-packet gap -RW */
+#define E1000_RDBAL (0x02800/4) /* RX Descriptor Base Address Low - RW */
+#define E1000_RDTR (0x02820/4) /* RX Delay Timer */
+#define E1000_RADV (0x0282C/4) /* RX Interrupt Absolute Delay Timer */
+#define E1000_RDH (0x02810/4) /* RX Descriptor Head - RW */
+#define E1000_RDT (0x02818/4) /* RX Descriptor Tail - RW */
+#define E1000_RDLEN (0x02808/4) /* RX Descriptor Length - RW */
+#define E1000_RSRPD (0x02C00/4) /* RX Small Packet Detect Interrupt */
+#define E1000_TDBAL (0x03800/4) /* TX Descriptor Base Address Low - RW */
+#define E1000_TDLEN (0x03808/4) /* TX Descriptor Length - RW */
+#define E1000_TDH (0x03810/4) /* TX Descriptor Head - RW */
+#define E1000_TDT (0x03818/4) /* TX Descripotr Tail - RW */
+#define E1000_MTA (0x05200/4) /* Multicast Table Array - RW Array */
+#define E1000_RA (0x05400/4) /* Receive Address - RW Array */
+
+/* Device Control */
+#define E1000_CTL_SLU 0x00000040 /* set link up */
+#define E1000_CTL_FRCSPD 0x00000800 /* force speed */
+#define E1000_CTL_FRCDPLX 0x00001000 /* force duplex */
+#define E1000_CTL_RST 0x00400000 /* full reset */
+
+/* Transmit Control */
+#define E1000_TCTL_RST 0x00000001 /* software reset */
+#define E1000_TCTL_EN 0x00000002 /* enable tx */
+#define E1000_TCTL_BCE 0x00000004 /* busy check enable */
+#define E1000_TCTL_PSP 0x00000008 /* pad short packets */
+#define E1000_TCTL_CT 0x00000ff0 /* collision threshold */
+#define E1000_TCTL_CT_SHIFT 4
+#define E1000_TCTL_COLD 0x003ff000 /* collision distance */
+#define E1000_TCTL_COLD_SHIFT 12
+#define E1000_TCTL_SWXOFF 0x00400000 /* SW Xoff transmission */
+#define E1000_TCTL_PBE 0x00800000 /* Packet Burst Enable */
+#define E1000_TCTL_RTLC 0x01000000 /* Re-transmit on late collision */
+#define E1000_TCTL_NRTU 0x02000000 /* No Re-transmit on underrun */
+#define E1000_TCTL_MULR 0x10000000 /* Multiple request support */
+
+/* Receive Control */
+#define E1000_RCTL_RST 0x00000001 /* Software reset */
+#define E1000_RCTL_EN 0x00000002 /* enable */
+#define E1000_RCTL_SBP 0x00000004 /* store bad packet */
+#define E1000_RCTL_UPE 0x00000008 /* unicast promiscuous enable */
+#define E1000_RCTL_MPE 0x00000010 /* multicast promiscuous enab */
+#define E1000_RCTL_LPE 0x00000020 /* long packet enable */
+#define E1000_RCTL_LBM_NO 0x00000000 /* no loopback mode */
+#define E1000_RCTL_LBM_MAC 0x00000040 /* MAC loopback mode */
+#define E1000_RCTL_LBM_SLP 0x00000080 /* serial link loopback mode */
+#define E1000_RCTL_LBM_TCVR 0x000000C0 /* tcvr loopback mode */
+#define E1000_RCTL_DTYP_MASK 0x00000C00 /* Descriptor type mask */
+#define E1000_RCTL_DTYP_PS 0x00000400 /* Packet Split descriptor */
+#define E1000_RCTL_RDMTS_HALF 0x00000000 /* rx desc min threshold size */
+#define E1000_RCTL_RDMTS_QUAT 0x00000100 /* rx desc min threshold size */
+#define E1000_RCTL_RDMTS_EIGTH 0x00000200 /* rx desc min threshold size */
+#define E1000_RCTL_MO_SHIFT 12 /* multicast offset shift */
+#define E1000_RCTL_MO_0 0x00000000 /* multicast offset 11:0 */
+#define E1000_RCTL_MO_1 0x00001000 /* multicast offset 12:1 */
+#define E1000_RCTL_MO_2 0x00002000 /* multicast offset 13:2 */
+#define E1000_RCTL_MO_3 0x00003000 /* multicast offset 15:4 */
+#define E1000_RCTL_MDR 0x00004000 /* multicast desc ring 0 */
+#define E1000_RCTL_BAM 0x00008000 /* broadcast enable */
+/* these buffer sizes are valid if E1000_RCTL_BSEX is 0 */
+#define E1000_RCTL_SZ_2048 0x00000000 /* rx buffer size 2048 */
+#define E1000_RCTL_SZ_1024 0x00010000 /* rx buffer size 1024 */
+#define E1000_RCTL_SZ_512 0x00020000 /* rx buffer size 512 */
+#define E1000_RCTL_SZ_256 0x00030000 /* rx buffer size 256 */
+/* these buffer sizes are valid if E1000_RCTL_BSEX is 1 */
+#define E1000_RCTL_SZ_16384 0x00010000 /* rx buffer size 16384 */
+#define E1000_RCTL_SZ_8192 0x00020000 /* rx buffer size 8192 */
+#define E1000_RCTL_SZ_4096 0x00030000 /* rx buffer size 4096 */
+#define E1000_RCTL_VFE 0x00040000 /* vlan filter enable */
+#define E1000_RCTL_CFIEN 0x00080000 /* canonical form enable */
+#define E1000_RCTL_CFI 0x00100000 /* canonical form indicator */
+#define E1000_RCTL_DPF 0x00400000 /* discard pause frames */
+#define E1000_RCTL_PMCF 0x00800000 /* pass MAC control frames */
+#define E1000_RCTL_BSEX 0x02000000 /* Buffer size extension */
+#define E1000_RCTL_SECRC 0x04000000 /* Strip Ethernet CRC */
+#define E1000_RCTL_FLXBUF_MASK 0x78000000 /* Flexible buffer size */
+#define E1000_RCTL_FLXBUF_SHIFT 27 /* Flexible buffer shift */
+
+#define DATA_MAX 1518
+
+/* Transmit Descriptor command definitions [E1000 3.3.3.1] */
+#define E1000_TXD_CMD_EOP 0x01 /* End of Packet */
+#define E1000_TXD_CMD_RS 0x08 /* Report Status */
+
+/* Transmit Descriptor status definitions [E1000 3.3.3.2] */
+#define E1000_TXD_STAT_DD 0x00000001 /* Descriptor Done */
+
+// [E1000 3.3.3]
+struct tx_desc
+{
+ uint64 addr;
+ uint16 length;
+ uint8 cso;
+ uint8 cmd;
+ uint8 status;
+ uint8 css;
+ uint16 special;
+};
+
+/* Receive Descriptor bit definitions [E1000 3.2.3.1] */
+#define E1000_RXD_STAT_DD 0x01 /* Descriptor Done */
+#define E1000_RXD_STAT_EOP 0x02 /* End of Packet */
+
+// [E1000 3.2.3]
+struct rx_desc
+{
+ uint64 addr; /* Address of the descriptor's data buffer */
+ uint16 length; /* Length of data DMAed into data buffer */
+ uint16 csum; /* Packet checksum */
+ uint8 status; /* Descriptor status */
+ uint8 errors; /* Descriptor Errors */
+ uint16 special;
+};
+
diff --git a/kernel/net.c b/kernel/net.c
new file mode 100644
index 0000000..137ea2b
--- /dev/null
+++ b/kernel/net.c
@@ -0,0 +1,374 @@
+//
+// networking protocol support (IP, UDP, ARP, etc.).
+//
+
+#include "types.h"
+#include "param.h"
+#include "memlayout.h"
+#include "riscv.h"
+#include "spinlock.h"
+#include "proc.h"
+#include "net.h"
+#include "defs.h"
+
+static uint32 local_ip = MAKE_IP_ADDR(10, 0, 2, 15); // qemu's idea of the guest IP
+static uint8 local_mac[ETHADDR_LEN] = { 0x52, 0x54, 0x00, 0x12, 0x34, 0x56 };
+static uint8 broadcast_mac[ETHADDR_LEN] = { 0xFF, 0XFF, 0XFF, 0XFF, 0XFF, 0XFF };
+
+// Strips data from the start of the buffer and returns a pointer to it.
+// Returns 0 if less than the full requested length is available.
+char *
+mbufpull(struct mbuf *m, unsigned int len)
+{
+ char *tmp = m->head;
+ if (m->len < len)
+ return 0;
+ m->len -= len;
+ m->head += len;
+ return tmp;
+}
+
+// Prepends data to the beginning of the buffer and returns a pointer to it.
+char *
+mbufpush(struct mbuf *m, unsigned int len)
+{
+ m->head -= len;
+ if (m->head < m->buf)
+ panic("mbufpush");
+ m->len += len;
+ return m->head;
+}
+
+// Appends data to the end of the buffer and returns a pointer to it.
+char *
+mbufput(struct mbuf *m, unsigned int len)
+{
+ char *tmp = m->head + m->len;
+ m->len += len;
+ if (m->len > MBUF_SIZE)
+ panic("mbufput");
+ return tmp;
+}
+
+// Strips data from the end of the buffer and returns a pointer to it.
+// Returns 0 if less than the full requested length is available.
+char *
+mbuftrim(struct mbuf *m, unsigned int len)
+{
+ if (len > m->len)
+ return 0;
+ m->len -= len;
+ return m->head + m->len;
+}
+
+// Allocates a packet buffer.
+struct mbuf *
+mbufalloc(unsigned int headroom)
+{
+ struct mbuf *m;
+
+ if (headroom > MBUF_SIZE)
+ return 0;
+ m = kalloc();
+ if (m == 0)
+ return 0;
+ m->next = 0;
+ m->head = (char *)m->buf + headroom;
+ m->len = 0;
+ memset(m->buf, 0, sizeof(m->buf));
+ return m;
+}
+
+// Frees a packet buffer.
+void
+mbuffree(struct mbuf *m)
+{
+ kfree(m);
+}
+
+// Pushes an mbuf to the end of the queue.
+void
+mbufq_pushtail(struct mbufq *q, struct mbuf *m)
+{
+ m->next = 0;
+ if (!q->head){
+ q->head = q->tail = m;
+ return;
+ }
+ q->tail->next = m;
+ q->tail = m;
+}
+
+// Pops an mbuf from the start of the queue.
+struct mbuf *
+mbufq_pophead(struct mbufq *q)
+{
+ struct mbuf *head = q->head;
+ if (!head)
+ return 0;
+ q->head = head->next;
+ return head;
+}
+
+// Returns one (nonzero) if the queue is empty.
+int
+mbufq_empty(struct mbufq *q)
+{
+ return q->head == 0;
+}
+
+// Intializes a queue of mbufs.
+void
+mbufq_init(struct mbufq *q)
+{
+ q->head = 0;
+}
+
+// This code is lifted from FreeBSD's ping.c, and is copyright by the Regents
+// of the University of California.
+static unsigned short
+in_cksum(const unsigned char *addr, int len)
+{
+ int nleft = len;
+ const unsigned short *w = (const unsigned short *)addr;
+ unsigned int sum = 0;
+ unsigned short answer = 0;
+
+ /*
+ * Our algorithm is simple, using a 32 bit accumulator (sum), we add
+ * sequential 16 bit words to it, and at the end, fold back all the
+ * carry bits from the top 16 bits into the lower 16 bits.
+ */
+ while (nleft > 1) {
+ sum += *w++;
+ nleft -= 2;
+ }
+
+ /* mop up an odd byte, if necessary */
+ if (nleft == 1) {
+ *(unsigned char *)(&answer) = *(const unsigned char *)w;
+ sum += answer;
+ }
+
+ /* add back carry outs from top 16 bits to low 16 bits */
+ sum = (sum & 0xffff) + (sum >> 16);
+ sum += (sum >> 16);
+ /* guaranteed now that the lower 16 bits of sum are correct */
+
+ answer = ~sum; /* truncate to 16 bits */
+ return answer;
+}
+
+// sends an ethernet packet
+static void
+net_tx_eth(struct mbuf *m, uint16 ethtype)
+{
+ struct eth *ethhdr;
+
+ ethhdr = mbufpushhdr(m, *ethhdr);
+ memmove(ethhdr->shost, local_mac, ETHADDR_LEN);
+ // In a real networking stack, dhost would be set to the address discovered
+ // through ARP. Because we don't support enough of the ARP protocol, set it
+ // to broadcast instead.
+ memmove(ethhdr->dhost, broadcast_mac, ETHADDR_LEN);
+ ethhdr->type = htons(ethtype);
+ if (e1000_transmit(m)) {
+ mbuffree(m);
+ }
+}
+
+// sends an IP packet
+static void
+net_tx_ip(struct mbuf *m, uint8 proto, uint32 dip)
+{
+ struct ip *iphdr;
+
+ // push the IP header
+ iphdr = mbufpushhdr(m, *iphdr);
+ memset(iphdr, 0, sizeof(*iphdr));
+ iphdr->ip_vhl = (4 << 4) | (20 >> 2);
+ iphdr->ip_p = proto;
+ iphdr->ip_src = htonl(local_ip);
+ iphdr->ip_dst = htonl(dip);
+ iphdr->ip_len = htons(m->len);
+ iphdr->ip_ttl = 100;
+ iphdr->ip_sum = in_cksum((unsigned char *)iphdr, sizeof(*iphdr));
+
+ // now on to the ethernet layer
+ net_tx_eth(m, ETHTYPE_IP);
+}
+
+// sends a UDP packet
+void
+net_tx_udp(struct mbuf *m, uint32 dip,
+ uint16 sport, uint16 dport)
+{
+ struct udp *udphdr;
+
+ // put the UDP header
+ udphdr = mbufpushhdr(m, *udphdr);
+ udphdr->sport = htons(sport);
+ udphdr->dport = htons(dport);
+ udphdr->ulen = htons(m->len);
+ udphdr->sum = 0; // zero means no checksum is provided
+
+ // now on to the IP layer
+ net_tx_ip(m, IPPROTO_UDP, dip);
+}
+
+// sends an ARP packet
+static int
+net_tx_arp(uint16 op, uint8 dmac[ETHADDR_LEN], uint32 dip)
+{
+ struct mbuf *m;
+ struct arp *arphdr;
+
+ m = mbufalloc(MBUF_DEFAULT_HEADROOM);
+ if (!m)
+ return -1;
+
+ // generic part of ARP header
+ arphdr = mbufputhdr(m, *arphdr);
+ arphdr->hrd = htons(ARP_HRD_ETHER);
+ arphdr->pro = htons(ETHTYPE_IP);
+ arphdr->hln = ETHADDR_LEN;
+ arphdr->pln = sizeof(uint32);
+ arphdr->op = htons(op);
+
+ // ethernet + IP part of ARP header
+ memmove(arphdr->sha, local_mac, ETHADDR_LEN);
+ arphdr->sip = htonl(local_ip);
+ memmove(arphdr->tha, dmac, ETHADDR_LEN);
+ arphdr->tip = htonl(dip);
+
+ // header is ready, send the packet
+ net_tx_eth(m, ETHTYPE_ARP);
+ return 0;
+}
+
+// receives an ARP packet
+static void
+net_rx_arp(struct mbuf *m)
+{
+ struct arp *arphdr;
+ uint8 smac[ETHADDR_LEN];
+ uint32 sip, tip;
+
+ arphdr = mbufpullhdr(m, *arphdr);
+ if (!arphdr)
+ goto done;
+
+ // validate the ARP header
+ if (ntohs(arphdr->hrd) != ARP_HRD_ETHER ||
+ ntohs(arphdr->pro) != ETHTYPE_IP ||
+ arphdr->hln != ETHADDR_LEN ||
+ arphdr->pln != sizeof(uint32)) {
+ goto done;
+ }
+
+ // only requests are supported so far
+ // check if our IP was solicited
+ tip = ntohl(arphdr->tip); // target IP address
+ if (ntohs(arphdr->op) != ARP_OP_REQUEST || tip != local_ip)
+ goto done;
+
+ // handle the ARP request
+ memmove(smac, arphdr->sha, ETHADDR_LEN); // sender's ethernet address
+ sip = ntohl(arphdr->sip); // sender's IP address (qemu's slirp)
+ net_tx_arp(ARP_OP_REPLY, smac, sip);
+
+done:
+ mbuffree(m);
+}
+
+// receives a UDP packet
+static void
+net_rx_udp(struct mbuf *m, uint16 len, struct ip *iphdr)
+{
+ struct udp *udphdr;
+ uint32 sip;
+ uint16 sport, dport;
+
+
+ udphdr = mbufpullhdr(m, *udphdr);
+ if (!udphdr)
+ goto fail;
+
+ // TODO: validate UDP checksum
+
+ // validate lengths reported in headers
+ if (ntohs(udphdr->ulen) != len)
+ goto fail;
+ len -= sizeof(*udphdr);
+ if (len > m->len)
+ goto fail;
+ // minimum packet size could be larger than the payload
+ mbuftrim(m, m->len - len);
+
+ // parse the necessary fields
+ sip = ntohl(iphdr->ip_src);
+ sport = ntohs(udphdr->sport);
+ dport = ntohs(udphdr->dport);
+ sockrecvudp(m, sip, dport, sport);
+ return;
+
+fail:
+ mbuffree(m);
+}
+
+// receives an IP packet
+static void
+net_rx_ip(struct mbuf *m)
+{
+ struct ip *iphdr;
+ uint16 len;
+
+ iphdr = mbufpullhdr(m, *iphdr);
+ if (!iphdr)
+ goto fail;
+
+ // check IP version and header len
+ if (iphdr->ip_vhl != ((4 << 4) | (20 >> 2)))
+ goto fail;
+ // validate IP checksum
+ if (in_cksum((unsigned char *)iphdr, sizeof(*iphdr)))
+ goto fail;
+ // can't support fragmented IP packets
+ if (htons(iphdr->ip_off) != 0)
+ goto fail;
+ // is the packet addressed to us?
+ if (htonl(iphdr->ip_dst) != local_ip)
+ goto fail;
+ // can only support UDP
+ if (iphdr->ip_p != IPPROTO_UDP)
+ goto fail;
+
+ len = ntohs(iphdr->ip_len) - sizeof(*iphdr);
+ net_rx_udp(m, len, iphdr);
+ return;
+
+fail:
+ mbuffree(m);
+}
+
+// called by e1000 driver's interrupt handler to deliver a packet to the
+// networking stack
+void net_rx(struct mbuf *m)
+{
+ struct eth *ethhdr;
+ uint16 type;
+
+ ethhdr = mbufpullhdr(m, *ethhdr);
+ if (!ethhdr) {
+ mbuffree(m);
+ return;
+ }
+
+ type = ntohs(ethhdr->type);
+ if (type == ETHTYPE_IP)
+ net_rx_ip(m);
+ else if (type == ETHTYPE_ARP)
+ net_rx_arp(m);
+ else
+ mbuffree(m);
+}
diff --git a/kernel/net.h b/kernel/net.h
new file mode 100644
index 0000000..9e6fefe
--- /dev/null
+++ b/kernel/net.h
@@ -0,0 +1,173 @@
+//
+// packet buffer management
+//
+
+#define MBUF_SIZE 2048
+#define MBUF_DEFAULT_HEADROOM 128
+
+struct mbuf {
+ struct mbuf *next; // the next mbuf in the chain
+ char *head; // the current start position of the buffer
+ unsigned int len; // the length of the buffer
+ char buf[MBUF_SIZE]; // the backing store
+};
+
+char *mbufpull(struct mbuf *m, unsigned int len);
+char *mbufpush(struct mbuf *m, unsigned int len);
+char *mbufput(struct mbuf *m, unsigned int len);
+char *mbuftrim(struct mbuf *m, unsigned int len);
+
+// The above functions manipulate the size and position of the buffer:
+// <- push <- trim
+// -> pull -> put
+// [-headroom-][------buffer------][-tailroom-]
+// |----------------MBUF_SIZE-----------------|
+//
+// These marcos automatically typecast and determine the size of header structs.
+// In most situations you should use these instead of the raw ops above.
+#define mbufpullhdr(mbuf, hdr) (typeof(hdr)*)mbufpull(mbuf, sizeof(hdr))
+#define mbufpushhdr(mbuf, hdr) (typeof(hdr)*)mbufpush(mbuf, sizeof(hdr))
+#define mbufputhdr(mbuf, hdr) (typeof(hdr)*)mbufput(mbuf, sizeof(hdr))
+#define mbuftrimhdr(mbuf, hdr) (typeof(hdr)*)mbuftrim(mbuf, sizeof(hdr))
+
+struct mbuf *mbufalloc(unsigned int headroom);
+void mbuffree(struct mbuf *m);
+
+struct mbufq {
+ struct mbuf *head; // the first element in the queue
+ struct mbuf *tail; // the last element in the queue
+};
+
+void mbufq_pushtail(struct mbufq *q, struct mbuf *m);
+struct mbuf *mbufq_pophead(struct mbufq *q);
+int mbufq_empty(struct mbufq *q);
+void mbufq_init(struct mbufq *q);
+
+
+//
+// endianness support
+//
+
+static inline uint16 bswaps(uint16 val)
+{
+ return (((val & 0x00ffU) << 8) |
+ ((val & 0xff00U) >> 8));
+}
+
+static inline uint32 bswapl(uint32 val)
+{
+ return (((val & 0x000000ffUL) << 24) |
+ ((val & 0x0000ff00UL) << 8) |
+ ((val & 0x00ff0000UL) >> 8) |
+ ((val & 0xff000000UL) >> 24));
+}
+
+// Use these macros to convert network bytes to the native byte order.
+// Note that Risc-V uses little endian while network order is big endian.
+#define ntohs bswaps
+#define ntohl bswapl
+#define htons bswaps
+#define htonl bswapl
+
+
+//
+// useful networking headers
+//
+
+#define ETHADDR_LEN 6
+
+// an Ethernet packet header (start of the packet).
+struct eth {
+ uint8 dhost[ETHADDR_LEN];
+ uint8 shost[ETHADDR_LEN];
+ uint16 type;
+} __attribute__((packed));
+
+#define ETHTYPE_IP 0x0800 // Internet protocol
+#define ETHTYPE_ARP 0x0806 // Address resolution protocol
+
+// an IP packet header (comes after an Ethernet header).
+struct ip {
+ uint8 ip_vhl; // version << 4 | header length >> 2
+ uint8 ip_tos; // type of service
+ uint16 ip_len; // total length
+ uint16 ip_id; // identification
+ uint16 ip_off; // fragment offset field
+ uint8 ip_ttl; // time to live
+ uint8 ip_p; // protocol
+ uint16 ip_sum; // checksum
+ uint32 ip_src, ip_dst;
+};
+
+#define IPPROTO_ICMP 1 // Control message protocol
+#define IPPROTO_TCP 6 // Transmission control protocol
+#define IPPROTO_UDP 17 // User datagram protocol
+
+#define MAKE_IP_ADDR(a, b, c, d) \
+ (((uint32)a << 24) | ((uint32)b << 16) | \
+ ((uint32)c << 8) | (uint32)d)
+
+// a UDP packet header (comes after an IP header).
+struct udp {
+ uint16 sport; // source port
+ uint16 dport; // destination port
+ uint16 ulen; // length, including udp header, not including IP header
+ uint16 sum; // checksum
+};
+
+// an ARP packet (comes after an Ethernet header).
+struct arp {
+ uint16 hrd; // format of hardware address
+ uint16 pro; // format of protocol address
+ uint8 hln; // length of hardware address
+ uint8 pln; // length of protocol address
+ uint16 op; // operation
+
+ char sha[ETHADDR_LEN]; // sender hardware address
+ uint32 sip; // sender IP address
+ char tha[ETHADDR_LEN]; // target hardware address
+ uint32 tip; // target IP address
+} __attribute__((packed));
+
+#define ARP_HRD_ETHER 1 // Ethernet
+
+enum {
+ ARP_OP_REQUEST = 1, // requests hw addr given protocol addr
+ ARP_OP_REPLY = 2, // replies a hw addr given protocol addr
+};
+
+// an DNS packet (comes after an UDP header).
+struct dns {
+ uint16 id; // request ID
+
+ uint8 rd: 1; // recursion desired
+ uint8 tc: 1; // truncated
+ uint8 aa: 1; // authoritive
+ uint8 opcode: 4;
+ uint8 qr: 1; // query/response
+ uint8 rcode: 4; // response code
+ uint8 cd: 1; // checking disabled
+ uint8 ad: 1; // authenticated data
+ uint8 z: 1;
+ uint8 ra: 1; // recursion available
+
+ uint16 qdcount; // number of question entries
+ uint16 ancount; // number of resource records in answer section
+ uint16 nscount; // number of NS resource records in authority section
+ uint16 arcount; // number of resource records in additional records
+} __attribute__((packed));
+
+struct dns_question {
+ uint16 qtype;
+ uint16 qclass;
+} __attribute__((packed));
+
+#define ARECORD (0x0001)
+#define QCLASS (0x0001)
+
+struct dns_data {
+ uint16 type;
+ uint16 class;
+ uint32 ttl;
+ uint16 len;
+} __attribute__((packed));
diff --git a/kernel/pci.c b/kernel/pci.c
new file mode 100644
index 0000000..5cd2102
--- /dev/null
+++ b/kernel/pci.c
@@ -0,0 +1,61 @@
+//
+// simple PCI-Express initialization, only
+// works for qemu and its e1000 card.
+//
+
+#include "types.h"
+#include "param.h"
+#include "memlayout.h"
+#include "riscv.h"
+#include "spinlock.h"
+#include "proc.h"
+#include "defs.h"
+
+void
+pci_init()
+{
+ // we'll place the e1000 registers at this address.
+ // vm.c maps this range.
+ uint64 e1000_regs = 0x40000000L;
+
+ // qemu -machine virt puts PCIe config space here.
+ // vm.c maps this range.
+ uint32 *ecam = (uint32 *) 0x30000000L;
+
+ // look at each possible PCI device on bus 0.
+ for(int dev = 0; dev < 32; dev++){
+ int bus = 0;
+ int func = 0;
+ int offset = 0;
+ uint32 off = (bus << 16) | (dev << 11) | (func << 8) | (offset);
+ volatile uint32 *base = ecam + off;
+ uint32 id = base[0];
+
+ // 100e:8086 is an e1000
+ if(id == 0x100e8086){
+ // command and status register.
+ // bit 0 : I/O access enable
+ // bit 1 : memory access enable
+ // bit 2 : enable mastering
+ base[1] = 7;
+ __sync_synchronize();
+
+ for(int i = 0; i < 6; i++){
+ uint32 old = base[4+i];
+
+ // writing all 1's to the BAR causes it to be
+ // replaced with its size.
+ base[4+i] = 0xffffffff;
+ __sync_synchronize();
+
+ base[4+i] = old;
+ }
+
+ // tell the e1000 to reveal its registers at
+ // physical address 0x40000000.
+ base[4+0] = e1000_regs;
+
+ e1000_init((uint32*)e1000_regs);
+ }
+ }
+}
diff --git a/kernel/sysnet.c b/kernel/sysnet.c
new file mode 100644
index 0000000..1c48cb3
--- /dev/null
+++ b/kernel/sysnet.c
@@ -0,0 +1,185 @@
+//
+// network system calls.
+//
+
+#include "types.h"
+#include "param.h"
+#include "memlayout.h"
+#include "riscv.h"
+#include "spinlock.h"
+#include "proc.h"
+#include "defs.h"
+#include "fs.h"
+#include "sleeplock.h"
+#include "file.h"
+#include "net.h"
+
+struct sock {
+ struct sock *next; // the next socket in the list
+ uint32 raddr; // the remote IPv4 address
+ uint16 lport; // the local UDP port number
+ uint16 rport; // the remote UDP port number
+ struct spinlock lock; // protects the rxq
+ struct mbufq rxq; // a queue of packets waiting to be received
+};
+
+static struct spinlock lock;
+static struct sock *sockets;
+
+void
+sockinit(void)
+{
+ initlock(&lock, "socktbl");
+}
+
+int
+sockalloc(struct file **f, uint32 raddr, uint16 lport, uint16 rport)
+{
+ struct sock *si, *pos;
+
+ si = 0;
+ *f = 0;
+ if ((*f = filealloc()) == 0)
+ goto bad;
+ if ((si = (struct sock*)kalloc()) == 0)
+ goto bad;
+
+ // initialize objects
+ si->raddr = raddr;
+ si->lport = lport;
+ si->rport = rport;
+ initlock(&si->lock, "sock");
+ mbufq_init(&si->rxq);
+ (*f)->type = FD_SOCK;
+ (*f)->readable = 1;
+ (*f)->writable = 1;
+ (*f)->sock = si;
+
+ // add to list of sockets
+ acquire(&lock);
+ pos = sockets;
+ while (pos) {
+ if (pos->raddr == raddr &&
+ pos->lport == lport &&
+ pos->rport == rport) {
+ release(&lock);
+ goto bad;
+ }
+ pos = pos->next;
+ }
+ si->next = sockets;
+ sockets = si;
+ release(&lock);
+ return 0;
+
+bad:
+ if (si)
+ kfree((char*)si);
+ if (*f)
+ fileclose(*f);
+ return -1;
+}
+
+void
+sockclose(struct sock *si)
+{
+ struct sock **pos;
+ struct mbuf *m;
+
+ // remove from list of sockets
+ acquire(&lock);
+ pos = &sockets;
+ while (*pos) {
+ if (*pos == si){
+ *pos = si->next;
+ break;
+ }
+ pos = &(*pos)->next;
+ }
+ release(&lock);
+
+ // free any pending mbufs
+ while (!mbufq_empty(&si->rxq)) {
+ m = mbufq_pophead(&si->rxq);
+ mbuffree(m);
+ }
+
+ kfree((char*)si);
+}
+
+int
+sockread(struct sock *si, uint64 addr, int n)
+{
+ struct proc *pr = myproc();
+ struct mbuf *m;
+ int len;
+
+ acquire(&si->lock);
+ while (mbufq_empty(&si->rxq) && !pr->killed) {
+ sleep(&si->rxq, &si->lock);
+ }
+ if (pr->killed) {
+ release(&si->lock);
+ return -1;
+ }
+ m = mbufq_pophead(&si->rxq);
+ release(&si->lock);
+
+ len = m->len;
+ if (len > n)
+ len = n;
+ if (copyout(pr->pagetable, addr, m->head, len) == -1) {
+ mbuffree(m);
+ return -1;
+ }
+ mbuffree(m);
+ return len;
+}
+
+int
+sockwrite(struct sock *si, uint64 addr, int n)
+{
+ struct proc *pr = myproc();
+ struct mbuf *m;
+
+ m = mbufalloc(MBUF_DEFAULT_HEADROOM);
+ if (!m)
+ return -1;
+
+ if (copyin(pr->pagetable, mbufput(m, n), addr, n) == -1) {
+ mbuffree(m);
+ return -1;
+ }
+ net_tx_udp(m, si->raddr, si->lport, si->rport);
+ return n;
+}
+
+// called by protocol handler layer to deliver UDP packets
+void
+sockrecvudp(struct mbuf *m, uint32 raddr, uint16 lport, uint16 rport)
+{
+ //
+ // Find the socket that handles this mbuf and deliver it, waking
+ // any sleeping reader. Free the mbuf if there are no sockets
+ // registered to handle it.
+ //
+ struct sock *si;
+
+ acquire(&lock);
+ si = sockets;
+ while (si) {
+ if (si->raddr == raddr && si->lport == lport && si->rport == rport)
+ goto found;
+ si = si->next;
+ }
+ release(&lock);
+ mbuffree(m);
+ return;
+
+found:
+ acquire(&si->lock);
+ mbufq_pushtail(&si->rxq, m);
+ wakeup(&si->rxq);
+ release(&si->lock);
+ release(&lock);
+}
diff --git a/ping.py b/ping.py
new file mode 100644
index 0000000..eab10f8
--- /dev/null
+++ b/ping.py
@@ -0,0 +1,12 @@
+import socket
+import sys
+import time
+
+sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+addr = ('localhost', int(sys.argv[1]))
+buf = "this is a ping!".encode('utf-8')
+
+while True:
+ print("pinging...", file=sys.stderr)
+ sock.sendto(buf, ("127.0.0.1", int(sys.argv[1])))
+ time.sleep(1)
diff --git a/server.py b/server.py
new file mode 100644
index 0000000..2421c31
--- /dev/null
+++ b/server.py
@@ -0,0 +1,13 @@
+import socket
+import sys
+
+sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+addr = ('localhost', int(sys.argv[1]))
+print('listening on %s port %s' % addr, file=sys.stderr)
+sock.bind(addr)
+
+while True:
+ buf, raddr = sock.recvfrom(4096)
+ print(buf.decode("utf-8"), file=sys.stderr)
+ if buf:
+ sent = sock.sendto(b'this is the host!', raddr)
diff --git a/user/nettests.c b/user/nettests.c
new file mode 100644
index 0000000..2f7d6cd
--- /dev/null
+++ b/user/nettests.c
@@ -0,0 +1,297 @@
+#include "kernel/types.h"
+#include "kernel/net.h"
+#include "kernel/stat.h"
+#include "user/user.h"
+
+//
+// send a UDP packet to the localhost (outside of qemu),
+// and receive a response.
+//
+static void
+ping(uint16 sport, uint16 dport, int attempts)
+{
+ int fd;
+ char *obuf = "a message from xv6!";
+ uint32 dst;
+
+ // 10.0.2.2, which qemu remaps to the external host,
+ // i.e. the machine you're running qemu on.
+ dst = (10 << 24) | (0 << 16) | (2 << 8) | (2 << 0);
+
+ // you can send a UDP packet to any Internet address
+ // by using a different dst.
+
+ if((fd = connect(dst, sport, dport)) < 0){
+ fprintf(2, "ping: connect() failed\n");
+ exit(1);
+ }
+
+ for(int i = 0; i < attempts; i++) {
+ if(write(fd, obuf, strlen(obuf)) < 0){
+ fprintf(2, "ping: send() failed\n");
+ exit(1);
+ }
+ }
+
+ char ibuf[128];
+ int cc = read(fd, ibuf, sizeof(ibuf)-1);
+ if(cc < 0){
+ fprintf(2, "ping: recv() failed\n");
+ exit(1);
+ }
+
+ close(fd);
+ ibuf[cc] = '\0';
+ if(strcmp(ibuf, "this is the host!") != 0){
+ fprintf(2, "ping didn't receive correct payload\n");
+ exit(1);
+ }
+}
+
+// Encode a DNS name
+static void
+encode_qname(char *qn, char *host)
+{
+ char *l = host;
+
+ for(char *c = host; c < host+strlen(host)+1; c++) {
+ if(*c == '.') {
+ *qn++ = (char) (c-l);
+ for(char *d = l; d < c; d++) {
+ *qn++ = *d;
+ }
+ l = c+1; // skip .
+ }
+ }
+ *qn = '\0';
+}
+
+// Decode a DNS name
+static void
+decode_qname(char *qn, int max)
+{
+ char *qnMax = qn + max;
+ while(1){
+ if(qn >= qnMax){
+ printf("invalid DNS reply\n");
+ exit(1);
+ }
+ int l = *qn;
+ if(l == 0)
+ break;
+ for(int i = 0; i < l; i++) {
+ *qn = *(qn+1);
+ qn++;
+ }
+ *qn++ = '.';
+ }
+}
+
+// Make a DNS request
+static int
+dns_req(uint8 *obuf)
+{
+ int len = 0;
+
+ struct dns *hdr = (struct dns *) obuf;
+ hdr->id = htons(6828);
+ hdr->rd = 1;
+ hdr->qdcount = htons(1);
+
+ len += sizeof(struct dns);
+
+ // qname part of question
+ char *qname = (char *) (obuf + sizeof(struct dns));
+ char *s = "pdos.csail.mit.edu.";
+ encode_qname(qname, s);
+ len += strlen(qname) + 1;
+
+ // constants part of question
+ struct dns_question *h = (struct dns_question *) (qname+strlen(qname)+1);
+ h->qtype = htons(0x1);
+ h->qclass = htons(0x1);
+
+ len += sizeof(struct dns_question);
+ return len;
+}
+
+// Process DNS response
+static void
+dns_rep(uint8 *ibuf, int cc)
+{
+ struct dns *hdr = (struct dns *) ibuf;
+ int len;
+ char *qname = 0;
+ int record = 0;
+
+ if(cc < sizeof(struct dns)){
+ printf("DNS reply too short\n");
+ exit(1);
+ }
+
+ if(!hdr->qr) {
+ printf("Not a DNS reply for %d\n", ntohs(hdr->id));
+ exit(1);
+ }
+
+ if(hdr->id != htons(6828)){
+ printf("DNS wrong id: %d\n", ntohs(hdr->id));
+ exit(1);
+ }
+
+ if(hdr->rcode != 0) {
+ printf("DNS rcode error: %x\n", hdr->rcode);
+ exit(1);
+ }
+
+ //printf("qdcount: %x\n", ntohs(hdr->qdcount));
+ //printf("ancount: %x\n", ntohs(hdr->ancount));
+ //printf("nscount: %x\n", ntohs(hdr->nscount));
+ //printf("arcount: %x\n", ntohs(hdr->arcount));
+
+ len = sizeof(struct dns);
+
+ for(int i =0; i < ntohs(hdr->qdcount); i++) {
+ char *qn = (char *) (ibuf+len);
+ qname = qn;
+ decode_qname(qn, cc - len);
+ len += strlen(qn)+1;
+ len += sizeof(struct dns_question);
+ }
+
+ for(int i = 0; i < ntohs(hdr->ancount); i++) {
+ if(len >= cc){
+ printf("invalid DNS reply\n");
+ exit(1);
+ }
+
+ char *qn = (char *) (ibuf+len);
+
+ if((int) qn[0] > 63) { // compression?
+ qn = (char *)(ibuf+qn[1]);
+ len += 2;
+ } else {
+ decode_qname(qn, cc - len);
+ len += strlen(qn)+1;
+ }
+
+ struct dns_data *d = (struct dns_data *) (ibuf+len);
+ len += sizeof(struct dns_data);
+ //printf("type %d ttl %d len %d\n", ntohs(d->type), ntohl(d->ttl), ntohs(d->len));
+ if(ntohs(d->type) == ARECORD && ntohs(d->len) == 4) {
+ record = 1;
+ printf("DNS arecord for %s is ", qname ? qname : "" );
+ uint8 *ip = (ibuf+len);
+ printf("%d.%d.%d.%d\n", ip[0], ip[1], ip[2], ip[3]);
+ if(ip[0] != 128 || ip[1] != 52 || ip[2] != 129 || ip[3] != 126) {
+ printf("wrong ip address");
+ exit(1);
+ }
+ len += 4;
+ }
+ }
+
+ // needed for DNS servers with EDNS support
+ for(int i = 0; i < ntohs(hdr->arcount); i++) {
+ char *qn = (char *) (ibuf+len);
+ if(*qn != 0) {
+ printf("invalid name for EDNS\n");
+ exit(1);
+ }
+ len += 1;
+
+ struct dns_data *d = (struct dns_data *) (ibuf+len);
+ len += sizeof(struct dns_data);
+ if(ntohs(d->type) != 41) {
+ printf("invalid type for EDNS\n");
+ exit(1);
+ }
+ len += ntohs(d->len);
+ }
+
+ if(len != cc) {
+ printf("Processed %d data bytes but received %d\n", len, cc);
+ exit(1);
+ }
+ if(!record) {
+ printf("Didn't receive an arecord\n");
+ exit(1);
+ }
+}
+
+static void
+dns()
+{
+ #define N 1000
+ uint8 obuf[N];
+ uint8 ibuf[N];
+ uint32 dst;
+ int fd;
+ int len;
+
+ memset(obuf, 0, N);
+ memset(ibuf, 0, N);
+
+ // 8.8.8.8: google's name server
+ dst = (8 << 24) | (8 << 16) | (8 << 8) | (8 << 0);
+
+ if((fd = connect(dst, 10000, 53)) < 0){
+ fprintf(2, "ping: connect() failed\n");
+ exit(1);
+ }
+
+ len = dns_req(obuf);
+
+ if(write(fd, obuf, len) < 0){
+ fprintf(2, "dns: send() failed\n");
+ exit(1);
+ }
+ int cc = read(fd, ibuf, sizeof(ibuf));
+ if(cc < 0){
+ fprintf(2, "dns: recv() failed\n");
+ exit(1);
+ }
+ dns_rep(ibuf, cc);
+
+ close(fd);
+}
+
+int
+main(int argc, char *argv[])
+{
+ int i, ret;
+ uint16 dport = NET_TESTS_PORT;
+
+ printf("nettests running on port %d\n", dport);
+
+ printf("testing ping: ");
+ ping(2000, dport, 1);
+ printf("OK\n");
+
+ printf("testing single-process pings: ");
+ for (i = 0; i < 100; i++)
+ ping(2000, dport, 1);
+ printf("OK\n");
+
+ printf("testing multi-process pings: ");
+ for (i = 0; i < 10; i++){
+ int pid = fork();
+ if (pid == 0){
+ ping(2000 + i + 1, dport, 1);
+ exit(0);
+ }
+ }
+ for (i = 0; i < 10; i++){
+ wait(&ret);
+ if (ret != 0)
+ exit(1);
+ }
+ printf("OK\n");
+
+ printf("testing DNS\n");
+ dns();
+ printf("DNS OK\n");
+
+ printf("all tests passed.\n");
+ exit(0);
+}
diff --git a/user/pingpong.c b/user/pingpong.c
new file mode 100644
index 0000000..6ed12e7
--- /dev/null
+++ b/user/pingpong.c
@@ -0,0 +1,52 @@
+#include "kernel/types.h"
+#include "kernel/stat.h"
+#include "user/user.h"
+
+#define N 5
+char buf[N];
+
+void
+pong(int *parent_to_child, int *child_to_parent) {
+ if (read(parent_to_child[0], buf, N) < 0) {
+ printf("read failed\n");
+ }
+ printf("%d: received %s\n", getpid(), buf);
+ if (write(child_to_parent[1], "pong", 4) != 4) {
+ printf("write failed\n");
+ }
+}
+
+void
+ping(int *parent_to_child, int *child_to_parent) {
+
+ if (write(parent_to_child[1], "ping", 4) != 4) {
+ printf("write failed\n");
+ }
+ if (read(child_to_parent[0], buf, N) < 0) {
+ printf("read failed\n");
+ }
+ printf("%d: received %s\n", getpid(), buf);
+}
+
+int
+main(int argc, char *argv[])
+{
+ int parent_to_child[2];
+ int child_to_parent[2];
+
+ int pid;
+
+ if (pipe(parent_to_child) < 0 || pipe(child_to_parent) < 0) {
+ printf("pipe failed\n");
+ }
+ if ((pid = fork()) < 0) {
+ printf("fork failed\n");
+ }
+ if (pid == 0) {
+ pong(parent_to_child, child_to_parent);
+ } else {
+ ping(parent_to_child, child_to_parent);
+ }
+
+ exit(0);
+}