diff options
author | Sanjit Bhat <[email protected]> | 2023-10-26 06:44:48 -0400 |
---|---|---|
committer | Sanjit Bhat <[email protected]> | 2023-10-26 06:44:48 -0400 |
commit | cfae93475dfb4cb5cfe264f4c029136e1447c262 (patch) | |
tree | 699903e093e3a23caf7ce3899e7c80e48511f900 | |
parent | 1ed40716eb54e371df9d1814b9129666b3fe4f09 (diff) | |
download | xv6-labs-cfae93475dfb4cb5cfe264f4c029136e1447c262.tar.gz xv6-labs-cfae93475dfb4cb5cfe264f4c029136e1447c262.tar.bz2 xv6-labs-cfae93475dfb4cb5cfe264f4c029136e1447c262.zip |
net add missing files
-rw-r--r-- | conf/lab.mk | 1 | ||||
-rwxr-xr-x | grade-lab-net | 43 | ||||
-rw-r--r-- | gradelib.py | 628 | ||||
-rw-r--r-- | kernel/e1000.c | 129 | ||||
-rw-r--r-- | kernel/e1000_dev.h | 125 | ||||
-rw-r--r-- | kernel/net.c | 374 | ||||
-rw-r--r-- | kernel/net.h | 173 | ||||
-rw-r--r-- | kernel/pci.c | 61 | ||||
-rw-r--r-- | kernel/sysnet.c | 185 | ||||
-rw-r--r-- | ping.py | 12 | ||||
-rw-r--r-- | server.py | 13 | ||||
-rw-r--r-- | user/nettests.c | 297 | ||||
-rw-r--r-- | user/pingpong.c | 52 |
13 files changed, 2093 insertions, 0 deletions
diff --git a/conf/lab.mk b/conf/lab.mk new file mode 100644 index 0000000..26dcd75 --- /dev/null +++ b/conf/lab.mk @@ -0,0 +1 @@ +LAB=net diff --git a/grade-lab-net b/grade-lab-net new file mode 100755 index 0000000..dd193e6 --- /dev/null +++ b/grade-lab-net @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 + +import re +import subprocess +from gradelib import * + +r = Runner(save("xv6.out")) + +@test(0, "running nettests") +def test_nettest(): + server = subprocess.Popen(["make", "server"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + r.run_qemu(shell_script([ + 'nettests' + ]), timeout=30) + server.terminate() + server.communicate() + +@test(40, "nettest: ping", parent=test_nettest) +def test_nettest_(): + r.match('^testing ping: OK$') + +@test(20, "nettest: single process", parent=test_nettest) +def test_nettest_(): + r.match('^testing single-process pings: OK$') + +@test(20, "nettest: multi-process", parent=test_nettest) +def test_nettest_fork_test(): + r.match('^testing multi-process pings: OK$') + +@test(19, "nettest: DNS", parent=test_nettest) +def test_nettest_dns_test(): + r.match('^DNS OK$') + +#@test(10, "answers-net.txt") +#def test_answers(): +# # just a simple sanity check, will be graded manually +# check_answers("answers-net.txt") + +@test(1, "time") +def test_time(): + check_time() + +run_tests() diff --git a/gradelib.py b/gradelib.py new file mode 100644 index 0000000..f0d4934 --- /dev/null +++ b/gradelib.py @@ -0,0 +1,628 @@ +from __future__ import print_function + +import sys, os, re, time, socket, select, subprocess, errno, shutil, random, string, json +from subprocess import check_call, Popen +from optparse import OptionParser + +__all__ = [] + +################################################################## +# Test structure +# + +__all__ += ["test", "end_part", "run_tests", "get_current_test"] + +TESTS = [] +TOTAL = POSSIBLE = 0 +PART_TOTAL = PART_POSSIBLE = 0 +CURRENT_TEST = None +GRADES = {} + +def test(points, title=None, parent=None): + """Decorator for declaring test functions. If title is None, the + title of the test will be derived from the function name by + stripping the leading "test_" and replacing underscores with + spaces.""" + + def register_test(fn, title=title): + if not title: + assert fn.__name__.startswith("test_") + title = fn.__name__[5:].replace("_", " ") + if parent: + title = " " + title + + def run_test(): + global TOTAL, POSSIBLE, CURRENT_TEST, GRADES + + # Handle test dependencies + if run_test.complete: + return run_test.ok + run_test.complete = True + parent_failed = False + if parent: + parent_failed = not parent() + + # Run the test + fail = None + start = time.time() + CURRENT_TEST = run_test + sys.stdout.write("== Test %s == " % title) + if parent: + sys.stdout.write("\n") + sys.stdout.flush() + try: + if parent_failed: + raise AssertionError('Parent failed: %s' % parent.__name__) + fn() + except AssertionError as e: + fail = str(e) + + # Display and handle test result + POSSIBLE += points + if points: + print("%s: %s" % (title, \ + (color("red", "FAIL") if fail else color("green", "OK"))), end=' ') + if time.time() - start > 0.1: + print("(%.1fs)" % (time.time() - start), end=' ') + print() + if fail: + print(" %s" % fail.replace("\n", "\n ")) + else: + TOTAL += points + if points: + GRADES[title] = 0 if fail else points + + for callback in run_test.on_finish: + callback(fail) + CURRENT_TEST = None + + run_test.ok = not fail + return run_test.ok + + # Record test metadata on the test wrapper function + run_test.__name__ = fn.__name__ + run_test.title = title + run_test.complete = False + run_test.ok = False + run_test.on_finish = [] + TESTS.append(run_test) + return run_test + return register_test + +def end_part(name): + def show_part(): + global PART_TOTAL, PART_POSSIBLE + print("Part %s score: %d/%d" % \ + (name, TOTAL - PART_TOTAL, POSSIBLE - PART_POSSIBLE)) + print() + PART_TOTAL, PART_POSSIBLE = TOTAL, POSSIBLE + show_part.title = "" + TESTS.append(show_part) + +def write_results(): + global options + if not options.results: + return + try: + with open(options.results, "w") as f: + f.write(json.dumps(GRADES)) + except OSError as e: + print("Provided a bad results path. Error:", e) + +def run_tests(): + """Set up for testing and run the registered test functions.""" + + # Handle command line + global options + parser = OptionParser(usage="usage: %prog [-v] [filters...]") + parser.add_option("-v", "--verbose", action="store_true", + help="print commands") + parser.add_option("--color", choices=["never", "always", "auto"], + default="auto", help="never, always, or auto") + parser.add_option("--results", help="results file path") + (options, args) = parser.parse_args() + + # Start with a full build to catch build errors + make() + + # Clean the file system if there is one + reset_fs() + + # Run tests + limit = list(map(str.lower, args)) + try: + for test in TESTS: + if not limit or any(l in test.title.lower() for l in limit): + test() + if not limit: + write_results() + print("Score: %d/%d" % (TOTAL, POSSIBLE)) + except KeyboardInterrupt: + pass + if TOTAL < POSSIBLE: + sys.exit(1) + +def get_current_test(): + if not CURRENT_TEST: + raise RuntimeError("No test is running") + return CURRENT_TEST + +################################################################## +# Assertions +# + +__all__ += ["assert_equal", "assert_lines_match"] + +def assert_equal(got, expect, msg=""): + if got == expect: + return + if msg: + msg += "\n" + raise AssertionError("%sgot:\n %s\nexpected:\n %s" % + (msg, str(got).replace("\n", "\n "), + str(expect).replace("\n", "\n "))) + +def assert_lines_match(text, *regexps, **kw): + """Assert that all of regexps match some line in text. If a 'no' + keyword argument is given, it must be a list of regexps that must + *not* match any line in text.""" + + def assert_lines_match_kw(no=[]): + return no + no = assert_lines_match_kw(**kw) + + # Check text against regexps + lines = text.splitlines() + good = set() + bad = set() + for i, line in enumerate(lines): + if any(re.match(r, line) for r in regexps): + good.add(i) + regexps = [r for r in regexps if not re.match(r, line)] + if any(re.match(r, line) for r in no): + bad.add(i) + + if not regexps and not bad: + return + + # We failed; construct an informative failure message + show = set() + for lineno in good.union(bad): + for offset in range(-2, 3): + show.add(lineno + offset) + if regexps: + show.update(n for n in range(len(lines) - 5, len(lines))) + + msg = [] + last = -1 + for lineno in sorted(show): + if 0 <= lineno < len(lines): + if lineno != last + 1: + msg.append("...") + last = lineno + msg.append("%s %s" % (color("red", "BAD ") if lineno in bad else + color("green", "GOOD") if lineno in good + else " ", + lines[lineno])) + if last != len(lines) - 1: + msg.append("...") + if bad: + msg.append("unexpected lines in output") + for r in regexps: + msg.append(color("red", "MISSING") + " '%s'" % r) + raise AssertionError("\n".join(msg)) + +################################################################## +# Utilities +# + +__all__ += ["make", "maybe_unlink", "reset_fs", "color", "random_str", "check_time", "check_answers"] + +MAKE_TIMESTAMP = 0 + +def pre_make(): + """Delay prior to running make to ensure file mtimes change.""" + while int(time.time()) == MAKE_TIMESTAMP: + time.sleep(0.1) + +def post_make(): + """Record the time after make completes so that the next run of + make can be delayed if needed.""" + global MAKE_TIMESTAMP + MAKE_TIMESTAMP = int(time.time()) + +def make(*target): + pre_make() + if Popen(("make",) + target).wait(): + sys.exit(1) + post_make() + +def show_command(cmd): + from pipes import quote + print("\n$", " ".join(map(quote, cmd))) + +def maybe_unlink(*paths): + for path in paths: + try: + os.unlink(path) + except EnvironmentError as e: + if e.errno != errno.ENOENT: + raise + +COLORS = {"default": "\033[0m", "red": "\033[31m", "green": "\033[32m"} + +def color(name, text): + if options.color == "always" or (options.color == "auto" and os.isatty(1)): + return COLORS[name] + text + COLORS["default"] + return text + +def reset_fs(): + if os.path.exists("obj/fs/clean-fs.img"): + shutil.copyfile("obj/fs/clean-fs.img", "obj/fs/fs.img") + +def random_str(n=8): + letters = string.ascii_letters + string.digits + return ''.join(random.choice(letters) for _ in range(n)) + +def check_time(): + try: + print("") + with open('time.txt') as f: + d = f.read().strip() + if not re.match(r'^\d+$', d): + raise AssertionError('time.txt does not contain a single integer (number of hours spent on the lab)') + except IOError: + raise AssertionError('Cannot read time.txt') + +def check_answers(file, n=10): + try: + print("") + with open(file) as f: + d = f.read().strip() + if len(d) < n: + raise AssertionError('%s does not seem to contain enough text' % file) + except IOError: + raise AssertionError('Cannot read %s' % file) + + +################################################################## +# Controllers +# + +__all__ += ["QEMU", "GDBClient"] + +class QEMU(object): + _GDBPORT = None + + def __init__(self, *make_args): + # Check that QEMU is not currently running + try: + GDBClient(self.get_gdb_port(), timeout=0).close() + except socket.error: + pass + else: + print("""\ +GDB stub found on port %d. +QEMU appears to already be running. Please exit it if possible or use +'killall qemu' or 'killall qemu.real'.""" % self.get_gdb_port(), file=sys.stderr) + sys.exit(1) + + if options.verbose: + show_command(("make",) + make_args) + cmd = ("make", "-s", "--no-print-directory") + make_args + self.proc = Popen(cmd, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + stdin=subprocess.PIPE) + # Accumulated output as a string + self.output = "" + # Accumulated output as a bytearray + self.outbytes = bytearray() + self.on_output = [] + + @staticmethod + def get_gdb_port(): + if QEMU._GDBPORT is None: + p = Popen(["make", "-s", "--no-print-directory", "print-gdbport"], + stdout=subprocess.PIPE) + (out, _) = p.communicate() + if p.returncode: + raise RuntimeError( + "Failed to get gdbport: make exited with %d" % + p.returncode) + QEMU._GDBPORT = int(out) + return QEMU._GDBPORT + + def fileno(self): + if self.proc: + return self.proc.stdout.fileno() + + def handle_read(self): + buf = os.read(self.proc.stdout.fileno(), 4096) + self.outbytes.extend(buf) + self.output = self.outbytes.decode("utf-8", "replace") + for callback in self.on_output: + callback(buf) + if buf == b"": + self.wait() + return + + def write(self, buf): + if isinstance(buf, str): + buf = buf.encode('utf-8') + self.proc.stdin.write(buf) + self.proc.stdin.flush() + + def wait(self): + if self.proc: + self.proc.wait() + self.proc = None + + def kill(self): + if self.proc: + self.proc.terminate() + +class GDBClient(object): + def __init__(self, port, timeout=15): + start = time.time() + while True: + self.sock = socket.socket() + try: + self.sock.settimeout(1) + self.sock.connect(("localhost", port)) + break + except socket.error: + if time.time() >= start + timeout: + raise + self.__buf = "" + + def fileno(self): + if self.sock: + return self.sock.fileno() + + def handle_read(self): + try: + data = self.sock.recv(4096).decode("ascii", "replace") + except socket.error: + data = "" + if data == "": + self.sock.close() + self.sock = None + return + self.__buf += data + + while True: + m = re.search(r"\$([^#]*)#[0-9a-zA-Z]{2}", self.__buf) + if not m: + break + pkt = m.group(1) + self.__buf = self.__buf[m.end():] + + if pkt.startswith("T05"): + # Breakpoint + raise TerminateTest + + def __send(self, cmd): + packet = "$%s#%02x" % (cmd, sum(map(ord, cmd)) % 256) + self.sock.sendall(packet.encode("ascii")) + + def __send_break(self): + self.sock.sendall(b"\x03") + + def close(self): + if self.sock: + self.sock.close() + self.sock = None + + def cont(self): + self.__send("c") + + def breakpoint(self, addr): + self.__send("Z1,%x,1" % addr) + + +################################################################## +# QEMU test runner +# + +__all__ += ["TerminateTest", "Runner"] + +class TerminateTest(Exception): + pass + +class Runner(): + def __init__(self, *default_monitors): + self.__default_monitors = default_monitors + + def run_qemu(self, *monitors, **kw): + """Run a QEMU-based test. monitors should functions that will + be called with this Runner instance once QEMU and GDB are + started. Typically, they should register callbacks that throw + TerminateTest when stop events occur. The target_base + argument gives the make target to run. The make_args argument + should be a list of additional arguments to pass to make. The + timeout argument bounds how long to run before returning.""" + + def run_qemu_kw(target_base="qemu", make_args=[], timeout=30): + return target_base, make_args, timeout + target_base, make_args, timeout = run_qemu_kw(**kw) + + # Start QEMU + pre_make() + self.qemu = QEMU(target_base + "-gdb", *make_args) + self.gdb = None + + try: + # Wait for QEMU to start or make to fail. This will set + # self.gdb if QEMU starts. + self.qemu.on_output = [self.__monitor_start] + self.__react([self.qemu], timeout=90) + self.qemu.on_output = [] + if self.gdb is None: + print("Failed to connect to QEMU; output:") + print(self.qemu.output) + sys.exit(1) + post_make() + + # QEMU and GDB are up + self.reactors = [self.qemu, self.gdb] + + # Start monitoring + for m in self.__default_monitors + monitors: + m(self) + + # Run and react + self.gdb.cont() + self.__react(self.reactors, timeout) + finally: + # Shutdown QEMU + try: + if self.gdb is None: + sys.exit(1) + self.qemu.kill() + self.__react(self.reactors, 5) + self.gdb.close() + self.qemu.wait() + except: + print("""\ +Failed to shutdown QEMU. You might need to 'killall qemu' or +'killall qemu.real'. +""") + raise + + def __monitor_start(self, output): + if b"\n" in output: + try: + self.gdb = GDBClient(self.qemu.get_gdb_port(), timeout=2) + raise TerminateTest + except socket.error: + pass + if not len(output): + raise TerminateTest + + def __react(self, reactors, timeout): + deadline = time.time() + timeout + try: + while True: + timeleft = deadline - time.time() + if timeleft < 0: + sys.stdout.write("Timeout! ") + sys.stdout.flush() + return + + rset = [r for r in reactors if r.fileno() is not None] + if not rset: + return + + rset, _, _ = select.select(rset, [], [], timeleft) + for reactor in rset: + reactor.handle_read() + except TerminateTest: + pass + + def user_test(self, binary, *monitors, **kw): + """Run a user test using the specified binary. Monitors and + keyword arguments are as for run_qemu. This runs on a disk + snapshot unless the keyword argument 'snapshot' is False.""" + + maybe_unlink("obj/kern/init.o", "obj/kern/kernel") + if kw.pop("snapshot", True): + kw.setdefault("make_args", []).append("QEMUEXTRA+=-snapshot") + self.run_qemu(target_base="run-%s" % binary, *monitors, **kw) + + def match(self, *args, **kwargs): + """Shortcut to call assert_lines_match on the most recent QEMU + output.""" + + assert_lines_match(self.qemu.output, *args, **kwargs) + +################################################################## +# Monitors +# + +__all__ += ["save", "stop_breakpoint", "call_on_line", "stop_on_line", "shell_script"] + +def save(path): + """Return a monitor that writes QEMU's output to path. If the + test fails, copy the output to path.test-name.""" + + def setup_save(runner): + f.seek(0) + f.truncate() + runner.qemu.on_output.append(f.write) + get_current_test().on_finish.append(save_on_finish) + + def save_on_finish(fail): + f.flush() + save_path = path + "." + get_current_test().__name__[5:] + if fail: + shutil.copyfile(path, save_path) + print(" QEMU output saved to %s" % save_path) + elif os.path.exists(save_path): + os.unlink(save_path) + print(" (Old %s failure log removed)" % save_path) + + f = open(path, "wb") + return setup_save + +def stop_breakpoint(addr): + """Returns a monitor that stops when addr is reached. addr may be + a number or the name of a symbol.""" + + def setup_breakpoint(runner): + if isinstance(addr, str): + addrs = [int(sym[:16], 16) for sym in open("kernel/kernel.sym") + if sym[17:].strip() == addr] + assert len(addrs), "Symbol %s not found" % addr + runner.gdb.breakpoint(addrs[0]) + else: + runner.gdb.breakpoint(addr) + return setup_breakpoint + +def call_on_line(regexp, callback): + """Returns a monitor that calls 'callback' when QEMU prints a line + matching 'regexp'.""" + + def setup_call_on_line(runner): + buf = bytearray() + def handle_output(output): + buf.extend(output) + while b"\n" in buf: + line, buf[:] = buf.split(b"\n", 1) + line = line.decode("utf-8", "replace") + if re.match(regexp, line): + callback(line) + runner.qemu.on_output.append(handle_output) + return setup_call_on_line + +def stop_on_line(regexp): + """Returns a monitor that stops when QEMU prints a line matching + 'regexp'.""" + + def stop(line): + raise TerminateTest + return call_on_line(regexp, stop) + +def shell_script(script, terminate_match=None): + """Returns a monitor that plays the script, and stops when the script is + done executing.""" + + def setup_call_on_line(runner): + class context: + n = 0 + buf = bytearray() + def handle_output(output): + context.buf.extend(output) + if terminate_match is not None: + if re.match(terminate_match, context.buf.decode('utf-8', 'replace')): + raise TerminateTest + if b'$ ' in context.buf: + context.buf = bytearray() + if context.n < len(script): + runner.qemu.write(script[context.n]) + runner.qemu.write('\n') + context.n += 1 + else: + if terminate_match is None: + raise TerminateTest + runner.qemu.on_output.append(handle_output) + return setup_call_on_line diff --git a/kernel/e1000.c b/kernel/e1000.c new file mode 100644 index 0000000..70a2adf --- /dev/null +++ b/kernel/e1000.c @@ -0,0 +1,129 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "spinlock.h" +#include "proc.h" +#include "defs.h" +#include "e1000_dev.h" +#include "net.h" + +#define TX_RING_SIZE 16 +static struct tx_desc tx_ring[TX_RING_SIZE] __attribute__((aligned(16))); +static struct mbuf *tx_mbufs[TX_RING_SIZE]; + +#define RX_RING_SIZE 16 +static struct rx_desc rx_ring[RX_RING_SIZE] __attribute__((aligned(16))); +static struct mbuf *rx_mbufs[RX_RING_SIZE]; + +// remember where the e1000's registers live. +static volatile uint32 *regs; + +struct spinlock e1000_lock; + +// called by pci_init(). +// xregs is the memory address at which the +// e1000's registers are mapped. +void +e1000_init(uint32 *xregs) +{ + int i; + + initlock(&e1000_lock, "e1000"); + + regs = xregs; + + // Reset the device + regs[E1000_IMS] = 0; // disable interrupts + regs[E1000_CTL] |= E1000_CTL_RST; + regs[E1000_IMS] = 0; // redisable interrupts + __sync_synchronize(); + + // [E1000 14.5] Transmit initialization + memset(tx_ring, 0, sizeof(tx_ring)); + for (i = 0; i < TX_RING_SIZE; i++) { + tx_ring[i].status = E1000_TXD_STAT_DD; + tx_mbufs[i] = 0; + } + regs[E1000_TDBAL] = (uint64) tx_ring; + if(sizeof(tx_ring) % 128 != 0) + panic("e1000"); + regs[E1000_TDLEN] = sizeof(tx_ring); + regs[E1000_TDH] = regs[E1000_TDT] = 0; + + // [E1000 14.4] Receive initialization + memset(rx_ring, 0, sizeof(rx_ring)); + for (i = 0; i < RX_RING_SIZE; i++) { + rx_mbufs[i] = mbufalloc(0); + if (!rx_mbufs[i]) + panic("e1000"); + rx_ring[i].addr = (uint64) rx_mbufs[i]->head; + } + regs[E1000_RDBAL] = (uint64) rx_ring; + if(sizeof(rx_ring) % 128 != 0) + panic("e1000"); + regs[E1000_RDH] = 0; + regs[E1000_RDT] = RX_RING_SIZE - 1; + regs[E1000_RDLEN] = sizeof(rx_ring); + + // filter by qemu's MAC address, 52:54:00:12:34:56 + regs[E1000_RA] = 0x12005452; + regs[E1000_RA+1] = 0x5634 | (1<<31); + // multicast table + for (int i = 0; i < 4096/32; i++) + regs[E1000_MTA + i] = 0; + + // transmitter control bits. + regs[E1000_TCTL] = E1000_TCTL_EN | // enable + E1000_TCTL_PSP | // pad short packets + (0x10 << E1000_TCTL_CT_SHIFT) | // collision stuff + (0x40 << E1000_TCTL_COLD_SHIFT); + regs[E1000_TIPG] = 10 | (8<<10) | (6<<20); // inter-pkt gap + + // receiver control bits. + regs[E1000_RCTL] = E1000_RCTL_EN | // enable receiver + E1000_RCTL_BAM | // enable broadcast + E1000_RCTL_SZ_2048 | // 2048-byte rx buffers + E1000_RCTL_SECRC; // strip CRC + + // ask e1000 for receive interrupts. + regs[E1000_RDTR] = 0; // interrupt after every received packet (no timer) + regs[E1000_RADV] = 0; // interrupt after every packet (no timer) + regs[E1000_IMS] = (1 << 7); // RXDW -- Receiver Descriptor Write Back +} + +int +e1000_transmit(struct mbuf *m) +{ + // + // Your code here. + // + // the mbuf contains an ethernet frame; program it into + // the TX descriptor ring so that the e1000 sends it. Stash + // a pointer so that it can be freed after sending. + // + + return 0; +} + +static void +e1000_recv(void) +{ + // + // Your code here. + // + // Check for packets that have arrived from the e1000 + // Create and deliver an mbuf for each packet (using net_rx()). + // +} + +void +e1000_intr(void) +{ + // tell the e1000 we've seen this interrupt; + // without this the e1000 won't raise any + // further interrupts. + regs[E1000_ICR] = 0xffffffff; + + e1000_recv(); +} diff --git a/kernel/e1000_dev.h b/kernel/e1000_dev.h new file mode 100644 index 0000000..9b462df --- /dev/null +++ b/kernel/e1000_dev.h @@ -0,0 +1,125 @@ +// +// E1000 hardware definitions: registers and DMA ring format. +// from the Intel 82540EP/EM &c manual. +// + +/* Registers */ +#define E1000_CTL (0x00000/4) /* Device Control Register - RW */ +#define E1000_ICR (0x000C0/4) /* Interrupt Cause Read - R */ +#define E1000_IMS (0x000D0/4) /* Interrupt Mask Set - RW */ +#define E1000_RCTL (0x00100/4) /* RX Control - RW */ +#define E1000_TCTL (0x00400/4) /* TX Control - RW */ +#define E1000_TIPG (0x00410/4) /* TX Inter-packet gap -RW */ +#define E1000_RDBAL (0x02800/4) /* RX Descriptor Base Address Low - RW */ +#define E1000_RDTR (0x02820/4) /* RX Delay Timer */ +#define E1000_RADV (0x0282C/4) /* RX Interrupt Absolute Delay Timer */ +#define E1000_RDH (0x02810/4) /* RX Descriptor Head - RW */ +#define E1000_RDT (0x02818/4) /* RX Descriptor Tail - RW */ +#define E1000_RDLEN (0x02808/4) /* RX Descriptor Length - RW */ +#define E1000_RSRPD (0x02C00/4) /* RX Small Packet Detect Interrupt */ +#define E1000_TDBAL (0x03800/4) /* TX Descriptor Base Address Low - RW */ +#define E1000_TDLEN (0x03808/4) /* TX Descriptor Length - RW */ +#define E1000_TDH (0x03810/4) /* TX Descriptor Head - RW */ +#define E1000_TDT (0x03818/4) /* TX Descripotr Tail - RW */ +#define E1000_MTA (0x05200/4) /* Multicast Table Array - RW Array */ +#define E1000_RA (0x05400/4) /* Receive Address - RW Array */ + +/* Device Control */ +#define E1000_CTL_SLU 0x00000040 /* set link up */ +#define E1000_CTL_FRCSPD 0x00000800 /* force speed */ +#define E1000_CTL_FRCDPLX 0x00001000 /* force duplex */ +#define E1000_CTL_RST 0x00400000 /* full reset */ + +/* Transmit Control */ +#define E1000_TCTL_RST 0x00000001 /* software reset */ +#define E1000_TCTL_EN 0x00000002 /* enable tx */ +#define E1000_TCTL_BCE 0x00000004 /* busy check enable */ +#define E1000_TCTL_PSP 0x00000008 /* pad short packets */ +#define E1000_TCTL_CT 0x00000ff0 /* collision threshold */ +#define E1000_TCTL_CT_SHIFT 4 +#define E1000_TCTL_COLD 0x003ff000 /* collision distance */ +#define E1000_TCTL_COLD_SHIFT 12 +#define E1000_TCTL_SWXOFF 0x00400000 /* SW Xoff transmission */ +#define E1000_TCTL_PBE 0x00800000 /* Packet Burst Enable */ +#define E1000_TCTL_RTLC 0x01000000 /* Re-transmit on late collision */ +#define E1000_TCTL_NRTU 0x02000000 /* No Re-transmit on underrun */ +#define E1000_TCTL_MULR 0x10000000 /* Multiple request support */ + +/* Receive Control */ +#define E1000_RCTL_RST 0x00000001 /* Software reset */ +#define E1000_RCTL_EN 0x00000002 /* enable */ +#define E1000_RCTL_SBP 0x00000004 /* store bad packet */ +#define E1000_RCTL_UPE 0x00000008 /* unicast promiscuous enable */ +#define E1000_RCTL_MPE 0x00000010 /* multicast promiscuous enab */ +#define E1000_RCTL_LPE 0x00000020 /* long packet enable */ +#define E1000_RCTL_LBM_NO 0x00000000 /* no loopback mode */ +#define E1000_RCTL_LBM_MAC 0x00000040 /* MAC loopback mode */ +#define E1000_RCTL_LBM_SLP 0x00000080 /* serial link loopback mode */ +#define E1000_RCTL_LBM_TCVR 0x000000C0 /* tcvr loopback mode */ +#define E1000_RCTL_DTYP_MASK 0x00000C00 /* Descriptor type mask */ +#define E1000_RCTL_DTYP_PS 0x00000400 /* Packet Split descriptor */ +#define E1000_RCTL_RDMTS_HALF 0x00000000 /* rx desc min threshold size */ +#define E1000_RCTL_RDMTS_QUAT 0x00000100 /* rx desc min threshold size */ +#define E1000_RCTL_RDMTS_EIGTH 0x00000200 /* rx desc min threshold size */ +#define E1000_RCTL_MO_SHIFT 12 /* multicast offset shift */ +#define E1000_RCTL_MO_0 0x00000000 /* multicast offset 11:0 */ +#define E1000_RCTL_MO_1 0x00001000 /* multicast offset 12:1 */ +#define E1000_RCTL_MO_2 0x00002000 /* multicast offset 13:2 */ +#define E1000_RCTL_MO_3 0x00003000 /* multicast offset 15:4 */ +#define E1000_RCTL_MDR 0x00004000 /* multicast desc ring 0 */ +#define E1000_RCTL_BAM 0x00008000 /* broadcast enable */ +/* these buffer sizes are valid if E1000_RCTL_BSEX is 0 */ +#define E1000_RCTL_SZ_2048 0x00000000 /* rx buffer size 2048 */ +#define E1000_RCTL_SZ_1024 0x00010000 /* rx buffer size 1024 */ +#define E1000_RCTL_SZ_512 0x00020000 /* rx buffer size 512 */ +#define E1000_RCTL_SZ_256 0x00030000 /* rx buffer size 256 */ +/* these buffer sizes are valid if E1000_RCTL_BSEX is 1 */ +#define E1000_RCTL_SZ_16384 0x00010000 /* rx buffer size 16384 */ +#define E1000_RCTL_SZ_8192 0x00020000 /* rx buffer size 8192 */ +#define E1000_RCTL_SZ_4096 0x00030000 /* rx buffer size 4096 */ +#define E1000_RCTL_VFE 0x00040000 /* vlan filter enable */ +#define E1000_RCTL_CFIEN 0x00080000 /* canonical form enable */ +#define E1000_RCTL_CFI 0x00100000 /* canonical form indicator */ +#define E1000_RCTL_DPF 0x00400000 /* discard pause frames */ +#define E1000_RCTL_PMCF 0x00800000 /* pass MAC control frames */ +#define E1000_RCTL_BSEX 0x02000000 /* Buffer size extension */ +#define E1000_RCTL_SECRC 0x04000000 /* Strip Ethernet CRC */ +#define E1000_RCTL_FLXBUF_MASK 0x78000000 /* Flexible buffer size */ +#define E1000_RCTL_FLXBUF_SHIFT 27 /* Flexible buffer shift */ + +#define DATA_MAX 1518 + +/* Transmit Descriptor command definitions [E1000 3.3.3.1] */ +#define E1000_TXD_CMD_EOP 0x01 /* End of Packet */ +#define E1000_TXD_CMD_RS 0x08 /* Report Status */ + +/* Transmit Descriptor status definitions [E1000 3.3.3.2] */ +#define E1000_TXD_STAT_DD 0x00000001 /* Descriptor Done */ + +// [E1000 3.3.3] +struct tx_desc +{ + uint64 addr; + uint16 length; + uint8 cso; + uint8 cmd; + uint8 status; + uint8 css; + uint16 special; +}; + +/* Receive Descriptor bit definitions [E1000 3.2.3.1] */ +#define E1000_RXD_STAT_DD 0x01 /* Descriptor Done */ +#define E1000_RXD_STAT_EOP 0x02 /* End of Packet */ + +// [E1000 3.2.3] +struct rx_desc +{ + uint64 addr; /* Address of the descriptor's data buffer */ + uint16 length; /* Length of data DMAed into data buffer */ + uint16 csum; /* Packet checksum */ + uint8 status; /* Descriptor status */ + uint8 errors; /* Descriptor Errors */ + uint16 special; +}; + diff --git a/kernel/net.c b/kernel/net.c new file mode 100644 index 0000000..137ea2b --- /dev/null +++ b/kernel/net.c @@ -0,0 +1,374 @@ +// +// networking protocol support (IP, UDP, ARP, etc.). +// + +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "spinlock.h" +#include "proc.h" +#include "net.h" +#include "defs.h" + +static uint32 local_ip = MAKE_IP_ADDR(10, 0, 2, 15); // qemu's idea of the guest IP +static uint8 local_mac[ETHADDR_LEN] = { 0x52, 0x54, 0x00, 0x12, 0x34, 0x56 }; +static uint8 broadcast_mac[ETHADDR_LEN] = { 0xFF, 0XFF, 0XFF, 0XFF, 0XFF, 0XFF }; + +// Strips data from the start of the buffer and returns a pointer to it. +// Returns 0 if less than the full requested length is available. +char * +mbufpull(struct mbuf *m, unsigned int len) +{ + char *tmp = m->head; + if (m->len < len) + return 0; + m->len -= len; + m->head += len; + return tmp; +} + +// Prepends data to the beginning of the buffer and returns a pointer to it. +char * +mbufpush(struct mbuf *m, unsigned int len) +{ + m->head -= len; + if (m->head < m->buf) + panic("mbufpush"); + m->len += len; + return m->head; +} + +// Appends data to the end of the buffer and returns a pointer to it. +char * +mbufput(struct mbuf *m, unsigned int len) +{ + char *tmp = m->head + m->len; + m->len += len; + if (m->len > MBUF_SIZE) + panic("mbufput"); + return tmp; +} + +// Strips data from the end of the buffer and returns a pointer to it. +// Returns 0 if less than the full requested length is available. +char * +mbuftrim(struct mbuf *m, unsigned int len) +{ + if (len > m->len) + return 0; + m->len -= len; + return m->head + m->len; +} + +// Allocates a packet buffer. +struct mbuf * +mbufalloc(unsigned int headroom) +{ + struct mbuf *m; + + if (headroom > MBUF_SIZE) + return 0; + m = kalloc(); + if (m == 0) + return 0; + m->next = 0; + m->head = (char *)m->buf + headroom; + m->len = 0; + memset(m->buf, 0, sizeof(m->buf)); + return m; +} + +// Frees a packet buffer. +void +mbuffree(struct mbuf *m) +{ + kfree(m); +} + +// Pushes an mbuf to the end of the queue. +void +mbufq_pushtail(struct mbufq *q, struct mbuf *m) +{ + m->next = 0; + if (!q->head){ + q->head = q->tail = m; + return; + } + q->tail->next = m; + q->tail = m; +} + +// Pops an mbuf from the start of the queue. +struct mbuf * +mbufq_pophead(struct mbufq *q) +{ + struct mbuf *head = q->head; + if (!head) + return 0; + q->head = head->next; + return head; +} + +// Returns one (nonzero) if the queue is empty. +int +mbufq_empty(struct mbufq *q) +{ + return q->head == 0; +} + +// Intializes a queue of mbufs. +void +mbufq_init(struct mbufq *q) +{ + q->head = 0; +} + +// This code is lifted from FreeBSD's ping.c, and is copyright by the Regents +// of the University of California. +static unsigned short +in_cksum(const unsigned char *addr, int len) +{ + int nleft = len; + const unsigned short *w = (const unsigned short *)addr; + unsigned int sum = 0; + unsigned short answer = 0; + + /* + * Our algorithm is simple, using a 32 bit accumulator (sum), we add + * sequential 16 bit words to it, and at the end, fold back all the + * carry bits from the top 16 bits into the lower 16 bits. + */ + while (nleft > 1) { + sum += *w++; + nleft -= 2; + } + + /* mop up an odd byte, if necessary */ + if (nleft == 1) { + *(unsigned char *)(&answer) = *(const unsigned char *)w; + sum += answer; + } + + /* add back carry outs from top 16 bits to low 16 bits */ + sum = (sum & 0xffff) + (sum >> 16); + sum += (sum >> 16); + /* guaranteed now that the lower 16 bits of sum are correct */ + + answer = ~sum; /* truncate to 16 bits */ + return answer; +} + +// sends an ethernet packet +static void +net_tx_eth(struct mbuf *m, uint16 ethtype) +{ + struct eth *ethhdr; + + ethhdr = mbufpushhdr(m, *ethhdr); + memmove(ethhdr->shost, local_mac, ETHADDR_LEN); + // In a real networking stack, dhost would be set to the address discovered + // through ARP. Because we don't support enough of the ARP protocol, set it + // to broadcast instead. + memmove(ethhdr->dhost, broadcast_mac, ETHADDR_LEN); + ethhdr->type = htons(ethtype); + if (e1000_transmit(m)) { + mbuffree(m); + } +} + +// sends an IP packet +static void +net_tx_ip(struct mbuf *m, uint8 proto, uint32 dip) +{ + struct ip *iphdr; + + // push the IP header + iphdr = mbufpushhdr(m, *iphdr); + memset(iphdr, 0, sizeof(*iphdr)); + iphdr->ip_vhl = (4 << 4) | (20 >> 2); + iphdr->ip_p = proto; + iphdr->ip_src = htonl(local_ip); + iphdr->ip_dst = htonl(dip); + iphdr->ip_len = htons(m->len); + iphdr->ip_ttl = 100; + iphdr->ip_sum = in_cksum((unsigned char *)iphdr, sizeof(*iphdr)); + + // now on to the ethernet layer + net_tx_eth(m, ETHTYPE_IP); +} + +// sends a UDP packet +void +net_tx_udp(struct mbuf *m, uint32 dip, + uint16 sport, uint16 dport) +{ + struct udp *udphdr; + + // put the UDP header + udphdr = mbufpushhdr(m, *udphdr); + udphdr->sport = htons(sport); + udphdr->dport = htons(dport); + udphdr->ulen = htons(m->len); + udphdr->sum = 0; // zero means no checksum is provided + + // now on to the IP layer + net_tx_ip(m, IPPROTO_UDP, dip); +} + +// sends an ARP packet +static int +net_tx_arp(uint16 op, uint8 dmac[ETHADDR_LEN], uint32 dip) +{ + struct mbuf *m; + struct arp *arphdr; + + m = mbufalloc(MBUF_DEFAULT_HEADROOM); + if (!m) + return -1; + + // generic part of ARP header + arphdr = mbufputhdr(m, *arphdr); + arphdr->hrd = htons(ARP_HRD_ETHER); + arphdr->pro = htons(ETHTYPE_IP); + arphdr->hln = ETHADDR_LEN; + arphdr->pln = sizeof(uint32); + arphdr->op = htons(op); + + // ethernet + IP part of ARP header + memmove(arphdr->sha, local_mac, ETHADDR_LEN); + arphdr->sip = htonl(local_ip); + memmove(arphdr->tha, dmac, ETHADDR_LEN); + arphdr->tip = htonl(dip); + + // header is ready, send the packet + net_tx_eth(m, ETHTYPE_ARP); + return 0; +} + +// receives an ARP packet +static void +net_rx_arp(struct mbuf *m) +{ + struct arp *arphdr; + uint8 smac[ETHADDR_LEN]; + uint32 sip, tip; + + arphdr = mbufpullhdr(m, *arphdr); + if (!arphdr) + goto done; + + // validate the ARP header + if (ntohs(arphdr->hrd) != ARP_HRD_ETHER || + ntohs(arphdr->pro) != ETHTYPE_IP || + arphdr->hln != ETHADDR_LEN || + arphdr->pln != sizeof(uint32)) { + goto done; + } + + // only requests are supported so far + // check if our IP was solicited + tip = ntohl(arphdr->tip); // target IP address + if (ntohs(arphdr->op) != ARP_OP_REQUEST || tip != local_ip) + goto done; + + // handle the ARP request + memmove(smac, arphdr->sha, ETHADDR_LEN); // sender's ethernet address + sip = ntohl(arphdr->sip); // sender's IP address (qemu's slirp) + net_tx_arp(ARP_OP_REPLY, smac, sip); + +done: + mbuffree(m); +} + +// receives a UDP packet +static void +net_rx_udp(struct mbuf *m, uint16 len, struct ip *iphdr) +{ + struct udp *udphdr; + uint32 sip; + uint16 sport, dport; + + + udphdr = mbufpullhdr(m, *udphdr); + if (!udphdr) + goto fail; + + // TODO: validate UDP checksum + + // validate lengths reported in headers + if (ntohs(udphdr->ulen) != len) + goto fail; + len -= sizeof(*udphdr); + if (len > m->len) + goto fail; + // minimum packet size could be larger than the payload + mbuftrim(m, m->len - len); + + // parse the necessary fields + sip = ntohl(iphdr->ip_src); + sport = ntohs(udphdr->sport); + dport = ntohs(udphdr->dport); + sockrecvudp(m, sip, dport, sport); + return; + +fail: + mbuffree(m); +} + +// receives an IP packet +static void +net_rx_ip(struct mbuf *m) +{ + struct ip *iphdr; + uint16 len; + + iphdr = mbufpullhdr(m, *iphdr); + if (!iphdr) + goto fail; + + // check IP version and header len + if (iphdr->ip_vhl != ((4 << 4) | (20 >> 2))) + goto fail; + // validate IP checksum + if (in_cksum((unsigned char *)iphdr, sizeof(*iphdr))) + goto fail; + // can't support fragmented IP packets + if (htons(iphdr->ip_off) != 0) + goto fail; + // is the packet addressed to us? + if (htonl(iphdr->ip_dst) != local_ip) + goto fail; + // can only support UDP + if (iphdr->ip_p != IPPROTO_UDP) + goto fail; + + len = ntohs(iphdr->ip_len) - sizeof(*iphdr); + net_rx_udp(m, len, iphdr); + return; + +fail: + mbuffree(m); +} + +// called by e1000 driver's interrupt handler to deliver a packet to the +// networking stack +void net_rx(struct mbuf *m) +{ + struct eth *ethhdr; + uint16 type; + + ethhdr = mbufpullhdr(m, *ethhdr); + if (!ethhdr) { + mbuffree(m); + return; + } + + type = ntohs(ethhdr->type); + if (type == ETHTYPE_IP) + net_rx_ip(m); + else if (type == ETHTYPE_ARP) + net_rx_arp(m); + else + mbuffree(m); +} diff --git a/kernel/net.h b/kernel/net.h new file mode 100644 index 0000000..9e6fefe --- /dev/null +++ b/kernel/net.h @@ -0,0 +1,173 @@ +// +// packet buffer management +// + +#define MBUF_SIZE 2048 +#define MBUF_DEFAULT_HEADROOM 128 + +struct mbuf { + struct mbuf *next; // the next mbuf in the chain + char *head; // the current start position of the buffer + unsigned int len; // the length of the buffer + char buf[MBUF_SIZE]; // the backing store +}; + +char *mbufpull(struct mbuf *m, unsigned int len); +char *mbufpush(struct mbuf *m, unsigned int len); +char *mbufput(struct mbuf *m, unsigned int len); +char *mbuftrim(struct mbuf *m, unsigned int len); + +// The above functions manipulate the size and position of the buffer: +// <- push <- trim +// -> pull -> put +// [-headroom-][------buffer------][-tailroom-] +// |----------------MBUF_SIZE-----------------| +// +// These marcos automatically typecast and determine the size of header structs. +// In most situations you should use these instead of the raw ops above. +#define mbufpullhdr(mbuf, hdr) (typeof(hdr)*)mbufpull(mbuf, sizeof(hdr)) +#define mbufpushhdr(mbuf, hdr) (typeof(hdr)*)mbufpush(mbuf, sizeof(hdr)) +#define mbufputhdr(mbuf, hdr) (typeof(hdr)*)mbufput(mbuf, sizeof(hdr)) +#define mbuftrimhdr(mbuf, hdr) (typeof(hdr)*)mbuftrim(mbuf, sizeof(hdr)) + +struct mbuf *mbufalloc(unsigned int headroom); +void mbuffree(struct mbuf *m); + +struct mbufq { + struct mbuf *head; // the first element in the queue + struct mbuf *tail; // the last element in the queue +}; + +void mbufq_pushtail(struct mbufq *q, struct mbuf *m); +struct mbuf *mbufq_pophead(struct mbufq *q); +int mbufq_empty(struct mbufq *q); +void mbufq_init(struct mbufq *q); + + +// +// endianness support +// + +static inline uint16 bswaps(uint16 val) +{ + return (((val & 0x00ffU) << 8) | + ((val & 0xff00U) >> 8)); +} + +static inline uint32 bswapl(uint32 val) +{ + return (((val & 0x000000ffUL) << 24) | + ((val & 0x0000ff00UL) << 8) | + ((val & 0x00ff0000UL) >> 8) | + ((val & 0xff000000UL) >> 24)); +} + +// Use these macros to convert network bytes to the native byte order. +// Note that Risc-V uses little endian while network order is big endian. +#define ntohs bswaps +#define ntohl bswapl +#define htons bswaps +#define htonl bswapl + + +// +// useful networking headers +// + +#define ETHADDR_LEN 6 + +// an Ethernet packet header (start of the packet). +struct eth { + uint8 dhost[ETHADDR_LEN]; + uint8 shost[ETHADDR_LEN]; + uint16 type; +} __attribute__((packed)); + +#define ETHTYPE_IP 0x0800 // Internet protocol +#define ETHTYPE_ARP 0x0806 // Address resolution protocol + +// an IP packet header (comes after an Ethernet header). +struct ip { + uint8 ip_vhl; // version << 4 | header length >> 2 + uint8 ip_tos; // type of service + uint16 ip_len; // total length + uint16 ip_id; // identification + uint16 ip_off; // fragment offset field + uint8 ip_ttl; // time to live + uint8 ip_p; // protocol + uint16 ip_sum; // checksum + uint32 ip_src, ip_dst; +}; + +#define IPPROTO_ICMP 1 // Control message protocol +#define IPPROTO_TCP 6 // Transmission control protocol +#define IPPROTO_UDP 17 // User datagram protocol + +#define MAKE_IP_ADDR(a, b, c, d) \ + (((uint32)a << 24) | ((uint32)b << 16) | \ + ((uint32)c << 8) | (uint32)d) + +// a UDP packet header (comes after an IP header). +struct udp { + uint16 sport; // source port + uint16 dport; // destination port + uint16 ulen; // length, including udp header, not including IP header + uint16 sum; // checksum +}; + +// an ARP packet (comes after an Ethernet header). +struct arp { + uint16 hrd; // format of hardware address + uint16 pro; // format of protocol address + uint8 hln; // length of hardware address + uint8 pln; // length of protocol address + uint16 op; // operation + + char sha[ETHADDR_LEN]; // sender hardware address + uint32 sip; // sender IP address + char tha[ETHADDR_LEN]; // target hardware address + uint32 tip; // target IP address +} __attribute__((packed)); + +#define ARP_HRD_ETHER 1 // Ethernet + +enum { + ARP_OP_REQUEST = 1, // requests hw addr given protocol addr + ARP_OP_REPLY = 2, // replies a hw addr given protocol addr +}; + +// an DNS packet (comes after an UDP header). +struct dns { + uint16 id; // request ID + + uint8 rd: 1; // recursion desired + uint8 tc: 1; // truncated + uint8 aa: 1; // authoritive + uint8 opcode: 4; + uint8 qr: 1; // query/response + uint8 rcode: 4; // response code + uint8 cd: 1; // checking disabled + uint8 ad: 1; // authenticated data + uint8 z: 1; + uint8 ra: 1; // recursion available + + uint16 qdcount; // number of question entries + uint16 ancount; // number of resource records in answer section + uint16 nscount; // number of NS resource records in authority section + uint16 arcount; // number of resource records in additional records +} __attribute__((packed)); + +struct dns_question { + uint16 qtype; + uint16 qclass; +} __attribute__((packed)); + +#define ARECORD (0x0001) +#define QCLASS (0x0001) + +struct dns_data { + uint16 type; + uint16 class; + uint32 ttl; + uint16 len; +} __attribute__((packed)); diff --git a/kernel/pci.c b/kernel/pci.c new file mode 100644 index 0000000..5cd2102 --- /dev/null +++ b/kernel/pci.c @@ -0,0 +1,61 @@ +// +// simple PCI-Express initialization, only +// works for qemu and its e1000 card. +// + +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "spinlock.h" +#include "proc.h" +#include "defs.h" + +void +pci_init() +{ + // we'll place the e1000 registers at this address. + // vm.c maps this range. + uint64 e1000_regs = 0x40000000L; + + // qemu -machine virt puts PCIe config space here. + // vm.c maps this range. + uint32 *ecam = (uint32 *) 0x30000000L; + + // look at each possible PCI device on bus 0. + for(int dev = 0; dev < 32; dev++){ + int bus = 0; + int func = 0; + int offset = 0; + uint32 off = (bus << 16) | (dev << 11) | (func << 8) | (offset); + volatile uint32 *base = ecam + off; + uint32 id = base[0]; + + // 100e:8086 is an e1000 + if(id == 0x100e8086){ + // command and status register. + // bit 0 : I/O access enable + // bit 1 : memory access enable + // bit 2 : enable mastering + base[1] = 7; + __sync_synchronize(); + + for(int i = 0; i < 6; i++){ + uint32 old = base[4+i]; + + // writing all 1's to the BAR causes it to be + // replaced with its size. + base[4+i] = 0xffffffff; + __sync_synchronize(); + + base[4+i] = old; + } + + // tell the e1000 to reveal its registers at + // physical address 0x40000000. + base[4+0] = e1000_regs; + + e1000_init((uint32*)e1000_regs); + } + } +} diff --git a/kernel/sysnet.c b/kernel/sysnet.c new file mode 100644 index 0000000..1c48cb3 --- /dev/null +++ b/kernel/sysnet.c @@ -0,0 +1,185 @@ +// +// network system calls. +// + +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "spinlock.h" +#include "proc.h" +#include "defs.h" +#include "fs.h" +#include "sleeplock.h" +#include "file.h" +#include "net.h" + +struct sock { + struct sock *next; // the next socket in the list + uint32 raddr; // the remote IPv4 address + uint16 lport; // the local UDP port number + uint16 rport; // the remote UDP port number + struct spinlock lock; // protects the rxq + struct mbufq rxq; // a queue of packets waiting to be received +}; + +static struct spinlock lock; +static struct sock *sockets; + +void +sockinit(void) +{ + initlock(&lock, "socktbl"); +} + +int +sockalloc(struct file **f, uint32 raddr, uint16 lport, uint16 rport) +{ + struct sock *si, *pos; + + si = 0; + *f = 0; + if ((*f = filealloc()) == 0) + goto bad; + if ((si = (struct sock*)kalloc()) == 0) + goto bad; + + // initialize objects + si->raddr = raddr; + si->lport = lport; + si->rport = rport; + initlock(&si->lock, "sock"); + mbufq_init(&si->rxq); + (*f)->type = FD_SOCK; + (*f)->readable = 1; + (*f)->writable = 1; + (*f)->sock = si; + + // add to list of sockets + acquire(&lock); + pos = sockets; + while (pos) { + if (pos->raddr == raddr && + pos->lport == lport && + pos->rport == rport) { + release(&lock); + goto bad; + } + pos = pos->next; + } + si->next = sockets; + sockets = si; + release(&lock); + return 0; + +bad: + if (si) + kfree((char*)si); + if (*f) + fileclose(*f); + return -1; +} + +void +sockclose(struct sock *si) +{ + struct sock **pos; + struct mbuf *m; + + // remove from list of sockets + acquire(&lock); + pos = &sockets; + while (*pos) { + if (*pos == si){ + *pos = si->next; + break; + } + pos = &(*pos)->next; + } + release(&lock); + + // free any pending mbufs + while (!mbufq_empty(&si->rxq)) { + m = mbufq_pophead(&si->rxq); + mbuffree(m); + } + + kfree((char*)si); +} + +int +sockread(struct sock *si, uint64 addr, int n) +{ + struct proc *pr = myproc(); + struct mbuf *m; + int len; + + acquire(&si->lock); + while (mbufq_empty(&si->rxq) && !pr->killed) { + sleep(&si->rxq, &si->lock); + } + if (pr->killed) { + release(&si->lock); + return -1; + } + m = mbufq_pophead(&si->rxq); + release(&si->lock); + + len = m->len; + if (len > n) + len = n; + if (copyout(pr->pagetable, addr, m->head, len) == -1) { + mbuffree(m); + return -1; + } + mbuffree(m); + return len; +} + +int +sockwrite(struct sock *si, uint64 addr, int n) +{ + struct proc *pr = myproc(); + struct mbuf *m; + + m = mbufalloc(MBUF_DEFAULT_HEADROOM); + if (!m) + return -1; + + if (copyin(pr->pagetable, mbufput(m, n), addr, n) == -1) { + mbuffree(m); + return -1; + } + net_tx_udp(m, si->raddr, si->lport, si->rport); + return n; +} + +// called by protocol handler layer to deliver UDP packets +void +sockrecvudp(struct mbuf *m, uint32 raddr, uint16 lport, uint16 rport) +{ + // + // Find the socket that handles this mbuf and deliver it, waking + // any sleeping reader. Free the mbuf if there are no sockets + // registered to handle it. + // + struct sock *si; + + acquire(&lock); + si = sockets; + while (si) { + if (si->raddr == raddr && si->lport == lport && si->rport == rport) + goto found; + si = si->next; + } + release(&lock); + mbuffree(m); + return; + +found: + acquire(&si->lock); + mbufq_pushtail(&si->rxq, m); + wakeup(&si->rxq); + release(&si->lock); + release(&lock); +} @@ -0,0 +1,12 @@ +import socket +import sys +import time + +sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) +addr = ('localhost', int(sys.argv[1])) +buf = "this is a ping!".encode('utf-8') + +while True: + print("pinging...", file=sys.stderr) + sock.sendto(buf, ("127.0.0.1", int(sys.argv[1]))) + time.sleep(1) diff --git a/server.py b/server.py new file mode 100644 index 0000000..2421c31 --- /dev/null +++ b/server.py @@ -0,0 +1,13 @@ +import socket +import sys + +sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) +addr = ('localhost', int(sys.argv[1])) +print('listening on %s port %s' % addr, file=sys.stderr) +sock.bind(addr) + +while True: + buf, raddr = sock.recvfrom(4096) + print(buf.decode("utf-8"), file=sys.stderr) + if buf: + sent = sock.sendto(b'this is the host!', raddr) diff --git a/user/nettests.c b/user/nettests.c new file mode 100644 index 0000000..2f7d6cd --- /dev/null +++ b/user/nettests.c @@ -0,0 +1,297 @@ +#include "kernel/types.h" +#include "kernel/net.h" +#include "kernel/stat.h" +#include "user/user.h" + +// +// send a UDP packet to the localhost (outside of qemu), +// and receive a response. +// +static void +ping(uint16 sport, uint16 dport, int attempts) +{ + int fd; + char *obuf = "a message from xv6!"; + uint32 dst; + + // 10.0.2.2, which qemu remaps to the external host, + // i.e. the machine you're running qemu on. + dst = (10 << 24) | (0 << 16) | (2 << 8) | (2 << 0); + + // you can send a UDP packet to any Internet address + // by using a different dst. + + if((fd = connect(dst, sport, dport)) < 0){ + fprintf(2, "ping: connect() failed\n"); + exit(1); + } + + for(int i = 0; i < attempts; i++) { + if(write(fd, obuf, strlen(obuf)) < 0){ + fprintf(2, "ping: send() failed\n"); + exit(1); + } + } + + char ibuf[128]; + int cc = read(fd, ibuf, sizeof(ibuf)-1); + if(cc < 0){ + fprintf(2, "ping: recv() failed\n"); + exit(1); + } + + close(fd); + ibuf[cc] = '\0'; + if(strcmp(ibuf, "this is the host!") != 0){ + fprintf(2, "ping didn't receive correct payload\n"); + exit(1); + } +} + +// Encode a DNS name +static void +encode_qname(char *qn, char *host) +{ + char *l = host; + + for(char *c = host; c < host+strlen(host)+1; c++) { + if(*c == '.') { + *qn++ = (char) (c-l); + for(char *d = l; d < c; d++) { + *qn++ = *d; + } + l = c+1; // skip . + } + } + *qn = '\0'; +} + +// Decode a DNS name +static void +decode_qname(char *qn, int max) +{ + char *qnMax = qn + max; + while(1){ + if(qn >= qnMax){ + printf("invalid DNS reply\n"); + exit(1); + } + int l = *qn; + if(l == 0) + break; + for(int i = 0; i < l; i++) { + *qn = *(qn+1); + qn++; + } + *qn++ = '.'; + } +} + +// Make a DNS request +static int +dns_req(uint8 *obuf) +{ + int len = 0; + + struct dns *hdr = (struct dns *) obuf; + hdr->id = htons(6828); + hdr->rd = 1; + hdr->qdcount = htons(1); + + len += sizeof(struct dns); + + // qname part of question + char *qname = (char *) (obuf + sizeof(struct dns)); + char *s = "pdos.csail.mit.edu."; + encode_qname(qname, s); + len += strlen(qname) + 1; + + // constants part of question + struct dns_question *h = (struct dns_question *) (qname+strlen(qname)+1); + h->qtype = htons(0x1); + h->qclass = htons(0x1); + + len += sizeof(struct dns_question); + return len; +} + +// Process DNS response +static void +dns_rep(uint8 *ibuf, int cc) +{ + struct dns *hdr = (struct dns *) ibuf; + int len; + char *qname = 0; + int record = 0; + + if(cc < sizeof(struct dns)){ + printf("DNS reply too short\n"); + exit(1); + } + + if(!hdr->qr) { + printf("Not a DNS reply for %d\n", ntohs(hdr->id)); + exit(1); + } + + if(hdr->id != htons(6828)){ + printf("DNS wrong id: %d\n", ntohs(hdr->id)); + exit(1); + } + + if(hdr->rcode != 0) { + printf("DNS rcode error: %x\n", hdr->rcode); + exit(1); + } + + //printf("qdcount: %x\n", ntohs(hdr->qdcount)); + //printf("ancount: %x\n", ntohs(hdr->ancount)); + //printf("nscount: %x\n", ntohs(hdr->nscount)); + //printf("arcount: %x\n", ntohs(hdr->arcount)); + + len = sizeof(struct dns); + + for(int i =0; i < ntohs(hdr->qdcount); i++) { + char *qn = (char *) (ibuf+len); + qname = qn; + decode_qname(qn, cc - len); + len += strlen(qn)+1; + len += sizeof(struct dns_question); + } + + for(int i = 0; i < ntohs(hdr->ancount); i++) { + if(len >= cc){ + printf("invalid DNS reply\n"); + exit(1); + } + + char *qn = (char *) (ibuf+len); + + if((int) qn[0] > 63) { // compression? + qn = (char *)(ibuf+qn[1]); + len += 2; + } else { + decode_qname(qn, cc - len); + len += strlen(qn)+1; + } + + struct dns_data *d = (struct dns_data *) (ibuf+len); + len += sizeof(struct dns_data); + //printf("type %d ttl %d len %d\n", ntohs(d->type), ntohl(d->ttl), ntohs(d->len)); + if(ntohs(d->type) == ARECORD && ntohs(d->len) == 4) { + record = 1; + printf("DNS arecord for %s is ", qname ? qname : "" ); + uint8 *ip = (ibuf+len); + printf("%d.%d.%d.%d\n", ip[0], ip[1], ip[2], ip[3]); + if(ip[0] != 128 || ip[1] != 52 || ip[2] != 129 || ip[3] != 126) { + printf("wrong ip address"); + exit(1); + } + len += 4; + } + } + + // needed for DNS servers with EDNS support + for(int i = 0; i < ntohs(hdr->arcount); i++) { + char *qn = (char *) (ibuf+len); + if(*qn != 0) { + printf("invalid name for EDNS\n"); + exit(1); + } + len += 1; + + struct dns_data *d = (struct dns_data *) (ibuf+len); + len += sizeof(struct dns_data); + if(ntohs(d->type) != 41) { + printf("invalid type for EDNS\n"); + exit(1); + } + len += ntohs(d->len); + } + + if(len != cc) { + printf("Processed %d data bytes but received %d\n", len, cc); + exit(1); + } + if(!record) { + printf("Didn't receive an arecord\n"); + exit(1); + } +} + +static void +dns() +{ + #define N 1000 + uint8 obuf[N]; + uint8 ibuf[N]; + uint32 dst; + int fd; + int len; + + memset(obuf, 0, N); + memset(ibuf, 0, N); + + // 8.8.8.8: google's name server + dst = (8 << 24) | (8 << 16) | (8 << 8) | (8 << 0); + + if((fd = connect(dst, 10000, 53)) < 0){ + fprintf(2, "ping: connect() failed\n"); + exit(1); + } + + len = dns_req(obuf); + + if(write(fd, obuf, len) < 0){ + fprintf(2, "dns: send() failed\n"); + exit(1); + } + int cc = read(fd, ibuf, sizeof(ibuf)); + if(cc < 0){ + fprintf(2, "dns: recv() failed\n"); + exit(1); + } + dns_rep(ibuf, cc); + + close(fd); +} + +int +main(int argc, char *argv[]) +{ + int i, ret; + uint16 dport = NET_TESTS_PORT; + + printf("nettests running on port %d\n", dport); + + printf("testing ping: "); + ping(2000, dport, 1); + printf("OK\n"); + + printf("testing single-process pings: "); + for (i = 0; i < 100; i++) + ping(2000, dport, 1); + printf("OK\n"); + + printf("testing multi-process pings: "); + for (i = 0; i < 10; i++){ + int pid = fork(); + if (pid == 0){ + ping(2000 + i + 1, dport, 1); + exit(0); + } + } + for (i = 0; i < 10; i++){ + wait(&ret); + if (ret != 0) + exit(1); + } + printf("OK\n"); + + printf("testing DNS\n"); + dns(); + printf("DNS OK\n"); + + printf("all tests passed.\n"); + exit(0); +} diff --git a/user/pingpong.c b/user/pingpong.c new file mode 100644 index 0000000..6ed12e7 --- /dev/null +++ b/user/pingpong.c @@ -0,0 +1,52 @@ +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h" + +#define N 5 +char buf[N]; + +void +pong(int *parent_to_child, int *child_to_parent) { + if (read(parent_to_child[0], buf, N) < 0) { + printf("read failed\n"); + } + printf("%d: received %s\n", getpid(), buf); + if (write(child_to_parent[1], "pong", 4) != 4) { + printf("write failed\n"); + } +} + +void +ping(int *parent_to_child, int *child_to_parent) { + + if (write(parent_to_child[1], "ping", 4) != 4) { + printf("write failed\n"); + } + if (read(child_to_parent[0], buf, N) < 0) { + printf("read failed\n"); + } + printf("%d: received %s\n", getpid(), buf); +} + +int +main(int argc, char *argv[]) +{ + int parent_to_child[2]; + int child_to_parent[2]; + + int pid; + + if (pipe(parent_to_child) < 0 || pipe(child_to_parent) < 0) { + printf("pipe failed\n"); + } + if ((pid = fork()) < 0) { + printf("fork failed\n"); + } + if (pid == 0) { + pong(parent_to_child, child_to_parent); + } else { + ping(parent_to_child, child_to_parent); + } + + exit(0); +} |