summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Morris <[email protected]>2011-08-12 09:25:39 -0400
committerRobert Morris <[email protected]>2011-08-12 09:25:39 -0400
commit2e59046362f532748711b9acaceee1cda969cc50 (patch)
tree82ab693a2fa6021d445915c169354b6833262d24
parentbd71a45046eb13797284216c43353b9b6c92f18c (diff)
downloadxv6-labs-2e59046362f532748711b9acaceee1cda969cc50.tar.gz
xv6-labs-2e59046362f532748711b9acaceee1cda969cc50.tar.bz2
xv6-labs-2e59046362f532748711b9acaceee1cda969cc50.zip
log write() data
usertest for big write()s push begin_trans/commit_trans down into syscalls
-rw-r--r--file.c29
-rw-r--r--fs.c4
-rw-r--r--log.c88
-rw-r--r--param.h2
-rw-r--r--syscall.c2
-rw-r--r--sysfile.c30
-rw-r--r--usertests.c33
7 files changed, 144 insertions, 44 deletions
diff --git a/file.c b/file.c
index e10b824..7101a50 100644
--- a/file.c
+++ b/file.c
@@ -67,8 +67,11 @@ fileclose(struct file *f)
if(ff.type == FD_PIPE)
pipeclose(ff.pipe, ff.writable);
- else if(ff.type == FD_INODE)
+ else if(ff.type == FD_INODE){
+ begin_trans();
iput(ff.ip);
+ commit_trans();
+ }
}
// Get metadata about file f.
@@ -116,10 +119,30 @@ filewrite(struct file *f, char *addr, int n)
return pipewrite(f->pipe, addr, n);
if(f->type == FD_INODE){
ilock(f->ip);
- if((r = writei(f->ip, addr, f->off, n)) > 0)
+ // write a few blocks at a time to avoid exceeding
+ // the maximum log transaction size, including
+ // i-node, indirect block, allocation blocks,
+ // and 2 blocks of slop for non-aligned writes.
+ // this really belongs lower down, since writei()
+ // might be writing a device like the console.
+ int max = ((LOGSIZE-1-1-2) / 2) * 512;
+ int i = 0;
+ while(i < n){
+ int n1 = n - i;
+ if(n1 > max)
+ n1 = max;
+ begin_trans();
+ r = writei(f->ip, addr + i, f->off, n1);
+ commit_trans();
+ if(r < 0)
+ break;
+ if(r != n1)
+ panic("short filewrite");
f->off += r;
+ i += r;
+ }
iunlock(f->ip);
- return r;
+ return i == n ? n : -1;
}
panic("filewrite");
}
diff --git a/fs.c b/fs.c
index a414b65..a76788b 100644
--- a/fs.c
+++ b/fs.c
@@ -437,13 +437,13 @@ writei(struct inode *ip, char *src, uint off, uint n)
if(off > ip->size || off + n < off)
return -1;
if(off + n > MAXFILE*BSIZE)
- n = MAXFILE*BSIZE - off;
+ return -1;
for(tot=0; tot<n; tot+=m, off+=m, src+=m){
bp = bread(ip->dev, bmap(ip, off/BSIZE));
m = min(n - tot, BSIZE - off%BSIZE);
memmove(bp->data + off%BSIZE, src, m);
- bwrite(bp);
+ log_write(bp);
brelse(bp);
}
diff --git a/log.c b/log.c
index 72a0367..db36ba9 100644
--- a/log.c
+++ b/log.c
@@ -8,18 +8,36 @@
#include "fs.h"
#include "buf.h"
-// Dirt simple "logging" supporting only one transaction. All file system calls
-// that potentially write a block should be wrapped in begin_trans and commit_trans,
-// so that there is never more than one transaction. This serializes all file system
-// operations that potentially write, but simplifies recovery (only the last
-// one transaction to recover) and concurrency (don't have to worry about reading a modified
-// block from a transaction that hasn't committed yet).
-
-// The header of the log. If head == 0, there are no log entries. All entries till head
-// are committed. sector[] records the home sector for each block in the log
-// (i.e., physical logging).
+// Simple logging. Each system call that might write the file system
+// should be surrounded with begin_trans() and commit_trans() calls.
+//
+// The log holds at most one transaction at a time. Commit forces
+// the log (with commit record) to disk, then installs the affected
+// blocks to disk, then erases the log. begin_trans() ensures that
+// only one system call can be in a transaction; others must wait.
+//
+// Allowing only one transaction at a time means that the file
+// system code doesn't have to worry about the possibility of
+// one transaction reading a block that another one has modified,
+// for example an i-node block.
+//
+// Read-only system calls don't need to use transactions, though
+// this means that they may observe uncommitted data. I-node
+// and buffer locks prevent read-only calls from seeing inconsistent data.
+//
+// The log is a physical re-do log containing disk blocks.
+// The on-disk log format:
+// header block, containing sector #s for block A, B, C, ...
+// block A
+// block B
+// block C
+// ...
+// Log appends are synchronous.
+
+// Contents of the header block, used for both the on-disk header block
+// and to keep track in memory of logged sector #s before commit.
struct logheader {
- int head;
+ int n;
int sector[LOGSIZE];
};
@@ -55,10 +73,10 @@ install_trans(void)
{
int tail;
- if (log.lh.head > 0)
- cprintf("install_trans %d\n", log.lh.head);
- for (tail = 0; tail < log.lh.head; tail++) {
- cprintf("put entry %d to disk block %d\n", tail, log.lh.sector[tail]);
+ //if (log.lh.n > 0)
+ // cprintf("install_trans %d\n", log.lh.n);
+ for (tail = 0; tail < log.lh.n; tail++) {
+ // cprintf("put entry %d to disk block %d\n", tail, log.lh.sector[tail]);
struct buf *lbuf = bread(log.dev, log.start+tail+1); // read i'th block from log
struct buf *dbuf = bread(log.dev, log.lh.sector[tail]); // read dst block
memmove(dbuf->data, lbuf->data, BSIZE);
@@ -75,27 +93,27 @@ read_head(void)
struct buf *buf = bread(log.dev, log.start);
struct logheader *lh = (struct logheader *) (buf->data);
int i;
- log.lh.head = lh->head;
- for (i = 0; i < log.lh.head; i++) {
+ log.lh.n = lh->n;
+ for (i = 0; i < log.lh.n; i++) {
log.lh.sector[i] = lh->sector[i];
}
brelse(buf);
- if (log.lh.head > 0)
- cprintf("read_head: %d\n", log.lh.head);
+ //if (log.lh.n > 0)
+ // cprintf("read_head: %d\n", log.lh.n);
}
// Write the in-memory log header to disk, committing log entries till head
static void
write_head(void)
{
- if (log.lh.head > 0)
- cprintf("write_head: %d\n", log.lh.head);
+ // if (log.lh.n > 0)
+ // cprintf("write_head: %d\n", log.lh.n);
struct buf *buf = bread(log.dev, log.start);
struct logheader *hb = (struct logheader *) (buf->data);
int i;
- hb->head = log.lh.head;
- for (i = 0; i < log.lh.head; i++) {
+ hb->n = log.lh.n;
+ for (i = 0; i < log.lh.n; i++) {
hb->sector[i] = log.lh.sector[i];
}
bwrite(buf);
@@ -107,7 +125,7 @@ recover_from_log(void)
{
read_head();
install_trans(); // Install all transactions till head
- log.lh.head = 0;
+ log.lh.n = 0;
write_head(); // Reclaim log
}
@@ -127,7 +145,7 @@ commit_trans(void)
{
write_head(); // This causes all blocks till log.head to be commited
install_trans(); // Install all the transactions till head
- log.lh.head = 0;
+ log.lh.n = 0;
write_head(); // Reclaim log
acquire(&log.lock);
@@ -136,21 +154,27 @@ commit_trans(void)
release(&log.lock);
}
-// Write buffer into the log at log.head and record the block number log.lh.entry, but
-// don't write the log header (which would commit the write).
+// Caller has modified b->data and is done with the buffer.
+// Append the block to the log and record the block number,
+// but don't write the log header (which would commit the write).
+// log_write() replaces bwrite(); a typical use is:
+// bp = bread(...)
+// modify bp->data[]
+// log_write(bp)
+// brelse(bp)
void
log_write(struct buf *b)
{
int i;
- if (log.lh.head >= LOGSIZE)
+ if (log.lh.n >= LOGSIZE || log.lh.n >= log.size - 1)
panic("too big a transaction");
if (!log.intrans)
panic("write outside of trans");
- cprintf("log_write: %d %d\n", b->sector, log.lh.head);
+ // cprintf("log_write: %d %d\n", b->sector, log.lh.n);
- for (i = 0; i < log.lh.head; i++) {
+ for (i = 0; i < log.lh.n; i++) {
if (log.lh.sector[i] == b->sector) // log absorbtion?
break;
}
@@ -159,6 +183,6 @@ log_write(struct buf *b)
memmove(lbuf->data, b->data, BSIZE);
bwrite(lbuf);
brelse(lbuf);
- if (i == log.lh.head)
- log.lh.head++;
+ if (i == log.lh.n)
+ log.lh.n++;
}
diff --git a/param.h b/param.h
index 03c05f9..b6f6f46 100644
--- a/param.h
+++ b/param.h
@@ -8,5 +8,5 @@
#define NDEV 10 // maximum major device number
#define ROOTDEV 1 // device number of file system root disk
#define MAXARG 32 // max exec arguments
-#define LOGSIZE 10 // size of log
+#define LOGSIZE 10 // max data sectors in on-disk log
diff --git a/syscall.c b/syscall.c
index b848716..71c369c 100644
--- a/syscall.c
+++ b/syscall.c
@@ -141,9 +141,7 @@ syscall(void)
if(num >= 0 && num < SYS_open && syscalls[num]) {
proc->tf->eax = syscalls[num]();
} else if (num >= SYS_open && num < NELEM(syscalls) && syscalls[num]) {
- begin_trans();
proc->tf->eax = syscalls[num]();
- commit_trans();
} else {
cprintf("%d %s: unknown sys call %d\n",
proc->pid, proc->name, num);
diff --git a/sysfile.c b/sysfile.c
index 4235660..ca54013 100644
--- a/sysfile.c
+++ b/sysfile.c
@@ -121,6 +121,9 @@ sys_link(void)
iunlockput(ip);
return -1;
}
+
+ begin_trans();
+
ip->nlink++;
iupdate(ip);
iunlock(ip);
@@ -134,6 +137,9 @@ sys_link(void)
}
iunlockput(dp);
iput(ip);
+
+ commit_trans();
+
return 0;
bad:
@@ -141,6 +147,7 @@ bad:
ip->nlink--;
iupdate(ip);
iunlockput(ip);
+ commit_trans();
return -1;
}
@@ -195,6 +202,8 @@ sys_unlink(void)
return -1;
}
+ begin_trans();
+
memset(&de, 0, sizeof(de));
if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
panic("unlink: writei");
@@ -207,6 +216,9 @@ sys_unlink(void)
ip->nlink--;
iupdate(ip);
iunlockput(ip);
+
+ commit_trans();
+
return 0;
}
@@ -251,6 +263,7 @@ create(char *path, short type, short major, short minor)
panic("create: dirlink");
iunlockput(dp);
+
return ip;
}
@@ -265,7 +278,10 @@ sys_open(void)
if(argstr(0, &path) < 0 || argint(1, &omode) < 0)
return -1;
if(omode & O_CREATE){
- if((ip = create(path, T_FILE, 0, 0)) == 0)
+ begin_trans();
+ ip = create(path, T_FILE, 0, 0);
+ commit_trans();
+ if(ip == 0)
return -1;
} else {
if((ip = namei(path)) == 0)
@@ -299,9 +315,13 @@ sys_mkdir(void)
char *path;
struct inode *ip;
- if(argstr(0, &path) < 0 || (ip = create(path, T_DIR, 0, 0)) == 0)
+ begin_trans();
+ if(argstr(0, &path) < 0 || (ip = create(path, T_DIR, 0, 0)) == 0){
+ commit_trans();
return -1;
+ }
iunlockput(ip);
+ commit_trans();
return 0;
}
@@ -313,12 +333,16 @@ sys_mknod(void)
int len;
int major, minor;
+ begin_trans();
if((len=argstr(0, &path)) < 0 ||
argint(1, &major) < 0 ||
argint(2, &minor) < 0 ||
- (ip = create(path, T_DEV, major, minor)) == 0)
+ (ip = create(path, T_DEV, major, minor)) == 0){
+ commit_trans();
return -1;
+ }
iunlockput(ip);
+ commit_trans();
return 0;
}
diff --git a/usertests.c b/usertests.c
index 3bffadb..ba648a7 100644
--- a/usertests.c
+++ b/usertests.c
@@ -7,7 +7,7 @@
#include "traps.h"
#include "memlayout.h"
-char buf[2048];
+char buf[8192];
char name[3];
char *echoargv[] = { "echo", "ALL", "TESTS", "PASSED", 0 };
int stdout = 1;
@@ -968,6 +968,36 @@ subdir(void)
printf(1, "subdir ok\n");
}
+// test writes that are larger than the log.
+void
+bigwrite(void)
+{
+ int fd, sz;
+
+ printf(1, "bigwrite test\n");
+
+ unlink("bigwrite");
+ for(sz = 499; sz < 12*512; sz += 471){
+ fd = open("bigwrite", O_CREATE | O_RDWR);
+ if(fd < 0){
+ printf(1, "cannot create bigwrite\n");
+ exit();
+ }
+ int i;
+ for(i = 0; i < 2; i++){
+ int cc = write(fd, buf, sz);
+ if(cc != sz){
+ printf(1, "write(%d) ret %d\n", sz, cc);
+ exit();
+ }
+ }
+ close(fd);
+ unlink("bigwrite");
+ }
+
+ printf(1, "bigwrite ok\n");
+}
+
void
bigfile(void)
{
@@ -1467,6 +1497,7 @@ main(int argc, char *argv[])
}
close(open("usertests.ran", O_CREATE));
+ bigwrite();
bigargtest();
bsstest();
sbrktest();