add dependencies

2018-04-10 20:53:13 +08:00 · 2018-04-10 20:53:13 +08:00 · 33e72857bf
commit 33e72857bf
parent 25fd77a63b
89 changed files with 438014 additions and 0 deletions
--- a/src/diskv/client.go
+++ b/src/diskv/client.go
@ -0,0 +1,165 @@
+package diskv
+
+import "shardmaster"
+import "net/rpc"
+import "time"
+import "sync"
+import "fmt"
+import "crypto/rand"
+import "math/big"
+
+type Clerk struct {
+	mu     sync.Mutex // one RPC at a time
+	sm     *shardmaster.Clerk
+	config shardmaster.Config
+	// You'll have to modify Clerk.
+}
+
+func nrand() int64 {
+	max := big.NewInt(int64(1) << 62)
+	bigx, _ := rand.Int(rand.Reader, max)
+	x := bigx.Int64()
+	return x
+}
+
+func MakeClerk(shardmasters []string) *Clerk {
+	ck := new(Clerk)
+	ck.sm = shardmaster.MakeClerk(shardmasters)
+	// You'll have to modify MakeClerk.
+	return ck
+}
+
+//
+// call() sends an RPC to the rpcname handler on server srv
+// with arguments args, waits for the reply, and leaves the
+// reply in reply. the reply argument should be a pointer
+// to a reply structure.
+//
+// the return value is true if the server responded, and false
+// if call() was not able to contact the server. in particular,
+// the reply's contents are only valid if call() returned true.
+//
+// you should assume that call() will return an
+// error after a while if the server is dead.
+// don't provide your own time-out mechanism.
+//
+// please use call() to send all RPCs, in client.go and server.go.
+// please don't change this function.
+//
+func call(srv string, rpcname string,
+	args interface{}, reply interface{}) bool {
+	c, errx := rpc.Dial("unix", srv)
+	if errx != nil {
+		return false
+	}
+	defer c.Close()
+
+	err := c.Call(rpcname, args, reply)
+	if err == nil {
+		return true
+	}
+
+	fmt.Println(err)
+	return false
+}
+
+//
+// which shard is a key in?
+// please use this function,
+// and please do not change it.
+//
+func key2shard(key string) int {
+	shard := 0
+	if len(key) > 0 {
+		shard = int(key[0])
+	}
+	shard %= shardmaster.NShards
+	return shard
+}
+
+//
+// fetch the current value for a key.
+// returns "" if the key does not exist.
+// keeps trying forever in the face of all other errors.
+//
+func (ck *Clerk) Get(key string) string {
+	ck.mu.Lock()
+	defer ck.mu.Unlock()
+
+	// You'll have to modify Get().
+
+	for {
+		shard := key2shard(key)
+
+		gid := ck.config.Shards[shard]
+
+		servers, ok := ck.config.Groups[gid]
+
+		if ok {
+			// try each server in the shard's replication group.
+			for _, srv := range servers {
+				args := &GetArgs{}
+				args.Key = key
+				var reply GetReply
+				ok := call(srv, "DisKV.Get", args, &reply)
+				if ok && (reply.Err == OK || reply.Err == ErrNoKey) {
+					return reply.Value
+				}
+				if ok && (reply.Err == ErrWrongGroup) {
+					break
+				}
+			}
+		}
+
+		time.Sleep(100 * time.Millisecond)
+
+		// ask master for a new configuration.
+		ck.config = ck.sm.Query(-1)
+	}
+}
+
+// send a Put or Append request.
+func (ck *Clerk) PutAppend(key string, value string, op string) {
+	ck.mu.Lock()
+	defer ck.mu.Unlock()
+
+	// You'll have to modify PutAppend().
+
+	for {
+		shard := key2shard(key)
+
+		gid := ck.config.Shards[shard]
+
+		servers, ok := ck.config.Groups[gid]
+
+		if ok {
+			// try each server in the shard's replication group.
+			for _, srv := range servers {
+				args := &PutAppendArgs{}
+				args.Key = key
+				args.Value = value
+				args.Op = op
+				var reply PutAppendReply
+				ok := call(srv, "DisKV.PutAppend", args, &reply)
+				if ok && reply.Err == OK {
+					return
+				}
+				if ok && (reply.Err == ErrWrongGroup) {
+					break
+				}
+			}
+		}
+
+		time.Sleep(100 * time.Millisecond)
+
+		// ask master for a new configuration.
+		ck.config = ck.sm.Query(-1)
+	}
+}
+
+func (ck *Clerk) Put(key string, value string) {
+	ck.PutAppend(key, value, "Put")
+}
+func (ck *Clerk) Append(key string, value string) {
+	ck.PutAppend(key, value, "Append")
+}
--- a/src/diskv/common.go
+++ b/src/diskv/common.go
@ -0,0 +1,43 @@
+package diskv
+
+//
+// Sharded key/value server.
+// Lots of replica groups, each running op-at-a-time paxos.
+// Shardmaster decides which group serves each shard.
+// Shardmaster may change shard assignment from time to time.
+//
+// You will have to modify these definitions.
+//
+
+const (
+	OK            = "OK"
+	ErrNoKey      = "ErrNoKey"
+	ErrWrongGroup = "ErrWrongGroup"
+)
+
+type Err string
+
+type PutAppendArgs struct {
+	Key   string
+	Value string
+	Op    string // "Put" or "Append"
+	// You'll have to add definitions here.
+	// Field names must start with capital letters,
+	// otherwise RPC will break.
+
+}
+
+type PutAppendReply struct {
+	Err Err
+}
+
+type GetArgs struct {
+	Key string
+	// You'll have to add definitions here.
+}
+
+type GetReply struct {
+	Err   Err
+	Value string
+}
+
--- a/src/diskv/server.go
+++ b/src/diskv/server.go
@ -0,0 +1,274 @@
+package diskv
+
+import "net"
+import "fmt"
+import "net/rpc"
+import "log"
+import "time"
+import "paxos"
+import "sync"
+import "sync/atomic"
+import "os"
+import "syscall"
+import "encoding/gob"
+import "encoding/base32"
+import "math/rand"
+import "shardmaster"
+import "io/ioutil"
+import "strconv"
+
+
+const Debug = 0
+
+func DPrintf(format string, a ...interface{}) (n int, err error) {
+	if Debug > 0 {
+		log.Printf(format, a...)
+	}
+	return
+}
+
+
+type Op struct {
+	// Your definitions here.
+}
+
+
+type DisKV struct {
+	mu         sync.Mutex
+	l          net.Listener
+	me         int
+	dead       int32 // for testing
+	unreliable int32 // for testing
+	sm         *shardmaster.Clerk
+	px         *paxos.Paxos
+	dir        string // each replica has its own data directory
+
+	gid int64 // my replica group ID
+
+	// Your definitions here.
+}
+
+//
+// these are handy functions that might be useful
+// for reading and writing key/value files, and
+// for reading and writing entire shards.
+// puts the key files for each shard in a separate
+// directory.
+//
+
+func (kv *DisKV) shardDir(shard int) string {
+	d := kv.dir + "/shard-" + strconv.Itoa(shard) + "/"
+	// create directory if needed.
+	_, err := os.Stat(d)
+	if err != nil {
+		if err := os.Mkdir(d, 0777); err != nil {
+			log.Fatalf("Mkdir(%v): %v", d, err)
+		}
+	}
+	return d
+}
+
+// cannot use keys in file names directly, since
+// they might contain troublesome characters like /.
+// base32-encode the key to get a file name.
+// base32 rather than base64 b/c Mac has case-insensitive
+// file names.
+func (kv *DisKV) encodeKey(key string) string {
+	return base32.StdEncoding.EncodeToString([]byte(key))
+}
+
+func (kv *DisKV) decodeKey(filename string) (string, error) {
+	key, err := base32.StdEncoding.DecodeString(filename)
+	return string(key), err
+}
+
+// read the content of a key's file.
+func (kv *DisKV) fileGet(shard int, key string) (string, error) {
+	fullname := kv.shardDir(shard) + "/key-" + kv.encodeKey(key)
+	content, err := ioutil.ReadFile(fullname)
+	return string(content), err
+}
+
+// replace the content of a key's file.
+// uses rename() to make the replacement atomic with
+// respect to crashes.
+func (kv *DisKV) filePut(shard int, key string, content string) error {
+	fullname := kv.shardDir(shard) + "/key-" + kv.encodeKey(key)
+	tempname := kv.shardDir(shard) + "/temp-" + kv.encodeKey(key)
+	if err := ioutil.WriteFile(tempname, []byte(content), 0666); err != nil {
+		return err
+	}
+	if err := os.Rename(tempname, fullname); err != nil {
+		return err
+	}
+	return nil
+}
+
+// return content of every key file in a given shard.
+func (kv *DisKV) fileReadShard(shard int) map[string]string {
+	m := map[string]string{}
+	d := kv.shardDir(shard)
+	files, err := ioutil.ReadDir(d)
+	if err != nil {
+		log.Fatalf("fileReadShard could not read %v: %v", d, err)
+	}
+	for _, fi := range files {
+		n1 := fi.Name()
+		if n1[0:4] == "key-" {
+			key, err := kv.decodeKey(n1[4:])
+			if err != nil {
+				log.Fatalf("fileReadShard bad file name %v: %v", n1, err)
+			}
+			content, err := kv.fileGet(shard, key)
+			if err != nil {
+				log.Fatalf("fileReadShard fileGet failed for %v: %v", key, err)
+			}
+			m[key] = content
+		}
+	}
+	return m
+}
+
+// replace an entire shard directory.
+func (kv *DisKV) fileReplaceShard(shard int, m map[string]string) {
+	d := kv.shardDir(shard)
+	os.RemoveAll(d) // remove all existing files from shard.
+	for k, v := range m {
+		kv.filePut(shard, k, v)
+	}
+}
+
+
+func (kv *DisKV) Get(args *GetArgs, reply *GetReply) error {
+	// Your code here.
+	return nil
+}
+
+// RPC handler for client Put and Append requests
+func (kv *DisKV) PutAppend(args *PutAppendArgs, reply *PutAppendReply) error {
+	// Your code here.
+	return nil
+}
+
+//
+// Ask the shardmaster if there's a new configuration;
+// if so, re-configure.
+//
+func (kv *DisKV) tick() {
+	// Your code here.
+}
+
+// tell the server to shut itself down.
+// please don't change these two functions.
+func (kv *DisKV) kill() {
+	atomic.StoreInt32(&kv.dead, 1)
+	kv.l.Close()
+	kv.px.Kill()
+}
+
+// call this to find out if the server is dead.
+func (kv *DisKV) isdead() bool {
+	return atomic.LoadInt32(&kv.dead) != 0
+}
+
+// please do not change these two functions.
+func (kv *DisKV) Setunreliable(what bool) {
+	if what {
+		atomic.StoreInt32(&kv.unreliable, 1)
+	} else {
+		atomic.StoreInt32(&kv.unreliable, 0)
+	}
+}
+
+func (kv *DisKV) isunreliable() bool {
+	return atomic.LoadInt32(&kv.unreliable) != 0
+}
+
+//
+// Start a shardkv server.
+// gid is the ID of the server's replica group.
+// shardmasters[] contains the ports of the
+//   servers that implement the shardmaster.
+// servers[] contains the ports of the servers
+//   in this replica group.
+// Me is the index of this server in servers[].
+// dir is the directory name under which this
+//   replica should store all its files.
+//   each replica is passed a different directory.
+// restart is false the very first time this server
+//   is started, and true to indicate a re-start
+//   after a crash or after a crash with disk loss.
+//
+func StartServer(gid int64, shardmasters []string,
+	servers []string, me int, dir string, restart bool) *DisKV {
+
+	kv := new(DisKV)
+	kv.me = me
+	kv.gid = gid
+	kv.sm = shardmaster.MakeClerk(shardmasters)
+	kv.dir = dir
+
+	// Your initialization code here.
+	// Don't call Join().
+
+	// log.SetOutput(ioutil.Discard)
+
+	gob.Register(Op{})
+
+	rpcs := rpc.NewServer()
+	rpcs.Register(kv)
+
+	kv.px = paxos.Make(servers, me, rpcs)
+
+	// log.SetOutput(os.Stdout)
+
+
+
+	os.Remove(servers[me])
+	l, e := net.Listen("unix", servers[me])
+	if e != nil {
+		log.Fatal("listen error: ", e)
+	}
+	kv.l = l
+
+	// please do not change any of the following code,
+	// or do anything to subvert it.
+
+	go func() {
+		for kv.isdead() == false {
+			conn, err := kv.l.Accept()
+			if err == nil && kv.isdead() == false {
+				if kv.isunreliable() && (rand.Int63()%1000) < 100 {
+					// discard the request.
+					conn.Close()
+				} else if kv.isunreliable() && (rand.Int63()%1000) < 200 {
+					// process the request but force discard of reply.
+					c1 := conn.(*net.UnixConn)
+					f, _ := c1.File()
+					err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR)
+					if err != nil {
+						fmt.Printf("shutdown: %v\n", err)
+					}
+					go rpcs.ServeConn(conn)
+				} else {
+					go rpcs.ServeConn(conn)
+				}
+			} else if err == nil {
+				conn.Close()
+			}
+			if err != nil && kv.isdead() == false {
+				fmt.Printf("DisKV(%v) accept: %v\n", me, err.Error())
+				kv.kill()
+			}
+		}
+	}()
+
+	go func() {
+		for kv.isdead() == false {
+			kv.tick()
+			time.Sleep(250 * time.Millisecond)
+		}
+	}()
+
+	return kv
+}
--- a/src/diskv/test_test.go
+++ b/src/diskv/test_test.go
--- a/src/kvpaxos/client.go
+++ b/src/kvpaxos/client.go
@ -0,0 +1,84 @@
+package kvpaxos
+
+import "net/rpc"
+import "crypto/rand"
+import "math/big"
+
+import "fmt"
+
+type Clerk struct {
+	servers []string
+	// You will have to modify this struct.
+}
+
+func nrand() int64 {
+	max := big.NewInt(int64(1) << 62)
+	bigx, _ := rand.Int(rand.Reader, max)
+	x := bigx.Int64()
+	return x
+}
+
+func MakeClerk(servers []string) *Clerk {
+	ck := new(Clerk)
+	ck.servers = servers
+	// You'll have to add code here.
+	return ck
+}
+
+//
+// call() sends an RPC to the rpcname handler on server srv
+// with arguments args, waits for the reply, and leaves the
+// reply in reply. the reply argument should be a pointer
+// to a reply structure.
+//
+// the return value is true if the server responded, and false
+// if call() was not able to contact the server. in particular,
+// the reply's contents are only valid if call() returned true.
+//
+// you should assume that call() will return an
+// error after a while if the server is dead.
+// don't provide your own time-out mechanism.
+//
+// please use call() to send all RPCs, in client.go and server.go.
+// please don't change this function.
+//
+func call(srv string, rpcname string,
+	args interface{}, reply interface{}) bool {
+	c, errx := rpc.Dial("unix", srv)
+	if errx != nil {
+		return false
+	}
+	defer c.Close()
+
+	err := c.Call(rpcname, args, reply)
+	if err == nil {
+		return true
+	}
+
+	fmt.Println(err)
+	return false
+}
+
+//
+// fetch the current value for a key.
+// returns "" if the key does not exist.
+// keeps trying forever in the face of all other errors.
+//
+func (ck *Clerk) Get(key string) string {
+	// You will have to modify this function.
+	return ""
+}
+
+//
+// shared by Put and Append.
+//
+func (ck *Clerk) PutAppend(key string, value string, op string) {
+	// You will have to modify this function.
+}
+
+func (ck *Clerk) Put(key string, value string) {
+	ck.PutAppend(key, value, "Put")
+}
+func (ck *Clerk) Append(key string, value string) {
+	ck.PutAppend(key, value, "Append")
+}
--- a/src/kvpaxos/common.go
+++ b/src/kvpaxos/common.go
@ -0,0 +1,33 @@
+package kvpaxos
+
+const (
+	OK       = "OK"
+	ErrNoKey = "ErrNoKey"
+)
+
+type Err string
+
+// Put or Append
+type PutAppendArgs struct {
+	// You'll have to add definitions here.
+	Key   string
+	Value string
+	Op    string // "Put" or "Append"
+	// You'll have to add definitions here.
+	// Field names must start with capital letters,
+	// otherwise RPC will break.
+}
+
+type PutAppendReply struct {
+	Err Err
+}
+
+type GetArgs struct {
+	Key string
+	// You'll have to add definitions here.
+}
+
+type GetReply struct {
+	Err   Err
+	Value string
+}
--- a/src/kvpaxos/server.go
+++ b/src/kvpaxos/server.go
@ -0,0 +1,144 @@
+package kvpaxos
+
+import "net"
+import "fmt"
+import "net/rpc"
+import "log"
+import "paxos"
+import "sync"
+import "sync/atomic"
+import "os"
+import "syscall"
+import "encoding/gob"
+import "math/rand"
+
+
+const Debug = 0
+
+func DPrintf(format string, a ...interface{}) (n int, err error) {
+	if Debug > 0 {
+		log.Printf(format, a...)
+	}
+	return
+}
+
+
+type Op struct {
+	// Your definitions here.
+	// Field names must start with capital letters,
+	// otherwise RPC will break.
+}
+
+type KVPaxos struct {
+	mu         sync.Mutex
+	l          net.Listener
+	me         int
+	dead       int32 // for testing
+	unreliable int32 // for testing
+	px         *paxos.Paxos
+
+	// Your definitions here.
+}
+
+
+func (kv *KVPaxos) Get(args *GetArgs, reply *GetReply) error {
+	// Your code here.
+	return nil
+}
+
+func (kv *KVPaxos) PutAppend(args *PutAppendArgs, reply *PutAppendReply) error {
+	// Your code here.
+
+	return nil
+}
+
+// tell the server to shut itself down.
+// please do not change these two functions.
+func (kv *KVPaxos) kill() {
+	DPrintf("Kill(%d): die\n", kv.me)
+	atomic.StoreInt32(&kv.dead, 1)
+	kv.l.Close()
+	kv.px.Kill()
+}
+
+// call this to find out if the server is dead.
+func (kv *KVPaxos) isdead() bool {
+	return atomic.LoadInt32(&kv.dead) != 0
+}
+
+// please do not change these two functions.
+func (kv *KVPaxos) setunreliable(what bool) {
+	if what {
+		atomic.StoreInt32(&kv.unreliable, 1)
+	} else {
+		atomic.StoreInt32(&kv.unreliable, 0)
+	}
+}
+
+func (kv *KVPaxos) isunreliable() bool {
+	return atomic.LoadInt32(&kv.unreliable) != 0
+}
+
+//
+// servers[] contains the ports of the set of
+// servers that will cooperate via Paxos to
+// form the fault-tolerant key/value service.
+// me is the index of the current server in servers[].
+//
+func StartServer(servers []string, me int) *KVPaxos {
+	// call gob.Register on structures you want
+	// Go's RPC library to marshall/unmarshall.
+	gob.Register(Op{})
+
+	kv := new(KVPaxos)
+	kv.me = me
+
+	// Your initialization code here.
+
+	rpcs := rpc.NewServer()
+	rpcs.Register(kv)
+
+	kv.px = paxos.Make(servers, me, rpcs)
+
+	os.Remove(servers[me])
+	l, e := net.Listen("unix", servers[me])
+	if e != nil {
+		log.Fatal("listen error: ", e)
+	}
+	kv.l = l
+
+
+	// please do not change any of the following code,
+	// or do anything to subvert it.
+
+	go func() {
+		for kv.isdead() == false {
+			conn, err := kv.l.Accept()
+			if err == nil && kv.isdead() == false {
+				if kv.isunreliable() && (rand.Int63()%1000) < 100 {
+					// discard the request.
+					conn.Close()
+				} else if kv.isunreliable() && (rand.Int63()%1000) < 200 {
+					// process the request but force discard of reply.
+					c1 := conn.(*net.UnixConn)
+					f, _ := c1.File()
+					err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR)
+					if err != nil {
+						fmt.Printf("shutdown: %v\n", err)
+					}
+					go rpcs.ServeConn(conn)
+				} else {
+					go rpcs.ServeConn(conn)
+				}
+			} else if err == nil {
+				conn.Close()
+			}
+			if err != nil && kv.isdead() == false {
+				fmt.Printf("KVPaxos(%v) accept: %v\n", me, err.Error())
+				kv.kill()
+			}
+		}
+	}()
+
+	return kv
+}
--- a/src/kvpaxos/test_test.go
+++ b/src/kvpaxos/test_test.go
@ -0,0 +1,711 @@
+package kvpaxos
+
+import "testing"
+import "runtime"
+import "strconv"
+import "os"
+import "time"
+import "fmt"
+import "math/rand"
+import "strings"
+import "sync/atomic"
+
+func check(t *testing.T, ck *Clerk, key string, value string) {
+	v := ck.Get(key)
+	if v != value {
+		t.Fatalf("Get(%v) -> %v, expected %v", key, v, value)
+	}
+}
+
+func port(tag string, host int) string {
+	s := "/var/tmp/824-"
+	s += strconv.Itoa(os.Getuid()) + "/"
+	os.Mkdir(s, 0777)
+	s += "kv-"
+	s += strconv.Itoa(os.Getpid()) + "-"
+	s += tag + "-"
+	s += strconv.Itoa(host)
+	return s
+}
+
+func cleanup(kva []*KVPaxos) {
+	for i := 0; i < len(kva); i++ {
+		if kva[i] != nil {
+			kva[i].kill()
+		}
+	}
+}
+
+// predict effect of Append(k, val) if old value is prev.
+func NextValue(prev string, val string) string {
+	return prev + val
+}
+
+func TestBasic(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	const nservers = 3
+	var kva []*KVPaxos = make([]*KVPaxos, nservers)
+	var kvh []string = make([]string, nservers)
+	defer cleanup(kva)
+
+	for i := 0; i < nservers; i++ {
+		kvh[i] = port("basic", i)
+	}
+	for i := 0; i < nservers; i++ {
+		kva[i] = StartServer(kvh, i)
+	}
+
+	ck := MakeClerk(kvh)
+	var cka [nservers]*Clerk
+	for i := 0; i < nservers; i++ {
+		cka[i] = MakeClerk([]string{kvh[i]})
+	}
+
+	fmt.Printf("Test: Basic put/append/get ...\n")
+
+	ck.Append("app", "x")
+	ck.Append("app", "y")
+	check(t, ck, "app", "xy")
+
+	ck.Put("a", "aa")
+	check(t, ck, "a", "aa")
+
+	cka[1].Put("a", "aaa")
+
+	check(t, cka[2], "a", "aaa")
+	check(t, cka[1], "a", "aaa")
+	check(t, ck, "a", "aaa")
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Concurrent clients ...\n")
+
+	for iters := 0; iters < 20; iters++ {
+		const npara = 15
+		var ca [npara]chan bool
+		for nth := 0; nth < npara; nth++ {
+			ca[nth] = make(chan bool)
+			go func(me int) {
+				defer func() { ca[me] <- true }()
+				ci := (rand.Int() % nservers)
+				myck := MakeClerk([]string{kvh[ci]})
+				if (rand.Int() % 1000) < 500 {
+					myck.Put("b", strconv.Itoa(rand.Int()))
+				} else {
+					myck.Get("b")
+				}
+			}(nth)
+		}
+		for nth := 0; nth < npara; nth++ {
+			<-ca[nth]
+		}
+		var va [nservers]string
+		for i := 0; i < nservers; i++ {
+			va[i] = cka[i].Get("b")
+			if va[i] != va[0] {
+				t.Fatalf("mismatch")
+			}
+		}
+	}
+
+	fmt.Printf("  ... Passed\n")
+
+	time.Sleep(1 * time.Second)
+}
+
+func TestDone(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	const nservers = 3
+	var kva []*KVPaxos = make([]*KVPaxos, nservers)
+	var kvh []string = make([]string, nservers)
+	defer cleanup(kva)
+
+	for i := 0; i < nservers; i++ {
+		kvh[i] = port("done", i)
+	}
+	for i := 0; i < nservers; i++ {
+		kva[i] = StartServer(kvh, i)
+	}
+	ck := MakeClerk(kvh)
+	var cka [nservers]*Clerk
+	for pi := 0; pi < nservers; pi++ {
+		cka[pi] = MakeClerk([]string{kvh[pi]})
+	}
+
+	fmt.Printf("Test: server frees Paxos log memory...\n")
+
+	ck.Put("a", "aa")
+	check(t, ck, "a", "aa")
+
+	runtime.GC()
+	var m0 runtime.MemStats
+	runtime.ReadMemStats(&m0)
+	// rtm's m0.Alloc is 2 MB
+
+	sz := 1000000
+	items := 10
+
+	for iters := 0; iters < 2; iters++ {
+		for i := 0; i < items; i++ {
+			key := strconv.Itoa(i)
+			value := make([]byte, sz)
+			for j := 0; j < len(value); j++ {
+				value[j] = byte((rand.Int() % 100) + 1)
+			}
+			ck.Put(key, string(value))
+			check(t, cka[i%nservers], key, string(value))
+		}
+	}
+
+	// Put and Get to each of the replicas, in case
+	// the Done information is piggybacked on
+	// the Paxos proposer messages.
+	for iters := 0; iters < 2; iters++ {
+		for pi := 0; pi < nservers; pi++ {
+			cka[pi].Put("a", "aa")
+			check(t, cka[pi], "a", "aa")
+		}
+	}
+
+	time.Sleep(1 * time.Second)
+
+	runtime.GC()
+	var m1 runtime.MemStats
+	runtime.ReadMemStats(&m1)
+	// rtm's m1.Alloc is 45 MB
+
+	// fmt.Printf("  Memory: before %v, after %v\n", m0.Alloc, m1.Alloc)
+
+	allowed := m0.Alloc + uint64(nservers*items*sz*2)
+	if m1.Alloc > allowed {
+		t.Fatalf("Memory use did not shrink enough (Used: %v, allowed: %v).\n", m1.Alloc, allowed)
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+func pp(tag string, src int, dst int) string {
+	s := "/var/tmp/824-"
+	s += strconv.Itoa(os.Getuid()) + "/"
+	s += "kv-" + tag + "-"
+	s += strconv.Itoa(os.Getpid()) + "-"
+	s += strconv.Itoa(src) + "-"
+	s += strconv.Itoa(dst)
+	return s
+}
+
+func cleanpp(tag string, n int) {
+	for i := 0; i < n; i++ {
+		for j := 0; j < n; j++ {
+			ij := pp(tag, i, j)
+			os.Remove(ij)
+		}
+	}
+}
+
+func part(t *testing.T, tag string, npaxos int, p1 []int, p2 []int, p3 []int) {
+	cleanpp(tag, npaxos)
+
+	pa := [][]int{p1, p2, p3}
+	for pi := 0; pi < len(pa); pi++ {
+		p := pa[pi]
+		for i := 0; i < len(p); i++ {
+			for j := 0; j < len(p); j++ {
+				ij := pp(tag, p[i], p[j])
+				pj := port(tag, p[j])
+				err := os.Link(pj, ij)
+				if err != nil {
+					t.Fatalf("os.Link(%v, %v): %v\n", pj, ij, err)
+				}
+			}
+		}
+	}
+}
+
+func TestPartition(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	tag := "partition"
+	const nservers = 5
+	var kva []*KVPaxos = make([]*KVPaxos, nservers)
+	defer cleanup(kva)
+	defer cleanpp(tag, nservers)
+
+	for i := 0; i < nservers; i++ {
+		var kvh []string = make([]string, nservers)
+		for j := 0; j < nservers; j++ {
+			if j == i {
+				kvh[j] = port(tag, i)
+			} else {
+				kvh[j] = pp(tag, i, j)
+			}
+		}
+		kva[i] = StartServer(kvh, i)
+	}
+	defer part(t, tag, nservers, []int{}, []int{}, []int{})
+
+	var cka [nservers]*Clerk
+	for i := 0; i < nservers; i++ {
+		cka[i] = MakeClerk([]string{port(tag, i)})
+	}
+
+	fmt.Printf("Test: No partition ...\n")
+
+	part(t, tag, nservers, []int{0, 1, 2, 3, 4}, []int{}, []int{})
+	cka[0].Put("1", "12")
+	cka[2].Put("1", "13")
+	check(t, cka[3], "1", "13")
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Progress in majority ...\n")
+
+	part(t, tag, nservers, []int{2, 3, 4}, []int{0, 1}, []int{})
+	cka[2].Put("1", "14")
+	check(t, cka[4], "1", "14")
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: No progress in minority ...\n")
+
+	done0 := make(chan bool)
+	done1 := make(chan bool)
+	go func() {
+		cka[0].Put("1", "15")
+		done0 <- true
+	}()
+	go func() {
+		cka[1].Get("1")
+		done1 <- true
+	}()
+
+	select {
+	case <-done0:
+		t.Fatalf("Put in minority completed")
+	case <-done1:
+		t.Fatalf("Get in minority completed")
+	case <-time.After(time.Second):
+	}
+
+	check(t, cka[4], "1", "14")
+	cka[3].Put("1", "16")
+	check(t, cka[4], "1", "16")
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Completion after heal ...\n")
+
+	part(t, tag, nservers, []int{0, 2, 3, 4}, []int{1}, []int{})
+
+	select {
+	case <-done0:
+	case <-time.After(30 * 100 * time.Millisecond):
+		t.Fatalf("Put did not complete")
+	}
+
+	select {
+	case <-done1:
+		t.Fatalf("Get in minority completed")
+	default:
+	}
+
+	check(t, cka[4], "1", "15")
+	check(t, cka[0], "1", "15")
+
+	part(t, tag, nservers, []int{0, 1, 2}, []int{3, 4}, []int{})
+
+	select {
+	case <-done1:
+	case <-time.After(100 * 100 * time.Millisecond):
+		t.Fatalf("Get did not complete")
+	}
+
+	check(t, cka[1], "1", "15")
+
+	fmt.Printf("  ... Passed\n")
+}
+
+func randclerk(kvh []string) *Clerk {
+	sa := make([]string, len(kvh))
+	copy(sa, kvh)
+	for i := range sa {
+		j := rand.Intn(i + 1)
+		sa[i], sa[j] = sa[j], sa[i]
+	}
+	return MakeClerk(sa)
+}
+
+// check that all known appends are present in a value,
+// and are in order for each concurrent client.
+func checkAppends(t *testing.T, v string, counts []int) {
+	nclients := len(counts)
+	for i := 0; i < nclients; i++ {
+		lastoff := -1
+		for j := 0; j < counts[i]; j++ {
+			wanted := "x " + strconv.Itoa(i) + " " + strconv.Itoa(j) + " y"
+			off := strings.Index(v, wanted)
+			if off < 0 {
+				t.Fatalf("missing element in Append result")
+			}
+			off1 := strings.LastIndex(v, wanted)
+			if off1 != off {
+				t.Fatalf("duplicate element in Append result")
+			}
+			if off <= lastoff {
+				t.Fatalf("wrong order for element in Append result")
+			}
+			lastoff = off
+		}
+	}
+}
+
+func TestUnreliable(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	const nservers = 3
+	var kva []*KVPaxos = make([]*KVPaxos, nservers)
+	var kvh []string = make([]string, nservers)
+	defer cleanup(kva)
+
+	for i := 0; i < nservers; i++ {
+		kvh[i] = port("un", i)
+	}
+	for i := 0; i < nservers; i++ {
+		kva[i] = StartServer(kvh, i)
+		kva[i].setunreliable(true)
+	}
+
+	ck := MakeClerk(kvh)
+	var cka [nservers]*Clerk
+	for i := 0; i < nservers; i++ {
+		cka[i] = MakeClerk([]string{kvh[i]})
+	}
+
+	fmt.Printf("Test: Basic put/get, unreliable ...\n")
+
+	ck.Put("a", "aa")
+	check(t, ck, "a", "aa")
+
+	cka[1].Put("a", "aaa")
+
+	check(t, cka[2], "a", "aaa")
+	check(t, cka[1], "a", "aaa")
+	check(t, ck, "a", "aaa")
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Sequence of puts, unreliable ...\n")
+
+	for iters := 0; iters < 6; iters++ {
+		const ncli = 5
+		var ca [ncli]chan bool
+		for cli := 0; cli < ncli; cli++ {
+			ca[cli] = make(chan bool)
+			go func(me int) {
+				ok := false
+				defer func() { ca[me] <- ok }()
+				myck := randclerk(kvh)
+				key := strconv.Itoa(me)
+				vv := myck.Get(key)
+				myck.Append(key, "0")
+				vv = NextValue(vv, "0")
+				myck.Append(key, "1")
+				vv = NextValue(vv, "1")
+				myck.Append(key, "2")
+				vv = NextValue(vv, "2")
+				time.Sleep(100 * time.Millisecond)
+				if myck.Get(key) != vv {
+					t.Fatalf("wrong value")
+				}
+				if myck.Get(key) != vv {
+					t.Fatalf("wrong value")
+				}
+				ok = true
+			}(cli)
+		}
+		for cli := 0; cli < ncli; cli++ {
+			x := <-ca[cli]
+			if x == false {
+				t.Fatalf("failure")
+			}
+		}
+	}
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Concurrent clients, unreliable ...\n")
+
+	for iters := 0; iters < 20; iters++ {
+		const ncli = 15
+		var ca [ncli]chan bool
+		for cli := 0; cli < ncli; cli++ {
+			ca[cli] = make(chan bool)
+			go func(me int) {
+				defer func() { ca[me] <- true }()
+				myck := randclerk(kvh)
+				if (rand.Int() % 1000) < 500 {
+					myck.Put("b", strconv.Itoa(rand.Int()))
+				} else {
+					myck.Get("b")
+				}
+			}(cli)
+		}
+		for cli := 0; cli < ncli; cli++ {
+			<-ca[cli]
+		}
+
+		var va [nservers]string
+		for i := 0; i < nservers; i++ {
+			va[i] = cka[i].Get("b")
+			if va[i] != va[0] {
+				t.Fatalf("mismatch; 0 got %v, %v got %v", va[0], i, va[i])
+			}
+		}
+	}
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Concurrent Append to same key, unreliable ...\n")
+
+	ck.Put("k", "")
+
+	ff := func(me int, ch chan int) {
+		ret := -1
+		defer func() { ch <- ret }()
+		myck := randclerk(kvh)
+		n := 0
+		for n < 5 {
+			myck.Append("k", "x "+strconv.Itoa(me)+" "+strconv.Itoa(n)+" y")
+			n++
+		}
+		ret = n
+	}
+
+	ncli := 5
+	cha := []chan int{}
+	for i := 0; i < ncli; i++ {
+		cha = append(cha, make(chan int))
+		go ff(i, cha[i])
+	}
+
+	counts := []int{}
+	for i := 0; i < ncli; i++ {
+		n := <-cha[i]
+		if n < 0 {
+			t.Fatal("client failed")
+		}
+		counts = append(counts, n)
+	}
+
+	vx := ck.Get("k")
+	checkAppends(t, vx, counts)
+
+	{
+		for i := 0; i < nservers; i++ {
+			vi := cka[i].Get("k")
+			if vi != vx {
+				t.Fatalf("mismatch; 0 got %v, %v got %v", vx, i, vi)
+			}
+		}
+	}
+
+	fmt.Printf("  ... Passed\n")
+
+	time.Sleep(1 * time.Second)
+}
+
+func TestHole(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	fmt.Printf("Test: Tolerates holes in paxos sequence ...\n")
+
+	tag := "hole"
+	const nservers = 5
+	var kva []*KVPaxos = make([]*KVPaxos, nservers)
+	defer cleanup(kva)
+	defer cleanpp(tag, nservers)
+
+	for i := 0; i < nservers; i++ {
+		var kvh []string = make([]string, nservers)
+		for j := 0; j < nservers; j++ {
+			if j == i {
+				kvh[j] = port(tag, i)
+			} else {
+				kvh[j] = pp(tag, i, j)
+			}
+		}
+		kva[i] = StartServer(kvh, i)
+	}
+	defer part(t, tag, nservers, []int{}, []int{}, []int{})
+
+	for iters := 0; iters < 5; iters++ {
+		part(t, tag, nservers, []int{0, 1, 2, 3, 4}, []int{}, []int{})
+
+		ck2 := MakeClerk([]string{port(tag, 2)})
+		ck2.Put("q", "q")
+
+		done := int32(0)
+		const nclients = 10
+		var ca [nclients]chan bool
+		for xcli := 0; xcli < nclients; xcli++ {
+			ca[xcli] = make(chan bool)
+			go func(cli int) {
+				ok := false
+				defer func() { ca[cli] <- ok }()
+				var cka [nservers]*Clerk
+				for i := 0; i < nservers; i++ {
+					cka[i] = MakeClerk([]string{port(tag, i)})
+				}
+				key := strconv.Itoa(cli)
+				last := ""
+				cka[0].Put(key, last)
+				for atomic.LoadInt32(&done) == 0 {
+					ci := (rand.Int() % 2)
+					if (rand.Int() % 1000) < 500 {
+						nv := strconv.Itoa(rand.Int())
+						cka[ci].Put(key, nv)
+						last = nv
+					} else {
+						v := cka[ci].Get(key)
+						if v != last {
+							t.Fatalf("%v: wrong value, key %v, wanted %v, got %v",
+								cli, key, last, v)
+						}
+					}
+				}
+				ok = true
+			}(xcli)
+		}
+
+		time.Sleep(3 * time.Second)
+
+		part(t, tag, nservers, []int{2, 3, 4}, []int{0, 1}, []int{})
+
+		// can majority partition make progress even though
+		// minority servers were interrupted in the middle of
+		// paxos agreements?
+		check(t, ck2, "q", "q")
+		ck2.Put("q", "qq")
+		check(t, ck2, "q", "qq")
+
+		// restore network, wait for all threads to exit.
+		part(t, tag, nservers, []int{0, 1, 2, 3, 4}, []int{}, []int{})
+		atomic.StoreInt32(&done, 1)
+		ok := true
+		for i := 0; i < nclients; i++ {
+			z := <-ca[i]
+			ok = ok && z
+		}
+		if ok == false {
+			t.Fatal("something is wrong")
+		}
+		check(t, ck2, "q", "qq")
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestManyPartition(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	fmt.Printf("Test: Many clients, changing partitions ...\n")
+
+	tag := "many"
+	const nservers = 5
+	var kva []*KVPaxos = make([]*KVPaxos, nservers)
+	defer cleanup(kva)
+	defer cleanpp(tag, nservers)
+
+	for i := 0; i < nservers; i++ {
+		var kvh []string = make([]string, nservers)
+		for j := 0; j < nservers; j++ {
+			if j == i {
+				kvh[j] = port(tag, i)
+			} else {
+				kvh[j] = pp(tag, i, j)
+			}
+		}
+		kva[i] = StartServer(kvh, i)
+		kva[i].setunreliable(true)
+	}
+	defer part(t, tag, nservers, []int{}, []int{}, []int{})
+	part(t, tag, nservers, []int{0, 1, 2, 3, 4}, []int{}, []int{})
+
+	done := int32(0)
+
+	// re-partition periodically
+	ch1 := make(chan bool)
+	go func() {
+		defer func() { ch1 <- true }()
+		for atomic.LoadInt32(&done) == 0 {
+			var a [nservers]int
+			for i := 0; i < nservers; i++ {
+				a[i] = (rand.Int() % 3)
+			}
+			pa := make([][]int, 3)
+			for i := 0; i < 3; i++ {
+				pa[i] = make([]int, 0)
+				for j := 0; j < nservers; j++ {
+					if a[j] == i {
+						pa[i] = append(pa[i], j)
+					}
+				}
+			}
+			part(t, tag, nservers, pa[0], pa[1], pa[2])
+			time.Sleep(time.Duration(rand.Int63()%200) * time.Millisecond)
+		}
+	}()
+
+	const nclients = 10
+	var ca [nclients]chan bool
+	for xcli := 0; xcli < nclients; xcli++ {
+		ca[xcli] = make(chan bool)
+		go func(cli int) {
+			ok := false
+			defer func() { ca[cli] <- ok }()
+			sa := make([]string, nservers)
+			for i := 0; i < nservers; i++ {
+				sa[i] = port(tag, i)
+			}
+			for i := range sa {
+				j := rand.Intn(i + 1)
+				sa[i], sa[j] = sa[j], sa[i]
+			}
+			myck := MakeClerk(sa)
+			key := strconv.Itoa(cli)
+			last := ""
+			myck.Put(key, last)
+			for atomic.LoadInt32(&done) == 0 {
+				if (rand.Int() % 1000) < 500 {
+					nv := strconv.Itoa(rand.Int())
+					myck.Append(key, nv)
+					last = NextValue(last, nv)
+				} else {
+					v := myck.Get(key)
+					if v != last {
+						t.Fatalf("%v: get wrong value, key %v, wanted %v, got %v",
+							cli, key, last, v)
+					}
+				}
+			}
+			ok = true
+		}(xcli)
+	}
+
+	time.Sleep(20 * time.Second)
+	atomic.StoreInt32(&done, 1)
+	<-ch1
+	part(t, tag, nservers, []int{0, 1, 2, 3, 4}, []int{}, []int{})
+
+	ok := true
+	for i := 0; i < nclients; i++ {
+		z := <-ca[i]
+		ok = ok && z
+	}
+
+	if ok {
+		fmt.Printf("  ... Passed\n")
+	}
+}
--- a/src/kvraft/client.go
+++ b/src/kvraft/client.go
@ -0,0 +1,110 @@
+package raftkv
+
+import "labrpc"
+import "crypto/rand"
+import (
+	"math/big"
+	"time"
+	"fmt"
+	"strconv"
+)
+
+
+type Clerk struct {
+	servers []*labrpc.ClientEnd
+	// You will have to modify this struct.
+	id int
+	cnt int
+}
+
+func nrand() int64 {
+	max := big.NewInt(int64(1) << 62)
+	bigx, _ := rand.Int(rand.Reader, max)
+	x := bigx.Int64()
+	return x
+}
+
+func MakeClerk(servers []*labrpc.ClientEnd) *Clerk {
+	ck := new(Clerk)
+	ck.servers = servers
+	// You'll have to add code here.
+	fmt.Println("MakeClerk")
+	ck.id = 0
+	ck.cnt = 0
+	return ck
+}
+
+//
+// fetch the current value for a key.
+// returns "" if the key does not exist.
+// keeps trying forever in the face of all other errors.
+//
+// you can send an RPC with code like this:
+// ok := ck.servers[i].Call("RaftKV.Get", &args, &reply)
+//
+// the types of args and reply (including whether they are pointers)
+// must match the declared types of the RPC handler function's
+// arguments. and reply must be passed as a pointer.
+//
+func (ck *Clerk) Get(key string) string {
+	value := ""
+	success := false
+	for !success {
+		for i:=0;i<len(ck.servers);i++ {
+			//fmt.Println("Call", i)
+			args := GetArgs{Key:key, UUID: strconv.Itoa(ck.id) + "_" + strconv.Itoa(ck.cnt)}
+			reply := &GetReply{}
+			ok := ck.servers[i].Call("RaftKV.Get", &args, reply)
+			if ok && !reply.WrongLeader {
+				success = true
+				value = reply.Value
+				if ck.id == 0 {
+					ck.id = reply.ID
+				}
+				break
+			}
+		}
+		time.Sleep(time.Millisecond * 10)
+	}
+	ck.cnt += 1
+	return value
+}
+
+//
+// shared by Put and Append.
+//
+// you can send an RPC with code like this:
+// ok := ck.servers[i].Call("RaftKV.PutAppend", &args, &reply)
+//
+// the types of args and reply (including whether they are pointers)
+// must match the declared types of the RPC handler function's
+// arguments. and reply must be passed as a pointer.
+//
+func (ck *Clerk) PutAppend(key string, value string, opt string) {
+	if ck.id == 0 {
+		ck.Get("nobody")
+	}
+
+	fmt.Println(opt, key, value, "--------------------------")
+	success := false
+	for !success{
+		for i:=0;i<len(ck.servers);i++ {
+			args := PutAppendArgs{Key: key, Opt: opt, Value:value, UUID: strconv.Itoa(ck.id) + "_" + strconv.Itoa(ck.cnt)}
+			reply := &PutAppendReply{}
+			ok := ck.servers[i].Call("RaftKV.PutAppend", &args, reply)
+			if ok &&reply.WrongLeader == false {
+				success = true
+				fmt.Println(opt, key, value, "success")
+			}
+		}
+		time.Sleep(time.Millisecond * 200)
+	}
+	ck.cnt += 1
+}
+
+func (ck *Clerk) Put(key string, value string) {
+	ck.PutAppend(key, value, "Put")
+}
+func (ck *Clerk) Append(key string, value string) {
+	ck.PutAppend(key, value, "Append")
+}
--- a/src/kvraft/common.go
+++ b/src/kvraft/common.go
@ -0,0 +1,36 @@
+package raftkv
+
+const (
+	OK       = "OK"
+	ErrNoKey = "ErrNoKey"
+)
+
+type Err string
+
+// Put or Append
+type PutAppendArgs struct {
+	// You'll have to add definitions here.
+	Key   string
+	Value string
+	Opt    string
+	UUID string
+}
+
+type PutAppendReply struct {
+	WrongLeader bool
+	Err         Err
+	ID int
+}
+
+type GetArgs struct {
+	Key string
+	// You'll have to add definitions here.
+	UUID string
+}
+
+type GetReply struct {
+	WrongLeader bool
+	Err         Err
+	Value       string
+	ID int
+}
--- a/src/kvraft/config.go
+++ b/src/kvraft/config.go
@ -0,0 +1,346 @@
+package raftkv
+
+import "labrpc"
+import "testing"
+import "os"
+
+// import "log"
+import crand "crypto/rand"
+import "math/rand"
+import "encoding/base64"
+import "sync"
+import "runtime"
+import "raft"
+
+func randstring(n int) string {
+	b := make([]byte, 2*n)
+	crand.Read(b)
+	s := base64.URLEncoding.EncodeToString(b)
+	return s[0:n]
+}
+
+// Randomize server handles
+func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd {
+	sa := make([]*labrpc.ClientEnd, len(kvh))
+	copy(sa, kvh)
+	for i := range sa {
+		j := rand.Intn(i + 1)
+		sa[i], sa[j] = sa[j], sa[i]
+	}
+	return sa
+}
+
+type config struct {
+	mu           sync.Mutex
+	t            *testing.T
+	tag          string
+	net          *labrpc.Network
+	n            int
+	kvservers    []*RaftKV
+	saved        []*raft.Persister
+	endnames     [][]string // names of each server's sending ClientEnds
+	clerks       map[*Clerk][]string
+	nextClientId int
+	maxraftstate int
+}
+
+func (cfg *config) cleanup() {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+	for i := 0; i < len(cfg.kvservers); i++ {
+		if cfg.kvservers[i] != nil {
+			cfg.kvservers[i].Kill()
+		}
+	}
+}
+
+// Maximum log size across all servers
+func (cfg *config) LogSize() int {
+	logsize := 0
+	for i := 0; i < cfg.n; i++ {
+		n := cfg.saved[i].RaftStateSize()
+		if n > logsize {
+			logsize = n
+		}
+	}
+	return logsize
+}
+
+// attach server i to servers listed in to
+// caller must hold cfg.mu
+func (cfg *config) connectUnlocked(i int, to []int) {
+	// log.Printf("connect peer %d to %v\n", i, to)
+
+	// outgoing socket files
+	for j := 0; j < len(to); j++ {
+		endname := cfg.endnames[i][to[j]]
+		cfg.net.Enable(endname, true)
+	}
+
+	// incoming socket files
+	for j := 0; j < len(to); j++ {
+		endname := cfg.endnames[to[j]][i]
+		cfg.net.Enable(endname, true)
+	}
+}
+
+func (cfg *config) connect(i int, to []int) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+	cfg.connectUnlocked(i, to)
+}
+
+// detach server i from the servers listed in from
+// caller must hold cfg.mu
+func (cfg *config) disconnectUnlocked(i int, from []int) {
+	// log.Printf("disconnect peer %d from %v\n", i, from)
+
+	// outgoing socket files
+	for j := 0; j < len(from); j++ {
+		if cfg.endnames[i] != nil {
+			endname := cfg.endnames[i][from[j]]
+			cfg.net.Enable(endname, false)
+		}
+	}
+
+	// incoming socket files
+	for j := 0; j < len(from); j++ {
+		if cfg.endnames[j] != nil {
+			endname := cfg.endnames[from[j]][i]
+			cfg.net.Enable(endname, false)
+		}
+	}
+}
+
+func (cfg *config) disconnect(i int, from []int) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+	cfg.disconnectUnlocked(i, from)
+}
+
+func (cfg *config) All() []int {
+	all := make([]int, cfg.n)
+	for i := 0; i < cfg.n; i++ {
+		all[i] = i
+	}
+	return all
+}
+
+func (cfg *config) ConnectAll() {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+	for i := 0; i < cfg.n; i++ {
+		cfg.connectUnlocked(i, cfg.All())
+	}
+}
+
+// Sets up 2 partitions with connectivity between servers in each  partition.
+func (cfg *config) partition(p1 []int, p2 []int) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+	// log.Printf("partition servers into: %v %v\n", p1, p2)
+	for i := 0; i < len(p1); i++ {
+		cfg.disconnectUnlocked(p1[i], p2)
+		cfg.connectUnlocked(p1[i], p1)
+	}
+	for i := 0; i < len(p2); i++ {
+		cfg.disconnectUnlocked(p2[i], p1)
+		cfg.connectUnlocked(p2[i], p2)
+	}
+}
+
+// Create a clerk with clerk specific server names.
+// Give it connections to all of the servers, but for
+// now enable only connections to servers in to[].
+func (cfg *config) makeClient(to []int) *Clerk {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+
+	// a fresh set of ClientEnds.
+	ends := make([]*labrpc.ClientEnd, cfg.n)
+	endnames := make([]string, cfg.n)
+	for j := 0; j < cfg.n; j++ {
+		endnames[j] = randstring(20)
+		ends[j] = cfg.net.MakeEnd(endnames[j])
+		cfg.net.Connect(endnames[j], j)
+	}
+
+	ck := MakeClerk(random_handles(ends))
+	cfg.clerks[ck] = endnames
+	cfg.nextClientId++
+	cfg.ConnectClientUnlocked(ck, to)
+	return ck
+}
+
+func (cfg *config) deleteClient(ck *Clerk) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+
+	v := cfg.clerks[ck]
+	for i := 0; i < len(v); i++ {
+		os.Remove(v[i])
+	}
+	delete(cfg.clerks, ck)
+}
+
+// caller should hold cfg.mu
+func (cfg *config) ConnectClientUnlocked(ck *Clerk, to []int) {
+	// log.Printf("ConnectClient %v to %v\n", ck, to)
+	endnames := cfg.clerks[ck]
+	for j := 0; j < len(to); j++ {
+		s := endnames[to[j]]
+		cfg.net.Enable(s, true)
+	}
+}
+
+func (cfg *config) ConnectClient(ck *Clerk, to []int) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+	cfg.ConnectClientUnlocked(ck, to)
+}
+
+// caller should hold cfg.mu
+func (cfg *config) DisconnectClientUnlocked(ck *Clerk, from []int) {
+	// log.Printf("DisconnectClient %v from %v\n", ck, from)
+	endnames := cfg.clerks[ck]
+	for j := 0; j < len(from); j++ {
+		s := endnames[from[j]]
+		cfg.net.Enable(s, false)
+	}
+}
+
+func (cfg *config) DisconnectClient(ck *Clerk, from []int) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+	cfg.DisconnectClientUnlocked(ck, from)
+}
+
+// Shutdown a server by isolating it
+func (cfg *config) ShutdownServer(i int) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+
+	cfg.disconnectUnlocked(i, cfg.All())
+
+	// disable client connections to the server.
+	// it's important to do this before creating
+	// the new Persister in saved[i], to avoid
+	// the possibility of the server returning a
+	// positive reply to an Append but persisting
+	// the result in the superseded Persister.
+	cfg.net.DeleteServer(i)
+
+	// a fresh persister, in case old instance
+	// continues to update the Persister.
+	// but copy old persister's content so that we always
+	// pass Make() the last persisted state.
+	if cfg.saved[i] != nil {
+		cfg.saved[i] = cfg.saved[i].Copy()
+	}
+
+	kv := cfg.kvservers[i]
+	if kv != nil {
+		cfg.mu.Unlock()
+		kv.Kill()
+		cfg.mu.Lock()
+		cfg.kvservers[i] = nil
+	}
+}
+
+// If restart servers, first call ShutdownServer
+func (cfg *config) StartServer(i int) {
+	cfg.mu.Lock()
+
+	// a fresh set of outgoing ClientEnd names.
+	cfg.endnames[i] = make([]string, cfg.n)
+	for j := 0; j < cfg.n; j++ {
+		cfg.endnames[i][j] = randstring(20)
+	}
+
+	// a fresh set of ClientEnds.
+	ends := make([]*labrpc.ClientEnd, cfg.n)
+	for j := 0; j < cfg.n; j++ {
+		ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j])
+		cfg.net.Connect(cfg.endnames[i][j], j)
+	}
+
+	// a fresh persister, so old instance doesn't overwrite
+	// new instance's persisted state.
+	// give the fresh persister a copy of the old persister's
+	// state, so that the spec is that we pass StartKVServer()
+	// the last persisted state.
+	if cfg.saved[i] != nil {
+		cfg.saved[i] = cfg.saved[i].Copy()
+	} else {
+		cfg.saved[i] = raft.MakePersister()
+	}
+	cfg.mu.Unlock()
+
+	cfg.kvservers[i] = StartKVServer(ends, i, cfg.saved[i], cfg.maxraftstate)
+
+	kvsvc := labrpc.MakeService(cfg.kvservers[i])
+	rfsvc := labrpc.MakeService(cfg.kvservers[i].rf)
+	srv := labrpc.MakeServer()
+	srv.AddService(kvsvc)
+	srv.AddService(rfsvc)
+	cfg.net.AddServer(i, srv)
+}
+
+func (cfg *config) Leader() (bool, int) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+
+	for i := 0; i < cfg.n; i++ {
+		_, is_leader := cfg.kvservers[i].rf.GetState()
+		if is_leader {
+			return true, i
+		}
+	}
+	return false, 0
+}
+
+// Partition servers into 2 groups and put current leader in minority
+func (cfg *config) make_partition() ([]int, []int) {
+	_, l := cfg.Leader()
+	p1 := make([]int, cfg.n/2+1)
+	p2 := make([]int, cfg.n/2)
+	j := 0
+	for i := 0; i < cfg.n; i++ {
+		if i != l {
+			if j < len(p1) {
+				p1[j] = i
+			} else {
+				p2[j-len(p1)] = i
+			}
+			j++
+		}
+	}
+	p2[len(p2)-1] = l
+	return p1, p2
+}
+
+func make_config(t *testing.T, tag string, n int, unreliable bool, maxraftstate int) *config {
+	runtime.GOMAXPROCS(4)
+	cfg := &config{}
+	cfg.t = t
+	cfg.tag = tag
+	cfg.net = labrpc.MakeNetwork()
+	cfg.n = n
+	cfg.kvservers = make([]*RaftKV, cfg.n)
+	cfg.saved = make([]*raft.Persister, cfg.n)
+	cfg.endnames = make([][]string, cfg.n)
+	cfg.clerks = make(map[*Clerk][]string)
+	cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid
+	cfg.maxraftstate = maxraftstate
+
+	// create a full set of KV servers.
+	for i := 0; i < cfg.n; i++ {
+		cfg.StartServer(i)
+	}
+
+	cfg.ConnectAll()
+
+	cfg.net.Reliable(!unreliable)
+
+	return cfg
+}
--- a/src/kvraft/server.go
+++ b/src/kvraft/server.go
@ -0,0 +1,193 @@
+package raftkv
+
+import (
+	"encoding/gob"
+	"labrpc"
+	"log"
+	"raft"
+	"sync"
+	"fmt"
+	"time"
+)
+
+const Debug = 1
+
+func DPrintf(format string, a ...interface{}) (n int, err error) {
+	if Debug > 0 {
+		log.Printf(format, a...)
+	}
+	return
+}
+
+
+type Op struct {
+	Opt string
+	Key string
+	Value string
+
+	UUID string
+}
+
+
+type RaftKV struct {
+	mu      sync.Mutex
+	me      int
+	rf      *raft.Raft
+	applyCh chan raft.ApplyMsg
+
+	maxraftstate int // snapshot if log grows this big
+
+	// Your definitions here.
+	currentIndex int
+}
+
+
+func (kv *RaftKV) Get(args *GetArgs, reply *GetReply) {
+	kv.mu.Lock()
+	defer kv.mu.Unlock()
+
+	op := Op{Opt:"Get", Key:args.Key, UUID:args.UUID}
+	index, _, isLeader := kv.rf.Start(op)
+	reply.WrongLeader = true
+	if isLeader {
+		fmt.Println(kv.me, "Get", args.Key, "")
+		/* wait until success */
+		cnt := 0
+		for {
+			//fmt.Println(kv.me, "waiting", index)
+			cnt += 1
+			if cnt > 30 {
+				break
+			}
+			if kv.currentIndex == index {
+				fmt.Println("Get success at index", index)
+				_, logs, commitIndex := kv.rf.GetState2()
+				var db map[string]string
+				db = make(map[string]string)
+
+				var UUIDs map[string]int
+				UUIDs = make(map[string]int)
+
+
+				fmt.Println("logs...")
+				for i:=0;i<commitIndex;i++{
+					op := logs[i]["command"].(Op)
+					fmt.Println(i, "=>", op)
+					/* check duplicates */
+					if op.Opt != "Get" && UUIDs[op.UUID] > 0 {
+						fmt.Println("skip", op)
+						continue
+					}
+					UUIDs[op.UUID] += 1
+
+					switch op.Opt {
+					case "Get":
+						break
+					case "Put":
+						db[op.Key] = op.Value
+						break
+					case "Append":
+						db[op.Key] = db[op.Key] + op.Value
+						break
+					}
+				}
+				fmt.Println("logs end...")
+				fmt.Println(kv.me, "Get", args.Key, "value:", db[args.Key])
+				reply.WrongLeader = false
+				reply.Value = db[args.Key]
+				reply.ID = index
+				break
+			}
+			time.Sleep(time.Millisecond * 10)
+		}
+	}
+}
+
+func (kv *RaftKV) PutAppend(args *PutAppendArgs, reply *PutAppendReply) {
+	kv.mu.Lock()
+	defer kv.mu.Unlock()
+
+	op := Op{
+		Opt:args.Opt, Key:args.Key, Value:args.Value, UUID: args.UUID}
+	index, _, isLeader := kv.rf.Start(op)
+	cnt := 0
+	reply.WrongLeader = true
+	if isLeader {
+		fmt.Println(kv.me, args.Opt, args.Key, args.Value)
+		/* wait until success */
+		for {
+			//fmt.Println(kv.me, "waiting", index)
+			cnt += 1
+			if cnt > 500 {
+				break
+			}
+			//fmt.Println("currentIndex:", kv.currentIndex)
+			if kv.currentIndex >= index {
+				_, logs, _ := kv.rf.GetState2()
+				tmp := logs[index - 1]["command"].(Op)
+				if tmp.Opt == op.Opt && tmp.Key == op.Key && tmp.Value == op.Value {
+					fmt.Println(kv.me, args.Opt, args.Key, "success at index", index)
+					reply.WrongLeader = false
+					reply.ID = index
+				}
+				break
+			}
+			time.Sleep(time.Millisecond * 10)
+		}
+	}
+}
+
+//
+// the tester calls Kill() when a RaftKV instance won't
+// be needed again. you are not required to do anything
+// in Kill(), but it might be convenient to (for example)
+// turn off debug output from this instance.
+//
+func (kv *RaftKV) Kill() {
+	kv.rf.Kill()
+	// Your code here, if desired.
+	fmt.Println(kv.me, "Killed")
+}
+
+//
+// servers[] contains the ports of the set of
+// servers that will cooperate via Raft to
+// form the fault-tolerant key/value service.
+// me is the index of the current server in servers[].
+// the k/v server should store snapshots with persister.SaveSnapshot(),
+// and Raft should save its state (including log) with persister.SaveRaftState().
+// the k/v server should snapshot when Raft's saved state exceeds maxraftstate bytes,
+// in order to allow Raft to garbage-collect its log. if maxraftstate is -1,
+// you don't need to snapshot.
+// StartKVServer() must return quickly, so it should start goroutines
+// for any long-running work.
+//
+func StartKVServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int) *RaftKV {
+	// call gob.Register on structures you want
+	// Go's RPC library to marshall/unmarshall.
+	gob.Register(Op{})
+
+	kv := new(RaftKV)
+	kv.me = me
+	kv.maxraftstate = maxraftstate
+
+	// Your initialization code here.
+
+	kv.applyCh = make(chan raft.ApplyMsg)
+	kv.rf = raft.Make(servers, me, persister, kv.applyCh)
+	fmt.Println(kv.me, "StartKVServer")
+
+	go func() {
+		//fmt.Println("start sub")
+		for {
+			msg := <-kv.applyCh
+			if msg.Index > kv.currentIndex {
+				kv.currentIndex = msg.Index
+			}
+			//fmt.Println(kv.me, msg)
+		}
+		//fmt.Println("finish")
+	}()
+
+	return kv
+}
--- a/src/kvraft/test_test.go
+++ b/src/kvraft/test_test.go
@ -0,0 +1,397 @@
+package raftkv
+
+import "testing"
+import "strconv"
+import "time"
+import "fmt"
+import "math/rand"
+import "log"
+import "strings"
+import "sync/atomic"
+
+// The tester generously allows solutions to complete elections in one second
+// (much more than the paper's range of timeouts).
+const electionTimeout = 1 * time.Second
+
+func check(t *testing.T, ck *Clerk, key string, value string) {
+	v := ck.Get(key)
+	if v != value {
+		t.Fatalf("Get(%v): expected:\n%v\nreceived:\n%v", key, value, v)
+	}
+}
+
+// a client runs the function f and then signals it is done
+func run_client(t *testing.T, cfg *config, me int, ca chan bool, fn func(me int, ck *Clerk, t *testing.T)) {
+	ok := false
+	defer func() { ca <- ok }()
+	ck := cfg.makeClient(cfg.All())
+	fn(me, ck, t)
+	ok = true
+	cfg.deleteClient(ck)
+}
+
+// spawn ncli clients and wait until they are all done
+func spawn_clients_and_wait(t *testing.T, cfg *config, ncli int, fn func(me int, ck *Clerk, t *testing.T)) {
+	ca := make([]chan bool, ncli)
+	for cli := 0; cli < ncli; cli++ {
+		ca[cli] = make(chan bool)
+		go run_client(t, cfg, cli, ca[cli], fn)
+	}
+	// log.Printf("spawn_clients_and_wait: waiting for clients")
+	for cli := 0; cli < ncli; cli++ {
+		ok := <-ca[cli]
+		// log.Printf("spawn_clients_and_wait: client %d is done\n", cli)
+		if ok == false {
+			t.Fatalf("failure")
+		}
+	}
+}
+
+// predict effect of Append(k, val) if old value is prev.
+func NextValue(prev string, val string) string {
+	return prev + val
+}
+
+// check that for a specific client all known appends are present in a value,
+// and in order
+func checkClntAppends(t *testing.T, clnt int, v string, count int) {
+	lastoff := -1
+	for j := 0; j < count; j++ {
+		wanted := "x " + strconv.Itoa(clnt) + " " + strconv.Itoa(j) + " y"
+		off := strings.Index(v, wanted)
+		if off < 0 {
+			t.Fatalf("%v missing element %v in Append result %v", clnt, wanted, v)
+		}
+		off1 := strings.LastIndex(v, wanted)
+		if off1 != off {
+			fmt.Printf("off1 %v off %v\n", off1, off)
+			t.Fatalf("duplicate element %v in Append result", wanted)
+		}
+		if off <= lastoff {
+			t.Fatalf("wrong order for element %v in Append result", wanted)
+		}
+		lastoff = off
+	}
+}
+
+// check that all known appends are present in a value,
+// and are in order for each concurrent client.
+func checkConcurrentAppends(t *testing.T, v string, counts []int) {
+	nclients := len(counts)
+	for i := 0; i < nclients; i++ {
+		lastoff := -1
+		for j := 0; j < counts[i]; j++ {
+			wanted := "x " + strconv.Itoa(i) + " " + strconv.Itoa(j) + " y"
+			off := strings.Index(v, wanted)
+			if off < 0 {
+				t.Fatalf("%v missing element %v in Append result %v", i, wanted, v)
+			}
+			off1 := strings.LastIndex(v, wanted)
+			if off1 != off {
+				t.Fatalf("duplicate element %v in Append result", wanted)
+			}
+			if off <= lastoff {
+				t.Fatalf("wrong order for element %v in Append result", wanted)
+			}
+			lastoff = off
+		}
+	}
+}
+
+// repartition the servers periodically
+func partitioner(t *testing.T, cfg *config, ch chan bool, done *int32) {
+	defer func() { ch <- true }()
+	for atomic.LoadInt32(done) == 0 {
+		a := make([]int, cfg.n)
+		for i := 0; i < cfg.n; i++ {
+			a[i] = (rand.Int() % 2)
+		}
+		pa := make([][]int, 2)
+		for i := 0; i < 2; i++ {
+			pa[i] = make([]int, 0)
+			for j := 0; j < cfg.n; j++ {
+				if a[j] == i {
+					pa[i] = append(pa[i], j)
+				}
+			}
+		}
+		cfg.partition(pa[0], pa[1])
+		time.Sleep(electionTimeout + time.Duration(rand.Int63()%200)*time.Millisecond)
+	}
+}
+
+// Basic test is as follows: one or more clients submitting Append/Get
+// operations to set of servers for some period of time.  After the period is
+// over, test checks that all appended values are present and in order for a
+// particular key.  If unreliable is set, RPCs may fail.  If crash is set, the
+// servers crash after the period is over and restart.  If partitions is set,
+// the test repartitions the network concurrently with the clients and servers. If
+// maxraftstate is a positive number, the size of the state for Raft (i.e., log
+// size) shouldn't exceed 2*maxraftstate.
+func GenericTest(t *testing.T, tag string, nclients int, unreliable bool, crash bool, partitions bool, maxraftstate int) {
+	const nservers = 5
+	cfg := make_config(t, tag, nservers, unreliable, maxraftstate)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient(cfg.All())
+
+	done_partitioner := int32(0)
+	done_clients := int32(0)
+	ch_partitioner := make(chan bool)
+	clnts := make([]chan int, nclients)
+	for i := 0; i < nclients; i++ {
+		clnts[i] = make(chan int)
+	}
+	for i := 0; i < 3; i++ {
+		// log.Printf("Iteration %v\n", i)
+		atomic.StoreInt32(&done_clients, 0)
+		atomic.StoreInt32(&done_partitioner, 0)
+		go spawn_clients_and_wait(t, cfg, nclients, func(cli int, myck *Clerk, t *testing.T) {
+			j := 0
+			defer func() {
+				clnts[cli] <- j
+			}()
+			last := ""
+			key := strconv.Itoa(cli)
+			myck.Put(key, last)
+			for atomic.LoadInt32(&done_clients) == 0 {
+				if (rand.Int() % 1000) < 500 {
+					nv := "x " + strconv.Itoa(cli) + " " + strconv.Itoa(j) + " y"
+					// log.Printf("%d: client new append %v\n", cli, nv)
+					myck.Append(key, nv)
+					last = NextValue(last, nv)
+					j++
+				} else {
+					// log.Printf("%d: client new get %v\n", cli, key)
+					v := myck.Get(key)
+					if v != last {
+						log.Fatalf("get wrong value, key %v, wanted:\n%v\n, got\n%v\n", key, last, v)
+					}
+				}
+			}
+		})
+
+		if partitions {
+			// Allow the clients to perform some operations without interruption
+			time.Sleep(1 * time.Second)
+			go partitioner(t, cfg, ch_partitioner, &done_partitioner)
+		}
+		time.Sleep(5 * time.Second)
+
+		atomic.StoreInt32(&done_clients, 1)     // tell clients to quit
+		atomic.StoreInt32(&done_partitioner, 1) // tell partitioner to quit
+
+		if partitions {
+			// log.Printf("wait for partitioner\n")
+			<-ch_partitioner
+			// reconnect network and submit a request. A client may
+			// have submitted a request in a minority.  That request
+			// won't return until that server discovers a new term
+			// has started.
+			cfg.ConnectAll()
+			// wait for a while so that we have a new term
+			time.Sleep(electionTimeout)
+		}
+
+		if crash {
+			// log.Printf("shutdown servers\n")
+			for i := 0; i < nservers; i++ {
+				cfg.ShutdownServer(i)
+			}
+			// Wait for a while for servers to shutdown, since
+			// shutdown isn't a real crash and isn't instantaneous
+			time.Sleep(electionTimeout)
+			// log.Printf("restart servers\n")
+			// crash and re-start all
+			for i := 0; i < nservers; i++ {
+				cfg.StartServer(i)
+			}
+			cfg.ConnectAll()
+		}
+
+		// log.Printf("wait for clients\n")
+		for i := 0; i < nclients; i++ {
+			// log.Printf("read from clients %d\n", i)
+			j := <-clnts[i]
+			if j < 10 {
+				log.Printf("Warning: client %d managed to perform only %d put operations in 1 sec?\n", i, j)
+			}
+			key := strconv.Itoa(i)
+			// log.Printf("Check %v for client %d\n", j, i)
+			v := ck.Get(key)
+			checkClntAppends(t, i, v, j)
+		}
+
+		if maxraftstate > 0 {
+			// Check maximum after the servers have processed all client
+			// requests and had time to checkpoint
+			if cfg.LogSize() > 2*maxraftstate {
+				t.Fatalf("logs were not trimmed (%v > 2*%v)", cfg.LogSize(), maxraftstate)
+			}
+		}
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestBasic(t *testing.T) {
+	fmt.Printf("Test: One client ...\n")
+	GenericTest(t, "basic", 1, false, false, false, -1)
+}
+
+func TestConcurrent(t *testing.T) {
+	fmt.Printf("Test: concurrent clients ...\n")
+	GenericTest(t, "concur", 5, false, false, false, -1)
+}
+
+func TestUnreliable(t *testing.T) {
+	fmt.Printf("Test: unreliable ...\n")
+	GenericTest(t, "unreliable", 5, true, false, false, -1)
+}
+
+func TestUnreliableOneKey(t *testing.T) {
+	const nservers = 3
+	cfg := make_config(t, "onekey", nservers, true, -1)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient(cfg.All())
+
+	fmt.Printf("Test: Concurrent Append to same key, unreliable ...\n")
+
+	ck.Put("k", "")
+
+	const nclient = 5
+	const upto = 10
+	spawn_clients_and_wait(t, cfg, nclient, func(me int, myck *Clerk, t *testing.T) {
+		n := 0
+		for n < upto {
+			myck.Append("k", "x "+strconv.Itoa(me)+" "+strconv.Itoa(n)+" y")
+			n++
+		}
+	})
+
+	var counts []int
+	for i := 0; i < nclient; i++ {
+		counts = append(counts, upto)
+	}
+
+	vx := ck.Get("k")
+	checkConcurrentAppends(t, vx, counts)
+
+	fmt.Printf("  ... Passed\n")
+}
+
+// Submit a request in the minority partition and check that the requests
+// doesn't go through until the partition heals.  The leader in the original
+// network ends up in the minority partition.
+func TestOnePartition(t *testing.T) {
+	const nservers = 5
+	cfg := make_config(t, "partition", nservers, false, -1)
+	defer cfg.cleanup()
+	ck := cfg.makeClient(cfg.All())
+
+	ck.Put("1", "13")
+
+	fmt.Printf("Test: Progress in majority ...\n")
+
+	p1, p2 := cfg.make_partition()
+	cfg.partition(p1, p2)
+
+	ckp1 := cfg.makeClient(p1)  // connect ckp1 to p1
+	ckp2a := cfg.makeClient(p2) // connect ckp2a to p2
+	ckp2b := cfg.makeClient(p2) // connect ckp2b to p2
+
+	ckp1.Put("1", "14")
+	check(t, ckp1, "1", "14")
+
+	fmt.Printf("  ... Passed\n")
+
+	done0 := make(chan bool)
+	done1 := make(chan bool)
+
+	fmt.Printf("Test: No progress in minority ...\n")
+	go func() {
+		ckp2a.Put("1", "15")
+		done0 <- true
+	}()
+	go func() {
+		ckp2b.Get("1") // different clerk in p2
+		done1 <- true
+	}()
+
+	select {
+	case <-done0:
+		t.Fatalf("Put in minority completed")
+	case <-done1:
+		t.Fatalf("Get in minority completed")
+	case <-time.After(time.Second):
+	}
+
+	check(t, ckp1, "1", "14")
+	ckp1.Put("1", "16")
+	check(t, ckp1, "1", "16")
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Completion after heal ...\n")
+
+	cfg.ConnectAll()
+	cfg.ConnectClient(ckp2a, cfg.All())
+	cfg.ConnectClient(ckp2b, cfg.All())
+
+	time.Sleep(electionTimeout)
+
+	select {
+	case <-done0:
+	case <-time.After(30 * 100 * time.Millisecond):
+		t.Fatalf("Put did not complete")
+	}
+
+	select {
+	case <-done1:
+	case <-time.After(30 * 100 * time.Millisecond):
+		t.Fatalf("Get did not complete")
+	default:
+	}
+
+	check(t, ck, "1", "15")
+
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestManyPartitionsOneClient(t *testing.T) {
+	fmt.Printf("Test: many partitions ...\n")
+	GenericTest(t, "manypartitions", 1, false, false, true, -1)
+}
+
+func TestManyPartitionsManyClients(t *testing.T) {
+	fmt.Printf("Test: many partitions, many clients ...\n")
+	GenericTest(t, "manypartitionsclnts", 5, false, false, true, -1)
+}
+
+func TestPersistOneClient(t *testing.T) {
+	fmt.Printf("Test: persistence with one client ...\n")
+	GenericTest(t, "persistone", 1, false, true, false, -1)
+}
+
+func TestPersistConcurrent(t *testing.T) {
+	fmt.Printf("Test: persistence with concurrent clients ...\n")
+	GenericTest(t, "persistconcur", 5, false, true, false, -1)
+}
+
+func TestPersistConcurrentUnreliable(t *testing.T) {
+	fmt.Printf("Test: persistence with concurrent clients, unreliable ...\n")
+	GenericTest(t, "persistconcurunreliable", 5, true, true, false, -1)
+}
+
+func TestPersistPartition(t *testing.T) {
+	fmt.Printf("Test: persistence with concurrent clients and repartitioning servers...\n")
+	GenericTest(t, "persistpart", 5, false, true, true, -1)
+}
+
+func TestPersistPartitionUnreliable(t *testing.T) {
+	fmt.Printf("Test: persistence with concurrent clients and repartitioning servers, unreliable...\n")
+	GenericTest(t, "persistpartunreliable", 5, true, true, true, -1)
+}
+
+
--- a/src/labrpc/labrpc.go
+++ b/src/labrpc/labrpc.go
@ -0,0 +1,458 @@
+package labrpc
+
+//
+// channel-based RPC, for 824 labs.
+// allows tests to disconnect RPC connections.
+//
+// we will use the original labrpc.go to test your code for grading.
+// so, while you can modify this code to help you debug, please
+// test against the original before submitting.
+//
+// adapted from Go net/rpc/server.go.
+//
+// sends gob-encoded values to ensure that RPCs
+// don't include references to program objects.
+//
+// net := MakeNetwork() -- holds network, clients, servers.
+// end := net.MakeEnd(endname) -- create a client end-point, to talk to one server.
+// net.AddServer(servername, server) -- adds a named server to network.
+// net.DeleteServer(servername) -- eliminate the named server.
+// net.Connect(endname, servername) -- connect a client to a server.
+// net.Enable(endname, enabled) -- enable/disable a client.
+// net.Reliable(bool) -- false means drop/delay messages
+//
+// end.Call("Raft.AppendEntries", &args, &reply) -- send an RPC, wait for reply.
+// the "Raft" is the name of the server struct to be called.
+// the "AppendEntries" is the name of the method to be called.
+// Call() returns true to indicate that the server executed the request
+// and the reply is valid.
+// Call() returns false if the network lost the request or reply
+// or the server is down.
+// It is OK to have multiple Call()s in progress at the same time on the
+// same ClientEnd.
+// Concurrent calls to Call() may be delivered to the server out of order,
+// since the network may re-order messages.
+// Call() is guaranteed to return (perhaps after a delay) *except* if the
+// handler function on the server side does not return. That is, there
+// is no need to implement your own timeouts around Call().
+// the server RPC handler function must declare its args and reply arguments
+// as pointers, so that their types exactly match the types of the arguments
+// to Call().
+//
+// srv := MakeServer()
+// srv.AddService(svc) -- a server can have multiple services, e.g. Raft and k/v
+//   pass srv to net.AddServer()
+//
+// svc := MakeService(receiverObject) -- obj's methods will handle RPCs
+//   much like Go's rpcs.Register()
+//   pass svc to srv.AddService()
+//
+
+import "encoding/gob"
+import "bytes"
+import "reflect"
+import "sync"
+import "log"
+import "strings"
+import "math/rand"
+import "time"
+
+type reqMsg struct {
+	endname  interface{} // name of sending ClientEnd
+	svcMeth  string      // e.g. "Raft.AppendEntries"
+	argsType reflect.Type
+	args     []byte
+	replyCh  chan replyMsg
+}
+
+type replyMsg struct {
+	ok    bool
+	reply []byte
+}
+
+type ClientEnd struct {
+	endname interface{} // this end-point's name
+	ch      chan reqMsg // copy of Network.endCh
+}
+
+// send an RPC, wait for the reply.
+// the return value indicates success; false means the
+// server couldn't be contacted.
+func (e *ClientEnd) Call(svcMeth string, args interface{}, reply interface{}) bool {
+	req := reqMsg{}
+	req.endname = e.endname
+	req.svcMeth = svcMeth
+	req.argsType = reflect.TypeOf(args)
+	req.replyCh = make(chan replyMsg)
+
+	qb := new(bytes.Buffer)
+	qe := gob.NewEncoder(qb)
+	qe.Encode(args)
+	req.args = qb.Bytes()
+
+	e.ch <- req
+
+	rep := <-req.replyCh
+	if rep.ok {
+		rb := bytes.NewBuffer(rep.reply)
+		rd := gob.NewDecoder(rb)
+		if err := rd.Decode(reply); err != nil {
+			log.Fatalf("ClientEnd.Call(): decode reply: %v\n", err)
+		}
+		return true
+	} else {
+		return false
+	}
+}
+
+type Network struct {
+	mu             sync.Mutex
+	reliable       bool
+	longDelays     bool                        // pause a long time on send on disabled connection
+	longReordering bool                        // sometimes delay replies a long time
+	ends           map[interface{}]*ClientEnd  // ends, by name
+	enabled        map[interface{}]bool        // by end name
+	servers        map[interface{}]*Server     // servers, by name
+	connections    map[interface{}]interface{} // endname -> servername
+	endCh          chan reqMsg
+}
+
+func MakeNetwork() *Network {
+	rn := &Network{}
+	rn.reliable = true
+	rn.ends = map[interface{}]*ClientEnd{}
+	rn.enabled = map[interface{}]bool{}
+	rn.servers = map[interface{}]*Server{}
+	rn.connections = map[interface{}](interface{}){}
+	rn.endCh = make(chan reqMsg)
+
+	// single goroutine to handle all ClientEnd.Call()s
+	go func() {
+		for xreq := range rn.endCh {
+			go rn.ProcessReq(xreq)
+		}
+	}()
+
+	return rn
+}
+
+func (rn *Network) Reliable(yes bool) {
+	rn.mu.Lock()
+	defer rn.mu.Unlock()
+
+	rn.reliable = yes
+}
+
+func (rn *Network) LongReordering(yes bool) {
+	rn.mu.Lock()
+	defer rn.mu.Unlock()
+
+	rn.longReordering = yes
+}
+
+func (rn *Network) LongDelays(yes bool) {
+	rn.mu.Lock()
+	defer rn.mu.Unlock()
+
+	rn.longDelays = yes
+}
+
+func (rn *Network) ReadEndnameInfo(endname interface{}) (enabled bool,
+	servername interface{}, server *Server, reliable bool, longreordering bool,
+) {
+	rn.mu.Lock()
+	defer rn.mu.Unlock()
+
+	enabled = rn.enabled[endname]
+	servername = rn.connections[endname]
+	if servername != nil {
+		server = rn.servers[servername]
+	}
+	reliable = rn.reliable
+	longreordering = rn.longReordering
+	return
+}
+
+func (rn *Network) IsServerDead(endname interface{}, servername interface{}, server *Server) bool {
+	rn.mu.Lock()
+	defer rn.mu.Unlock()
+
+	if rn.enabled[endname] == false || rn.servers[servername] != server {
+		return true
+	}
+	return false
+}
+
+func (rn *Network) ProcessReq(req reqMsg) {
+	enabled, servername, server, reliable, longreordering := rn.ReadEndnameInfo(req.endname)
+
+	if enabled && servername != nil && server != nil {
+		if reliable == false {
+			// short delay
+			ms := (rand.Int() % 27)
+			time.Sleep(time.Duration(ms) * time.Millisecond)
+		}
+
+		if reliable == false && (rand.Int()%1000) < 100 {
+			// drop the request, return as if timeout
+			req.replyCh <- replyMsg{false, nil}
+			return
+		}
+
+		// execute the request (call the RPC handler).
+		// in a separate thread so that we can periodically check
+		// if the server has been killed and the RPC should get a
+		// failure reply.
+		ech := make(chan replyMsg)
+		go func() {
+			r := server.dispatch(req)
+			ech <- r
+		}()
+
+		// wait for handler to return,
+		// but stop waiting if DeleteServer() has been called,
+		// and return an error.
+		var reply replyMsg
+		replyOK := false
+		serverDead := false
+		for replyOK == false && serverDead == false {
+			select {
+			case reply = <-ech:
+				replyOK = true
+			case <-time.After(100 * time.Millisecond):
+				serverDead = rn.IsServerDead(req.endname, servername, server)
+			}
+		}
+
+		// do not reply if DeleteServer() has been called, i.e.
+		// the server has been killed. this is needed to avoid
+		// situation in which a client gets a positive reply
+		// to an Append, but the server persisted the update
+		// into the old Persister. config.go is careful to call
+		// DeleteServer() before superseding the Persister.
+		serverDead = rn.IsServerDead(req.endname, servername, server)
+
+		if replyOK == false || serverDead == true {
+			// server was killed while we were waiting; return error.
+			req.replyCh <- replyMsg{false, nil}
+		} else if reliable == false && (rand.Int()%1000) < 100 {
+			// drop the reply, return as if timeout
+			req.replyCh <- replyMsg{false, nil}
+		} else if longreordering == true && rand.Intn(900) < 600 {
+			// delay the response for a while
+			ms := 200 + rand.Intn(1+rand.Intn(2000))
+			time.Sleep(time.Duration(ms) * time.Millisecond)
+			req.replyCh <- reply
+		} else {
+			req.replyCh <- reply
+		}
+	} else {
+		// simulate no reply and eventual timeout.
+		ms := 0
+		if rn.longDelays {
+			// let Raft tests check that leader doesn't send
+			// RPCs synchronously.
+			ms = (rand.Int() % 7000)
+		} else {
+			// many kv tests require the client to try each
+			// server in fairly rapid succession.
+			ms = (rand.Int() % 100)
+		}
+		time.Sleep(time.Duration(ms) * time.Millisecond)
+		req.replyCh <- replyMsg{false, nil}
+	}
+
+}
+
+// create a client end-point.
+// start the thread that listens and delivers.
+func (rn *Network) MakeEnd(endname interface{}) *ClientEnd {
+	rn.mu.Lock()
+	defer rn.mu.Unlock()
+
+	if _, ok := rn.ends[endname]; ok {
+		log.Fatalf("MakeEnd: %v already exists\n", endname)
+	}
+
+	e := &ClientEnd{}
+	e.endname = endname
+	e.ch = rn.endCh
+	rn.ends[endname] = e
+	rn.enabled[endname] = false
+	rn.connections[endname] = nil
+
+	return e
+}
+
+func (rn *Network) AddServer(servername interface{}, rs *Server) {
+	rn.mu.Lock()
+	defer rn.mu.Unlock()
+
+	rn.servers[servername] = rs
+}
+
+func (rn *Network) DeleteServer(servername interface{}) {
+	rn.mu.Lock()
+	defer rn.mu.Unlock()
+
+	rn.servers[servername] = nil
+}
+
+// connect a ClientEnd to a server.
+// a ClientEnd can only be connected once in its lifetime.
+func (rn *Network) Connect(endname interface{}, servername interface{}) {
+	rn.mu.Lock()
+	defer rn.mu.Unlock()
+
+	rn.connections[endname] = servername
+}
+
+// enable/disable a ClientEnd.
+func (rn *Network) Enable(endname interface{}, enabled bool) {
+	rn.mu.Lock()
+	defer rn.mu.Unlock()
+
+	rn.enabled[endname] = enabled
+}
+
+// get a server's count of incoming RPCs.
+func (rn *Network) GetCount(servername interface{}) int {
+	rn.mu.Lock()
+	defer rn.mu.Unlock()
+
+	svr := rn.servers[servername]
+	return svr.GetCount()
+}
+
+//
+// a server is a collection of services, all sharing
+// the same rpc dispatcher. so that e.g. both a Raft
+// and a k/v server can listen to the same rpc endpoint.
+//
+type Server struct {
+	mu       sync.Mutex
+	services map[string]*Service
+	count    int // incoming RPCs
+}
+
+func MakeServer() *Server {
+	rs := &Server{}
+	rs.services = map[string]*Service{}
+	return rs
+}
+
+func (rs *Server) AddService(svc *Service) {
+	rs.mu.Lock()
+	defer rs.mu.Unlock()
+	rs.services[svc.name] = svc
+}
+
+func (rs *Server) dispatch(req reqMsg) replyMsg {
+	rs.mu.Lock()
+
+	rs.count += 1
+
+	// split Raft.AppendEntries into service and method
+	dot := strings.LastIndex(req.svcMeth, ".")
+	serviceName := req.svcMeth[:dot]
+	methodName := req.svcMeth[dot+1:]
+
+	service, ok := rs.services[serviceName]
+
+	rs.mu.Unlock()
+
+	if ok {
+		return service.dispatch(methodName, req)
+	} else {
+		choices := []string{}
+		for k, _ := range rs.services {
+			choices = append(choices, k)
+		}
+		log.Fatalf("labrpc.Server.dispatch(): unknown service %v in %v.%v; expecting one of %v\n",
+			serviceName, serviceName, methodName, choices)
+		return replyMsg{false, nil}
+	}
+}
+
+func (rs *Server) GetCount() int {
+	rs.mu.Lock()
+	defer rs.mu.Unlock()
+	return rs.count
+}
+
+// an object with methods that can be called via RPC.
+// a single server may have more than one Service.
+type Service struct {
+	name    string
+	rcvr    reflect.Value
+	typ     reflect.Type
+	methods map[string]reflect.Method
+}
+
+func MakeService(rcvr interface{}) *Service {
+	svc := &Service{}
+	svc.typ = reflect.TypeOf(rcvr)
+	svc.rcvr = reflect.ValueOf(rcvr)
+	svc.name = reflect.Indirect(svc.rcvr).Type().Name()
+	svc.methods = map[string]reflect.Method{}
+
+	for m := 0; m < svc.typ.NumMethod(); m++ {
+		method := svc.typ.Method(m)
+		mtype := method.Type
+		mname := method.Name
+
+		//fmt.Printf("%v pp %v ni %v 1k %v 2k %v no %v\n",
+		//	mname, method.PkgPath, mtype.NumIn(), mtype.In(1).Kind(), mtype.In(2).Kind(), mtype.NumOut())
+
+		if method.PkgPath != "" || // capitalized?
+			mtype.NumIn() != 3 ||
+			//mtype.In(1).Kind() != reflect.Ptr ||
+			mtype.In(2).Kind() != reflect.Ptr ||
+			mtype.NumOut() != 0 {
+			// the method is not suitable for a handler
+			//fmt.Printf("bad method: %v\n", mname)
+		} else {
+			// the method looks like a handler
+			svc.methods[mname] = method
+		}
+	}
+
+	return svc
+}
+
+func (svc *Service) dispatch(methname string, req reqMsg) replyMsg {
+	if method, ok := svc.methods[methname]; ok {
+		// prepare space into which to read the argument.
+		// the Value's type will be a pointer to req.argsType.
+		args := reflect.New(req.argsType)
+
+		// decode the argument.
+		ab := bytes.NewBuffer(req.args)
+		ad := gob.NewDecoder(ab)
+		ad.Decode(args.Interface())
+
+		// allocate space for the reply.
+		replyType := method.Type.In(2)
+		replyType = replyType.Elem()
+		replyv := reflect.New(replyType)
+
+		// call the method.
+		function := method.Func
+		function.Call([]reflect.Value{svc.rcvr, args.Elem(), replyv})
+
+		// encode the reply.
+		rb := new(bytes.Buffer)
+		re := gob.NewEncoder(rb)
+		re.EncodeValue(replyv)
+
+		return replyMsg{true, rb.Bytes()}
+	} else {
+		choices := []string{}
+		for k, _ := range svc.methods {
+			choices = append(choices, k)
+		}
+		log.Fatalf("labrpc.Service.dispatch(): unknown method %v in %v; expecting one of %v\n",
+			methname, req.svcMeth, choices)
+		return replyMsg{false, nil}
+	}
+}
--- a/src/labrpc/test_test.go
+++ b/src/labrpc/test_test.go
@ -0,0 +1,518 @@
+package labrpc
+
+import "testing"
+import "strconv"
+import "sync"
+import "runtime"
+import "time"
+import "fmt"
+
+type JunkArgs struct {
+	X int
+}
+type JunkReply struct {
+	X string
+}
+
+type JunkServer struct {
+	mu   sync.Mutex
+	log1 []string
+	log2 []int
+}
+
+func (js *JunkServer) Handler1(args string, reply *int) {
+	js.mu.Lock()
+	defer js.mu.Unlock()
+	js.log1 = append(js.log1, args)
+	*reply, _ = strconv.Atoi(args)
+}
+
+func (js *JunkServer) Handler2(args int, reply *string) {
+	js.mu.Lock()
+	defer js.mu.Unlock()
+	js.log2 = append(js.log2, args)
+	*reply = "handler2-" + strconv.Itoa(args)
+}
+
+func (js *JunkServer) Handler3(args int, reply *int) {
+	js.mu.Lock()
+	defer js.mu.Unlock()
+	time.Sleep(20 * time.Second)
+	*reply = -args
+}
+
+// args is a pointer
+func (js *JunkServer) Handler4(args *JunkArgs, reply *JunkReply) {
+	reply.X = "pointer"
+}
+
+// args is a not pointer
+func (js *JunkServer) Handler5(args JunkArgs, reply *JunkReply) {
+	reply.X = "no pointer"
+}
+
+func TestBasic(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	rn := MakeNetwork()
+
+	e := rn.MakeEnd("end1-99")
+
+	js := &JunkServer{}
+	svc := MakeService(js)
+
+	rs := MakeServer()
+	rs.AddService(svc)
+	rn.AddServer("server99", rs)
+
+	rn.Connect("end1-99", "server99")
+	rn.Enable("end1-99", true)
+
+	{
+		reply := ""
+		e.Call("JunkServer.Handler2", 111, &reply)
+		if reply != "handler2-111" {
+			t.Fatalf("wrong reply from Handler2")
+		}
+	}
+
+	{
+		reply := 0
+		e.Call("JunkServer.Handler1", "9099", &reply)
+		if reply != 9099 {
+			t.Fatalf("wrong reply from Handler1")
+		}
+	}
+}
+
+func TestTypes(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	rn := MakeNetwork()
+
+	e := rn.MakeEnd("end1-99")
+
+	js := &JunkServer{}
+	svc := MakeService(js)
+
+	rs := MakeServer()
+	rs.AddService(svc)
+	rn.AddServer("server99", rs)
+
+	rn.Connect("end1-99", "server99")
+	rn.Enable("end1-99", true)
+
+	{
+		var args JunkArgs
+		var reply JunkReply
+		// args must match type (pointer or not) of handler.
+		e.Call("JunkServer.Handler4", &args, &reply)
+		if reply.X != "pointer" {
+			t.Fatalf("wrong reply from Handler4")
+		}
+	}
+
+	{
+		var args JunkArgs
+		var reply JunkReply
+		// args must match type (pointer or not) of handler.
+		e.Call("JunkServer.Handler5", args, &reply)
+		if reply.X != "no pointer" {
+			t.Fatalf("wrong reply from Handler5")
+		}
+	}
+}
+
+//
+// does net.Enable(endname, false) really disconnect a client?
+//
+func TestDisconnect(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	rn := MakeNetwork()
+
+	e := rn.MakeEnd("end1-99")
+
+	js := &JunkServer{}
+	svc := MakeService(js)
+
+	rs := MakeServer()
+	rs.AddService(svc)
+	rn.AddServer("server99", rs)
+
+	rn.Connect("end1-99", "server99")
+
+	{
+		reply := ""
+		e.Call("JunkServer.Handler2", 111, &reply)
+		if reply != "" {
+			t.Fatalf("unexpected reply from Handler2")
+		}
+	}
+
+	rn.Enable("end1-99", true)
+
+	{
+		reply := 0
+		e.Call("JunkServer.Handler1", "9099", &reply)
+		if reply != 9099 {
+			t.Fatalf("wrong reply from Handler1")
+		}
+	}
+}
+
+//
+// test net.GetCount()
+//
+func TestCounts(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	rn := MakeNetwork()
+
+	e := rn.MakeEnd("end1-99")
+
+	js := &JunkServer{}
+	svc := MakeService(js)
+
+	rs := MakeServer()
+	rs.AddService(svc)
+	rn.AddServer(99, rs)
+
+	rn.Connect("end1-99", 99)
+	rn.Enable("end1-99", true)
+
+	for i := 0; i < 17; i++ {
+		reply := ""
+		e.Call("JunkServer.Handler2", i, &reply)
+		wanted := "handler2-" + strconv.Itoa(i)
+		if reply != wanted {
+			t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted)
+		}
+	}
+
+	n := rn.GetCount(99)
+	if n != 17 {
+		t.Fatalf("wrong GetCount() %v, expected 17\n", n)
+	}
+}
+
+//
+// test RPCs from concurrent ClientEnds
+//
+func TestConcurrentMany(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	rn := MakeNetwork()
+
+	js := &JunkServer{}
+	svc := MakeService(js)
+
+	rs := MakeServer()
+	rs.AddService(svc)
+	rn.AddServer(1000, rs)
+
+	ch := make(chan int)
+
+	nclients := 20
+	nrpcs := 10
+	for ii := 0; ii < nclients; ii++ {
+		go func(i int) {
+			n := 0
+			defer func() { ch <- n }()
+
+			e := rn.MakeEnd(i)
+			rn.Connect(i, 1000)
+			rn.Enable(i, true)
+
+			for j := 0; j < nrpcs; j++ {
+				arg := i*100 + j
+				reply := ""
+				e.Call("JunkServer.Handler2", arg, &reply)
+				wanted := "handler2-" + strconv.Itoa(arg)
+				if reply != wanted {
+					t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted)
+				}
+				n += 1
+			}
+		}(ii)
+	}
+
+	total := 0
+	for ii := 0; ii < nclients; ii++ {
+		x := <-ch
+		total += x
+	}
+
+	if total != nclients*nrpcs {
+		t.Fatalf("wrong number of RPCs completed, got %v, expected %v", total, nclients*nrpcs)
+	}
+
+	n := rn.GetCount(1000)
+	if n != total {
+		t.Fatalf("wrong GetCount() %v, expected %v\n", n, total)
+	}
+}
+
+//
+// test unreliable
+//
+func TestUnreliable(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	rn := MakeNetwork()
+	rn.Reliable(false)
+
+	js := &JunkServer{}
+	svc := MakeService(js)
+
+	rs := MakeServer()
+	rs.AddService(svc)
+	rn.AddServer(1000, rs)
+
+	ch := make(chan int)
+
+	nclients := 300
+	for ii := 0; ii < nclients; ii++ {
+		go func(i int) {
+			n := 0
+			defer func() { ch <- n }()
+
+			e := rn.MakeEnd(i)
+			rn.Connect(i, 1000)
+			rn.Enable(i, true)
+
+			arg := i * 100
+			reply := ""
+			ok := e.Call("JunkServer.Handler2", arg, &reply)
+			if ok {
+				wanted := "handler2-" + strconv.Itoa(arg)
+				if reply != wanted {
+					t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted)
+				}
+				n += 1
+			}
+		}(ii)
+	}
+
+	total := 0
+	for ii := 0; ii < nclients; ii++ {
+		x := <-ch
+		total += x
+	}
+
+	if total == nclients || total == 0 {
+		t.Fatalf("all RPCs succeeded despite unreliable")
+	}
+}
+
+//
+// test concurrent RPCs from a single ClientEnd
+//
+func TestConcurrentOne(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	rn := MakeNetwork()
+
+	js := &JunkServer{}
+	svc := MakeService(js)
+
+	rs := MakeServer()
+	rs.AddService(svc)
+	rn.AddServer(1000, rs)
+
+	e := rn.MakeEnd("c")
+	rn.Connect("c", 1000)
+	rn.Enable("c", true)
+
+	ch := make(chan int)
+
+	nrpcs := 20
+	for ii := 0; ii < nrpcs; ii++ {
+		go func(i int) {
+			n := 0
+			defer func() { ch <- n }()
+
+			arg := 100 + i
+			reply := ""
+			e.Call("JunkServer.Handler2", arg, &reply)
+			wanted := "handler2-" + strconv.Itoa(arg)
+			if reply != wanted {
+				t.Fatalf("wrong reply %v from Handler2, expecting %v", reply, wanted)
+			}
+			n += 1
+		}(ii)
+	}
+
+	total := 0
+	for ii := 0; ii < nrpcs; ii++ {
+		x := <-ch
+		total += x
+	}
+
+	if total != nrpcs {
+		t.Fatalf("wrong number of RPCs completed, got %v, expected %v", total, nrpcs)
+	}
+
+	js.mu.Lock()
+	defer js.mu.Unlock()
+	if len(js.log2) != nrpcs {
+		t.Fatalf("wrong number of RPCs delivered")
+	}
+
+	n := rn.GetCount(1000)
+	if n != total {
+		t.Fatalf("wrong GetCount() %v, expected %v\n", n, total)
+	}
+}
+
+//
+// regression: an RPC that's delayed during Enabled=false
+// should not delay subsequent RPCs (e.g. after Enabled=true).
+//
+func TestRegression1(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	rn := MakeNetwork()
+
+	js := &JunkServer{}
+	svc := MakeService(js)
+
+	rs := MakeServer()
+	rs.AddService(svc)
+	rn.AddServer(1000, rs)
+
+	e := rn.MakeEnd("c")
+	rn.Connect("c", 1000)
+
+	// start some RPCs while the ClientEnd is disabled.
+	// they'll be delayed.
+	rn.Enable("c", false)
+	ch := make(chan bool)
+	nrpcs := 20
+	for ii := 0; ii < nrpcs; ii++ {
+		go func(i int) {
+			ok := false
+			defer func() { ch <- ok }()
+
+			arg := 100 + i
+			reply := ""
+			// this call ought to return false.
+			e.Call("JunkServer.Handler2", arg, &reply)
+			ok = true
+		}(ii)
+	}
+
+	time.Sleep(100 * time.Millisecond)
+
+	// now enable the ClientEnd and check that an RPC completes quickly.
+	t0 := time.Now()
+	rn.Enable("c", true)
+	{
+		arg := 99
+		reply := ""
+		e.Call("JunkServer.Handler2", arg, &reply)
+		wanted := "handler2-" + strconv.Itoa(arg)
+		if reply != wanted {
+			t.Fatalf("wrong reply %v from Handler2, expecting %v", reply, wanted)
+		}
+	}
+	dur := time.Since(t0).Seconds()
+
+	if dur > 0.03 {
+		t.Fatalf("RPC took too long (%v) after Enable", dur)
+	}
+
+	for ii := 0; ii < nrpcs; ii++ {
+		<-ch
+	}
+
+	js.mu.Lock()
+	defer js.mu.Unlock()
+	if len(js.log2) != 1 {
+		t.Fatalf("wrong number (%v) of RPCs delivered, expected 1", len(js.log2))
+	}
+
+	n := rn.GetCount(1000)
+	if n != 1 {
+		t.Fatalf("wrong GetCount() %v, expected %v\n", n, 1)
+	}
+}
+
+//
+// if an RPC is stuck in a server, and the server
+// is killed with DeleteServer(), does the RPC
+// get un-stuck?
+//
+func TestKilled(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	rn := MakeNetwork()
+
+	e := rn.MakeEnd("end1-99")
+
+	js := &JunkServer{}
+	svc := MakeService(js)
+
+	rs := MakeServer()
+	rs.AddService(svc)
+	rn.AddServer("server99", rs)
+
+	rn.Connect("end1-99", "server99")
+	rn.Enable("end1-99", true)
+
+	doneCh := make(chan bool)
+	go func() {
+		reply := 0
+		ok := e.Call("JunkServer.Handler3", 99, &reply)
+		doneCh <- ok
+	}()
+
+	time.Sleep(1000 * time.Millisecond)
+
+	select {
+	case <-doneCh:
+		t.Fatalf("Handler3 should not have returned yet")
+	case <-time.After(100 * time.Millisecond):
+	}
+
+	rn.DeleteServer("server99")
+
+	select {
+	case x := <-doneCh:
+		if x != false {
+			t.Fatalf("Handler3 returned successfully despite DeleteServer()")
+		}
+	case <-time.After(100 * time.Millisecond):
+		t.Fatalf("Handler3 should return after DeleteServer()")
+	}
+}
+
+func TestBenchmark(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	rn := MakeNetwork()
+
+	e := rn.MakeEnd("end1-99")
+
+	js := &JunkServer{}
+	svc := MakeService(js)
+
+	rs := MakeServer()
+	rs.AddService(svc)
+	rn.AddServer("server99", rs)
+
+	rn.Connect("end1-99", "server99")
+	rn.Enable("end1-99", true)
+
+	t0 := time.Now()
+	n := 100000
+	for iters := 0; iters < n; iters++ {
+		reply := ""
+		e.Call("JunkServer.Handler2", 111, &reply)
+		if reply != "handler2-111" {
+			t.Fatalf("wrong reply from Handler2")
+		}
+	}
+	fmt.Printf("%v for %v\n", time.Since(t0), n)
+	// march 2016, rtm laptop, 22 microseconds per RPC
+}
--- a/src/lockservice/client.go
+++ b/src/lockservice/client.go
@ -0,0 +1,93 @@
+package lockservice
+
+import "net/rpc"
+import "fmt"
+
+
+//
+// the lockservice Clerk lives in the client
+// and maintains a little state.
+//
+type Clerk struct {
+	servers [2]string // primary port, backup port
+	// Your definitions here.
+}
+
+
+func MakeClerk(primary string, backup string) *Clerk {
+	ck := new(Clerk)
+	ck.servers[0] = primary
+	ck.servers[1] = backup
+	// Your initialization code here.
+	return ck
+}
+
+//
+// call() sends an RPC to the rpcname handler on server srv
+// with arguments args, waits for the reply, and leaves the
+// reply in reply. the reply argument should be the address
+// of a reply structure.
+//
+// call() returns true if the server responded, and false
+// if call() was not able to contact the server. in particular,
+// reply's contents are valid if and only if call() returned true.
+//
+// you should assume that call() will return an
+// error after a while if the server is dead.
+// don't provide your own time-out mechanism.
+//
+// please use call() to send all RPCs, in client.go and server.go.
+// please don't change this function.
+//
+func call(srv string, rpcname string,
+	args interface{}, reply interface{}) bool {
+	c, errx := rpc.Dial("unix", srv)
+	if errx != nil {
+		return false
+	}
+	defer c.Close()
+
+	err := c.Call(rpcname, args, reply)
+	if err == nil {
+		return true
+	}
+
+	fmt.Println(err)
+	return false
+}
+
+//
+// ask the lock service for a lock.
+// returns true if the lock service
+// granted the lock, false otherwise.
+//
+// you will have to modify this function.
+//
+func (ck *Clerk) Lock(lockname string) bool {
+	// prepare the arguments.
+	args := &LockArgs{}
+	args.Lockname = lockname
+	var reply LockReply
+
+	// send an RPC request, wait for the reply.
+	ok := call(ck.servers[0], "LockServer.Lock", args, &reply)
+	if ok == false {
+		return false
+	}
+
+	return reply.OK
+}
+
+
+//
+// ask the lock service to unlock a lock.
+// returns true if the lock was previously held,
+// false otherwise.
+//
+
+func (ck *Clerk) Unlock(lockname string) bool {
+
+	// Your code here.
+
+	return false
+}
--- a/src/lockservice/common.go
+++ b/src/lockservice/common.go
@ -0,0 +1,33 @@
+package lockservice
+
+//
+// RPC definitions for a simple lock service.
+//
+// You will need to modify this file.
+//
+
+//
+// Lock(lockname) returns OK=true if the lock is not held.
+// If it is held, it returns OK=false immediately.
+//
+type LockArgs struct {
+	// Go's net/rpc requires that these field
+	// names start with upper case letters!
+	Lockname string // lock name
+}
+
+type LockReply struct {
+	OK bool
+}
+
+//
+// Unlock(lockname) returns OK=true if the lock was held.
+// It returns OK=false if the lock was not held.
+//
+type UnlockArgs struct {
+	Lockname string
+}
+
+type UnlockReply struct {
+	OK bool
+}
--- a/src/lockservice/server.go
+++ b/src/lockservice/server.go
@ -0,0 +1,159 @@
+package lockservice
+
+import "net"
+import "net/rpc"
+import "log"
+import "sync"
+import "fmt"
+import "os"
+import "io"
+import "time"
+
+type LockServer struct {
+	mu    sync.Mutex
+	l     net.Listener
+	dead  bool // for test_test.go
+	dying bool // for test_test.go
+
+	am_primary bool   // am I the primary?
+	backup     string // backup's port
+
+	// for each lock name, is it locked?
+	locks map[string]bool
+}
+
+
+//
+// server Lock RPC handler.
+//
+// you will have to modify this function
+//
+func (ls *LockServer) Lock(args *LockArgs, reply *LockReply) error {
+	ls.mu.Lock()
+	defer ls.mu.Unlock()
+
+
+	locked, _ := ls.locks[args.Lockname]
+
+	if locked {
+		reply.OK = false
+	} else {
+		reply.OK = true
+		ls.locks[args.Lockname] = true
+	}
+
+	return nil
+}
+
+//
+// server Unlock RPC handler.
+//
+func (ls *LockServer) Unlock(args *UnlockArgs, reply *UnlockReply) error {
+
+	// Your code here.
+
+	return nil
+}
+
+//
+// tell the server to shut itself down.
+// for testing.
+// please don't change this.
+//
+func (ls *LockServer) kill() {
+	ls.dead = true
+	ls.l.Close()
+}
+
+//
+// hack to allow test_test.go to have primary process
+// an RPC but not send a reply. can't use the shutdown()
+// trick b/c that causes client to immediately get an
+// error and send to backup before primary does.
+// please don't change anything to do with DeafConn.
+//
+type DeafConn struct {
+	c io.ReadWriteCloser
+}
+
+func (dc DeafConn) Write(p []byte) (n int, err error) {
+	return len(p), nil
+}
+func (dc DeafConn) Close() error {
+	return dc.c.Close()
+}
+func (dc DeafConn) Read(p []byte) (n int, err error) {
+	return dc.c.Read(p)
+}
+
+func StartServer(primary string, backup string, am_primary bool) *LockServer {
+	ls := new(LockServer)
+	ls.backup = backup
+	ls.am_primary = am_primary
+	ls.locks = map[string]bool{}
+
+	// Your initialization code here.
+
+
+	me := ""
+	if am_primary {
+		me = primary
+	} else {
+		me = backup
+	}
+
+	// tell net/rpc about our RPC server and handlers.
+	rpcs := rpc.NewServer()
+	rpcs.Register(ls)
+
+	// prepare to receive connections from clients.
+	// change "unix" to "tcp" to use over a network.
+	os.Remove(me) // only needed for "unix"
+	l, e := net.Listen("unix", me)
+	if e != nil {
+		log.Fatal("listen error: ", e)
+	}
+	ls.l = l
+
+	// please don't change any of the following code,
+	// or do anything to subvert it.
+
+	// create a thread to accept RPC connections from clients.
+	go func() {
+		for ls.dead == false {
+			conn, err := ls.l.Accept()
+			if err == nil && ls.dead == false {
+				if ls.dying {
+					// process the request but force discard of reply.
+
+					// without this the connection is never closed,
+					// b/c ServeConn() is waiting for more requests.
+					// test_test.go depends on this two seconds.
+					go func() {
+						time.Sleep(2 * time.Second)
+						conn.Close()
+					}()
+					ls.l.Close()
+
+					// this object has the type ServeConn expects,
+					// but discards writes (i.e. discards the RPC reply).
+					deaf_conn := DeafConn{c: conn}
+
+					rpcs.ServeConn(deaf_conn)
+
+					ls.dead = true
+				} else {
+					go rpcs.ServeConn(conn)
+				}
+			} else if err == nil {
+				conn.Close()
+			}
+			if err != nil && ls.dead == false {
+				fmt.Printf("LockServer(%v) accept: %v\n", me, err.Error())
+				ls.kill()
+			}
+		}
+	}()
+
+	return ls
+}
--- a/src/lockservice/test_test.go
+++ b/src/lockservice/test_test.go
@ -0,0 +1,478 @@
+package lockservice
+
+import "testing"
+import "runtime"
+import "math/rand"
+import "os"
+import "strconv"
+import "time"
+import "fmt"
+
+func tl(t *testing.T, ck *Clerk, lockname string, expected bool) {
+	x := ck.Lock(lockname)
+	if x != expected {
+		t.Fatalf("Lock(%v) returned %v; expected %v", lockname, x, expected)
+	}
+}
+
+func tu(t *testing.T, ck *Clerk, lockname string, expected bool) {
+	x := ck.Unlock(lockname)
+	if x != expected {
+		t.Fatalf("Unlock(%v) returned %v; expected %v", lockname, x, expected)
+	}
+}
+
+//
+// cook up a unique-ish UNIX-domain socket name
+// in /var/tmp. can't use current directory since
+// AFS doesn't support UNIX-domain sockets.
+//
+func port(suffix string) string {
+	s := "/var/tmp/824-"
+	s += strconv.Itoa(os.Getuid()) + "/"
+	os.Mkdir(s, 0777)
+	s += strconv.Itoa(os.Getpid()) + "-"
+	s += suffix
+	return s
+}
+
+func TestBasic(t *testing.T) {
+	fmt.Printf("Test: Basic lock/unlock ...\n")
+
+	runtime.GOMAXPROCS(4)
+
+	phost := port("p")
+	bhost := port("b")
+	p := StartServer(phost, bhost, true)  // primary
+	b := StartServer(phost, bhost, false) // backup
+
+	ck := MakeClerk(phost, bhost)
+
+	tl(t, ck, "a", true)
+	tu(t, ck, "a", true)
+
+	tl(t, ck, "a", true)
+	tl(t, ck, "b", true)
+	tu(t, ck, "a", true)
+	tu(t, ck, "b", true)
+
+	tl(t, ck, "a", true)
+	tl(t, ck, "a", false)
+	tu(t, ck, "a", true)
+	tu(t, ck, "a", false)
+
+	p.kill()
+	b.kill()
+
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestPrimaryFail1(t *testing.T) {
+	fmt.Printf("Test: Primary failure ...\n")
+	runtime.GOMAXPROCS(4)
+
+	phost := port("p")
+	bhost := port("b")
+	p := StartServer(phost, bhost, true)  // primary
+	b := StartServer(phost, bhost, false) // backup
+
+	ck := MakeClerk(phost, bhost)
+
+	tl(t, ck, "a", true)
+
+	tl(t, ck, "b", true)
+	tu(t, ck, "b", true)
+
+	tl(t, ck, "c", true)
+	tl(t, ck, "c", false)
+
+	tl(t, ck, "d", true)
+	tu(t, ck, "d", true)
+	tl(t, ck, "d", true)
+
+	p.kill()
+
+	tl(t, ck, "a", false)
+	tu(t, ck, "a", true)
+
+	tu(t, ck, "b", false)
+	tl(t, ck, "b", true)
+
+	tu(t, ck, "c", true)
+
+	tu(t, ck, "d", true)
+
+	b.kill()
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestPrimaryFail2(t *testing.T) {
+	fmt.Printf("Test: Primary failure just before reply #1 ...\n")
+	runtime.GOMAXPROCS(4)
+
+	phost := port("p")
+	bhost := port("b")
+	p := StartServer(phost, bhost, true)  // primary
+	b := StartServer(phost, bhost, false) // backup
+
+	ck1 := MakeClerk(phost, bhost)
+	ck2 := MakeClerk(phost, bhost)
+
+	tl(t, ck1, "a", true)
+	tl(t, ck1, "b", true)
+
+	p.dying = true
+
+	tl(t, ck2, "c", true)
+	tl(t, ck1, "c", false)
+	tu(t, ck2, "c", true)
+	tl(t, ck1, "c", true)
+
+	b.kill()
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestPrimaryFail3(t *testing.T) {
+	fmt.Printf("Test: Primary failure just before reply #2 ...\n")
+	runtime.GOMAXPROCS(4)
+
+	phost := port("p")
+	bhost := port("b")
+	p := StartServer(phost, bhost, true)  // primary
+	b := StartServer(phost, bhost, false) // backup
+
+	ck1 := MakeClerk(phost, bhost)
+
+	tl(t, ck1, "a", true)
+	tl(t, ck1, "b", true)
+
+	p.dying = true
+
+	tl(t, ck1, "b", false)
+
+	b.kill()
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestPrimaryFail4(t *testing.T) {
+	fmt.Printf("Test: Primary failure just before reply #3 ...\n")
+	runtime.GOMAXPROCS(4)
+
+	phost := port("p")
+	bhost := port("b")
+	p := StartServer(phost, bhost, true)  // primary
+	b := StartServer(phost, bhost, false) // backup
+
+	ck1 := MakeClerk(phost, bhost)
+	ck2 := MakeClerk(phost, bhost)
+
+	tl(t, ck1, "a", true)
+	tl(t, ck1, "b", true)
+
+	p.dying = true
+
+	tl(t, ck2, "b", false)
+
+	b.kill()
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestPrimaryFail5(t *testing.T) {
+	fmt.Printf("Test: Primary failure just before reply #4 ...\n")
+	runtime.GOMAXPROCS(4)
+
+	phost := port("p")
+	bhost := port("b")
+	p := StartServer(phost, bhost, true)  // primary
+	b := StartServer(phost, bhost, false) // backup
+
+	ck1 := MakeClerk(phost, bhost)
+	ck2 := MakeClerk(phost, bhost)
+
+	tl(t, ck1, "a", true)
+	tl(t, ck1, "b", true)
+	tu(t, ck1, "b", true)
+
+	p.dying = true
+
+	tu(t, ck1, "b", false)
+	tl(t, ck2, "b", true)
+
+	b.kill()
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestPrimaryFail6(t *testing.T) {
+	fmt.Printf("Test: Primary failure just before reply #5 ...\n")
+	runtime.GOMAXPROCS(4)
+
+	phost := port("p")
+	bhost := port("b")
+	p := StartServer(phost, bhost, true)  // primary
+	b := StartServer(phost, bhost, false) // backup
+
+	ck1 := MakeClerk(phost, bhost)
+	ck2 := MakeClerk(phost, bhost)
+
+	tl(t, ck1, "a", true)
+	tu(t, ck1, "a", true)
+	tu(t, ck2, "a", false)
+	tl(t, ck1, "b", true)
+
+	p.dying = true
+
+	tu(t, ck2, "b", true)
+	tl(t, ck1, "b", true)
+
+	b.kill()
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestPrimaryFail7(t *testing.T) {
+	fmt.Printf("Test: Primary failure just before reply #6 ...\n")
+	runtime.GOMAXPROCS(4)
+
+	phost := port("p")
+	bhost := port("b")
+	p := StartServer(phost, bhost, true)  // primary
+	b := StartServer(phost, bhost, false) // backup
+
+	ck1 := MakeClerk(phost, bhost)
+	ck2 := MakeClerk(phost, bhost)
+
+	tl(t, ck1, "a", true)
+	tu(t, ck1, "a", true)
+	tu(t, ck2, "a", false)
+	tl(t, ck1, "b", true)
+
+	p.dying = true
+
+	ch := make(chan bool)
+	go func() {
+		ok := false
+		defer func() { ch <- ok }()
+		tu(t, ck2, "b", true) // 2 second delay until retry
+		ok = true
+	}()
+	time.Sleep(1 * time.Second)
+	tl(t, ck1, "b", true)
+
+	ok := <-ch
+	if ok == false {
+		t.Fatalf("re-sent Unlock did not return true")
+	}
+
+	tu(t, ck1, "b", true)
+
+	b.kill()
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestPrimaryFail8(t *testing.T) {
+	fmt.Printf("Test: Primary failure just before reply #7 ...\n")
+	runtime.GOMAXPROCS(4)
+
+	phost := port("p")
+	bhost := port("b")
+	p := StartServer(phost, bhost, true)  // primary
+	b := StartServer(phost, bhost, false) // backup
+
+	ck1 := MakeClerk(phost, bhost)
+	ck2 := MakeClerk(phost, bhost)
+
+	tl(t, ck1, "a", true)
+	tu(t, ck1, "a", true)
+
+	p.dying = true
+
+	ch := make(chan bool)
+	go func() {
+		ok := false
+		defer func() { ch <- ok }()
+		tu(t, ck2, "a", false) // 2 second delay until retry
+		ok = true
+	}()
+	time.Sleep(1 * time.Second)
+	tl(t, ck1, "a", true)
+
+	ok := <-ch
+	if ok == false {
+		t.Fatalf("re-sent Unlock did not return false")
+	}
+
+	tu(t, ck1, "a", true)
+
+	b.kill()
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestBackupFail(t *testing.T) {
+	fmt.Printf("Test: Backup failure ...\n")
+	runtime.GOMAXPROCS(4)
+
+	phost := port("p")
+	bhost := port("b")
+	p := StartServer(phost, bhost, true)  // primary
+	b := StartServer(phost, bhost, false) // backup
+
+	ck := MakeClerk(phost, bhost)
+
+	tl(t, ck, "a", true)
+
+	tl(t, ck, "b", true)
+	tu(t, ck, "b", true)
+
+	tl(t, ck, "c", true)
+	tl(t, ck, "c", false)
+
+	tl(t, ck, "d", true)
+	tu(t, ck, "d", true)
+	tl(t, ck, "d", true)
+
+	b.kill()
+
+	tl(t, ck, "a", false)
+	tu(t, ck, "a", true)
+
+	tu(t, ck, "b", false)
+	tl(t, ck, "b", true)
+
+	tu(t, ck, "c", true)
+
+	tu(t, ck, "d", true)
+
+	p.kill()
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestMany(t *testing.T) {
+	fmt.Printf("Test: Multiple clients with primary failure ...\n")
+	runtime.GOMAXPROCS(4)
+
+	phost := port("p")
+	bhost := port("b")
+	p := StartServer(phost, bhost, true)  // primary
+	b := StartServer(phost, bhost, false) // backup
+
+	const nclients = 2
+	const nlocks = 10
+	done := false
+	var state [nclients][nlocks]bool
+	var acks [nclients]bool
+
+	for xi := 0; xi < nclients; xi++ {
+		go func(i int) {
+			ck := MakeClerk(phost, bhost)
+			rr := rand.New(rand.NewSource(int64(os.Getpid() + i)))
+			for done == false {
+				locknum := (rr.Int() % nlocks)
+				lockname := strconv.Itoa(locknum + (i * 1000))
+				what := rr.Int() % 2
+				if what == 0 {
+					ck.Lock(lockname)
+					state[i][locknum] = true
+				} else {
+					ck.Unlock(lockname)
+					state[i][locknum] = false
+				}
+			}
+			acks[i] = true
+		}(xi)
+	}
+
+	time.Sleep(2 * time.Second)
+	p.kill()
+	time.Sleep(2 * time.Second)
+	done = true
+	time.Sleep(time.Second)
+	ck := MakeClerk(phost, bhost)
+	for xi := 0; xi < nclients; xi++ {
+		if acks[xi] == false {
+			t.Fatal("one client didn't complete")
+		}
+		for locknum := 0; locknum < nlocks; locknum++ {
+			lockname := strconv.Itoa(locknum + (xi * 1000))
+			locked := !ck.Lock(lockname)
+			if locked != state[xi][locknum] {
+				t.Fatal("bad final state")
+			}
+		}
+	}
+
+	b.kill()
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestConcurrentCounts(t *testing.T) {
+	fmt.Printf("Test: Multiple clients, single lock, primary failure ...\n")
+	runtime.GOMAXPROCS(4)
+
+	phost := port("p")
+	bhost := port("b")
+	p := StartServer(phost, bhost, true)  // primary
+	b := StartServer(phost, bhost, false) // backup
+
+	const nclients = 2
+	const nlocks = 1
+	done := false
+	var acks [nclients]bool
+	var locks [nclients][nlocks]int
+	var unlocks [nclients][nlocks]int
+
+	for xi := 0; xi < nclients; xi++ {
+		go func(i int) {
+			ck := MakeClerk(phost, bhost)
+			rr := rand.New(rand.NewSource(int64(os.Getpid() + i)))
+			for done == false {
+				locknum := rr.Int() % nlocks
+				lockname := strconv.Itoa(locknum)
+				what := rr.Int() % 2
+				if what == 0 {
+					if ck.Lock(lockname) {
+						locks[i][locknum]++
+					}
+				} else {
+					if ck.Unlock(lockname) {
+						unlocks[i][locknum]++
+					}
+				}
+			}
+			acks[i] = true
+		}(xi)
+	}
+
+	time.Sleep(2 * time.Second)
+	p.kill()
+	time.Sleep(2 * time.Second)
+	done = true
+	time.Sleep(time.Second)
+	for xi := 0; xi < nclients; xi++ {
+		if acks[xi] == false {
+			t.Fatal("one client didn't complete")
+		}
+	}
+	ck := MakeClerk(phost, bhost)
+	for locknum := 0; locknum < nlocks; locknum++ {
+		nl := 0
+		nu := 0
+		for xi := 0; xi < nclients; xi++ {
+			nl += locks[xi][locknum]
+			nu += unlocks[xi][locknum]
+		}
+		locked := ck.Unlock(strconv.Itoa(locknum))
+		// fmt.Printf("lock=%d nl=%d nu=%d locked=%v\n",
+		//   locknum, nl, nu, locked)
+		if nl < nu || nl > nu+1 {
+			t.Fatal("lock race 1")
+		}
+		if nl == nu && locked != false {
+			t.Fatal("lock race 2")
+		}
+		if nl != nu && locked != true {
+			t.Fatal("lock race 3")
+		}
+	}
+
+	b.kill()
+	fmt.Printf("  ... Passed\n")
+}
--- a/src/main/diskvd
+++ b/src/main/diskvd
--- a/src/main/diskvd.go
+++ b/src/main/diskvd.go
@ -0,0 +1,74 @@
+package main
+
+//
+// start a diskvd server. it's a member of some replica
+// group, which has other members, and it needs to know
+// how to talk to the members of the shardmaster service.
+// used by ../diskv/test_test.go
+//
+// arguments:
+//   -g groupid
+//   -m masterport1 -m masterport2 ...
+//   -s replicaport1 -s replicaport2 ...
+//   -i my-index-in-server-port-list
+//   -u unreliable
+//   -d directory
+//   -r restart
+
+import "time"
+import "diskv"
+import "os"
+import "fmt"
+import "strconv"
+import "runtime"
+
+func usage() {
+	fmt.Printf("Usage: diskvd -g gid -m master... -s server... -i my-index -d dir\n")
+	os.Exit(1)
+}
+
+func main() {
+	var gid int64 = -1     // my replica group ID
+	masters := []string{}  // ports of shardmasters
+	replicas := []string{} // ports of servers in my replica group
+	me := -1               // my index in replicas[]
+	unreliable := false
+	dir := "" // store persistent data here
+	restart := false
+
+	for i := 1; i+1 < len(os.Args); i += 2 {
+		a0 := os.Args[i]
+		a1 := os.Args[i+1]
+		if a0 == "-g" {
+			gid, _ = strconv.ParseInt(a1, 10, 64)
+		} else if a0 == "-m" {
+			masters = append(masters, a1)
+		} else if a0 == "-s" {
+			replicas = append(replicas, a1)
+		} else if a0 == "-i" {
+			me, _ = strconv.Atoi(a1)
+		} else if a0 == "-u" {
+			unreliable, _ = strconv.ParseBool(a1)
+		} else if a0 == "-d" {
+			dir = a1
+		} else if a0 == "-r" {
+			restart, _ = strconv.ParseBool(a1)
+		} else {
+			usage()
+		}
+	}
+
+	if gid < 0 || me < 0 || len(masters) < 1 || me >= len(replicas) || dir == "" {
+		usage()
+	}
+
+	runtime.GOMAXPROCS(4)
+
+	srv := diskv.StartServer(gid, masters, replicas, me, dir, restart)
+	srv.Setunreliable(unreliable)
+
+	// for safety, force quit after 10 minutes.
+	time.Sleep(10 * 60 * time.Second)
+	mep, _ := os.FindProcess(os.Getpid())
+	mep.Kill()
+}
--- a/src/main/ii.go
+++ b/src/main/ii.go
@ -0,0 +1,40 @@
+package main
+
+import "os"
+import "fmt"
+import "mapreduce"
+
+// The mapping function is called once for each piece of the input.
+// In this framework, the key is the name of the file that is being processed,
+// and the value is the file's contents. The return value should be a slice of
+// key/value pairs, each represented by a mapreduce.KeyValue.
+func mapF(document string, value string) (res []mapreduce.KeyValue) {
+	// TODO: you should complete this to do the inverted index challenge
+}
+
+// The reduce function is called once for each key generated by Map, with a
+// list of that key's string value (merged across all inputs). The return value
+// should be a single output value for that key.
+func reduceF(key string, values []string) string {
+	// TODO: you should complete this to do the inverted index challenge
+}
+
+// Can be run in 3 ways:
+// 1) Sequential (e.g., go run wc.go master sequential x1.txt .. xN.txt)
+// 2) Master (e.g., go run wc.go master localhost:7777 x1.txt .. xN.txt)
+// 3) Worker (e.g., go run wc.go worker localhost:7777 localhost:7778 &)
+func main() {
+	if len(os.Args) < 4 {
+		fmt.Printf("%s: see usage comments in file\n", os.Args[0])
+	} else if os.Args[1] == "master" {
+		var mr *mapreduce.Master
+		if os.Args[2] == "sequential" {
+			mr = mapreduce.Sequential("iiseq", os.Args[3:], 3, mapF, reduceF)
+		} else {
+			mr = mapreduce.Distributed("iiseq", os.Args[3:], 3, os.Args[2])
+		}
+		mr.Wait()
+	} else {
+		mapreduce.RunWorker(os.Args[2], os.Args[3], mapF, reduceF, 100)
+	}
+}
--- a/src/main/lockc.go
+++ b/src/main/lockc.go
@ -0,0 +1,31 @@
+package main
+
+//
+// see comments in lockd.go
+//
+
+import "lockservice"
+import "os"
+import "fmt"
+
+func usage() {
+	fmt.Printf("Usage: lockc -l|-u primaryport backupport lockname\n")
+	os.Exit(1)
+}
+
+func main() {
+	if len(os.Args) == 5 {
+		ck := lockservice.MakeClerk(os.Args[2], os.Args[3])
+		var ok bool
+		if os.Args[1] == "-l" {
+			ok = ck.Lock(os.Args[4])
+		} else if os.Args[1] == "-u" {
+			ok = ck.Unlock(os.Args[4])
+		} else {
+			usage()
+		}
+		fmt.Printf("reply: %v\n", ok)
+	} else {
+		usage()
+	}
+}
--- a/src/main/lockd.go
+++ b/src/main/lockd.go
@ -0,0 +1,31 @@
+package main
+
+// export GOPATH=~/6.824
+// go build lockd.go
+// go build lockc.go
+// ./lockd -p a b &
+// ./lockd -b a b &
+// ./lockc -l a b lx
+// ./lockc -u a b lx
+//
+// on Athena, use /tmp/myname-a and /tmp/myname-b
+// instead of a and b.
+
+import "time"
+import "lockservice"
+import "os"
+import "fmt"
+
+func main() {
+	if len(os.Args) == 4 && os.Args[1] == "-p" {
+		lockservice.StartServer(os.Args[2], os.Args[3], true)
+	} else if len(os.Args) == 4 && os.Args[1] == "-b" {
+		lockservice.StartServer(os.Args[2], os.Args[3], false)
+	} else {
+		fmt.Printf("Usage: lockd -p|-b primaryport backupport\n")
+		os.Exit(1)
+	}
+	for {
+		time.Sleep(100 * time.Second)
+	}
+}
--- a/src/main/mr-challenge.txt
+++ b/src/main/mr-challenge.txt
@ -0,0 +1,10 @@
+women: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-metamorphosis.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt
+won: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-metamorphosis.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt
+wonderful: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt
+words: 15 pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-metamorphosis.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt
+worked: 15 pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-metamorphosis.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt
+worse: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt
+wounded: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt
+yes: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-metamorphosis.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt
+younger: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt
+yours: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt
--- a/src/main/mr-testout.txt
+++ b/src/main/mr-testout.txt
@ -0,0 +1,10 @@
+he: 34077
+was: 37044
+that: 37495
+I: 44502
+in: 46092
+a: 60558
+to: 74357
+of: 79727
+and: 93990
+the: 154024
--- a/src/main/pbc.go
+++ b/src/main/pbc.go
@ -0,0 +1,44 @@
+package main
+
+//
+// pbservice client application
+//
+// export GOPATH=~/6.824
+// go build viewd.go
+// go build pbd.go
+// go build pbc.go
+// ./viewd /tmp/rtm-v &
+// ./pbd /tmp/rtm-v /tmp/rtm-1 &
+// ./pbd /tmp/rtm-v /tmp/rtm-2 &
+// ./pbc /tmp/rtm-v key1 value1
+// ./pbc /tmp/rtm-v key1
+//
+// change "rtm" to your user name.
+// start the pbd programs in separate windows and kill
+// and restart them to exercise fault tolerance.
+//
+
+import "pbservice"
+import "os"
+import "fmt"
+
+func usage() {
+	fmt.Printf("Usage: pbc viewport key\n")
+	fmt.Printf("       pbc viewport key value\n")
+	os.Exit(1)
+}
+
+func main() {
+	if len(os.Args) == 3 {
+		// get
+		ck := pbservice.MakeClerk(os.Args[1], "")
+		v := ck.Get(os.Args[2])
+		fmt.Printf("%v\n", v)
+	} else if len(os.Args) == 4 {
+		// put
+		ck := pbservice.MakeClerk(os.Args[1], "")
+		ck.Put(os.Args[2], os.Args[3])
+	} else {
+		usage()
+	}
+}
--- a/src/main/pbd.go
+++ b/src/main/pbd.go
@ -0,0 +1,23 @@
+package main
+
+//
+// see directions in pbc.go
+//
+
+import "time"
+import "pbservice"
+import "os"
+import "fmt"
+
+func main() {
+	if len(os.Args) != 3 {
+		fmt.Printf("Usage: pbd viewport myport\n")
+		os.Exit(1)
+	}
+
+	pbservice.StartServer(os.Args[1], os.Args[2])
+
+	for {
+		time.Sleep(100 * time.Second)
+	}
+}
--- a/src/main/pg-being_ernest.txt
+++ b/src/main/pg-being_ernest.txt
--- a/src/main/pg-dorian_gray.txt
+++ b/src/main/pg-dorian_gray.txt
--- a/src/main/pg-dracula.txt
+++ b/src/main/pg-dracula.txt
--- a/src/main/pg-emma.txt
+++ b/src/main/pg-emma.txt
--- a/src/main/pg-frankenstein.txt
+++ b/src/main/pg-frankenstein.txt
--- a/src/main/pg-great_expectations.txt
+++ b/src/main/pg-great_expectations.txt
--- a/src/main/pg-grimm.txt
+++ b/src/main/pg-grimm.txt
--- a/src/main/pg-huckleberry_finn.txt
+++ b/src/main/pg-huckleberry_finn.txt
--- a/src/main/pg-les_miserables.txt
+++ b/src/main/pg-les_miserables.txt
--- a/src/main/pg-metamorphosis.txt
+++ b/src/main/pg-metamorphosis.txt
--- a/src/main/pg-moby_dick.txt
+++ b/src/main/pg-moby_dick.txt
--- a/src/main/pg-sherlock_holmes.txt
+++ b/src/main/pg-sherlock_holmes.txt
--- a/src/main/pg-tale_of_two_cities.txt
+++ b/src/main/pg-tale_of_two_cities.txt
--- a/src/main/pg-tom_sawyer.txt
+++ b/src/main/pg-tom_sawyer.txt
--- a/src/main/pg-ulysses.txt
+++ b/src/main/pg-ulysses.txt
--- a/src/main/pg-war_and_peace.txt
+++ b/src/main/pg-war_and_peace.txt
--- a/src/main/test-ii.sh
+++ b/src/main/test-ii.sh
@ -0,0 +1,11 @@
+#!/bin/bash
+go run ii.go master sequential pg-*.txt
+sort -k1,1 mrtmp.iiseq | sort -snk2,2 | grep -v '16' | tail -10 | diff - mr-challenge.txt > diff.out
+if [ -s diff.out ]
+then
+echo "Failed test. Output should be as in mr-challenge.txt. Your output differs as follows (from diff.out):" > /dev/stderr
+  cat diff.out
+else
+  echo "Passed test" > /dev/stderr
+fi
+
--- a/src/main/test-mr.sh
+++ b/src/main/test-mr.sh
@ -0,0 +1,21 @@
+#!/bin/bash
+here=$(dirname "$0")
+[[ "$here" = /* ]] || here="$PWD/$here"
+export GOPATH="$here/../../"
+echo ""
+echo "==> Part I"
+go test -run Sequential mapreduce/...
+echo ""
+echo "==> Part II"
+(cd "$here" && ./test-wc.sh > /dev/null)
+echo ""
+echo "==> Part III"
+go test -run TestBasic mapreduce/...
+echo ""
+echo "==> Part IV"
+go test -run Failure mapreduce/...
+echo ""
+echo "==> Part V (challenge)"
+(cd "$here" && ./test-ii.sh > /dev/null)
+
+rm "$here"/mrtmp.* "$here"/diff.out
--- a/src/main/test-wc.sh
+++ b/src/main/test-wc.sh
@ -0,0 +1,11 @@
+#!/bin/bash
+go run wc.go master sequential pg-*.txt
+sort -n -k2 mrtmp.wcseq | tail -10 | diff - mr-testout.txt > diff.out
+if [ -s diff.out ]
+then
+echo "Failed test. Output should be as in mr-testout.txt. Your output differs as follows (from diff.out):" > /dev/stderr
+  cat diff.out
+else
+  echo "Passed test" > /dev/stderr
+fi
+
--- a/src/main/viewd.go
+++ b/src/main/viewd.go
@ -0,0 +1,23 @@
+package main
+
+//
+// see directions in pbc.go
+//
+
+import "time"
+import "viewservice"
+import "os"
+import "fmt"
+
+func main() {
+	if len(os.Args) != 2 {
+		fmt.Printf("Usage: viewd port\n")
+		os.Exit(1)
+	}
+
+	viewservice.StartServer(os.Args[1])
+
+	for {
+		time.Sleep(100 * time.Second)
+	}
+}
--- a/src/main/wc.go
+++ b/src/main/wc.go
@ -0,0 +1,42 @@
+package main
+
+import (
+	"fmt"
+	"mapreduce"
+	"os"
+)
+
+// The mapping function is called once for each piece of the input.
+// In this framework, the key is the name of the file that is being processed,
+// and the value is the file's contents. The return value should be a slice of
+// key/value pairs, each represented by a mapreduce.KeyValue.
+func mapF(document string, value string) (res []mapreduce.KeyValue) {
+	// TODO: you have to write this function
+}
+
+// The reduce function is called once for each key generated by Map, with a
+// list of that key's string value (merged across all inputs). The return value
+// should be a single output value for that key.
+func reduceF(key string, values []string) string {
+	// TODO: you also have to write this function
+}
+
+// Can be run in 3 ways:
+// 1) Sequential (e.g., go run wc.go master sequential x1.txt .. xN.txt)
+// 2) Master (e.g., go run wc.go master localhost:7777 x1.txt .. xN.txt)
+// 3) Worker (e.g., go run wc.go worker localhost:7777 localhost:7778 &)
+func main() {
+	if len(os.Args) < 4 {
+		fmt.Printf("%s: see usage comments in file\n", os.Args[0])
+	} else if os.Args[1] == "master" {
+		var mr *mapreduce.Master
+		if os.Args[2] == "sequential" {
+			mr = mapreduce.Sequential("wcseq", os.Args[3:], 3, mapF, reduceF)
+		} else {
+			mr = mapreduce.Distributed("wcseq", os.Args[3:], 3, os.Args[2])
+		}
+		mr.Wait()
+	} else {
+		mapreduce.RunWorker(os.Args[2], os.Args[3], mapF, reduceF, 100)
+	}
+}
--- a/src/mapreduce/824-mrinput-0.txt
+++ b/src/mapreduce/824-mrinput-0.txt
--- a/src/mapreduce/common.go
+++ b/src/mapreduce/common.go
@ -0,0 +1,43 @@
+package mapreduce
+
+import (
+	"fmt"
+	"strconv"
+)
+
+// Debugging enabled?
+const debugEnabled = false
+
+// DPrintf will only print if the debugEnabled const has been set to true
+func debug(format string, a ...interface{}) (n int, err error) {
+	if debugEnabled {
+		n, err = fmt.Printf(format, a...)
+	}
+	return
+}
+
+// jobPhase indicates whether a task is scheduled as a map or reduce task.
+type jobPhase string
+
+const (
+	mapPhase    jobPhase = "Map"
+	reducePhase          = "Reduce"
+)
+
+// KeyValue is a type used to hold the key/value pairs passed to the map and
+// reduce functions.
+type KeyValue struct {
+	Key   string
+	Value string
+}
+
+// reduceName constructs the name of the intermediate file which map task
+// <mapTask> produces for reduce task <reduceTask>.
+func reduceName(jobName string, mapTask int, reduceTask int) string {
+	return "mrtmp." + jobName + "-" + strconv.Itoa(mapTask) + "-" + strconv.Itoa(reduceTask)
+}
+
+// mergeName constructs the name of the output file of reduce task <reduceTask>
+func mergeName(jobName string, reduceTask int) string {
+	return "mrtmp." + jobName + "-res-" + strconv.Itoa(reduceTask)
+}
--- a/src/mapreduce/common_map.go
+++ b/src/mapreduce/common_map.go
@ -0,0 +1,49 @@
+package mapreduce
+
+import (
+	"hash/fnv"
+)
+
+// doMap does the job of a map worker: it reads one of the input files
+// (inFile), calls the user-defined map function (mapF) for that file's
+// contents, and partitions the output into nReduce intermediate files.
+func doMap(
+	jobName string, // the name of the MapReduce job
+	mapTaskNumber int, // which map task this is
+	inFile string,
+	nReduce int, // the number of reduce task that will be run ("R" in the paper)
+	mapF func(file string, contents string) []KeyValue,
+) {
+	// TODO:
+	// You will need to write this function.
+	// You can find the filename for this map task's input to reduce task number
+	// r using reduceName(jobName, mapTaskNumber, r). The ihash function (given
+	// below doMap) should be used to decide which file a given key belongs into.
+	//
+	// The intermediate output of a map task is stored in the file
+	// system as multiple files whose name indicates which map task produced
+	// them, as well as which reduce task they are for. Coming up with a
+	// scheme for how to store the key/value pairs on disk can be tricky,
+	// especially when taking into account that both keys and values could
+	// contain newlines, quotes, and any other character you can think of.
+	//
+	// One format often used for serializing data to a byte stream that the
+	// other end can correctly reconstruct is JSON. You are not required to
+	// use JSON, but as the output of the reduce tasks *must* be JSON,
+	// familiarizing yourself with it here may prove useful. You can write
+	// out a data structure as a JSON string to a file using the commented
+	// code below. The corresponding decoding functions can be found in
+	// common_reduce.go.
+	//
+	//   enc := json.NewEncoder(file)
+	//   for _, kv := ... {
+	//     err := enc.Encode(&kv)
+	//
+	// Remember to close the file after you have written all the values!
+}
+
+func ihash(s string) uint32 {
+	h := fnv.New32a()
+	h.Write([]byte(s))
+	return h.Sum32()
+}
--- a/src/mapreduce/common_reduce.go
+++ b/src/mapreduce/common_reduce.go
@ -0,0 +1,34 @@
+package mapreduce
+
+// doReduce does the job of a reduce worker: it reads the intermediate
+// key/value pairs (produced by the map phase) for this task, sorts the
+// intermediate key/value pairs by key, calls the user-defined reduce function
+// (reduceF) for each key, and writes the output to disk.
+func doReduce(
+	jobName string, // the name of the whole MapReduce job
+	reduceTaskNumber int, // which reduce task this is
+	nMap int, // the number of map tasks that were run ("M" in the paper)
+	reduceF func(key string, values []string) string,
+) {
+	// TODO:
+	// You will need to write this function.
+	// You can find the intermediate file for this reduce task from map task number
+	// m using reduceName(jobName, m, reduceTaskNumber).
+	// Remember that you've encoded the values in the intermediate files, so you
+	// will need to decode them. If you chose to use JSON, you can read out
+	// multiple decoded values by creating a decoder, and then repeatedly calling
+	// .Decode() on it until Decode() returns an error.
+	//
+	// You should write the reduced output in as JSON encoded KeyValue
+	// objects to a file named mergeName(jobName, reduceTaskNumber). We require
+	// you to use JSON here because that is what the merger than combines the
+	// output from all the reduce tasks expects. There is nothing "special" about
+	// JSON -- it is just the marshalling format we chose to use. It will look
+	// something like this:
+	//
+	// enc := json.NewEncoder(mergeFile)
+	// for key in ... {
+	// 	enc.Encode(KeyValue{key, reduceF(...)})
+	// }
+	// file.Close()
+}
--- a/src/mapreduce/common_rpc.go
+++ b/src/mapreduce/common_rpc.go
@ -0,0 +1,66 @@
+package mapreduce
+
+import (
+	"fmt"
+	"net/rpc"
+)
+
+// What follows are RPC types and methods.
+// Field names must start with capital letters, otherwise RPC will break.
+
+// DoTaskArgs holds the arguments that are passed to a worker when a job is
+// scheduled on it.
+type DoTaskArgs struct {
+	JobName    string
+	File       string   // the file to process
+	Phase      jobPhase // are we in mapPhase or reducePhase?
+	TaskNumber int      // this task's index in the current phase
+
+	// NumOtherPhase is the total number of tasks in other phase; mappers
+	// need this to compute the number of output bins, and reducers needs
+	// this to know how many input files to collect.
+	NumOtherPhase int
+}
+
+// ShutdownReply is the response to a WorkerShutdown.
+// It holds the number of tasks this worker has processed since it was started.
+type ShutdownReply struct {
+	Ntasks int
+}
+
+// RegisterArgs is the argument passed when a worker registers with the master.
+type RegisterArgs struct {
+	Worker string
+}
+
+// call() sends an RPC to the rpcname handler on server srv
+// with arguments args, waits for the reply, and leaves the
+// reply in reply. the reply argument should be the address
+// of a reply structure.
+//
+// call() returns true if the server responded, and false
+// if call() was not able to contact the server. in particular,
+// reply's contents are valid if and only if call() returned true.
+//
+// you should assume that call() will time out and return an
+// error after a while if it doesn't get a reply from the server.
+//
+// please use call() to send all RPCs, in master.go, mapreduce.go,
+// and worker.go.  please don't change this function.
+//
+func call(srv string, rpcname string,
+	args interface{}, reply interface{}) bool {
+	c, errx := rpc.Dial("unix", srv)
+	if errx != nil {
+		return false
+	}
+	defer c.Close()
+
+	err := c.Call(rpcname, args, reply)
+	if err == nil {
+		return true
+	}
+
+	fmt.Println(err)
+	return false
+}
--- a/src/mapreduce/master.go
+++ b/src/mapreduce/master.go
@ -0,0 +1,144 @@
+package mapreduce
+
+import (
+	"fmt"
+	"net"
+	"sync"
+)
+
+// Master holds all the state that the master needs to keep track of. Of
+// particular importance is registerChannel, the channel that notifies the
+// master of workers that have gone idle and are in need of new work.
+type Master struct {
+	sync.Mutex
+
+	address         string
+	registerChannel chan string
+	doneChannel     chan bool
+	workers         []string // protected by the mutex
+
+	// Per-task information
+	jobName string   // Name of currently executing job
+	files   []string // Input files
+	nReduce int      // Number of reduce partitions
+
+	shutdown chan struct{}
+	l        net.Listener
+	stats    []int
+}
+
+// Register is an RPC method that is called by workers after they have started
+// up to report that they are ready to receive tasks.
+func (mr *Master) Register(args *RegisterArgs, _ *struct{}) error {
+	mr.Lock()
+	defer mr.Unlock()
+	debug("Register: worker %s\n", args.Worker)
+	mr.workers = append(mr.workers, args.Worker)
+	go func() {
+		mr.registerChannel <- args.Worker
+	}()
+	return nil
+}
+
+// newMaster initializes a new Map/Reduce Master
+func newMaster(master string) (mr *Master) {
+	mr = new(Master)
+	mr.address = master
+	mr.shutdown = make(chan struct{})
+	mr.registerChannel = make(chan string)
+	mr.doneChannel = make(chan bool)
+	return
+}
+
+// Sequential runs map and reduce tasks sequentially, waiting for each task to
+// complete before scheduling the next.
+func Sequential(jobName string, files []string, nreduce int,
+	mapF func(string, string) []KeyValue,
+	reduceF func(string, []string) string,
+) (mr *Master) {
+	mr = newMaster("master")
+	go mr.run(jobName, files, nreduce, func(phase jobPhase) {
+		switch phase {
+		case mapPhase:
+			for i, f := range mr.files {
+				doMap(mr.jobName, i, f, mr.nReduce, mapF)
+			}
+		case reducePhase:
+			for i := 0; i < mr.nReduce; i++ {
+				doReduce(mr.jobName, i, len(mr.files), reduceF)
+			}
+		}
+	}, func() {
+		mr.stats = []int{len(files) + nreduce}
+	})
+	return
+}
+
+// Distributed schedules map and reduce tasks on workers that register with the
+// master over RPC.
+func Distributed(jobName string, files []string, nreduce int, master string) (mr *Master) {
+	mr = newMaster(master)
+	mr.startRPCServer()
+	go mr.run(jobName, files, nreduce, mr.schedule, func() {
+		mr.stats = mr.killWorkers()
+		mr.stopRPCServer()
+	})
+	return
+}
+
+// run executes a mapreduce job on the given number of mappers and reducers.
+//
+// First, it divides up the input file among the given number of mappers, and
+// schedules each task on workers as they become available. Each map task bins
+// its output in a number of bins equal to the given number of reduce tasks.
+// Once all the mappers have finished, workers are assigned reduce tasks.
+//
+// When all tasks have been completed, the reducer outputs are merged,
+// statistics are collected, and the master is shut down.
+//
+// Note that this implementation assumes a shared file system.
+func (mr *Master) run(jobName string, files []string, nreduce int,
+	schedule func(phase jobPhase),
+	finish func(),
+) {
+	mr.jobName = jobName
+	mr.files = files
+	mr.nReduce = nreduce
+
+	debug("%s: Starting Map/Reduce task %s\n", mr.address, mr.jobName)
+
+	schedule(mapPhase)
+	schedule(reducePhase)
+	finish()
+	mr.merge()
+
+	debug("%s: Map/Reduce task completed\n", mr.address)
+
+	mr.doneChannel <- true
+}
+
+// Wait blocks until the currently scheduled work has completed.
+// This happens when all tasks have scheduled and completed, the final output
+// have been computed, and all workers have been shut down.
+func (mr *Master) Wait() {
+	<-mr.doneChannel
+}
+
+// killWorkers cleans up all workers by sending each one a Shutdown RPC.
+// It also collects and returns the number of tasks each worker has performed.
+func (mr *Master) killWorkers() []int {
+	mr.Lock()
+	defer mr.Unlock()
+	ntasks := make([]int, 0, len(mr.workers))
+	for _, w := range mr.workers {
+		debug("Master: shutdown worker %s\n", w)
+		var reply ShutdownReply
+		ok := call(w, "Worker.Shutdown", new(struct{}), &reply)
+		if ok == false {
+			fmt.Printf("Master: RPC %s shutdown error\n", w)
+		} else {
+			ntasks = append(ntasks, reply.Ntasks)
+		}
+	}
+	return ntasks
+}
--- a/src/mapreduce/master_rpc.go
+++ b/src/mapreduce/master_rpc.go
@ -0,0 +1,66 @@
+package mapreduce
+
+import (
+	"fmt"
+	"log"
+	"net"
+	"net/rpc"
+	"os"
+)
+
+// Shutdown is an RPC method that shuts down the Master's RPC server.
+func (mr *Master) Shutdown(_, _ *struct{}) error {
+	debug("Shutdown: registration server\n")
+	close(mr.shutdown)
+	mr.l.Close() // causes the Accept to fail
+	return nil
+}
+
+// startRPCServer staarts the Master's RPC server. It continues accepting RPC
+// calls (Register in particular) for as long as the worker is alive.
+func (mr *Master) startRPCServer() {
+	rpcs := rpc.NewServer()
+	rpcs.Register(mr)
+	os.Remove(mr.address) // only needed for "unix"
+	l, e := net.Listen("unix", mr.address)
+	if e != nil {
+		log.Fatal("RegstrationServer", mr.address, " error: ", e)
+	}
+	mr.l = l
+
+	// now that we are listening on the master address, can fork off
+	// accepting connections to another thread.
+	go func() {
+	loop:
+		for {
+			select {
+			case <-mr.shutdown:
+				break loop
+			default:
+			}
+			conn, err := mr.l.Accept()
+			if err == nil {
+				go func() {
+					rpcs.ServeConn(conn)
+					conn.Close()
+				}()
+			} else {
+				debug("RegistrationServer: accept error", err)
+				break
+			}
+		}
+		debug("RegistrationServer: done\n")
+	}()
+}
+
+// stopRPCServer stops the master RPC server.
+// This must be done through an RPC to avoid race conditions between the RPC
+// server thread and the current thread.
+func (mr *Master) stopRPCServer() {
+	var reply ShutdownReply
+	ok := call(mr.address, "Master.Shutdown", new(struct{}), &reply)
+	if ok == false {
+		fmt.Printf("Cleanup: RPC %s error\n", mr.address)
+	}
+	debug("cleanupRegistration: done\n")
+}
--- a/src/mapreduce/master_splitmerge.go
+++ b/src/mapreduce/master_splitmerge.go
@ -0,0 +1,72 @@
+package mapreduce
+
+import (
+	"bufio"
+	"encoding/json"
+	"fmt"
+	"log"
+	"os"
+	"sort"
+)
+
+// merge combines the results of the many reduce jobs into a single output file
+// XXX use merge sort
+func (mr *Master) merge() {
+	debug("Merge phase")
+	kvs := make(map[string]string)
+	for i := 0; i < mr.nReduce; i++ {
+		p := mergeName(mr.jobName, i)
+		debug("Merge: read %s\n", p)
+		file, err := os.Open(p)
+		if err != nil {
+			log.Fatal("Merge: ", err)
+		}
+		dec := json.NewDecoder(file)
+		for {
+			var kv KeyValue
+			err = dec.Decode(&kv)
+			if err != nil {
+				break
+			}
+			kvs[kv.Key] = kv.Value
+		}
+		file.Close()
+	}
+	var keys []string
+	for k := range kvs {
+		keys = append(keys, k)
+	}
+	sort.Strings(keys)
+
+	file, err := os.Create("mrtmp." + mr.jobName)
+	if err != nil {
+		log.Fatal("Merge: create ", err)
+	}
+	w := bufio.NewWriter(file)
+	for _, k := range keys {
+		fmt.Fprintf(w, "%s: %s\n", k, kvs[k])
+	}
+	w.Flush()
+	file.Close()
+}
+
+// removeFile is a simple wrapper around os.Remove that logs errors.
+func removeFile(n string) {
+	err := os.Remove(n)
+	if err != nil {
+		log.Fatal("CleanupFiles ", err)
+	}
+}
+
+// CleanupFiles removes all intermediate files produced by running mapreduce.
+func (mr *Master) CleanupFiles() {
+	for i := range mr.files {
+		for j := 0; j < mr.nReduce; j++ {
+			removeFile(reduceName(mr.jobName, i, j))
+		}
+	}
+	for i := 0; i < mr.nReduce; i++ {
+		removeFile(mergeName(mr.jobName, i))
+	}
+	removeFile("mrtmp." + mr.jobName)
+}
--- a/src/mapreduce/readme.go
+++ b/src/mapreduce/readme.go
@ -0,0 +1,45 @@
+// Package mapreduce provides a simple mapreduce library with a sequential
+// implementation. Applications should normally call Distributed() [located in
+// master.go] to start a job, but may instead call Sequential() [also in
+// master.go] to get a sequential execution for debugging purposes.
+//
+// The flow of the mapreduce implementation is as follows:
+//
+//   1. The application provides a number of input files, a map function, a
+//      reduce function, and the number of reduce tasks (nReduce).
+//   2. A master is created with this knowledge. It spins up an RPC server (see
+//      master_rpc.go), and waits for workers to register (using the RPC call
+//      Register() [defined in master.go]). As tasks become available (in steps
+//      4 and 5), schedule() [schedule.go] decides how to assign those tasks to
+//      workers, and how to handle worker failures.
+//   3. The master considers each input file one map tasks, and makes a call to
+//      doMap() [common_map.go] at least once for each task. It does so either
+//      directly (when using Sequential()) or by issuing the DoJob RPC on a
+//      worker [worker.go]. Each call to doMap() reads the appropriate file,
+//      calls the map function on that file's contents, and produces nReduce
+//      files for each map file. Thus, there will be #files x nReduce files
+//      after all map tasks are done:
+//
+//          f0-0, ..., f0-0, f0-<nReduce-1>, ...,
+//          f<#files-1>-0, ... f<#files-1>-<nReduce-1>.
+//
+//   4. The master next makes a call to doReduce() [common_reduce.go] at least
+//      once for each reduce task. As for doMap(), it does so either directly or
+//      through a worker. doReduce() collects nReduce reduce files from each
+//      map (f-*-<reduce>), and runs the reduce function on those files. This
+//      produces nReduce result files.
+//   5. The master calls mr.merge() [master_splitmerge.go], which merges all
+//      the nReduce files produced by the previous step into a single output.
+//   6. The master sends a Shutdown RPC to each of its workers, and then shuts
+//      down its own RPC server.
+//
+// TODO:
+// You will have to write/modify doMap, doReduce, and schedule yourself. These
+// are located in common_map.go, common_reduce.go, and schedule.go
+// respectively. You will also have to write the map and reduce functions in
+// ../main/wc.go.
+//
+// You should not need to modify any other files, but reading them might be
+// useful in order to understand how the other methods fit into the overall
+// architecture of the system.
+package mapreduce
--- a/src/mapreduce/schedule.go
+++ b/src/mapreduce/schedule.go
@ -0,0 +1,26 @@
+package mapreduce
+
+// schedule starts and waits for all tasks in the given phase (Map or Reduce).
+func (mr *Master) schedule(phase jobPhase) {
+	var ntasks int
+	var nios int // number of inputs (for reduce) or outputs (for map)
+	switch phase {
+	case mapPhase:
+		ntasks = len(mr.files)
+		nios = mr.nReduce
+	case reducePhase:
+		ntasks = mr.nReduce
+		nios = len(mr.files)
+	}
+
+	debug("Schedule: %v %v tasks (%d I/Os)\n", ntasks, phase, nios)
+
+	// All ntasks tasks have to be scheduled on workers, and only once all of
+	// them have been completed successfully should the function return.
+	// Remember that workers may fail, and that any given worker may finish
+	// multiple tasks.
+	//
+	// TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO
+	//
+	debug("Schedule: %v phase done\n", phase)
+}
--- a/src/mapreduce/test_test.go
+++ b/src/mapreduce/test_test.go
@ -0,0 +1,208 @@
+package mapreduce
+
+import (
+	"fmt"
+	"testing"
+	"time"
+
+	"bufio"
+	"log"
+	"os"
+	"sort"
+	"strconv"
+	"strings"
+)
+
+const (
+	nNumber = 100000
+	nMap    = 100
+	nReduce = 50
+)
+
+// Create input file with N numbers
+// Check if we have N numbers in output file
+
+// Split in words
+func MapFunc(file string, value string) (res []KeyValue) {
+	debug("Map %v\n", value)
+	words := strings.Fields(value)
+	for _, w := range words {
+		kv := KeyValue{w, ""}
+		res = append(res, kv)
+	}
+	return
+}
+
+// Just return key
+func ReduceFunc(key string, values []string) string {
+	for _, e := range values {
+		debug("Reduce %s %v\n", key, e)
+	}
+	return ""
+}
+
+// Checks input file agaist output file: each input number should show up
+// in the output file in string sorted order
+func check(t *testing.T, files []string) {
+	output, err := os.Open("mrtmp.test")
+	if err != nil {
+		log.Fatal("check: ", err)
+	}
+	defer output.Close()
+
+	var lines []string
+	for _, f := range files {
+		input, err := os.Open(f)
+		if err != nil {
+			log.Fatal("check: ", err)
+		}
+		defer input.Close()
+		inputScanner := bufio.NewScanner(input)
+		for inputScanner.Scan() {
+			lines = append(lines, inputScanner.Text())
+		}
+	}
+
+	sort.Strings(lines)
+
+	outputScanner := bufio.NewScanner(output)
+	i := 0
+	for outputScanner.Scan() {
+		var v1 int
+		var v2 int
+		text := outputScanner.Text()
+		n, err := fmt.Sscanf(lines[i], "%d", &v1)
+		if n == 1 && err == nil {
+			n, err = fmt.Sscanf(text, "%d", &v2)
+		}
+		if err != nil || v1 != v2 {
+			t.Fatalf("line %d: %d != %d err %v\n", i, v1, v2, err)
+		}
+		i++
+	}
+	if i != nNumber {
+		t.Fatalf("Expected %d lines in output\n", nNumber)
+	}
+}
+
+// Workers report back how many RPCs they have processed in the Shutdown reply.
+// Check that they processed at least 1 RPC.
+func checkWorker(t *testing.T, l []int) {
+	for _, tasks := range l {
+		if tasks == 0 {
+			t.Fatalf("Some worker didn't do any work\n")
+		}
+	}
+}
+
+// Make input file
+func makeInputs(num int) []string {
+	var names []string
+	var i = 0
+	for f := 0; f < num; f++ {
+		names = append(names, fmt.Sprintf("824-mrinput-%d.txt", f))
+		file, err := os.Create(names[f])
+		if err != nil {
+			log.Fatal("mkInput: ", err)
+		}
+		w := bufio.NewWriter(file)
+		for i < (f+1)*(nNumber/num) {
+			fmt.Fprintf(w, "%d\n", i)
+			i++
+		}
+		w.Flush()
+		file.Close()
+	}
+	return names
+}
+
+// Cook up a unique-ish UNIX-domain socket name
+// in /var/tmp. can't use current directory since
+// AFS doesn't support UNIX-domain sockets.
+func port(suffix string) string {
+	s := "/var/tmp/824-"
+	s += strconv.Itoa(os.Getuid()) + "/"
+	os.Mkdir(s, 0777)
+	s += "mr"
+	s += strconv.Itoa(os.Getpid()) + "-"
+	s += suffix
+	return s
+}
+
+func setup() *Master {
+	files := makeInputs(nMap)
+	master := port("master")
+	mr := Distributed("test", files, nReduce, master)
+	return mr
+}
+
+func cleanup(mr *Master) {
+	mr.CleanupFiles()
+	for _, f := range mr.files {
+		removeFile(f)
+	}
+}
+
+func TestSequentialSingle(t *testing.T) {
+	mr := Sequential("test", makeInputs(1), 1, MapFunc, ReduceFunc)
+	mr.Wait()
+	check(t, mr.files)
+	checkWorker(t, mr.stats)
+	cleanup(mr)
+}
+
+func TestSequentialMany(t *testing.T) {
+	mr := Sequential("test", makeInputs(5), 3, MapFunc, ReduceFunc)
+	mr.Wait()
+	check(t, mr.files)
+	checkWorker(t, mr.stats)
+	cleanup(mr)
+}
+
+func TestBasic(t *testing.T) {
+	mr := setup()
+	for i := 0; i < 2; i++ {
+		go RunWorker(mr.address, port("worker"+strconv.Itoa(i)),
+			MapFunc, ReduceFunc, -1)
+	}
+	mr.Wait()
+	check(t, mr.files)
+	checkWorker(t, mr.stats)
+	cleanup(mr)
+}
+
+func TestOneFailure(t *testing.T) {
+	mr := setup()
+	// Start 2 workers that fail after 10 tasks
+	go RunWorker(mr.address, port("worker"+strconv.Itoa(0)),
+		MapFunc, ReduceFunc, 10)
+	go RunWorker(mr.address, port("worker"+strconv.Itoa(1)),
+		MapFunc, ReduceFunc, -1)
+	mr.Wait()
+	check(t, mr.files)
+	checkWorker(t, mr.stats)
+	cleanup(mr)
+}
+
+func TestManyFailures(t *testing.T) {
+	mr := setup()
+	i := 0
+	done := false
+	for !done {
+		select {
+		case done = <-mr.doneChannel:
+			check(t, mr.files)
+			cleanup(mr)
+			break
+		default:
+			// Start 2 workers each sec. The workers fail after 10 tasks
+			w := port("worker" + strconv.Itoa(i))
+			go RunWorker(mr.address, w, MapFunc, ReduceFunc, 10)
+			i++
+			w = port("worker" + strconv.Itoa(i))
+			go RunWorker(mr.address, w, MapFunc, ReduceFunc, 10)
+			i++
+			time.Sleep(1 * time.Second)
+		}
+	}
+}
--- a/src/mapreduce/worker.go
+++ b/src/mapreduce/worker.go
@ -0,0 +1,109 @@
+package mapreduce
+
+import (
+	"fmt"
+	"log"
+	"net"
+	"net/rpc"
+	"os"
+	"sync"
+)
+
+// Worker holds the state for a server waiting for DoTask or Shutdown RPCs
+type Worker struct {
+	sync.Mutex
+
+	name   string
+	Map    func(string, string) []KeyValue
+	Reduce func(string, []string) string
+	nRPC   int // protected by mutex
+	nTasks int // protected by mutex
+	l      net.Listener
+}
+
+// DoTask is called by the master when a new task is being scheduled on this
+// worker.
+func (wk *Worker) DoTask(arg *DoTaskArgs, _ *struct{}) error {
+     	 debug("%s: given %v task #%d on file %s (nios: %d)\n",
+		wk.name, arg.Phase, arg.TaskNumber, arg.File, arg.NumOtherPhase)
+
+	switch arg.Phase {
+	case mapPhase:
+		doMap(arg.JobName, arg.TaskNumber, arg.File, arg.NumOtherPhase, wk.Map)
+	case reducePhase:
+		doReduce(arg.JobName, arg.TaskNumber, arg.NumOtherPhase, wk.Reduce)
+	}
+
+	debug("%s: %v task #%d done\n", wk.name, arg.Phase, arg.TaskNumber)
+	return nil
+}
+
+// Shutdown is called by the master when all work has been completed.
+// We should respond with the number of tasks we have processed.
+func (wk *Worker) Shutdown(_ *struct{}, res *ShutdownReply) error {
+	debug("Shutdown %s\n", wk.name)
+	wk.Lock()
+	defer wk.Unlock()
+	res.Ntasks = wk.nTasks
+	wk.nRPC = 1
+	wk.nTasks-- // Don't count the shutdown RPC
+	return nil
+}
+
+// Tell the master we exist and ready to work
+func (wk *Worker) register(master string) {
+	args := new(RegisterArgs)
+	args.Worker = wk.name
+	ok := call(master, "Master.Register", args, new(struct{}))
+	if ok == false {
+		fmt.Printf("Register: RPC %s register error\n", master)
+	}
+}
+
+// RunWorker sets up a connection with the master, registers its address, and
+// waits for tasks to be scheduled.
+func RunWorker(MasterAddress string, me string,
+	MapFunc func(string, string) []KeyValue,
+	ReduceFunc func(string, []string) string,
+	nRPC int,
+) {
+	debug("RunWorker %s\n", me)
+	wk := new(Worker)
+	wk.name = me
+	wk.Map = MapFunc
+	wk.Reduce = ReduceFunc
+	wk.nRPC = nRPC
+	rpcs := rpc.NewServer()
+	rpcs.Register(wk)
+	os.Remove(me) // only needed for "unix"
+	l, e := net.Listen("unix", me)
+	if e != nil {
+		log.Fatal("RunWorker: worker ", me, " error: ", e)
+	}
+	wk.l = l
+	wk.register(MasterAddress)
+
+	// DON'T MODIFY CODE BELOW
+	for {
+		wk.Lock()
+		if wk.nRPC == 0 {
+			wk.Unlock()
+			break
+		}
+		wk.Unlock()
+		conn, err := wk.l.Accept()
+		if err == nil {
+			wk.Lock()
+			wk.nRPC--
+			wk.Unlock()
+			go rpcs.ServeConn(conn)
+			wk.Lock()
+			wk.nTasks++
+			wk.Unlock()
+		} else {
+			break
+		}
+	}
+	wk.l.Close()
+	debug("RunWorker %s exit\n", me)
+}
--- a/src/paxos-shardkv/client.go
+++ b/src/paxos-shardkv/client.go
@ -0,0 +1,165 @@
+package shardkv
+
+import "shardmaster"
+import "net/rpc"
+import "time"
+import "sync"
+import "fmt"
+import "crypto/rand"
+import "math/big"
+
+type Clerk struct {
+	mu     sync.Mutex // one RPC at a time
+	sm     *shardmaster.Clerk
+	config shardmaster.Config
+	// You'll have to modify Clerk.
+}
+
+func nrand() int64 {
+	max := big.NewInt(int64(1) << 62)
+	bigx, _ := rand.Int(rand.Reader, max)
+	x := bigx.Int64()
+	return x
+}
+
+func MakeClerk(shardmasters []string) *Clerk {
+	ck := new(Clerk)
+	ck.sm = shardmaster.MakeClerk(shardmasters)
+	// You'll have to modify MakeClerk.
+	return ck
+}
+
+//
+// call() sends an RPC to the rpcname handler on server srv
+// with arguments args, waits for the reply, and leaves the
+// reply in reply. the reply argument should be a pointer
+// to a reply structure.
+//
+// the return value is true if the server responded, and false
+// if call() was not able to contact the server. in particular,
+// the reply's contents are only valid if call() returned true.
+//
+// you should assume that call() will return an
+// error after a while if the server is dead.
+// don't provide your own time-out mechanism.
+//
+// please use call() to send all RPCs, in client.go and server.go.
+// please don't change this function.
+//
+func call(srv string, rpcname string,
+	args interface{}, reply interface{}) bool {
+	c, errx := rpc.Dial("unix", srv)
+	if errx != nil {
+		return false
+	}
+	defer c.Close()
+
+	err := c.Call(rpcname, args, reply)
+	if err == nil {
+		return true
+	}
+
+	fmt.Println(err)
+	return false
+}
+
+//
+// which shard is a key in?
+// please use this function,
+// and please do not change it.
+//
+func key2shard(key string) int {
+	shard := 0
+	if len(key) > 0 {
+		shard = int(key[0])
+	}
+	shard %= shardmaster.NShards
+	return shard
+}
+
+//
+// fetch the current value for a key.
+// returns "" if the key does not exist.
+// keeps trying forever in the face of all other errors.
+//
+func (ck *Clerk) Get(key string) string {
+	ck.mu.Lock()
+	defer ck.mu.Unlock()
+
+	// You'll have to modify Get().
+
+	for {
+		shard := key2shard(key)
+
+		gid := ck.config.Shards[shard]
+
+		servers, ok := ck.config.Groups[gid]
+
+		if ok {
+			// try each server in the shard's replication group.
+			for _, srv := range servers {
+				args := &GetArgs{}
+				args.Key = key
+				var reply GetReply
+				ok := call(srv, "ShardKV.Get", args, &reply)
+				if ok && (reply.Err == OK || reply.Err == ErrNoKey) {
+					return reply.Value
+				}
+				if ok && (reply.Err == ErrWrongGroup) {
+					break
+				}
+			}
+		}
+
+		time.Sleep(100 * time.Millisecond)
+
+		// ask master for a new configuration.
+		ck.config = ck.sm.Query(-1)
+	}
+}
+
+// send a Put or Append request.
+func (ck *Clerk) PutAppend(key string, value string, op string) {
+	ck.mu.Lock()
+	defer ck.mu.Unlock()
+
+	// You'll have to modify PutAppend().
+
+	for {
+		shard := key2shard(key)
+
+		gid := ck.config.Shards[shard]
+
+		servers, ok := ck.config.Groups[gid]
+
+		if ok {
+			// try each server in the shard's replication group.
+			for _, srv := range servers {
+				args := &PutAppendArgs{}
+				args.Key = key
+				args.Value = value
+				args.Op = op
+				var reply PutAppendReply
+				ok := call(srv, "ShardKV.PutAppend", args, &reply)
+				if ok && reply.Err == OK {
+					return
+				}
+				if ok && (reply.Err == ErrWrongGroup) {
+					break
+				}
+			}
+		}
+
+		time.Sleep(100 * time.Millisecond)
+
+		// ask master for a new configuration.
+		ck.config = ck.sm.Query(-1)
+	}
+}
+
+func (ck *Clerk) Put(key string, value string) {
+	ck.PutAppend(key, value, "Put")
+}
+func (ck *Clerk) Append(key string, value string) {
+	ck.PutAppend(key, value, "Append")
+}
--- a/src/paxos-shardkv/common.go
+++ b/src/paxos-shardkv/common.go
@ -0,0 +1,43 @@
+package shardkv
+
+//
+// Sharded key/value server.
+// Lots of replica groups, each running op-at-a-time paxos.
+// Shardmaster decides which group serves each shard.
+// Shardmaster may change shard assignment from time to time.
+//
+// You will have to modify these definitions.
+//
+
+const (
+	OK            = "OK"
+	ErrNoKey      = "ErrNoKey"
+	ErrWrongGroup = "ErrWrongGroup"
+)
+
+type Err string
+
+type PutAppendArgs struct {
+	Key   string
+	Value string
+	Op    string // "Put" or "Append"
+	// You'll have to add definitions here.
+	// Field names must start with capital letters,
+	// otherwise RPC will break.
+
+}
+
+type PutAppendReply struct {
+	Err Err
+}
+
+type GetArgs struct {
+	Key string
+	// You'll have to add definitions here.
+}
+
+type GetReply struct {
+	Err   Err
+	Value string
+}
+
--- a/src/paxos-shardkv/server.go
+++ b/src/paxos-shardkv/server.go
@ -0,0 +1,166 @@
+package shardkv
+
+import "net"
+import "fmt"
+import "net/rpc"
+import "log"
+import "time"
+import "paxos"
+import "sync"
+import "sync/atomic"
+import "os"
+import "syscall"
+import "encoding/gob"
+import "math/rand"
+import "shardmaster"
+
+
+const Debug = 0
+
+func DPrintf(format string, a ...interface{}) (n int, err error) {
+	if Debug > 0 {
+		log.Printf(format, a...)
+	}
+	return
+}
+
+
+type Op struct {
+	// Your definitions here.
+}
+
+
+type ShardKV struct {
+	mu         sync.Mutex
+	l          net.Listener
+	me         int
+	dead       int32 // for testing
+	unreliable int32 // for testing
+	sm         *shardmaster.Clerk
+	px         *paxos.Paxos
+
+	gid int64 // my replica group ID
+
+	// Your definitions here.
+}
+
+
+func (kv *ShardKV) Get(args *GetArgs, reply *GetReply) error {
+	// Your code here.
+	return nil
+}
+
+// RPC handler for client Put and Append requests
+func (kv *ShardKV) PutAppend(args *PutAppendArgs, reply *PutAppendReply) error {
+	// Your code here.
+	return nil
+}
+
+//
+// Ask the shardmaster if there's a new configuration;
+// if so, re-configure.
+//
+func (kv *ShardKV) tick() {
+}
+
+// tell the server to shut itself down.
+// please don't change these two functions.
+func (kv *ShardKV) kill() {
+	atomic.StoreInt32(&kv.dead, 1)
+	kv.l.Close()
+	kv.px.Kill()
+}
+
+// call this to find out if the server is dead.
+func (kv *ShardKV) isdead() bool {
+	return atomic.LoadInt32(&kv.dead) != 0
+}
+
+// please do not change these two functions.
+func (kv *ShardKV) Setunreliable(what bool) {
+	if what {
+		atomic.StoreInt32(&kv.unreliable, 1)
+	} else {
+		atomic.StoreInt32(&kv.unreliable, 0)
+	}
+}
+
+func (kv *ShardKV) isunreliable() bool {
+	return atomic.LoadInt32(&kv.unreliable) != 0
+}
+
+//
+// Start a shardkv server.
+// gid is the ID of the server's replica group.
+// shardmasters[] contains the ports of the
+//   servers that implement the shardmaster.
+// servers[] contains the ports of the servers
+//   in this replica group.
+// Me is the index of this server in servers[].
+//
+func StartServer(gid int64, shardmasters []string,
+	servers []string, me int) *ShardKV {
+	gob.Register(Op{})
+
+	kv := new(ShardKV)
+	kv.me = me
+	kv.gid = gid
+	kv.sm = shardmaster.MakeClerk(shardmasters)
+
+	// Your initialization code here.
+	// Don't call Join().
+
+	rpcs := rpc.NewServer()
+	rpcs.Register(kv)
+
+	kv.px = paxos.Make(servers, me, rpcs)
+
+
+	os.Remove(servers[me])
+	l, e := net.Listen("unix", servers[me])
+	if e != nil {
+		log.Fatal("listen error: ", e)
+	}
+	kv.l = l
+
+	// please do not change any of the following code,
+	// or do anything to subvert it.
+
+	go func() {
+		for kv.isdead() == false {
+			conn, err := kv.l.Accept()
+			if err == nil && kv.isdead() == false {
+				if kv.isunreliable() && (rand.Int63()%1000) < 100 {
+					// discard the request.
+					conn.Close()
+				} else if kv.isunreliable() && (rand.Int63()%1000) < 200 {
+					// process the request but force discard of reply.
+					c1 := conn.(*net.UnixConn)
+					f, _ := c1.File()
+					err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR)
+					if err != nil {
+						fmt.Printf("shutdown: %v\n", err)
+					}
+					go rpcs.ServeConn(conn)
+				} else {
+					go rpcs.ServeConn(conn)
+				}
+			} else if err == nil {
+				conn.Close()
+			}
+			if err != nil && kv.isdead() == false {
+				fmt.Printf("ShardKV(%v) accept: %v\n", me, err.Error())
+				kv.kill()
+			}
+		}
+	}()
+
+	go func() {
+		for kv.isdead() == false {
+			kv.tick()
+			time.Sleep(250 * time.Millisecond)
+		}
+	}()
+
+	return kv
+}
--- a/src/paxos-shardkv/test_test.go
+++ b/src/paxos-shardkv/test_test.go
@ -0,0 +1,360 @@
+package shardkv
+
+import "testing"
+import "shardmaster"
+import "runtime"
+import "strconv"
+import "os"
+import "time"
+import "fmt"
+import "sync"
+import "sync/atomic"
+import "math/rand"
+
+// information about the servers of one replica group.
+type tGroup struct {
+	gid     int64
+	servers []*ShardKV
+	ports   []string
+}
+
+// information about all the servers of a k/v cluster.
+type tCluster struct {
+	t           *testing.T
+	masters     []*shardmaster.ShardMaster
+	mck         *shardmaster.Clerk
+	masterports []string
+	groups      []*tGroup
+}
+
+func port(tag string, host int) string {
+	s := "/var/tmp/824-"
+	s += strconv.Itoa(os.Getuid()) + "/"
+	os.Mkdir(s, 0777)
+	s += "skv-"
+	s += strconv.Itoa(os.Getpid()) + "-"
+	s += tag + "-"
+	s += strconv.Itoa(host)
+	return s
+}
+
+//
+// start a k/v replica server thread.
+//
+func (tc *tCluster) start1(gi int, si int, unreliable bool) {
+	s := StartServer(tc.groups[gi].gid, tc.masterports, tc.groups[gi].ports, si)
+	tc.groups[gi].servers[si] = s
+	s.Setunreliable(unreliable)
+}
+
+func (tc *tCluster) cleanup() {
+	for gi := 0; gi < len(tc.groups); gi++ {
+		g := tc.groups[gi]
+		for si := 0; si < len(g.servers); si++ {
+			if g.servers[si] != nil {
+				g.servers[si].kill()
+			}
+		}
+	}
+
+	for i := 0; i < len(tc.masters); i++ {
+		if tc.masters[i] != nil {
+			tc.masters[i].Kill()
+		}
+	}
+}
+
+func (tc *tCluster) shardclerk() *shardmaster.Clerk {
+	return shardmaster.MakeClerk(tc.masterports)
+}
+
+func (tc *tCluster) clerk() *Clerk {
+	return MakeClerk(tc.masterports)
+}
+
+func (tc *tCluster) join(gi int) {
+	tc.mck.Join(tc.groups[gi].gid, tc.groups[gi].ports)
+}
+
+func (tc *tCluster) leave(gi int) {
+	tc.mck.Leave(tc.groups[gi].gid)
+}
+
+func setup(t *testing.T, tag string, unreliable bool) *tCluster {
+	runtime.GOMAXPROCS(4)
+
+	const nmasters = 3
+	const ngroups = 3   // replica groups
+	const nreplicas = 3 // servers per group
+
+	tc := &tCluster{}
+	tc.t = t
+	tc.masters = make([]*shardmaster.ShardMaster, nmasters)
+	tc.masterports = make([]string, nmasters)
+
+	for i := 0; i < nmasters; i++ {
+		tc.masterports[i] = port(tag+"m", i)
+	}
+	for i := 0; i < nmasters; i++ {
+		tc.masters[i] = shardmaster.StartServer(tc.masterports, i)
+	}
+	tc.mck = tc.shardclerk()
+
+	tc.groups = make([]*tGroup, ngroups)
+
+	for i := 0; i < ngroups; i++ {
+		tc.groups[i] = &tGroup{}
+		tc.groups[i].gid = int64(i + 100)
+		tc.groups[i].servers = make([]*ShardKV, nreplicas)
+		tc.groups[i].ports = make([]string, nreplicas)
+		for j := 0; j < nreplicas; j++ {
+			tc.groups[i].ports[j] = port(tag+"s", (i*nreplicas)+j)
+		}
+		for j := 0; j < nreplicas; j++ {
+			tc.start1(i, j, unreliable)
+		}
+	}
+
+	// return smh, gids, ha, sa, clean
+	return tc
+}
+
+func TestBasic(t *testing.T) {
+	tc := setup(t, "basic", false)
+	defer tc.cleanup()
+
+	fmt.Printf("Test: Basic Join/Leave ...\n")
+
+	tc.join(0)
+
+	ck := tc.clerk()
+
+	ck.Put("a", "x")
+	ck.Append("a", "b")
+	if ck.Get("a") != "xb" {
+		t.Fatalf("Get got wrong value")
+	}
+
+	keys := make([]string, 10)
+	vals := make([]string, len(keys))
+	for i := 0; i < len(keys); i++ {
+		keys[i] = strconv.Itoa(rand.Int())
+		vals[i] = strconv.Itoa(rand.Int())
+		ck.Put(keys[i], vals[i])
+	}
+
+	// are keys still there after joins?
+	for g := 1; g < len(tc.groups); g++ {
+		tc.join(g)
+		time.Sleep(1 * time.Second)
+		for i := 0; i < len(keys); i++ {
+			v := ck.Get(keys[i])
+			if v != vals[i] {
+				t.Fatalf("joining; wrong value; g=%v k=%v wanted=%v got=%v",
+					g, keys[i], vals[i], v)
+			}
+			vals[i] = strconv.Itoa(rand.Int())
+			ck.Put(keys[i], vals[i])
+		}
+	}
+
+	// are keys still there after leaves?
+	for g := 0; g < len(tc.groups)-1; g++ {
+		tc.leave(g)
+		time.Sleep(1 * time.Second)
+		for i := 0; i < len(keys); i++ {
+			v := ck.Get(keys[i])
+			if v != vals[i] {
+				t.Fatalf("leaving; wrong value; g=%v k=%v wanted=%v got=%v",
+					g, keys[i], vals[i], v)
+			}
+			vals[i] = strconv.Itoa(rand.Int())
+			ck.Put(keys[i], vals[i])
+		}
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestMove(t *testing.T) {
+	tc := setup(t, "move", false)
+	defer tc.cleanup()
+
+	fmt.Printf("Test: Shards really move ...\n")
+
+	tc.join(0)
+
+	ck := tc.clerk()
+
+	// insert one key per shard
+	for i := 0; i < shardmaster.NShards; i++ {
+		ck.Put(string('0'+i), string('0'+i))
+	}
+
+	// add group 1.
+	tc.join(1)
+	time.Sleep(5 * time.Second)
+
+	// check that keys are still there.
+	for i := 0; i < shardmaster.NShards; i++ {
+		if ck.Get(string('0'+i)) != string('0'+i) {
+			t.Fatalf("missing key/value")
+		}
+	}
+
+	// remove sockets from group 0.
+	for _, port := range tc.groups[0].ports {
+		os.Remove(port)
+	}
+
+	count := int32(0)
+	var mu sync.Mutex
+	for i := 0; i < shardmaster.NShards; i++ {
+		go func(me int) {
+			myck := tc.clerk()
+			v := myck.Get(string('0' + me))
+			if v == string('0'+me) {
+				mu.Lock()
+				atomic.AddInt32(&count, 1)
+				mu.Unlock()
+			} else {
+				t.Fatalf("Get(%v) yielded %v\n", me, v)
+			}
+		}(i)
+	}
+
+	time.Sleep(10 * time.Second)
+
+	ccc := atomic.LoadInt32(&count)
+	if ccc > shardmaster.NShards/3 && ccc < 2*(shardmaster.NShards/3) {
+		fmt.Printf("  ... Passed\n")
+	} else {
+		t.Fatalf("%v keys worked after killing 1/2 of groups; wanted %v",
+			ccc, shardmaster.NShards/2)
+	}
+}
+
+func TestLimp(t *testing.T) {
+	tc := setup(t, "limp", false)
+	defer tc.cleanup()
+
+	fmt.Printf("Test: Reconfiguration with some dead replicas ...\n")
+
+	tc.join(0)
+
+	ck := tc.clerk()
+
+	ck.Put("a", "b")
+	if ck.Get("a") != "b" {
+		t.Fatalf("got wrong value")
+	}
+
+	// kill one server from each replica group.
+	for gi := 0; gi < len(tc.groups); gi++ {
+		sa := tc.groups[gi].servers
+		ns := len(sa)
+		sa[rand.Int()%ns].kill()
+	}
+
+	keys := make([]string, 10)
+	vals := make([]string, len(keys))
+	for i := 0; i < len(keys); i++ {
+		keys[i] = strconv.Itoa(rand.Int())
+		vals[i] = strconv.Itoa(rand.Int())
+		ck.Put(keys[i], vals[i])
+	}
+
+	// are keys still there after joins?
+	for g := 1; g < len(tc.groups); g++ {
+		tc.join(g)
+		time.Sleep(1 * time.Second)
+		for i := 0; i < len(keys); i++ {
+			v := ck.Get(keys[i])
+			if v != vals[i] {
+				t.Fatalf("joining; wrong value; g=%v k=%v wanted=%v got=%v",
+					g, keys[i], vals[i], v)
+			}
+			vals[i] = strconv.Itoa(rand.Int())
+			ck.Put(keys[i], vals[i])
+		}
+	}
+
+	// are keys still there after leaves?
+	for gi := 0; gi < len(tc.groups)-1; gi++ {
+		tc.leave(gi)
+		time.Sleep(2 * time.Second)
+		g := tc.groups[gi]
+		for i := 0; i < len(g.servers); i++ {
+			g.servers[i].kill()
+		}
+		for i := 0; i < len(keys); i++ {
+			v := ck.Get(keys[i])
+			if v != vals[i] {
+				t.Fatalf("leaving; wrong value; g=%v k=%v wanted=%v got=%v",
+					g, keys[i], vals[i], v)
+			}
+			vals[i] = strconv.Itoa(rand.Int())
+			ck.Put(keys[i], vals[i])
+		}
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+func doConcurrent(t *testing.T, unreliable bool) {
+	tc := setup(t, "concurrent-"+strconv.FormatBool(unreliable), unreliable)
+	defer tc.cleanup()
+
+	for i := 0; i < len(tc.groups); i++ {
+		tc.join(i)
+	}
+
+	const npara = 11
+	var ca [npara]chan bool
+	for i := 0; i < npara; i++ {
+		ca[i] = make(chan bool)
+		go func(me int) {
+			ok := true
+			defer func() { ca[me] <- ok }()
+			ck := tc.clerk()
+			mymck := tc.shardclerk()
+			key := strconv.Itoa(me)
+			last := ""
+			for iters := 0; iters < 3; iters++ {
+				nv := strconv.Itoa(rand.Int())
+				ck.Append(key, nv)
+				last = last + nv
+				v := ck.Get(key)
+				if v != last {
+					ok = false
+					t.Fatalf("Get(%v) expected %v got %v\n", key, last, v)
+				}
+
+				gi := rand.Int() % len(tc.groups)
+				gid := tc.groups[gi].gid
+				mymck.Move(rand.Int()%shardmaster.NShards, gid)
+
+				time.Sleep(time.Duration(rand.Int()%30) * time.Millisecond)
+			}
+		}(i)
+	}
+
+	for i := 0; i < npara; i++ {
+		x := <-ca[i]
+		if x == false {
+			t.Fatalf("something is wrong")
+		}
+	}
+}
+
+func TestConcurrent(t *testing.T) {
+	fmt.Printf("Test: Concurrent Put/Get/Move ...\n")
+	doConcurrent(t, false)
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestConcurrentUnreliable(t *testing.T) {
+	fmt.Printf("Test: Concurrent Put/Get/Move (unreliable) ...\n")
+	doConcurrent(t, true)
+	fmt.Printf("  ... Passed\n")
+}
--- a/src/paxos-shardmaster/client.go
+++ b/src/paxos-shardmaster/client.go
@ -0,0 +1,120 @@
+package shardmaster
+
+//
+// Shardmaster clerk.
+// Please don't change this file.
+//
+
+import "net/rpc"
+import "time"
+import "fmt"
+
+type Clerk struct {
+	servers []string // shardmaster replicas
+}
+
+func MakeClerk(servers []string) *Clerk {
+	ck := new(Clerk)
+	ck.servers = servers
+	return ck
+}
+
+//
+// call() sends an RPC to the rpcname handler on server srv
+// with arguments args, waits for the reply, and leaves the
+// reply in reply. the reply argument should be a pointer
+// to a reply structure.
+//
+// the return value is true if the server responded, and false
+// if call() was not able to contact the server. in particular,
+// the reply's contents are only valid if call() returned true.
+//
+// you should assume that call() will return an
+// error after a while if the server is dead.
+// don't provide your own time-out mechanism.
+//
+// please use call() to send all RPCs, in client.go and server.go.
+// please don't change this function.
+//
+func call(srv string, rpcname string,
+	args interface{}, reply interface{}) bool {
+	c, errx := rpc.Dial("unix", srv)
+	if errx != nil {
+		return false
+	}
+	defer c.Close()
+
+	err := c.Call(rpcname, args, reply)
+	if err == nil {
+		return true
+	}
+
+	fmt.Println(err)
+	return false
+}
+
+func (ck *Clerk) Query(num int) Config {
+	for {
+		// try each known server.
+		for _, srv := range ck.servers {
+			args := &QueryArgs{}
+			args.Num = num
+			var reply QueryReply
+			ok := call(srv, "ShardMaster.Query", args, &reply)
+			if ok {
+				return reply.Config
+			}
+		}
+		time.Sleep(100 * time.Millisecond)
+	}
+}
+
+func (ck *Clerk) Join(gid int64, servers []string) {
+	for {
+		// try each known server.
+		for _, srv := range ck.servers {
+			args := &JoinArgs{}
+			args.GID = gid
+			args.Servers = servers
+			var reply JoinReply
+			ok := call(srv, "ShardMaster.Join", args, &reply)
+			if ok {
+				return
+			}
+		}
+		time.Sleep(100 * time.Millisecond)
+	}
+}
+
+func (ck *Clerk) Leave(gid int64) {
+	for {
+		// try each known server.
+		for _, srv := range ck.servers {
+			args := &LeaveArgs{}
+			args.GID = gid
+			var reply LeaveReply
+			ok := call(srv, "ShardMaster.Leave", args, &reply)
+			if ok {
+				return
+			}
+		}
+		time.Sleep(100 * time.Millisecond)
+	}
+}
+
+func (ck *Clerk) Move(shard int, gid int64) {
+	for {
+		// try each known server.
+		for _, srv := range ck.servers {
+			args := &MoveArgs{}
+			args.Shard = shard
+			args.GID = gid
+			var reply MoveReply
+			ok := call(srv, "ShardMaster.Move", args, &reply)
+			if ok {
+				return
+			}
+		}
+		time.Sleep(100 * time.Millisecond)
+	}
+}
--- a/src/paxos-shardmaster/common.go
+++ b/src/paxos-shardmaster/common.go
@ -0,0 +1,60 @@
+package shardmaster
+
+//
+// Master shard server: assigns shards to replication groups.
+//
+// RPC interface:
+// Join(gid, servers) -- replica group gid is joining, give it some shards.
+// Leave(gid) -- replica group gid is retiring, hand off all its shards.
+// Move(shard, gid) -- hand off one shard from current owner to gid.
+// Query(num) -> fetch Config # num, or latest config if num==-1.
+//
+// A Config (configuration) describes a set of replica groups, and the
+// replica group responsible for each shard. Configs are numbered. Config
+// #0 is the initial configuration, with no groups and all shards
+// assigned to group 0 (the invalid group).
+//
+// A GID is a replica group ID. GIDs must be uniqe and > 0.
+// Once a GID joins, and leaves, it should never join again.
+//
+// Please don't change this file.
+//
+
+const NShards = 10
+
+type Config struct {
+	Num    int                // config number
+	Shards [NShards]int64     // shard -> gid
+	Groups map[int64][]string // gid -> servers[]
+}
+
+type JoinArgs struct {
+	GID     int64    // unique replica group ID
+	Servers []string // group server ports
+}
+
+type JoinReply struct {
+}
+
+type LeaveArgs struct {
+	GID int64
+}
+
+type LeaveReply struct {
+}
+
+type MoveArgs struct {
+	Shard int
+	GID   int64
+}
+
+type MoveReply struct {
+}
+
+type QueryArgs struct {
+	Num int // desired config number
+}
+
+type QueryReply struct {
+	Config Config
+}
--- a/src/paxos-shardmaster/server.go
+++ b/src/paxos-shardmaster/server.go
@ -0,0 +1,141 @@
+package shardmaster
+
+import "net"
+import "fmt"
+import "net/rpc"
+import "log"
+
+import "paxos"
+import "sync"
+import "sync/atomic"
+import "os"
+import "syscall"
+import "encoding/gob"
+import "math/rand"
+
+type ShardMaster struct {
+	mu         sync.Mutex
+	l          net.Listener
+	me         int
+	dead       int32 // for testing
+	unreliable int32 // for testing
+	px         *paxos.Paxos
+
+	configs []Config // indexed by config num
+}
+
+
+type Op struct {
+	// Your data here.
+}
+
+
+func (sm *ShardMaster) Join(args *JoinArgs, reply *JoinReply) error {
+	// Your code here.
+
+	return nil
+}
+
+func (sm *ShardMaster) Leave(args *LeaveArgs, reply *LeaveReply) error {
+	// Your code here.
+
+	return nil
+}
+
+func (sm *ShardMaster) Move(args *MoveArgs, reply *MoveReply) error {
+	// Your code here.
+
+	return nil
+}
+
+func (sm *ShardMaster) Query(args *QueryArgs, reply *QueryReply) error {
+	// Your code here.
+
+	return nil
+}
+
+// please don't change these two functions.
+func (sm *ShardMaster) Kill() {
+	atomic.StoreInt32(&sm.dead, 1)
+	sm.l.Close()
+	sm.px.Kill()
+}
+
+// call this to find out if the server is dead.
+func (sm *ShardMaster) isdead() bool {
+	return atomic.LoadInt32(&sm.dead) != 0
+}
+
+// please do not change these two functions.
+func (sm *ShardMaster) setunreliable(what bool) {
+	if what {
+		atomic.StoreInt32(&sm.unreliable, 1)
+	} else {
+		atomic.StoreInt32(&sm.unreliable, 0)
+	}
+}
+
+func (sm *ShardMaster) isunreliable() bool {
+	return atomic.LoadInt32(&sm.unreliable) != 0
+}
+
+//
+// servers[] contains the ports of the set of
+// servers that will cooperate via Paxos to
+// form the fault-tolerant shardmaster service.
+// me is the index of the current server in servers[].
+//
+func StartServer(servers []string, me int) *ShardMaster {
+	sm := new(ShardMaster)
+	sm.me = me
+
+	sm.configs = make([]Config, 1)
+	sm.configs[0].Groups = map[int64][]string{}
+
+	rpcs := rpc.NewServer()
+
+	gob.Register(Op{})
+	rpcs.Register(sm)
+	sm.px = paxos.Make(servers, me, rpcs)
+
+	os.Remove(servers[me])
+	l, e := net.Listen("unix", servers[me])
+	if e != nil {
+		log.Fatal("listen error: ", e)
+	}
+	sm.l = l
+
+	// please do not change any of the following code,
+	// or do anything to subvert it.
+
+	go func() {
+		for sm.isdead() == false {
+			conn, err := sm.l.Accept()
+			if err == nil && sm.isdead() == false {
+				if sm.isunreliable() && (rand.Int63()%1000) < 100 {
+					// discard the request.
+					conn.Close()
+				} else if sm.isunreliable() && (rand.Int63()%1000) < 200 {
+					// process the request but force discard of reply.
+					c1 := conn.(*net.UnixConn)
+					f, _ := c1.File()
+					err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR)
+					if err != nil {
+						fmt.Printf("shutdown: %v\n", err)
+					}
+					go rpcs.ServeConn(conn)
+				} else {
+					go rpcs.ServeConn(conn)
+				}
+			} else if err == nil {
+				conn.Close()
+			}
+			if err != nil && sm.isdead() == false {
+				fmt.Printf("ShardMaster(%v) accept: %v\n", me, err.Error())
+				sm.Kill()
+			}
+		}
+	}()
+
+	return sm
+}
--- a/src/paxos-shardmaster/test_test.go
+++ b/src/paxos-shardmaster/test_test.go
@ -0,0 +1,372 @@
+package shardmaster
+
+import "testing"
+import "runtime"
+import "strconv"
+import "os"
+
+// import "time"
+import "fmt"
+import "math/rand"
+
+func port(tag string, host int) string {
+	s := "/var/tmp/824-"
+	s += strconv.Itoa(os.Getuid()) + "/"
+	os.Mkdir(s, 0777)
+	s += "sm-"
+	s += strconv.Itoa(os.Getpid()) + "-"
+	s += tag + "-"
+	s += strconv.Itoa(host)
+	return s
+}
+
+func cleanup(sma []*ShardMaster) {
+	for i := 0; i < len(sma); i++ {
+		if sma[i] != nil {
+			sma[i].Kill()
+		}
+	}
+}
+
+//
+// maybe should take a cka[] and find the server with
+// the highest Num.
+//
+func check(t *testing.T, groups []int64, ck *Clerk) {
+	c := ck.Query(-1)
+	if len(c.Groups) != len(groups) {
+		t.Fatalf("wanted %v groups, got %v", len(groups), len(c.Groups))
+	}
+
+	// are the groups as expected?
+	for _, g := range groups {
+		_, ok := c.Groups[g]
+		if ok != true {
+			t.Fatalf("missing group %v", g)
+		}
+	}
+
+	// any un-allocated shards?
+	if len(groups) > 0 {
+		for s, g := range c.Shards {
+			_, ok := c.Groups[g]
+			if ok == false {
+				t.Fatalf("shard %v -> invalid group %v", s, g)
+			}
+		}
+	}
+
+	// more or less balanced sharding?
+	counts := map[int64]int{}
+	for _, g := range c.Shards {
+		counts[g] += 1
+	}
+	min := 257
+	max := 0
+	for g, _ := range c.Groups {
+		if counts[g] > max {
+			max = counts[g]
+		}
+		if counts[g] < min {
+			min = counts[g]
+		}
+	}
+	if max > min+1 {
+		t.Fatalf("max %v too much larger than min %v", max, min)
+	}
+}
+
+func TestBasic(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	const nservers = 3
+	var sma []*ShardMaster = make([]*ShardMaster, nservers)
+	var kvh []string = make([]string, nservers)
+	defer cleanup(sma)
+
+	for i := 0; i < nservers; i++ {
+		kvh[i] = port("basic", i)
+	}
+	for i := 0; i < nservers; i++ {
+		sma[i] = StartServer(kvh, i)
+	}
+
+	ck := MakeClerk(kvh)
+	var cka [nservers]*Clerk
+	for i := 0; i < nservers; i++ {
+		cka[i] = MakeClerk([]string{kvh[i]})
+	}
+
+	fmt.Printf("Test: Basic leave/join ...\n")
+
+	cfa := make([]Config, 6)
+	cfa[0] = ck.Query(-1)
+
+	check(t, []int64{}, ck)
+
+	var gid1 int64 = 1
+	ck.Join(gid1, []string{"x", "y", "z"})
+	check(t, []int64{gid1}, ck)
+	cfa[1] = ck.Query(-1)
+
+	var gid2 int64 = 2
+	ck.Join(gid2, []string{"a", "b", "c"})
+	check(t, []int64{gid1, gid2}, ck)
+	cfa[2] = ck.Query(-1)
+
+	ck.Join(gid2, []string{"a", "b", "c"})
+	check(t, []int64{gid1, gid2}, ck)
+	cfa[3] = ck.Query(-1)
+
+	cfx := ck.Query(-1)
+	sa1 := cfx.Groups[gid1]
+	if len(sa1) != 3 || sa1[0] != "x" || sa1[1] != "y" || sa1[2] != "z" {
+		t.Fatalf("wrong servers for gid %v: %v\n", gid1, sa1)
+	}
+	sa2 := cfx.Groups[gid2]
+	if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" {
+		t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2)
+	}
+
+	ck.Leave(gid1)
+	check(t, []int64{gid2}, ck)
+	cfa[4] = ck.Query(-1)
+
+	ck.Leave(gid1)
+	check(t, []int64{gid2}, ck)
+	cfa[5] = ck.Query(-1)
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Historical queries ...\n")
+
+	for i := 0; i < len(cfa); i++ {
+		c := ck.Query(cfa[i].Num)
+		if c.Num != cfa[i].Num {
+			t.Fatalf("historical Num wrong")
+		}
+		if c.Shards != cfa[i].Shards {
+			t.Fatalf("historical Shards wrong")
+		}
+		if len(c.Groups) != len(cfa[i].Groups) {
+			t.Fatalf("number of historical Groups is wrong")
+		}
+		for gid, sa := range c.Groups {
+			sa1, ok := cfa[i].Groups[gid]
+			if ok == false || len(sa1) != len(sa) {
+				t.Fatalf("historical len(Groups) wrong")
+			}
+			if ok && len(sa1) == len(sa) {
+				for j := 0; j < len(sa); j++ {
+					if sa[j] != sa1[j] {
+						t.Fatalf("historical Groups wrong")
+					}
+				}
+			}
+		}
+	}
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Move ...\n")
+	{
+		var gid3 int64 = 503
+		ck.Join(gid3, []string{"3a", "3b", "3c"})
+		var gid4 int64 = 504
+		ck.Join(gid4, []string{"4a", "4b", "4c"})
+		for i := 0; i < NShards; i++ {
+			cf := ck.Query(-1)
+			if i < NShards/2 {
+				ck.Move(i, gid3)
+				if cf.Shards[i] != gid3 {
+					cf1 := ck.Query(-1)
+					if cf1.Num <= cf.Num {
+						t.Fatalf("Move should increase Config.Num")
+					}
+				}
+			} else {
+				ck.Move(i, gid4)
+				if cf.Shards[i] != gid4 {
+					cf1 := ck.Query(-1)
+					if cf1.Num <= cf.Num {
+						t.Fatalf("Move should increase Config.Num")
+					}
+				}
+			}
+		}
+		cf2 := ck.Query(-1)
+		for i := 0; i < NShards; i++ {
+			if i < NShards/2 {
+				if cf2.Shards[i] != gid3 {
+					t.Fatalf("expected shard %v on gid %v actually %v",
+						i, gid3, cf2.Shards[i])
+				}
+			} else {
+				if cf2.Shards[i] != gid4 {
+					t.Fatalf("expected shard %v on gid %v actually %v",
+						i, gid4, cf2.Shards[i])
+				}
+			}
+		}
+		ck.Leave(gid3)
+		ck.Leave(gid4)
+	}
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Concurrent leave/join ...\n")
+
+	const npara = 10
+	gids := make([]int64, npara)
+	var ca [npara]chan bool
+	for xi := 0; xi < npara; xi++ {
+		gids[xi] = int64(xi + 1)
+		ca[xi] = make(chan bool)
+		go func(i int) {
+			defer func() { ca[i] <- true }()
+			var gid int64 = gids[i]
+			cka[(i+0)%nservers].Join(gid+1000, []string{"a", "b", "c"})
+			cka[(i+0)%nservers].Join(gid, []string{"a", "b", "c"})
+			cka[(i+1)%nservers].Leave(gid + 1000)
+		}(xi)
+	}
+	for i := 0; i < npara; i++ {
+		<-ca[i]
+	}
+	check(t, gids, ck)
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Min advances after joins ...\n")
+
+	for i, sm := range sma {
+		if sm.px.Min() <= 0 {
+			t.Fatalf("Min() for %s did not advance", kvh[i])
+		}
+	}
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Minimal transfers after joins ...\n")
+
+	c1 := ck.Query(-1)
+	for i := 0; i < 5; i++ {
+		ck.Join(int64(npara+1+i), []string{"a", "b", "c"})
+	}
+	c2 := ck.Query(-1)
+	for i := int64(1); i <= npara; i++ {
+		for j := 0; j < len(c1.Shards); j++ {
+			if c2.Shards[j] == i {
+				if c1.Shards[j] != i {
+					t.Fatalf("non-minimal transfer after Join()s")
+				}
+			}
+		}
+	}
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Minimal transfers after leaves ...\n")
+
+	for i := 0; i < 5; i++ {
+		ck.Leave(int64(npara + 1 + i))
+	}
+	c3 := ck.Query(-1)
+	for i := int64(1); i <= npara; i++ {
+		for j := 0; j < len(c1.Shards); j++ {
+			if c2.Shards[j] == i {
+				if c3.Shards[j] != i {
+					t.Fatalf("non-minimal transfer after Leave()s")
+				}
+			}
+		}
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestUnreliable(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	const nservers = 3
+	var sma []*ShardMaster = make([]*ShardMaster, nservers)
+	var kvh []string = make([]string, nservers)
+	defer cleanup(sma)
+
+	for i := 0; i < nservers; i++ {
+		kvh[i] = port("unrel", i)
+	}
+	for i := 0; i < nservers; i++ {
+		sma[i] = StartServer(kvh, i)
+		// don't turn on unreliable because the assignment
+		// doesn't require the shardmaster to detect duplicate
+		// client requests.
+		// sma[i].setunreliable(true)
+	}
+
+	ck := MakeClerk(kvh)
+	var cka [nservers]*Clerk
+	for i := 0; i < nservers; i++ {
+		cka[i] = MakeClerk([]string{kvh[i]})
+	}
+
+	fmt.Printf("Test: Concurrent leave/join, failure ...\n")
+
+	const npara = 20
+	gids := make([]int64, npara)
+	var ca [npara]chan bool
+	for xi := 0; xi < npara; xi++ {
+		gids[xi] = int64(xi + 1)
+		ca[xi] = make(chan bool)
+		go func(i int) {
+			defer func() { ca[i] <- true }()
+			var gid int64 = gids[i]
+			cka[1+(rand.Int()%2)].Join(gid+1000, []string{"a", "b", "c"})
+			cka[1+(rand.Int()%2)].Join(gid, []string{"a", "b", "c"})
+			cka[1+(rand.Int()%2)].Leave(gid + 1000)
+			// server 0 won't be able to hear any RPCs.
+			os.Remove(kvh[0])
+		}(xi)
+	}
+	for i := 0; i < npara; i++ {
+		<-ca[i]
+	}
+	check(t, gids, ck)
+
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestFreshQuery(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	const nservers = 3
+	var sma []*ShardMaster = make([]*ShardMaster, nservers)
+	var kvh []string = make([]string, nservers)
+	defer cleanup(sma)
+
+	for i := 0; i < nservers; i++ {
+		kvh[i] = port("fresh", i)
+	}
+	for i := 0; i < nservers; i++ {
+		sma[i] = StartServer(kvh, i)
+	}
+
+	ck1 := MakeClerk([]string{kvh[1]})
+
+	fmt.Printf("Test: Query() returns latest configuration ...\n")
+
+	portx := kvh[0] + strconv.Itoa(rand.Int())
+	if os.Rename(kvh[0], portx) != nil {
+		t.Fatalf("os.Rename() failed")
+	}
+	ck0 := MakeClerk([]string{portx})
+
+	ck1.Join(1001, []string{"a", "b", "c"})
+	c := ck0.Query(-1)
+	_, ok := c.Groups[1001]
+	if ok == false {
+		t.Fatalf("Query(-1) produced a stale configuration")
+	}
+
+	fmt.Printf("  ... Passed\n")
+	os.Remove(portx)
+}
--- a/src/paxos/paxos.go
+++ b/src/paxos/paxos.go
@ -0,0 +1,273 @@
+package paxos
+
+//
+// Paxos library, to be included in an application.
+// Multiple applications will run, each including
+// a Paxos peer.
+//
+// Manages a sequence of agreed-on values.
+// The set of peers is fixed.
+// Copes with network failures (partition, msg loss, &c).
+// Does not store anything persistently, so cannot handle crash+restart.
+//
+// The application interface:
+//
+// px = paxos.Make(peers []string, me int)
+// px.Start(seq int, v interface{}) -- start agreement on new instance
+// px.Status(seq int) (Fate, v interface{}) -- get info about an instance
+// px.Done(seq int) -- ok to forget all instances <= seq
+// px.Max() int -- highest instance seq known, or -1
+// px.Min() int -- instances before this seq have been forgotten
+//
+
+import "net"
+import "net/rpc"
+import "log"
+
+import "os"
+import "syscall"
+import "sync"
+import "sync/atomic"
+import "fmt"
+import "math/rand"
+
+
+// px.Status() return values, indicating
+// whether an agreement has been decided,
+// or Paxos has not yet reached agreement,
+// or it was agreed but forgotten (i.e. < Min()).
+type Fate int
+
+const (
+	Decided   Fate = iota + 1
+	Pending        // not yet decided.
+	Forgotten      // decided but forgotten.
+)
+
+type Paxos struct {
+	mu         sync.Mutex
+	l          net.Listener
+	dead       int32 // for testing
+	unreliable int32 // for testing
+	rpcCount   int32 // for testing
+	peers      []string
+	me         int // index into peers[]
+
+
+	// Your data here.
+}
+
+//
+// call() sends an RPC to the rpcname handler on server srv
+// with arguments args, waits for the reply, and leaves the
+// reply in reply. the reply argument should be a pointer
+// to a reply structure.
+//
+// the return value is true if the server responded, and false
+// if call() was not able to contact the server. in particular,
+// the replys contents are only valid if call() returned true.
+//
+// you should assume that call() will time out and return an
+// error after a while if it does not get a reply from the server.
+//
+// please use call() to send all RPCs, in client.go and server.go.
+// please do not change this function.
+//
+func call(srv string, name string, args interface{}, reply interface{}) bool {
+	c, err := rpc.Dial("unix", srv)
+	if err != nil {
+		err1 := err.(*net.OpError)
+		if err1.Err != syscall.ENOENT && err1.Err != syscall.ECONNREFUSED {
+			fmt.Printf("paxos Dial() failed: %v\n", err1)
+		}
+		return false
+	}
+	defer c.Close()
+
+	err = c.Call(name, args, reply)
+	if err == nil {
+		return true
+	}
+
+	fmt.Println(err)
+	return false
+}
+
+
+//
+// the application wants paxos to start agreement on
+// instance seq, with proposed value v.
+// Start() returns right away; the application will
+// call Status() to find out if/when agreement
+// is reached.
+//
+func (px *Paxos) Start(seq int, v interface{}) {
+	// Your code here.
+}
+
+//
+// the application on this machine is done with
+// all instances <= seq.
+//
+// see the comments for Min() for more explanation.
+//
+func (px *Paxos) Done(seq int) {
+	// Your code here.
+}
+
+//
+// the application wants to know the
+// highest instance sequence known to
+// this peer.
+//
+func (px *Paxos) Max() int {
+	// Your code here.
+	return 0
+}
+
+//
+// Min() should return one more than the minimum among z_i,
+// where z_i is the highest number ever passed
+// to Done() on peer i. A peers z_i is -1 if it has
+// never called Done().
+//
+// Paxos is required to have forgotten all information
+// about any instances it knows that are < Min().
+// The point is to free up memory in long-running
+// Paxos-based servers.
+//
+// Paxos peers need to exchange their highest Done()
+// arguments in order to implement Min(). These
+// exchanges can be piggybacked on ordinary Paxos
+// agreement protocol messages, so it is OK if one
+// peers Min does not reflect another Peers Done()
+// until after the next instance is agreed to.
+//
+// The fact that Min() is defined as a minimum over
+// *all* Paxos peers means that Min() cannot increase until
+// all peers have been heard from. So if a peer is dead
+// or unreachable, other peers Min()s will not increase
+// even if all reachable peers call Done. The reason for
+// this is that when the unreachable peer comes back to
+// life, it will need to catch up on instances that it
+// missed -- the other peers therefor cannot forget these
+// instances.
+//
+func (px *Paxos) Min() int {
+	// You code here.
+	return 0
+}
+
+//
+// the application wants to know whether this
+// peer thinks an instance has been decided,
+// and if so what the agreed value is. Status()
+// should just inspect the local peer state;
+// it should not contact other Paxos peers.
+//
+func (px *Paxos) Status(seq int) (Fate, interface{}) {
+	// Your code here.
+	return Pending, nil
+}
+
+
+
+//
+// tell the peer to shut itself down.
+// for testing.
+// please do not change these two functions.
+//
+func (px *Paxos) Kill() {
+	atomic.StoreInt32(&px.dead, 1)
+	if px.l != nil {
+		px.l.Close()
+	}
+}
+
+//
+// has this peer been asked to shut down?
+//
+func (px *Paxos) isdead() bool {
+	return atomic.LoadInt32(&px.dead) != 0
+}
+
+// please do not change these two functions.
+func (px *Paxos) setunreliable(what bool) {
+	if what {
+		atomic.StoreInt32(&px.unreliable, 1)
+	} else {
+		atomic.StoreInt32(&px.unreliable, 0)
+	}
+}
+
+func (px *Paxos) isunreliable() bool {
+	return atomic.LoadInt32(&px.unreliable) != 0
+}
+
+//
+// the application wants to create a paxos peer.
+// the ports of all the paxos peers (including this one)
+// are in peers[]. this servers port is peers[me].
+//
+func Make(peers []string, me int, rpcs *rpc.Server) *Paxos {
+	px := &Paxos{}
+	px.peers = peers
+	px.me = me
+
+
+	// Your initialization code here.
+
+	if rpcs != nil {
+		// caller will create socket &c
+		rpcs.Register(px)
+	} else {
+		rpcs = rpc.NewServer()
+		rpcs.Register(px)
+
+		// prepare to receive connections from clients.
+		// change "unix" to "tcp" to use over a network.
+		os.Remove(peers[me]) // only needed for "unix"
+		l, e := net.Listen("unix", peers[me])
+		if e != nil {
+			log.Fatal("listen error: ", e)
+		}
+		px.l = l
+
+		// please do not change any of the following code,
+		// or do anything to subvert it.
+
+		// create a thread to accept RPC connections
+		go func() {
+			for px.isdead() == false {
+				conn, err := px.l.Accept()
+				if err == nil && px.isdead() == false {
+					if px.isunreliable() && (rand.Int63()%1000) < 100 {
+						// discard the request.
+						conn.Close()
+					} else if px.isunreliable() && (rand.Int63()%1000) < 200 {
+						// process the request but force discard of reply.
+						c1 := conn.(*net.UnixConn)
+						f, _ := c1.File()
+						err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR)
+						if err != nil {
+							fmt.Printf("shutdown: %v\n", err)
+						}
+						atomic.AddInt32(&px.rpcCount, 1)
+						go rpcs.ServeConn(conn)
+					} else {
+						atomic.AddInt32(&px.rpcCount, 1)
+						go rpcs.ServeConn(conn)
+					}
+				} else if err == nil {
+					conn.Close()
+				}
+				if err != nil && px.isdead() == false {
+					fmt.Printf("Paxos(%v) accept: %v\n", me, err.Error())
+				}
+			}
+		}()
+	}
+
+
+	return px
+}
--- a/src/paxos/test_test.go
+++ b/src/paxos/test_test.go
@ -0,0 +1,957 @@
+package paxos
+
+import "testing"
+import "runtime"
+import "strconv"
+import "os"
+import "time"
+import "fmt"
+import "math/rand"
+import crand "crypto/rand"
+import "encoding/base64"
+import "sync/atomic"
+
+func randstring(n int) string {
+	b := make([]byte, 2*n)
+	crand.Read(b)
+	s := base64.URLEncoding.EncodeToString(b)
+	return s[0:n]
+}
+
+func port(tag string, host int) string {
+	s := "/var/tmp/824-"
+	s += strconv.Itoa(os.Getuid()) + "/"
+	os.Mkdir(s, 0777)
+	s += "px-"
+	s += strconv.Itoa(os.Getpid()) + "-"
+	s += tag + "-"
+	s += strconv.Itoa(host)
+	return s
+}
+
+func ndecided(t *testing.T, pxa []*Paxos, seq int) int {
+	count := 0
+	var v interface{}
+	for i := 0; i < len(pxa); i++ {
+		if pxa[i] != nil {
+			decided, v1 := pxa[i].Status(seq)
+			if decided == Decided {
+				if count > 0 && v != v1 {
+					t.Fatalf("decided values do not match; seq=%v i=%v v=%v v1=%v",
+						seq, i, v, v1)
+				}
+				count++
+				v = v1
+			}
+		}
+	}
+	return count
+}
+
+func waitn(t *testing.T, pxa []*Paxos, seq int, wanted int) {
+	to := 10 * time.Millisecond
+	for iters := 0; iters < 30; iters++ {
+		if ndecided(t, pxa, seq) >= wanted {
+			break
+		}
+		time.Sleep(to)
+		if to < time.Second {
+			to *= 2
+		}
+	}
+	nd := ndecided(t, pxa, seq)
+	if nd < wanted {
+		t.Fatalf("too few decided; seq=%v ndecided=%v wanted=%v", seq, nd, wanted)
+	}
+}
+
+func waitmajority(t *testing.T, pxa []*Paxos, seq int) {
+	waitn(t, pxa, seq, (len(pxa)/2)+1)
+}
+
+func checkmax(t *testing.T, pxa []*Paxos, seq int, max int) {
+	time.Sleep(3 * time.Second)
+	nd := ndecided(t, pxa, seq)
+	if nd > max {
+		t.Fatalf("too many decided; seq=%v ndecided=%v max=%v", seq, nd, max)
+	}
+}
+
+func cleanup(pxa []*Paxos) {
+	for i := 0; i < len(pxa); i++ {
+		if pxa[i] != nil {
+			pxa[i].Kill()
+		}
+	}
+}
+
+func noTestSpeed(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	const npaxos = 3
+	var pxa []*Paxos = make([]*Paxos, npaxos)
+	var pxh []string = make([]string, npaxos)
+	defer cleanup(pxa)
+
+	for i := 0; i < npaxos; i++ {
+		pxh[i] = port("time", i)
+	}
+	for i := 0; i < npaxos; i++ {
+		pxa[i] = Make(pxh, i, nil)
+	}
+
+	t0 := time.Now()
+
+	for i := 0; i < 20; i++ {
+		pxa[0].Start(i, "x")
+		waitn(t, pxa, i, npaxos)
+	}
+
+	d := time.Since(t0)
+	fmt.Printf("20 agreements %v seconds\n", d.Seconds())
+}
+
+func TestBasic(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	const npaxos = 3
+	var pxa []*Paxos = make([]*Paxos, npaxos)
+	var pxh []string = make([]string, npaxos)
+	defer cleanup(pxa)
+
+	for i := 0; i < npaxos; i++ {
+		pxh[i] = port("basic", i)
+	}
+	for i := 0; i < npaxos; i++ {
+		pxa[i] = Make(pxh, i, nil)
+	}
+
+	fmt.Printf("Test: Single proposer ...\n")
+
+	pxa[0].Start(0, "hello")
+	waitn(t, pxa, 0, npaxos)
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Many proposers, same value ...\n")
+
+	for i := 0; i < npaxos; i++ {
+		pxa[i].Start(1, 77)
+	}
+	waitn(t, pxa, 1, npaxos)
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Many proposers, different values ...\n")
+
+	pxa[0].Start(2, 100)
+	pxa[1].Start(2, 101)
+	pxa[2].Start(2, 102)
+	waitn(t, pxa, 2, npaxos)
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Out-of-order instances ...\n")
+
+	pxa[0].Start(7, 700)
+	pxa[0].Start(6, 600)
+	pxa[1].Start(5, 500)
+	waitn(t, pxa, 7, npaxos)
+	pxa[0].Start(4, 400)
+	pxa[1].Start(3, 300)
+	waitn(t, pxa, 6, npaxos)
+	waitn(t, pxa, 5, npaxos)
+	waitn(t, pxa, 4, npaxos)
+	waitn(t, pxa, 3, npaxos)
+
+	if pxa[0].Max() != 7 {
+		t.Fatalf("wrong Max()")
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestDeaf(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	const npaxos = 5
+	var pxa []*Paxos = make([]*Paxos, npaxos)
+	var pxh []string = make([]string, npaxos)
+	defer cleanup(pxa)
+
+	for i := 0; i < npaxos; i++ {
+		pxh[i] = port("deaf", i)
+	}
+	for i := 0; i < npaxos; i++ {
+		pxa[i] = Make(pxh, i, nil)
+	}
+
+	fmt.Printf("Test: Deaf proposer ...\n")
+
+	pxa[0].Start(0, "hello")
+	waitn(t, pxa, 0, npaxos)
+
+	os.Remove(pxh[0])
+	os.Remove(pxh[npaxos-1])
+
+	pxa[1].Start(1, "goodbye")
+	waitmajority(t, pxa, 1)
+	time.Sleep(1 * time.Second)
+	if ndecided(t, pxa, 1) != npaxos-2 {
+		t.Fatalf("a deaf peer heard about a decision")
+	}
+
+	pxa[0].Start(1, "xxx")
+	waitn(t, pxa, 1, npaxos-1)
+	time.Sleep(1 * time.Second)
+	if ndecided(t, pxa, 1) != npaxos-1 {
+		t.Fatalf("a deaf peer heard about a decision")
+	}
+
+	pxa[npaxos-1].Start(1, "yyy")
+	waitn(t, pxa, 1, npaxos)
+
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestForget(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	const npaxos = 6
+	var pxa []*Paxos = make([]*Paxos, npaxos)
+	var pxh []string = make([]string, npaxos)
+	defer cleanup(pxa)
+
+	for i := 0; i < npaxos; i++ {
+		pxh[i] = port("gc", i)
+	}
+	for i := 0; i < npaxos; i++ {
+		pxa[i] = Make(pxh, i, nil)
+	}
+
+	fmt.Printf("Test: Forgetting ...\n")
+
+	// initial Min() correct?
+	for i := 0; i < npaxos; i++ {
+		m := pxa[i].Min()
+		if m > 0 {
+			t.Fatalf("wrong initial Min() %v", m)
+		}
+	}
+
+	pxa[0].Start(0, "00")
+	pxa[1].Start(1, "11")
+	pxa[2].Start(2, "22")
+	pxa[0].Start(6, "66")
+	pxa[1].Start(7, "77")
+
+	waitn(t, pxa, 0, npaxos)
+
+	// Min() correct?
+	for i := 0; i < npaxos; i++ {
+		m := pxa[i].Min()
+		if m != 0 {
+			t.Fatalf("wrong Min() %v; expected 0", m)
+		}
+	}
+
+	waitn(t, pxa, 1, npaxos)
+
+	// Min() correct?
+	for i := 0; i < npaxos; i++ {
+		m := pxa[i].Min()
+		if m != 0 {
+			t.Fatalf("wrong Min() %v; expected 0", m)
+		}
+	}
+
+	// everyone Done() -> Min() changes?
+	for i := 0; i < npaxos; i++ {
+		pxa[i].Done(0)
+	}
+	for i := 1; i < npaxos; i++ {
+		pxa[i].Done(1)
+	}
+	for i := 0; i < npaxos; i++ {
+		pxa[i].Start(8+i, "xx")
+	}
+	allok := false
+	for iters := 0; iters < 12; iters++ {
+		allok = true
+		for i := 0; i < npaxos; i++ {
+			s := pxa[i].Min()
+			if s != 1 {
+				allok = false
+			}
+		}
+		if allok {
+			break
+		}
+		time.Sleep(1 * time.Second)
+	}
+	if allok != true {
+		t.Fatalf("Min() did not advance after Done()")
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestManyForget(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	const npaxos = 3
+	var pxa []*Paxos = make([]*Paxos, npaxos)
+	var pxh []string = make([]string, npaxos)
+	defer cleanup(pxa)
+
+	for i := 0; i < npaxos; i++ {
+		pxh[i] = port("manygc", i)
+	}
+	for i := 0; i < npaxos; i++ {
+		pxa[i] = Make(pxh, i, nil)
+		pxa[i].setunreliable(true)
+	}
+
+	fmt.Printf("Test: Lots of forgetting ...\n")
+
+	const maxseq = 20
+
+	go func() {
+		na := rand.Perm(maxseq)
+		for i := 0; i < len(na); i++ {
+			seq := na[i]
+			j := (rand.Int() % npaxos)
+			v := rand.Int()
+			pxa[j].Start(seq, v)
+			runtime.Gosched()
+		}
+	}()
+
+	done := make(chan bool)
+	go func() {
+		for {
+			select {
+			case <-done:
+				return
+			default:
+			}
+			seq := (rand.Int() % maxseq)
+			i := (rand.Int() % npaxos)
+			if seq >= pxa[i].Min() {
+				decided, _ := pxa[i].Status(seq)
+				if decided == Decided {
+					pxa[i].Done(seq)
+				}
+			}
+			runtime.Gosched()
+		}
+	}()
+
+	time.Sleep(5 * time.Second)
+	done <- true
+	for i := 0; i < npaxos; i++ {
+		pxa[i].setunreliable(false)
+	}
+	time.Sleep(2 * time.Second)
+
+	for seq := 0; seq < maxseq; seq++ {
+		for i := 0; i < npaxos; i++ {
+			if seq >= pxa[i].Min() {
+				pxa[i].Status(seq)
+			}
+		}
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+//
+// does paxos forgetting actually free the memory?
+//
+func TestForgetMem(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	fmt.Printf("Test: Paxos frees forgotten instance memory ...\n")
+
+	const npaxos = 3
+	var pxa []*Paxos = make([]*Paxos, npaxos)
+	var pxh []string = make([]string, npaxos)
+	defer cleanup(pxa)
+
+	for i := 0; i < npaxos; i++ {
+		pxh[i] = port("gcmem", i)
+	}
+	for i := 0; i < npaxos; i++ {
+		pxa[i] = Make(pxh, i, nil)
+	}
+
+	pxa[0].Start(0, "x")
+	waitn(t, pxa, 0, npaxos)
+
+	runtime.GC()
+	var m0 runtime.MemStats
+	runtime.ReadMemStats(&m0)
+	// m0.Alloc about a megabyte
+
+	for i := 1; i <= 10; i++ {
+		big := make([]byte, 1000000)
+		for j := 0; j < len(big); j++ {
+			big[j] = byte('a' + rand.Int()%26)
+		}
+		pxa[0].Start(i, string(big))
+		waitn(t, pxa, i, npaxos)
+	}
+
+	runtime.GC()
+	var m1 runtime.MemStats
+	runtime.ReadMemStats(&m1)
+	// m1.Alloc about 90 megabytes
+
+	for i := 0; i < npaxos; i++ {
+		pxa[i].Done(10)
+	}
+	for i := 0; i < npaxos; i++ {
+		pxa[i].Start(11+i, "z")
+	}
+	time.Sleep(3 * time.Second)
+	for i := 0; i < npaxos; i++ {
+		if pxa[i].Min() != 11 {
+			t.Fatalf("expected Min() %v, got %v\n", 11, pxa[i].Min())
+		}
+	}
+
+	runtime.GC()
+	var m2 runtime.MemStats
+	runtime.ReadMemStats(&m2)
+	// m2.Alloc about 10 megabytes
+
+	if m2.Alloc > (m1.Alloc / 2) {
+		t.Fatalf("memory use did not shrink enough")
+	}
+
+	again := make([]string, 10)
+	for seq := 0; seq < npaxos && seq < 10; seq++ {
+		again[seq] = randstring(20)
+		for i := 0; i < npaxos; i++ {
+			fate, _ := pxa[i].Status(seq)
+			if fate != Forgotten {
+				t.Fatalf("seq %d < Min() %d but not Forgotten", seq, pxa[i].Min())
+			}
+			pxa[i].Start(seq, again[seq])
+		}
+	}
+	time.Sleep(1 * time.Second)
+	for seq := 0; seq < npaxos && seq < 10; seq++ {
+		for i := 0; i < npaxos; i++ {
+			fate, v := pxa[i].Status(seq)
+			if fate != Forgotten || v == again[seq] {
+				t.Fatalf("seq %d < Min() %d but not Forgotten", seq, pxa[i].Min())
+			}
+		}
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+//
+// does Max() work after Done()s?
+//
+func TestDoneMax(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	fmt.Printf("Test: Paxos Max() after Done()s ...\n")
+
+	const npaxos = 3
+	var pxa []*Paxos = make([]*Paxos, npaxos)
+	var pxh []string = make([]string, npaxos)
+	defer cleanup(pxa)
+
+	for i := 0; i < npaxos; i++ {
+		pxh[i] = port("donemax", i)
+	}
+	for i := 0; i < npaxos; i++ {
+		pxa[i] = Make(pxh, i, nil)
+	}
+
+	pxa[0].Start(0, "x")
+	waitn(t, pxa, 0, npaxos)
+
+	for i := 1; i <= 10; i++ {
+		pxa[0].Start(i, "y")
+		waitn(t, pxa, i, npaxos)
+	}
+
+	for i := 0; i < npaxos; i++ {
+		pxa[i].Done(10)
+	}
+
+	// Propagate messages so everyone knows about Done(10)
+	for i := 0; i < npaxos; i++ {
+		pxa[i].Start(10, "z")
+	}
+	time.Sleep(2 * time.Second)
+	for i := 0; i < npaxos; i++ {
+		mx := pxa[i].Max()
+		if mx != 10 {
+			t.Fatalf("Max() did not return correct result %d after calling Done(); returned %d", 10, mx)
+		}
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestRPCCount(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	fmt.Printf("Test: RPC counts aren't too high ...\n")
+
+	const npaxos = 3
+	var pxa []*Paxos = make([]*Paxos, npaxos)
+	var pxh []string = make([]string, npaxos)
+	defer cleanup(pxa)
+
+	for i := 0; i < npaxos; i++ {
+		pxh[i] = port("count", i)
+	}
+	for i := 0; i < npaxos; i++ {
+		pxa[i] = Make(pxh, i, nil)
+	}
+
+	ninst1 := 5
+	seq := 0
+	for i := 0; i < ninst1; i++ {
+		pxa[0].Start(seq, "x")
+		waitn(t, pxa, seq, npaxos)
+		seq++
+	}
+
+	time.Sleep(2 * time.Second)
+
+	total1 := int32(0)
+	for j := 0; j < npaxos; j++ {
+		total1 += atomic.LoadInt32(&pxa[j].rpcCount)
+	}
+
+	// per agreement:
+	// 3 prepares
+	// 3 accepts
+	// 3 decides
+	expected1 := int32(ninst1 * npaxos * npaxos)
+	if total1 > expected1 {
+		t.Fatalf("too many RPCs for serial Start()s; %v instances, got %v, expected %v",
+			ninst1, total1, expected1)
+	}
+
+	ninst2 := 5
+	for i := 0; i < ninst2; i++ {
+		for j := 0; j < npaxos; j++ {
+			go pxa[j].Start(seq, j+(i*10))
+		}
+		waitn(t, pxa, seq, npaxos)
+		seq++
+	}
+
+	time.Sleep(2 * time.Second)
+
+	total2 := int32(0)
+	for j := 0; j < npaxos; j++ {
+		total2 += atomic.LoadInt32(&pxa[j].rpcCount)
+	}
+	total2 -= total1
+
+	// worst case per agreement:
+	// Proposer 1: 3 prep, 3 acc, 3 decides.
+	// Proposer 2: 3 prep, 3 acc, 3 prep, 3 acc, 3 decides.
+	// Proposer 3: 3 prep, 3 acc, 3 prep, 3 acc, 3 prep, 3 acc, 3 decides.
+	expected2 := int32(ninst2 * npaxos * 15)
+	if total2 > expected2 {
+		t.Fatalf("too many RPCs for concurrent Start()s; %v instances, got %v, expected %v",
+			ninst2, total2, expected2)
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+//
+// many agreements (without failures)
+//
+func TestMany(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	fmt.Printf("Test: Many instances ...\n")
+
+	const npaxos = 3
+	var pxa []*Paxos = make([]*Paxos, npaxos)
+	var pxh []string = make([]string, npaxos)
+	defer cleanup(pxa)
+
+	for i := 0; i < npaxos; i++ {
+		pxh[i] = port("many", i)
+	}
+	for i := 0; i < npaxos; i++ {
+		pxa[i] = Make(pxh, i, nil)
+		pxa[i].Start(0, 0)
+	}
+
+	const ninst = 50
+	for seq := 1; seq < ninst; seq++ {
+		// only 5 active instances, to limit the
+		// number of file descriptors.
+		for seq >= 5 && ndecided(t, pxa, seq-5) < npaxos {
+			time.Sleep(20 * time.Millisecond)
+		}
+		for i := 0; i < npaxos; i++ {
+			pxa[i].Start(seq, (seq*10)+i)
+		}
+	}
+
+	for {
+		done := true
+		for seq := 1; seq < ninst; seq++ {
+			if ndecided(t, pxa, seq) < npaxos {
+				done = false
+			}
+		}
+		if done {
+			break
+		}
+		time.Sleep(100 * time.Millisecond)
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+//
+// a peer starts up, with proposal, after others decide.
+// then another peer starts, without a proposal.
+//
+func TestOld(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	fmt.Printf("Test: Minority proposal ignored ...\n")
+
+	const npaxos = 5
+	var pxa []*Paxos = make([]*Paxos, npaxos)
+	var pxh []string = make([]string, npaxos)
+	defer cleanup(pxa)
+
+	for i := 0; i < npaxos; i++ {
+		pxh[i] = port("old", i)
+	}
+
+	pxa[1] = Make(pxh, 1, nil)
+	pxa[2] = Make(pxh, 2, nil)
+	pxa[3] = Make(pxh, 3, nil)
+	pxa[1].Start(1, 111)
+
+	waitmajority(t, pxa, 1)
+
+	pxa[0] = Make(pxh, 0, nil)
+	pxa[0].Start(1, 222)
+
+	waitn(t, pxa, 1, 4)
+
+	if false {
+		pxa[4] = Make(pxh, 4, nil)
+		waitn(t, pxa, 1, npaxos)
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+//
+// many agreements, with unreliable RPC
+//
+func TestManyUnreliable(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	fmt.Printf("Test: Many instances, unreliable RPC ...\n")
+
+	const npaxos = 3
+	var pxa []*Paxos = make([]*Paxos, npaxos)
+	var pxh []string = make([]string, npaxos)
+	defer cleanup(pxa)
+
+	for i := 0; i < npaxos; i++ {
+		pxh[i] = port("manyun", i)
+	}
+	for i := 0; i < npaxos; i++ {
+		pxa[i] = Make(pxh, i, nil)
+		pxa[i].setunreliable(true)
+		pxa[i].Start(0, 0)
+	}
+
+	const ninst = 50
+	for seq := 1; seq < ninst; seq++ {
+		// only 3 active instances, to limit the
+		// number of file descriptors.
+		for seq >= 3 && ndecided(t, pxa, seq-3) < npaxos {
+			time.Sleep(20 * time.Millisecond)
+		}
+		for i := 0; i < npaxos; i++ {
+			pxa[i].Start(seq, (seq*10)+i)
+		}
+	}
+
+	for {
+		done := true
+		for seq := 1; seq < ninst; seq++ {
+			if ndecided(t, pxa, seq) < npaxos {
+				done = false
+			}
+		}
+		if done {
+			break
+		}
+		time.Sleep(100 * time.Millisecond)
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+func pp(tag string, src int, dst int) string {
+	s := "/var/tmp/824-"
+	s += strconv.Itoa(os.Getuid()) + "/"
+	s += "px-" + tag + "-"
+	s += strconv.Itoa(os.Getpid()) + "-"
+	s += strconv.Itoa(src) + "-"
+	s += strconv.Itoa(dst)
+	return s
+}
+
+func cleanpp(tag string, n int) {
+	for i := 0; i < n; i++ {
+		for j := 0; j < n; j++ {
+			ij := pp(tag, i, j)
+			os.Remove(ij)
+		}
+	}
+}
+
+func part(t *testing.T, tag string, npaxos int, p1 []int, p2 []int, p3 []int) {
+	cleanpp(tag, npaxos)
+
+	pa := [][]int{p1, p2, p3}
+	for pi := 0; pi < len(pa); pi++ {
+		p := pa[pi]
+		for i := 0; i < len(p); i++ {
+			for j := 0; j < len(p); j++ {
+				ij := pp(tag, p[i], p[j])
+				pj := port(tag, p[j])
+				err := os.Link(pj, ij)
+				if err != nil {
+					// one reason this link can fail is if the
+					// corresponding Paxos peer has prematurely quit and
+					// deleted its socket file (e.g., called px.Kill()).
+					t.Fatalf("os.Link(%v, %v): %v\n", pj, ij, err)
+				}
+			}
+		}
+	}
+}
+
+func TestPartition(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	tag := "partition"
+	const npaxos = 5
+	var pxa []*Paxos = make([]*Paxos, npaxos)
+	defer cleanup(pxa)
+	defer cleanpp(tag, npaxos)
+
+	for i := 0; i < npaxos; i++ {
+		var pxh []string = make([]string, npaxos)
+		for j := 0; j < npaxos; j++ {
+			if j == i {
+				pxh[j] = port(tag, i)
+			} else {
+				pxh[j] = pp(tag, i, j)
+			}
+		}
+		pxa[i] = Make(pxh, i, nil)
+	}
+	defer part(t, tag, npaxos, []int{}, []int{}, []int{})
+
+	seq := 0
+
+	fmt.Printf("Test: No decision if partitioned ...\n")
+
+	part(t, tag, npaxos, []int{0, 2}, []int{1, 3}, []int{4})
+	pxa[1].Start(seq, 111)
+	checkmax(t, pxa, seq, 0)
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Decision in majority partition ...\n")
+
+	part(t, tag, npaxos, []int{0}, []int{1, 2, 3}, []int{4})
+	time.Sleep(2 * time.Second)
+	waitmajority(t, pxa, seq)
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: All agree after full heal ...\n")
+
+	pxa[0].Start(seq, 1000) // poke them
+	pxa[4].Start(seq, 1004)
+	part(t, tag, npaxos, []int{0, 1, 2, 3, 4}, []int{}, []int{})
+
+	waitn(t, pxa, seq, npaxos)
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: One peer switches partitions ...\n")
+
+	for iters := 0; iters < 20; iters++ {
+		seq++
+
+		part(t, tag, npaxos, []int{0, 1, 2}, []int{3, 4}, []int{})
+		pxa[0].Start(seq, seq*10)
+		pxa[3].Start(seq, (seq*10)+1)
+		waitmajority(t, pxa, seq)
+		if ndecided(t, pxa, seq) > 3 {
+			t.Fatalf("too many decided")
+		}
+
+		part(t, tag, npaxos, []int{0, 1}, []int{2, 3, 4}, []int{})
+		waitn(t, pxa, seq, npaxos)
+	}
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: One peer switches partitions, unreliable ...\n")
+
+	for iters := 0; iters < 20; iters++ {
+		seq++
+
+		for i := 0; i < npaxos; i++ {
+			pxa[i].setunreliable(true)
+		}
+
+		part(t, tag, npaxos, []int{0, 1, 2}, []int{3, 4}, []int{})
+		for i := 0; i < npaxos; i++ {
+			pxa[i].Start(seq, (seq*10)+i)
+		}
+		waitn(t, pxa, seq, 3)
+		if ndecided(t, pxa, seq) > 3 {
+			t.Fatalf("too many decided")
+		}
+
+		part(t, tag, npaxos, []int{0, 1}, []int{2, 3, 4}, []int{})
+
+		for i := 0; i < npaxos; i++ {
+			pxa[i].setunreliable(false)
+		}
+
+		waitn(t, pxa, seq, 5)
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestLots(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	fmt.Printf("Test: Many requests, changing partitions ...\n")
+
+	tag := "lots"
+	const npaxos = 5
+	var pxa []*Paxos = make([]*Paxos, npaxos)
+	defer cleanup(pxa)
+	defer cleanpp(tag, npaxos)
+
+	for i := 0; i < npaxos; i++ {
+		var pxh []string = make([]string, npaxos)
+		for j := 0; j < npaxos; j++ {
+			if j == i {
+				pxh[j] = port(tag, i)
+			} else {
+				pxh[j] = pp(tag, i, j)
+			}
+		}
+		pxa[i] = Make(pxh, i, nil)
+		pxa[i].setunreliable(true)
+	}
+	defer part(t, tag, npaxos, []int{}, []int{}, []int{})
+
+	done := int32(0)
+
+	// re-partition periodically
+	ch1 := make(chan bool)
+	go func() {
+		defer func() { ch1 <- true }()
+		for atomic.LoadInt32(&done) == 0 {
+			var a [npaxos]int
+			for i := 0; i < npaxos; i++ {
+				a[i] = (rand.Int() % 3)
+			}
+			pa := make([][]int, 3)
+			for i := 0; i < 3; i++ {
+				pa[i] = make([]int, 0)
+				for j := 0; j < npaxos; j++ {
+					if a[j] == i {
+						pa[i] = append(pa[i], j)
+					}
+				}
+			}
+			part(t, tag, npaxos, pa[0], pa[1], pa[2])
+			time.Sleep(time.Duration(rand.Int63()%200) * time.Millisecond)
+		}
+	}()
+
+	seq := int32(0)
+
+	// periodically start a new instance
+	ch2 := make(chan bool)
+	go func() {
+		defer func() { ch2 <- true }()
+		for atomic.LoadInt32(&done) == 0 {
+			// how many instances are in progress?
+			nd := 0
+			sq := int(atomic.LoadInt32(&seq))
+			for i := 0; i < sq; i++ {
+				if ndecided(t, pxa, i) == npaxos {
+					nd++
+				}
+			}
+			if sq-nd < 10 {
+				for i := 0; i < npaxos; i++ {
+					pxa[i].Start(sq, rand.Int()%10)
+				}
+				atomic.AddInt32(&seq, 1)
+			}
+			time.Sleep(time.Duration(rand.Int63()%300) * time.Millisecond)
+		}
+	}()
+
+	// periodically check that decisions are consistent
+	ch3 := make(chan bool)
+	go func() {
+		defer func() { ch3 <- true }()
+		for atomic.LoadInt32(&done) == 0 {
+			for i := 0; i < int(atomic.LoadInt32(&seq)); i++ {
+				ndecided(t, pxa, i)
+			}
+			time.Sleep(time.Duration(rand.Int63()%300) * time.Millisecond)
+		}
+	}()
+
+	time.Sleep(20 * time.Second)
+	atomic.StoreInt32(&done, 1)
+	<-ch1
+	<-ch2
+	<-ch3
+
+	// repair, then check that all instances decided.
+	for i := 0; i < npaxos; i++ {
+		pxa[i].setunreliable(false)
+	}
+	part(t, tag, npaxos, []int{0, 1, 2, 3, 4}, []int{}, []int{})
+	time.Sleep(5 * time.Second)
+
+	for i := 0; i < int(atomic.LoadInt32(&seq)); i++ {
+		waitmajority(t, pxa, i)
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
--- a/src/pbservice/client.go
+++ b/src/pbservice/client.go
@ -0,0 +1,103 @@
+package pbservice
+
+import "viewservice"
+import "net/rpc"
+import "fmt"
+
+import "crypto/rand"
+import "math/big"
+
+
+type Clerk struct {
+	vs *viewservice.Clerk
+	// Your declarations here
+}
+
+// this may come in handy.
+func nrand() int64 {
+	max := big.NewInt(int64(1) << 62)
+	bigx, _ := rand.Int(rand.Reader, max)
+	x := bigx.Int64()
+	return x
+}
+
+func MakeClerk(vshost string, me string) *Clerk {
+	ck := new(Clerk)
+	ck.vs = viewservice.MakeClerk(me, vshost)
+	// Your ck.* initializations here
+
+	return ck
+}
+
+
+//
+// call() sends an RPC to the rpcname handler on server srv
+// with arguments args, waits for the reply, and leaves the
+// reply in reply. the reply argument should be a pointer
+// to a reply structure.
+//
+// the return value is true if the server responded, and false
+// if call() was not able to contact the server. in particular,
+// the reply's contents are only valid if call() returned true.
+//
+// you should assume that call() will return an
+// error after a while if the server is dead.
+// don't provide your own time-out mechanism.
+//
+// please use call() to send all RPCs, in client.go and server.go.
+// please don't change this function.
+//
+func call(srv string, rpcname string,
+	args interface{}, reply interface{}) bool {
+	c, errx := rpc.Dial("unix", srv)
+	if errx != nil {
+		return false
+	}
+	defer c.Close()
+
+	err := c.Call(rpcname, args, reply)
+	if err == nil {
+		return true
+	}
+
+	fmt.Println(err)
+	return false
+}
+
+//
+// fetch a key's value from the current primary;
+// if they key has never been set, return "".
+// Get() must keep trying until it either the
+// primary replies with the value or the primary
+// says the key doesn't exist (has never been Put().
+//
+func (ck *Clerk) Get(key string) string {
+
+	// Your code here.
+
+	return "???"
+}
+
+//
+// send a Put or Append RPC
+//
+func (ck *Clerk) PutAppend(key string, value string, op string) {
+
+	// Your code here.
+}
+
+//
+// tell the primary to update key's value.
+// must keep trying until it succeeds.
+//
+func (ck *Clerk) Put(key string, value string) {
+	ck.PutAppend(key, value, "Put")
+}
+
+//
+// tell the primary to append to key's value.
+// must keep trying until it succeeds.
+//
+func (ck *Clerk) Append(key string, value string) {
+	ck.PutAppend(key, value, "Append")
+}
--- a/src/pbservice/common.go
+++ b/src/pbservice/common.go
@ -0,0 +1,36 @@
+package pbservice
+
+const (
+	OK             = "OK"
+	ErrNoKey       = "ErrNoKey"
+	ErrWrongServer = "ErrWrongServer"
+)
+
+type Err string
+
+// Put or Append
+type PutAppendArgs struct {
+	Key   string
+	Value string
+	// You'll have to add definitions here.
+
+	// Field names must start with capital letters,
+	// otherwise RPC will break.
+}
+
+type PutAppendReply struct {
+	Err Err
+}
+
+type GetArgs struct {
+	Key string
+	// You'll have to add definitions here.
+}
+
+type GetReply struct {
+	Err   Err
+	Value string
+}
+
+
+// Your RPC definitions here.
--- a/src/pbservice/server.go
+++ b/src/pbservice/server.go
@ -0,0 +1,138 @@
+package pbservice
+
+import "net"
+import "fmt"
+import "net/rpc"
+import "log"
+import "time"
+import "viewservice"
+import "sync"
+import "sync/atomic"
+import "os"
+import "syscall"
+import "math/rand"
+
+
+
+type PBServer struct {
+	mu         sync.Mutex
+	l          net.Listener
+	dead       int32 // for testing
+	unreliable int32 // for testing
+	me         string
+	vs         *viewservice.Clerk
+	// Your declarations here.
+}
+
+
+func (pb *PBServer) Get(args *GetArgs, reply *GetReply) error {
+
+	// Your code here.
+
+	return nil
+}
+
+
+func (pb *PBServer) PutAppend(args *PutAppendArgs, reply *PutAppendReply) error {
+
+	// Your code here.
+
+
+	return nil
+}
+
+
+//
+// ping the viewserver periodically.
+// if view changed:
+//   transition to new view.
+//   manage transfer of state from primary to new backup.
+//
+func (pb *PBServer) tick() {
+
+	// Your code here.
+}
+
+// tell the server to shut itself down.
+// please do not change these two functions.
+func (pb *PBServer) kill() {
+	atomic.StoreInt32(&pb.dead, 1)
+	pb.l.Close()
+}
+
+// call this to find out if the server is dead.
+func (pb *PBServer) isdead() bool {
+	return atomic.LoadInt32(&pb.dead) != 0
+}
+
+// please do not change these two functions.
+func (pb *PBServer) setunreliable(what bool) {
+	if what {
+		atomic.StoreInt32(&pb.unreliable, 1)
+	} else {
+		atomic.StoreInt32(&pb.unreliable, 0)
+	}
+}
+
+func (pb *PBServer) isunreliable() bool {
+	return atomic.LoadInt32(&pb.unreliable) != 0
+}
+
+
+func StartServer(vshost string, me string) *PBServer {
+	pb := new(PBServer)
+	pb.me = me
+	pb.vs = viewservice.MakeClerk(me, vshost)
+	// Your pb.* initializations here.
+
+	rpcs := rpc.NewServer()
+	rpcs.Register(pb)
+
+	os.Remove(pb.me)
+	l, e := net.Listen("unix", pb.me)
+	if e != nil {
+		log.Fatal("listen error: ", e)
+	}
+	pb.l = l
+
+	// please do not change any of the following code,
+	// or do anything to subvert it.
+
+	go func() {
+		for pb.isdead() == false {
+			conn, err := pb.l.Accept()
+			if err == nil && pb.isdead() == false {
+				if pb.isunreliable() && (rand.Int63()%1000) < 100 {
+					// discard the request.
+					conn.Close()
+				} else if pb.isunreliable() && (rand.Int63()%1000) < 200 {
+					// process the request but force discard of reply.
+					c1 := conn.(*net.UnixConn)
+					f, _ := c1.File()
+					err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR)
+					if err != nil {
+						fmt.Printf("shutdown: %v\n", err)
+					}
+					go rpcs.ServeConn(conn)
+				} else {
+					go rpcs.ServeConn(conn)
+				}
+			} else if err == nil {
+				conn.Close()
+			}
+			if err != nil && pb.isdead() == false {
+				fmt.Printf("PBServer(%v) accept: %v\n", me, err.Error())
+				pb.kill()
+			}
+		}
+	}()
+
+	go func() {
+		for pb.isdead() == false {
+			pb.tick()
+			time.Sleep(viewservice.PingInterval)
+		}
+	}()
+
+	return pb
+}
--- a/src/pbservice/test_test.go
+++ b/src/pbservice/test_test.go
--- a/src/shardkv/client.go
+++ b/src/shardkv/client.go
@ -0,0 +1,135 @@
+package shardkv
+
+//
+// client code to talk to a sharded key/value service.
+//
+// the client first talks to the shardmaster to find out
+// the assignment of shards (keys) to groups, and then
+// talks to the group that holds the key's shard.
+//
+
+import "labrpc"
+import "crypto/rand"
+import "math/big"
+import "shardmaster"
+import "time"
+
+//
+// which shard is a key in?
+// please use this function,
+// and please do not change it.
+//
+func key2shard(key string) int {
+	shard := 0
+	if len(key) > 0 {
+		shard = int(key[0])
+	}
+	shard %= shardmaster.NShards
+	return shard
+}
+
+func nrand() int64 {
+	max := big.NewInt(int64(1) << 62)
+	bigx, _ := rand.Int(rand.Reader, max)
+	x := bigx.Int64()
+	return x
+}
+
+type Clerk struct {
+	sm       *shardmaster.Clerk
+	config   shardmaster.Config
+	make_end func(string) *labrpc.ClientEnd
+	// You will have to modify this struct.
+}
+
+//
+// the tester calls MakeClerk.
+//
+// masters[] is needed to call shardmaster.MakeClerk().
+//
+// make_end(servername) turns a server name from a
+// Config.Groups[gid][i] into a labrpc.ClientEnd on which you can
+// send RPCs.
+//
+func MakeClerk(masters []*labrpc.ClientEnd, make_end func(string) *labrpc.ClientEnd) *Clerk {
+	ck := new(Clerk)
+	ck.sm = shardmaster.MakeClerk(masters)
+	ck.make_end = make_end
+	// You'll have to add code here.
+	return ck
+}
+
+//
+// fetch the current value for a key.
+// returns "" if the key does not exist.
+// keeps trying forever in the face of all other errors.
+// You will have to modify this function.
+//
+func (ck *Clerk) Get(key string) string {
+	args := GetArgs{}
+	args.Key = key
+
+	for {
+		shard := key2shard(key)
+		gid := ck.config.Shards[shard]
+		if servers, ok := ck.config.Groups[gid]; ok {
+			// try each server for the shard.
+			for si := 0; si < len(servers); si++ {
+				srv := ck.make_end(servers[si])
+				var reply GetReply
+				ok := srv.Call("ShardKV.Get", &args, &reply)
+				if ok && reply.WrongLeader == false && (reply.Err == OK || reply.Err == ErrNoKey) {
+					return reply.Value
+				}
+				if ok && (reply.Err == ErrWrongGroup) {
+					break
+				}
+			}
+		}
+		time.Sleep(100 * time.Millisecond)
+		// ask master for the latest configuration.
+		ck.config = ck.sm.Query(-1)
+	}
+
+	return ""
+}
+
+//
+// shared by Put and Append.
+// You will have to modify this function.
+//
+func (ck *Clerk) PutAppend(key string, value string, op string) {
+	args := PutAppendArgs{}
+	args.Key = key
+	args.Value = value
+	args.Op = op
+
+
+	for {
+		shard := key2shard(key)
+		gid := ck.config.Shards[shard]
+		if servers, ok := ck.config.Groups[gid]; ok {
+			for si := 0; si < len(servers); si++ {
+				srv := ck.make_end(servers[si])
+				var reply PutAppendReply
+				ok := srv.Call("ShardKV.PutAppend", &args, &reply)
+				if ok && reply.WrongLeader == false && reply.Err == OK {
+					return
+				}
+				if ok && reply.Err == ErrWrongGroup {
+					break
+				}
+			}
+		}
+		time.Sleep(100 * time.Millisecond)
+		// ask master for the latest configuration.
+		ck.config = ck.sm.Query(-1)
+	}
+}
+
+func (ck *Clerk) Put(key string, value string) {
+	ck.PutAppend(key, value, "Put")
+}
+func (ck *Clerk) Append(key string, value string) {
+	ck.PutAppend(key, value, "Append")
+}
--- a/src/shardkv/common.go
+++ b/src/shardkv/common.go
@ -0,0 +1,45 @@
+package shardkv
+
+//
+// Sharded key/value server.
+// Lots of replica groups, each running op-at-a-time paxos.
+// Shardmaster decides which group serves each shard.
+// Shardmaster may change shard assignment from time to time.
+//
+// You will have to modify these definitions.
+//
+
+const (
+	OK            = "OK"
+	ErrNoKey      = "ErrNoKey"
+	ErrWrongGroup = "ErrWrongGroup"
+)
+
+type Err string
+
+// Put or Append
+type PutAppendArgs struct {
+	// You'll have to add definitions here.
+	Key   string
+	Value string
+	Op    string // "Put" or "Append"
+	// You'll have to add definitions here.
+	// Field names must start with capital letters,
+	// otherwise RPC will break.
+}
+
+type PutAppendReply struct {
+	WrongLeader bool
+	Err         Err
+}
+
+type GetArgs struct {
+	Key string
+	// You'll have to add definitions here.
+}
+
+type GetReply struct {
+	WrongLeader bool
+	Err         Err
+	Value       string
+}
--- a/src/shardkv/config.go
+++ b/src/shardkv/config.go
@ -0,0 +1,350 @@
+package shardkv
+
+import "shardmaster"
+import "labrpc"
+import "testing"
+import "os"
+
+// import "log"
+import crand "crypto/rand"
+import "math/rand"
+import "encoding/base64"
+import "sync"
+import "runtime"
+import "raft"
+import "strconv"
+
+func randstring(n int) string {
+	b := make([]byte, 2*n)
+	crand.Read(b)
+	s := base64.URLEncoding.EncodeToString(b)
+	return s[0:n]
+}
+
+// Randomize server handles
+func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd {
+	sa := make([]*labrpc.ClientEnd, len(kvh))
+	copy(sa, kvh)
+	for i := range sa {
+		j := rand.Intn(i + 1)
+		sa[i], sa[j] = sa[j], sa[i]
+	}
+	return sa
+}
+
+type group struct {
+	gid       int
+	servers   []*ShardKV
+	saved     []*raft.Persister
+	endnames  [][]string
+	mendnames [][]string
+}
+
+type config struct {
+	mu  sync.Mutex
+	t   *testing.T
+	net *labrpc.Network
+
+	nmasters      int
+	masterservers []*shardmaster.ShardMaster
+	mck           *shardmaster.Clerk
+
+	ngroups int
+	n       int // servers per k/v group
+	groups  []*group
+
+	clerks       map[*Clerk][]string
+	nextClientId int
+	maxraftstate int
+}
+
+func (cfg *config) cleanup() {
+	for gi := 0; gi < cfg.ngroups; gi++ {
+		cfg.ShutdownGroup(gi)
+	}
+}
+
+// check that no server's log is too big.
+func (cfg *config) checklogs() {
+	for gi := 0; gi < cfg.ngroups; gi++ {
+		for i := 0; i < cfg.n; i++ {
+			raft := cfg.groups[gi].saved[i].RaftStateSize()
+			snap := len(cfg.groups[gi].saved[i].ReadSnapshot())
+			if cfg.maxraftstate >= 0 && raft > 2*cfg.maxraftstate {
+				cfg.t.Fatalf("persister.RaftStateSize() %v, but maxraftstate %v",
+					raft, cfg.maxraftstate)
+			}
+			if cfg.maxraftstate < 0 && snap > 0 {
+				cfg.t.Fatalf("maxraftstate is -1, but snapshot is non-empty!")
+			}
+		}
+	}
+}
+
+// master server name for labrpc.
+func (cfg *config) mastername(i int) string {
+	return "master" + strconv.Itoa(i)
+}
+
+// shard server name for labrpc.
+// i'th server of group gid.
+func (cfg *config) servername(gid int, i int) string {
+	return "server-" + strconv.Itoa(gid) + "-" + strconv.Itoa(i)
+}
+
+func (cfg *config) makeClient() *Clerk {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+
+	// ClientEnds to talk to master service.
+	ends := make([]*labrpc.ClientEnd, cfg.nmasters)
+	endnames := make([]string, cfg.n)
+	for j := 0; j < cfg.nmasters; j++ {
+		endnames[j] = randstring(20)
+		ends[j] = cfg.net.MakeEnd(endnames[j])
+		cfg.net.Connect(endnames[j], cfg.mastername(j))
+		cfg.net.Enable(endnames[j], true)
+	}
+
+	ck := MakeClerk(ends, func(servername string) *labrpc.ClientEnd {
+		name := randstring(20)
+		end := cfg.net.MakeEnd(name)
+		cfg.net.Connect(name, servername)
+		cfg.net.Enable(name, true)
+		return end
+	})
+	cfg.clerks[ck] = endnames
+	cfg.nextClientId++
+	return ck
+}
+
+func (cfg *config) deleteClient(ck *Clerk) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+
+	v := cfg.clerks[ck]
+	for i := 0; i < len(v); i++ {
+		os.Remove(v[i])
+	}
+	delete(cfg.clerks, ck)
+}
+
+// Shutdown i'th server of gi'th group, by isolating it
+func (cfg *config) ShutdownServer(gi int, i int) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+
+	gg := cfg.groups[gi]
+
+	// prevent this server from sending
+	for j := 0; j < len(gg.servers); j++ {
+		name := gg.endnames[i][j]
+		cfg.net.Enable(name, false)
+	}
+	for j := 0; j < len(gg.mendnames[i]); j++ {
+		name := gg.mendnames[i][j]
+		cfg.net.Enable(name, false)
+	}
+
+	// disable client connections to the server.
+	// it's important to do this before creating
+	// the new Persister in saved[i], to avoid
+	// the possibility of the server returning a
+	// positive reply to an Append but persisting
+	// the result in the superseded Persister.
+	cfg.net.DeleteServer(cfg.servername(gg.gid, i))
+
+	// a fresh persister, in case old instance
+	// continues to update the Persister.
+	// but copy old persister's content so that we always
+	// pass Make() the last persisted state.
+	if gg.saved[i] != nil {
+		gg.saved[i] = gg.saved[i].Copy()
+	}
+
+	kv := gg.servers[i]
+	if kv != nil {
+		cfg.mu.Unlock()
+		kv.Kill()
+		cfg.mu.Lock()
+		gg.servers[i] = nil
+	}
+}
+
+func (cfg *config) ShutdownGroup(gi int) {
+	for i := 0; i < cfg.n; i++ {
+		cfg.ShutdownServer(gi, i)
+	}
+}
+
+// start i'th server in gi'th group
+func (cfg *config) StartServer(gi int, i int) {
+	cfg.mu.Lock()
+
+	gg := cfg.groups[gi]
+
+	// a fresh set of outgoing ClientEnd names
+	// to talk to other servers in this group.
+	gg.endnames[i] = make([]string, cfg.n)
+	for j := 0; j < cfg.n; j++ {
+		gg.endnames[i][j] = randstring(20)
+	}
+
+	// and the connections to other servers in this group.
+	ends := make([]*labrpc.ClientEnd, cfg.n)
+	for j := 0; j < cfg.n; j++ {
+		ends[j] = cfg.net.MakeEnd(gg.endnames[i][j])
+		cfg.net.Connect(gg.endnames[i][j], cfg.servername(gg.gid, j))
+		cfg.net.Enable(gg.endnames[i][j], true)
+	}
+
+	// ends to talk to shardmaster service
+	mends := make([]*labrpc.ClientEnd, cfg.nmasters)
+	gg.mendnames[i] = make([]string, cfg.nmasters)
+	for j := 0; j < cfg.nmasters; j++ {
+		gg.mendnames[i][j] = randstring(20)
+		mends[j] = cfg.net.MakeEnd(gg.mendnames[i][j])
+		cfg.net.Connect(gg.mendnames[i][j], cfg.mastername(j))
+		cfg.net.Enable(gg.mendnames[i][j], true)
+	}
+
+	// a fresh persister, so old instance doesn't overwrite
+	// new instance's persisted state.
+	// give the fresh persister a copy of the old persister's
+	// state, so that the spec is that we pass StartKVServer()
+	// the last persisted state.
+	if gg.saved[i] != nil {
+		gg.saved[i] = gg.saved[i].Copy()
+	} else {
+		gg.saved[i] = raft.MakePersister()
+	}
+	cfg.mu.Unlock()
+
+	gg.servers[i] = StartServer(ends, i, gg.saved[i], cfg.maxraftstate,
+		gg.gid, mends,
+		func(servername string) *labrpc.ClientEnd {
+			name := randstring(20)
+			end := cfg.net.MakeEnd(name)
+			cfg.net.Connect(name, servername)
+			cfg.net.Enable(name, true)
+			return end
+		})
+
+	kvsvc := labrpc.MakeService(gg.servers[i])
+	rfsvc := labrpc.MakeService(gg.servers[i].rf)
+	srv := labrpc.MakeServer()
+	srv.AddService(kvsvc)
+	srv.AddService(rfsvc)
+	cfg.net.AddServer(cfg.servername(gg.gid, i), srv)
+}
+
+func (cfg *config) StartGroup(gi int) {
+	for i := 0; i < cfg.n; i++ {
+		cfg.StartServer(gi, i)
+	}
+}
+
+func (cfg *config) StartMasterServer(i int) {
+	// ClientEnds to talk to other master replicas.
+	ends := make([]*labrpc.ClientEnd, cfg.nmasters)
+	for j := 0; j < cfg.nmasters; j++ {
+		endname := randstring(20)
+		ends[j] = cfg.net.MakeEnd(endname)
+		cfg.net.Connect(endname, cfg.mastername(j))
+		cfg.net.Enable(endname, true)
+	}
+
+	p := raft.MakePersister()
+
+	cfg.masterservers[i] = shardmaster.StartServer(ends, i, p)
+
+	msvc := labrpc.MakeService(cfg.masterservers[i])
+	rfsvc := labrpc.MakeService(cfg.masterservers[i].Raft())
+	srv := labrpc.MakeServer()
+	srv.AddService(msvc)
+	srv.AddService(rfsvc)
+	cfg.net.AddServer(cfg.mastername(i), srv)
+}
+
+func (cfg *config) shardclerk() *shardmaster.Clerk {
+	// ClientEnds to talk to master service.
+	ends := make([]*labrpc.ClientEnd, cfg.nmasters)
+	for j := 0; j < cfg.nmasters; j++ {
+		name := randstring(20)
+		ends[j] = cfg.net.MakeEnd(name)
+		cfg.net.Connect(name, cfg.mastername(j))
+		cfg.net.Enable(name, true)
+	}
+
+	return shardmaster.MakeClerk(ends)
+}
+
+// tell the shardmaster that a group is joining.
+func (cfg *config) join(gi int) {
+	cfg.joinm([]int{gi})
+}
+
+func (cfg *config) joinm(gis []int) {
+	m := make(map[int][]string, len(gis))
+	for _, g := range gis {
+		gid := cfg.groups[g].gid
+		servernames := make([]string, cfg.n)
+		for i := 0; i < cfg.n; i++ {
+			servernames[i] = cfg.servername(gid, i)
+		}
+		m[gid] = servernames
+	}
+	cfg.mck.Join(m)
+}
+
+// tell the shardmaster that a group is leaving.
+func (cfg *config) leave(gi int) {
+	cfg.leavem([]int{gi})
+}
+
+func (cfg *config) leavem(gis []int) {
+	gids := make([]int, 0, len(gis))
+	for _, g := range gis {
+		gids = append(gids, cfg.groups[g].gid)
+	}
+	cfg.mck.Leave(gids)
+}
+
+func make_config(t *testing.T, n int, unreliable bool, maxraftstate int) *config {
+	runtime.GOMAXPROCS(4)
+	cfg := &config{}
+	cfg.t = t
+	cfg.maxraftstate = maxraftstate
+	cfg.net = labrpc.MakeNetwork()
+
+	// master
+	cfg.nmasters = 3
+	cfg.masterservers = make([]*shardmaster.ShardMaster, cfg.nmasters)
+	for i := 0; i < cfg.nmasters; i++ {
+		cfg.StartMasterServer(i)
+	}
+	cfg.mck = cfg.shardclerk()
+
+	cfg.ngroups = 3
+	cfg.groups = make([]*group, cfg.ngroups)
+	cfg.n = n
+	for gi := 0; gi < cfg.ngroups; gi++ {
+		gg := &group{}
+		cfg.groups[gi] = gg
+		gg.gid = 100 + gi
+		gg.servers = make([]*ShardKV, cfg.n)
+		gg.saved = make([]*raft.Persister, cfg.n)
+		gg.endnames = make([][]string, cfg.n)
+		gg.mendnames = make([][]string, cfg.nmasters)
+		for i := 0; i < cfg.n; i++ {
+			cfg.StartServer(gi, i)
+		}
+	}
+
+	cfg.clerks = make(map[*Clerk][]string)
+	cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid
+
+	cfg.net.Reliable(!unreliable)
+
+	return cfg
+}
--- a/src/shardkv/server.go
+++ b/src/shardkv/server.go
@ -0,0 +1,102 @@
+package shardkv
+
+
+// import "shardmaster"
+import "labrpc"
+import "raft"
+import "sync"
+import "encoding/gob"
+
+
+
+type Op struct {
+	// Your definitions here.
+	// Field names must start with capital letters,
+	// otherwise RPC will break.
+}
+
+type ShardKV struct {
+	mu           sync.Mutex
+	me           int
+	rf           *raft.Raft
+	applyCh      chan raft.ApplyMsg
+	make_end     func(string) *labrpc.ClientEnd
+	gid          int
+	masters      []*labrpc.ClientEnd
+	maxraftstate int // snapshot if log grows this big
+
+	// Your definitions here.
+}
+
+
+func (kv *ShardKV) Get(args *GetArgs, reply *GetReply) {
+	// Your code here.
+}
+
+func (kv *ShardKV) PutAppend(args *PutAppendArgs, reply *PutAppendReply) {
+	// Your code here.
+}
+
+//
+// the tester calls Kill() when a ShardKV instance won't
+// be needed again. you are not required to do anything
+// in Kill(), but it might be convenient to (for example)
+// turn off debug output from this instance.
+//
+func (kv *ShardKV) Kill() {
+	kv.rf.Kill()
+	// Your code here, if desired.
+}
+
+
+//
+// servers[] contains the ports of the servers in this group.
+//
+// me is the index of the current server in servers[].
+//
+// the k/v server should store snapshots with
+// persister.SaveSnapshot(), and Raft should save its state (including
+// log) with persister.SaveRaftState().
+//
+// the k/v server should snapshot when Raft's saved state exceeds
+// maxraftstate bytes, in order to allow Raft to garbage-collect its
+// log. if maxraftstate is -1, you don't need to snapshot.
+//
+// gid is this group's GID, for interacting with the shardmaster.
+//
+// pass masters[] to shardmaster.MakeClerk() so you can send
+// RPCs to the shardmaster.
+//
+// make_end(servername) turns a server name from a
+// Config.Groups[gid][i] into a labrpc.ClientEnd on which you can
+// send RPCs. You'll need this to send RPCs to other groups.
+//
+// look at client.go for examples of how to use masters[]
+// and make_end() to send RPCs to the group owning a specific shard.
+//
+// StartServer() must return quickly, so it should start goroutines
+// for any long-running work.
+//
+func StartServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int, gid int, masters []*labrpc.ClientEnd, make_end func(string) *labrpc.ClientEnd) *ShardKV {
+	// call gob.Register on structures you want
+	// Go's RPC library to marshall/unmarshall.
+	gob.Register(Op{})
+
+	kv := new(ShardKV)
+	kv.me = me
+	kv.maxraftstate = maxraftstate
+	kv.make_end = make_end
+	kv.gid = gid
+	kv.masters = masters
+
+	// Your initialization code here.
+
+	// Use something like this to talk to the shardmaster:
+	// kv.mck = shardmaster.MakeClerk(kv.masters)
+
+	kv.applyCh = make(chan raft.ApplyMsg)
+	kv.rf = raft.Make(servers, me, persister, kv.applyCh)
+
+
+	return kv
+}
--- a/src/shardkv/test_test.go
+++ b/src/shardkv/test_test.go
@ -0,0 +1,830 @@
+package shardkv
+
+import "testing"
+import "strconv"
+import "time"
+import "fmt"
+import "sync/atomic"
+import "math/rand"
+
+func check(t *testing.T, ck *Clerk, key string, value string) {
+	v := ck.Get(key)
+	if v != value {
+		t.Fatalf("Get(%v): expected:\n%v\nreceived:\n%v", key, value, v)
+	}
+}
+
+//
+// test static 2-way sharding, without shard movement.
+//
+func TestStaticShards(t *testing.T) {
+	fmt.Printf("Test: static shards ...\n")
+
+	cfg := make_config(t, 3, false, -1)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient()
+
+	cfg.join(0)
+	cfg.join(1)
+
+	n := 10
+	ka := make([]string, n)
+	va := make([]string, n)
+	for i := 0; i < n; i++ {
+		ka[i] = strconv.Itoa(i) // ensure multiple shards
+		va[i] = randstring(20)
+		ck.Put(ka[i], va[i])
+	}
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+
+	// make sure that the data really is sharded by
+	// shutting down one shard and checking that some
+	// Get()s don't succeed.
+	cfg.ShutdownGroup(1)
+	cfg.checklogs() // forbid snapshots
+
+	ch := make(chan bool)
+	for xi := 0; xi < n; xi++ {
+		ck1 := cfg.makeClient() // only one call allowed per client
+		go func(i int) {
+			defer func() { ch <- true }()
+			check(t, ck1, ka[i], va[i])
+		}(xi)
+	}
+
+	// wait a bit, only about half the Gets should succeed.
+	ndone := 0
+	done := false
+	for done == false {
+		select {
+		case <-ch:
+			ndone += 1
+		case <-time.After(time.Second * 2):
+			done = true
+			break
+		}
+	}
+
+	if ndone != 5 {
+		t.Fatalf("expected 5 completions with one shard dead; got %v\n", ndone)
+	}
+
+	// bring the crashed shard/group back to life.
+	cfg.StartGroup(1)
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestJoinLeave(t *testing.T) {
+	fmt.Printf("Test: join then leave ...\n")
+
+	cfg := make_config(t, 3, false, -1)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient()
+
+	cfg.join(0)
+
+	n := 10
+	ka := make([]string, n)
+	va := make([]string, n)
+	for i := 0; i < n; i++ {
+		ka[i] = strconv.Itoa(i) // ensure multiple shards
+		va[i] = randstring(5)
+		ck.Put(ka[i], va[i])
+	}
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+
+	cfg.join(1)
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+		x := randstring(5)
+		ck.Append(ka[i], x)
+		va[i] += x
+	}
+
+	cfg.leave(0)
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+		x := randstring(5)
+		ck.Append(ka[i], x)
+		va[i] += x
+	}
+
+	// allow time for shards to transfer.
+	time.Sleep(1 * time.Second)
+
+	cfg.checklogs()
+	cfg.ShutdownGroup(0)
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestSnapshot(t *testing.T) {
+	fmt.Printf("Test: snapshots, join, and leave ...\n")
+
+	cfg := make_config(t, 3, false, 1000)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient()
+
+	cfg.join(0)
+
+	n := 30
+	ka := make([]string, n)
+	va := make([]string, n)
+	for i := 0; i < n; i++ {
+		ka[i] = strconv.Itoa(i) // ensure multiple shards
+		va[i] = randstring(20)
+		ck.Put(ka[i], va[i])
+	}
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+
+	cfg.join(1)
+	cfg.join(2)
+	cfg.leave(0)
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+		x := randstring(20)
+		ck.Append(ka[i], x)
+		va[i] += x
+	}
+
+	cfg.leave(1)
+	cfg.join(0)
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+		x := randstring(20)
+		ck.Append(ka[i], x)
+		va[i] += x
+	}
+
+	time.Sleep(1 * time.Second)
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+
+	time.Sleep(1 * time.Second)
+
+	cfg.checklogs()
+
+	cfg.ShutdownGroup(0)
+	cfg.ShutdownGroup(1)
+	cfg.ShutdownGroup(2)
+
+	cfg.StartGroup(0)
+	cfg.StartGroup(1)
+	cfg.StartGroup(2)
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestMissChange(t *testing.T) {
+	fmt.Printf("Test: servers miss configuration changes...\n")
+
+	cfg := make_config(t, 3, false, 1000)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient()
+
+	cfg.join(0)
+
+	n := 10
+	ka := make([]string, n)
+	va := make([]string, n)
+	for i := 0; i < n; i++ {
+		ka[i] = strconv.Itoa(i) // ensure multiple shards
+		va[i] = randstring(20)
+		ck.Put(ka[i], va[i])
+	}
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+
+	cfg.join(1)
+
+	cfg.ShutdownServer(0, 0)
+	cfg.ShutdownServer(1, 0)
+	cfg.ShutdownServer(2, 0)
+
+	cfg.join(2)
+	cfg.leave(1)
+	cfg.leave(0)
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+		x := randstring(20)
+		ck.Append(ka[i], x)
+		va[i] += x
+	}
+
+	cfg.join(1)
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+		x := randstring(20)
+		ck.Append(ka[i], x)
+		va[i] += x
+	}
+
+	cfg.StartServer(0, 0)
+	cfg.StartServer(1, 0)
+	cfg.StartServer(2, 0)
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+		x := randstring(20)
+		ck.Append(ka[i], x)
+		va[i] += x
+	}
+
+	time.Sleep(2 * time.Second)
+
+	cfg.ShutdownServer(0, 1)
+	cfg.ShutdownServer(1, 1)
+	cfg.ShutdownServer(2, 1)
+
+	cfg.join(0)
+	cfg.leave(2)
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+		x := randstring(20)
+		ck.Append(ka[i], x)
+		va[i] += x
+	}
+
+	cfg.StartServer(0, 1)
+	cfg.StartServer(1, 1)
+	cfg.StartServer(2, 1)
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestConcurrent1(t *testing.T) {
+	fmt.Printf("Test: concurrent puts and configuration changes...\n")
+
+	cfg := make_config(t, 3, false, 100)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient()
+
+	cfg.join(0)
+
+	n := 10
+	ka := make([]string, n)
+	va := make([]string, n)
+	for i := 0; i < n; i++ {
+		ka[i] = strconv.Itoa(i) // ensure multiple shards
+		va[i] = randstring(5)
+		ck.Put(ka[i], va[i])
+	}
+
+	var done int32
+	ch := make(chan bool)
+
+	ff := func(i int) {
+		defer func() { ch <- true }()
+		ck1 := cfg.makeClient()
+		for atomic.LoadInt32(&done) == 0 {
+			x := randstring(5)
+			ck1.Append(ka[i], x)
+			va[i] += x
+			time.Sleep(10 * time.Millisecond)
+		}
+	}
+
+	for i := 0; i < n; i++ {
+		go ff(i)
+	}
+
+	time.Sleep(150 * time.Millisecond)
+	cfg.join(1)
+	time.Sleep(500 * time.Millisecond)
+	cfg.join(2)
+	time.Sleep(500 * time.Millisecond)
+	cfg.leave(0)
+
+	cfg.ShutdownGroup(0)
+	time.Sleep(100 * time.Millisecond)
+	cfg.ShutdownGroup(1)
+	time.Sleep(100 * time.Millisecond)
+	cfg.ShutdownGroup(2)
+
+	cfg.leave(2)
+
+	time.Sleep(100 * time.Millisecond)
+	cfg.StartGroup(0)
+	cfg.StartGroup(1)
+	cfg.StartGroup(2)
+
+	time.Sleep(100 * time.Millisecond)
+	cfg.join(0)
+	cfg.leave(1)
+	time.Sleep(500 * time.Millisecond)
+	cfg.join(1)
+
+	time.Sleep(1 * time.Second)
+
+	atomic.StoreInt32(&done, 1)
+	for i := 0; i < n; i++ {
+		<-ch
+	}
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+//
+// this tests the various sources from which a re-starting
+// group might need to fetch shard contents.
+//
+func TestConcurrent2(t *testing.T) {
+	fmt.Printf("Test: more concurrent puts and configuration changes...\n")
+
+	cfg := make_config(t, 3, false, -1)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient()
+
+	cfg.join(1)
+	cfg.join(0)
+	cfg.join(2)
+
+	n := 10
+	ka := make([]string, n)
+	va := make([]string, n)
+	for i := 0; i < n; i++ {
+		ka[i] = strconv.Itoa(i) // ensure multiple shards
+		va[i] = randstring(1)
+		ck.Put(ka[i], va[i])
+	}
+
+	var done int32
+	ch := make(chan bool)
+
+	ff := func(i int, ck1 *Clerk) {
+		defer func() { ch <- true }()
+		for atomic.LoadInt32(&done) == 0 {
+			x := randstring(1)
+			ck1.Append(ka[i], x)
+			va[i] += x
+			time.Sleep(50 * time.Millisecond)
+		}
+	}
+
+	for i := 0; i < n; i++ {
+		ck1 := cfg.makeClient()
+		go ff(i, ck1)
+	}
+
+	cfg.leave(0)
+	cfg.leave(2)
+	time.Sleep(3000 * time.Millisecond)
+	cfg.join(0)
+	cfg.join(2)
+	cfg.leave(1)
+	time.Sleep(3000 * time.Millisecond)
+	cfg.join(1)
+	cfg.leave(0)
+	cfg.leave(2)
+	time.Sleep(3000 * time.Millisecond)
+
+	cfg.ShutdownGroup(1)
+	cfg.ShutdownGroup(2)
+	time.Sleep(1000 * time.Millisecond)
+	cfg.StartGroup(1)
+	cfg.StartGroup(2)
+
+	time.Sleep(2 * time.Second)
+
+	atomic.StoreInt32(&done, 1)
+	for i := 0; i < n; i++ {
+		<-ch
+	}
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestUnreliable1(t *testing.T) {
+	fmt.Printf("Test: unreliable 1...\n")
+
+	cfg := make_config(t, 3, true, 100)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient()
+
+	cfg.join(0)
+
+	n := 10
+	ka := make([]string, n)
+	va := make([]string, n)
+	for i := 0; i < n; i++ {
+		ka[i] = strconv.Itoa(i) // ensure multiple shards
+		va[i] = randstring(5)
+		ck.Put(ka[i], va[i])
+	}
+
+	cfg.join(1)
+	cfg.join(2)
+	cfg.leave(0)
+
+	for ii := 0; ii < n*2; ii++ {
+		i := ii % n
+		check(t, ck, ka[i], va[i])
+		x := randstring(5)
+		ck.Append(ka[i], x)
+		va[i] += x
+	}
+
+	cfg.join(0)
+	cfg.leave(1)
+
+	for ii := 0; ii < n*2; ii++ {
+		i := ii % n
+		check(t, ck, ka[i], va[i])
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestUnreliable2(t *testing.T) {
+	fmt.Printf("Test: unreliable 2...\n")
+
+	cfg := make_config(t, 3, true, 100)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient()
+
+	cfg.join(0)
+
+	n := 10
+	ka := make([]string, n)
+	va := make([]string, n)
+	for i := 0; i < n; i++ {
+		ka[i] = strconv.Itoa(i) // ensure multiple shards
+		va[i] = randstring(5)
+		ck.Put(ka[i], va[i])
+	}
+
+	var done int32
+	ch := make(chan bool)
+
+	ff := func(i int) {
+		defer func() { ch <- true }()
+		ck1 := cfg.makeClient()
+		for atomic.LoadInt32(&done) == 0 {
+			x := randstring(5)
+			ck1.Append(ka[i], x)
+			va[i] += x
+		}
+	}
+
+	for i := 0; i < n; i++ {
+		go ff(i)
+	}
+
+	time.Sleep(150 * time.Millisecond)
+	cfg.join(1)
+	time.Sleep(500 * time.Millisecond)
+	cfg.join(2)
+	time.Sleep(500 * time.Millisecond)
+	cfg.leave(0)
+	time.Sleep(500 * time.Millisecond)
+	cfg.leave(1)
+	time.Sleep(500 * time.Millisecond)
+	cfg.join(1)
+	cfg.join(0)
+
+	time.Sleep(2 * time.Second)
+
+	atomic.StoreInt32(&done, 1)
+	cfg.net.Reliable(true)
+	for i := 0; i < n; i++ {
+		<-ch
+	}
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+//
+// optional test to see whether servers are deleting
+// shards for which they are no longer responsible.
+//
+func TestChallenge1Delete(t *testing.T) {
+	fmt.Printf("Test: shard deletion (challenge 1) ...\n")
+
+	// "1" means force snapshot after every log entry.
+	cfg := make_config(t, 3, false, 1)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient()
+
+	cfg.join(0)
+
+	// 30,000 bytes of total values.
+	n := 30
+	ka := make([]string, n)
+	va := make([]string, n)
+	for i := 0; i < n; i++ {
+		ka[i] = strconv.Itoa(i)
+		va[i] = randstring(1000)
+		ck.Put(ka[i], va[i])
+	}
+	for i := 0; i < 3; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+
+	for iters := 0; iters < 2; iters++ {
+		cfg.join(1)
+		cfg.leave(0)
+		cfg.join(2)
+		time.Sleep(3 * time.Second)
+		for i := 0; i < 3; i++ {
+			check(t, ck, ka[i], va[i])
+		}
+		cfg.leave(1)
+		cfg.join(0)
+		cfg.leave(2)
+		time.Sleep(3 * time.Second)
+		for i := 0; i < 3; i++ {
+			check(t, ck, ka[i], va[i])
+		}
+	}
+
+	cfg.join(1)
+	cfg.join(2)
+	time.Sleep(1 * time.Second)
+	for i := 0; i < 3; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+	time.Sleep(1 * time.Second)
+	for i := 0; i < 3; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+	time.Sleep(1 * time.Second)
+	for i := 0; i < 3; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+
+	total := 0
+	for gi := 0; gi < cfg.ngroups; gi++ {
+		for i := 0; i < cfg.n; i++ {
+			raft := cfg.groups[gi].saved[i].RaftStateSize()
+			snap := len(cfg.groups[gi].saved[i].ReadSnapshot())
+			total += raft + snap
+		}
+	}
+
+	// 27 keys should be stored once.
+	// 3 keys should also be stored in client dup tables.
+	// everything on 3 replicas.
+	// plus slop.
+	expected := 3 * (((n - 3) * 1000) + 2*3*1000 + 6000)
+	if total > expected {
+		t.Fatalf("snapshot + persisted Raft state are too big: %v > %v\n", total, expected)
+	}
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestChallenge1Concurrent(t *testing.T) {
+	fmt.Printf("Test: concurrent configuration change and restart (challenge 1)...\n")
+
+	cfg := make_config(t, 3, false, 300)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient()
+
+	cfg.join(0)
+
+	n := 10
+	ka := make([]string, n)
+	va := make([]string, n)
+	for i := 0; i < n; i++ {
+		ka[i] = strconv.Itoa(i)
+		va[i] = randstring(1)
+		ck.Put(ka[i], va[i])
+	}
+
+	var done int32
+	ch := make(chan bool)
+
+	ff := func(i int, ck1 *Clerk) {
+		defer func() { ch <- true }()
+		for atomic.LoadInt32(&done) == 0 {
+			x := randstring(1)
+			ck1.Append(ka[i], x)
+			va[i] += x
+		}
+	}
+
+	for i := 0; i < n; i++ {
+		ck1 := cfg.makeClient()
+		go ff(i, ck1)
+	}
+
+	t0 := time.Now()
+	for time.Since(t0) < 12*time.Second {
+		cfg.join(2)
+		cfg.join(1)
+		time.Sleep(time.Duration(rand.Int()%900) * time.Millisecond)
+		cfg.ShutdownGroup(0)
+		cfg.ShutdownGroup(1)
+		cfg.ShutdownGroup(2)
+		cfg.StartGroup(0)
+		cfg.StartGroup(1)
+		cfg.StartGroup(2)
+
+		time.Sleep(time.Duration(rand.Int()%900) * time.Millisecond)
+		cfg.leave(1)
+		cfg.leave(2)
+		time.Sleep(time.Duration(rand.Int()%900) * time.Millisecond)
+	}
+
+	time.Sleep(2 * time.Second)
+
+	atomic.StoreInt32(&done, 1)
+	for i := 0; i < n; i++ {
+		<-ch
+	}
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+//
+// optional test to see whether servers can handle
+// shards that are not affected by a config change
+// while the config change is underway
+//
+func TestChallenge2Unaffected(t *testing.T) {
+	fmt.Printf("Test: unaffected shard access (challenge 2) ...\n")
+
+	cfg := make_config(t, 3, true, 100)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient()
+
+	// JOIN 100
+	cfg.join(0)
+
+	// Do a bunch of puts to keys in all shards
+	n := 10
+	ka := make([]string, n)
+	va := make([]string, n)
+	for i := 0; i < n; i++ {
+		ka[i] = strconv.Itoa(i) // ensure multiple shards
+		va[i] = "100"
+		ck.Put(ka[i], va[i])
+	}
+
+	// JOIN 101
+	cfg.join(1)
+
+	// QUERY to find shards now owned by 101
+	c := cfg.mck.Query(-1)
+	owned := make(map[int]bool, n)
+	for s, gid := range c.Shards {
+		owned[s] = gid == cfg.groups[1].gid
+	}
+
+	// Wait for migration to new config to complete, and for clients to
+	// start using this updated config. Gets to any key k such that
+	// owned[shard(k)] == true should now be served by group 101.
+	<-time.After(1 * time.Second)
+	for i := 0; i < n; i++ {
+		if owned[i] {
+			va[i] = "101"
+			ck.Put(ka[i], va[i])
+		}
+	}
+
+	// KILL 100
+	cfg.ShutdownGroup(0)
+
+	// LEAVE 100
+	// 101 doesn't get a chance to migrate things previously owned by 100
+	cfg.leave(0)
+
+	// Wait to make sure clients see new config
+	<-time.After(1 * time.Second)
+
+	// And finally: check that gets/puts for 101-owned keys still complete
+	for i := 0; i < n; i++ {
+		shard := int(ka[i][0]) % 10
+		if owned[shard] {
+			check(t, ck, ka[i], va[i])
+			ck.Put(ka[i], va[i]+"-1")
+			check(t, ck, ka[i], va[i]+"-1")
+		}
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+//
+// optional test to see whether servers can handle operations on shards that
+// have been received as a part of a config migration when the entire migration
+// has not yet completed.
+//
+func TestChallenge2Partial(t *testing.T) {
+	fmt.Printf("Test: partial migration shard access (challenge 2) ...\n")
+
+	cfg := make_config(t, 3, true, 100)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient()
+
+	// JOIN 100 + 101 + 102
+	cfg.joinm([]int{0, 1, 2})
+
+	// Give the implementation some time to reconfigure
+	<-time.After(1 * time.Second)
+
+	// Do a bunch of puts to keys in all shards
+	n := 10
+	ka := make([]string, n)
+	va := make([]string, n)
+	for i := 0; i < n; i++ {
+		ka[i] = strconv.Itoa(i) // ensure multiple shards
+		va[i] = "100"
+		ck.Put(ka[i], va[i])
+	}
+
+	// QUERY to find shards owned by 102
+	c := cfg.mck.Query(-1)
+	owned := make(map[int]bool, n)
+	for s, gid := range c.Shards {
+		owned[s] = gid == cfg.groups[2].gid
+	}
+
+	// KILL 100
+	cfg.ShutdownGroup(0)
+
+	// LEAVE 100 + 102
+	// 101 can get old shards from 102, but not from 100. 101 should start
+	// serving shards that used to belong to 102 as soon as possible
+	cfg.leavem([]int{0, 2})
+
+	// Give the implementation some time to start reconfiguration
+	// And to migrate 102 -> 101
+	<-time.After(1 * time.Second)
+
+	// And finally: check that gets/puts for 101-owned keys now complete
+	for i := 0; i < n; i++ {
+		shard := key2shard(ka[i])
+		if owned[shard] {
+			check(t, ck, ka[i], va[i])
+			ck.Put(ka[i], va[i]+"-2")
+			check(t, ck, ka[i], va[i]+"-2")
+		}
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
--- a/src/shardmaster/client.go
+++ b/src/shardmaster/client.go
@ -0,0 +1,101 @@
+package shardmaster
+
+//
+// Shardmaster clerk.
+//
+
+import "labrpc"
+import "time"
+import "crypto/rand"
+import "math/big"
+
+type Clerk struct {
+	servers []*labrpc.ClientEnd
+	// Your data here.
+}
+
+func nrand() int64 {
+	max := big.NewInt(int64(1) << 62)
+	bigx, _ := rand.Int(rand.Reader, max)
+	x := bigx.Int64()
+	return x
+}
+
+func MakeClerk(servers []*labrpc.ClientEnd) *Clerk {
+	ck := new(Clerk)
+	ck.servers = servers
+	// Your code here.
+	return ck
+}
+
+func (ck *Clerk) Query(num int) Config {
+	args := &QueryArgs{}
+	// Your code here.
+	args.Num = num
+	for {
+		// try each known server.
+		for _, srv := range ck.servers {
+			var reply QueryReply
+			ok := srv.Call("ShardMaster.Query", args, &reply)
+			if ok && reply.WrongLeader == false {
+				return reply.Config
+			}
+		}
+		time.Sleep(100 * time.Millisecond)
+	}
+}
+
+func (ck *Clerk) Join(servers map[int][]string) {
+	args := &JoinArgs{}
+	// Your code here.
+	args.Servers = servers
+
+	for {
+		// try each known server.
+		for _, srv := range ck.servers {
+			var reply JoinReply
+			ok := srv.Call("ShardMaster.Join", args, &reply)
+			if ok && reply.WrongLeader == false {
+				return
+			}
+		}
+		time.Sleep(100 * time.Millisecond)
+	}
+}
+
+func (ck *Clerk) Leave(gids []int) {
+	args := &LeaveArgs{}
+	// Your code here.
+	args.GIDs = gids
+
+	for {
+		// try each known server.
+		for _, srv := range ck.servers {
+			var reply LeaveReply
+			ok := srv.Call("ShardMaster.Leave", args, &reply)
+			if ok && reply.WrongLeader == false {
+				return
+			}
+		}
+		time.Sleep(100 * time.Millisecond)
+	}
+}
+
+func (ck *Clerk) Move(shard int, gid int) {
+	args := &MoveArgs{}
+	// Your code here.
+	args.Shard = shard
+	args.GID = gid
+
+	for {
+		// try each known server.
+		for _, srv := range ck.servers {
+			var reply MoveReply
+			ok := srv.Call("ShardMaster.Move", args, &reply)
+			if ok && reply.WrongLeader == false {
+				return
+			}
+		}
+		time.Sleep(100 * time.Millisecond)
+	}
+}
--- a/src/shardmaster/common.go
+++ b/src/shardmaster/common.go
@ -0,0 +1,76 @@
+package shardmaster
+
+//
+// Master shard server: assigns shards to replication groups.
+//
+// RPC interface:
+// Join(servers) -- add a set of groups (gid -> server-list mapping).
+// Leave(gids) -- delete a set of groups.
+// Move(shard, gid) -- hand off one shard from current owner to gid.
+// Query(num) -> fetch Config # num, or latest config if num==-1.
+//
+// A Config (configuration) describes a set of replica groups, and the
+// replica group responsible for each shard. Configs are numbered. Config
+// #0 is the initial configuration, with no groups and all shards
+// assigned to group 0 (the invalid group).
+//
+// A GID is a replica group ID. GIDs must be uniqe and > 0.
+// Once a GID joins, and leaves, it should never join again.
+//
+// You will need to add fields to the RPC arguments.
+//
+
+// The number of shards.
+const NShards = 10
+
+// A configuration -- an assignment of shards to groups.
+// Please don't change this.
+type Config struct {
+	Num    int              // config number
+	Shards [NShards]int     // shard -> gid
+	Groups map[int][]string // gid -> servers[]
+}
+
+const (
+	OK = "OK"
+)
+
+type Err string
+
+type JoinArgs struct {
+	Servers map[int][]string // new GID -> servers mappings
+}
+
+type JoinReply struct {
+	WrongLeader bool
+	Err         Err
+}
+
+type LeaveArgs struct {
+	GIDs []int
+}
+
+type LeaveReply struct {
+	WrongLeader bool
+	Err         Err
+}
+
+type MoveArgs struct {
+	Shard int
+	GID   int
+}
+
+type MoveReply struct {
+	WrongLeader bool
+	Err         Err
+}
+
+type QueryArgs struct {
+	Num int // desired config number
+}
+
+type QueryReply struct {
+	WrongLeader bool
+	Err         Err
+	Config      Config
+}
--- a/src/shardmaster/config.go
+++ b/src/shardmaster/config.go
@ -0,0 +1,343 @@
+package shardmaster
+
+import "labrpc"
+import "raft"
+import "testing"
+import "os"
+
+// import "log"
+import crand "crypto/rand"
+import "math/rand"
+import "encoding/base64"
+import "sync"
+import "runtime"
+
+func randstring(n int) string {
+	b := make([]byte, 2*n)
+	crand.Read(b)
+	s := base64.URLEncoding.EncodeToString(b)
+	return s[0:n]
+}
+
+// Randomize server handles
+func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd {
+	sa := make([]*labrpc.ClientEnd, len(kvh))
+	copy(sa, kvh)
+	for i := range sa {
+		j := rand.Intn(i + 1)
+		sa[i], sa[j] = sa[j], sa[i]
+	}
+	return sa
+}
+
+type config struct {
+	mu           sync.Mutex
+	t            *testing.T
+	net          *labrpc.Network
+	n            int
+	servers      []*ShardMaster
+	saved        []*raft.Persister
+	endnames     [][]string // names of each server's sending ClientEnds
+	clerks       map[*Clerk][]string
+	nextClientId int
+}
+
+func (cfg *config) cleanup() {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+	for i := 0; i < len(cfg.servers); i++ {
+		if cfg.servers[i] != nil {
+			cfg.servers[i].Kill()
+		}
+	}
+}
+
+// Maximum log size across all servers
+func (cfg *config) LogSize() int {
+	logsize := 0
+	for i := 0; i < cfg.n; i++ {
+		n := cfg.saved[i].RaftStateSize()
+		if n > logsize {
+			logsize = n
+		}
+	}
+	return logsize
+}
+
+// attach server i to servers listed in to
+// caller must hold cfg.mu
+func (cfg *config) connectUnlocked(i int, to []int) {
+	// log.Printf("connect peer %d to %v\n", i, to)
+
+	// outgoing socket files
+	for j := 0; j < len(to); j++ {
+		endname := cfg.endnames[i][to[j]]
+		cfg.net.Enable(endname, true)
+	}
+
+	// incoming socket files
+	for j := 0; j < len(to); j++ {
+		endname := cfg.endnames[to[j]][i]
+		cfg.net.Enable(endname, true)
+	}
+}
+
+func (cfg *config) connect(i int, to []int) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+	cfg.connectUnlocked(i, to)
+}
+
+// detach server i from the servers listed in from
+// caller must hold cfg.mu
+func (cfg *config) disconnectUnlocked(i int, from []int) {
+	// log.Printf("disconnect peer %d from %v\n", i, from)
+
+	// outgoing socket files
+	for j := 0; j < len(from); j++ {
+		if cfg.endnames[i] != nil {
+			endname := cfg.endnames[i][from[j]]
+			cfg.net.Enable(endname, false)
+		}
+	}
+
+	// incoming socket files
+	for j := 0; j < len(from); j++ {
+		if cfg.endnames[j] != nil {
+			endname := cfg.endnames[from[j]][i]
+			cfg.net.Enable(endname, false)
+		}
+	}
+}
+
+func (cfg *config) disconnect(i int, from []int) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+	cfg.disconnectUnlocked(i, from)
+}
+
+func (cfg *config) All() []int {
+	all := make([]int, cfg.n)
+	for i := 0; i < cfg.n; i++ {
+		all[i] = i
+	}
+	return all
+}
+
+func (cfg *config) ConnectAll() {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+	for i := 0; i < cfg.n; i++ {
+		cfg.connectUnlocked(i, cfg.All())
+	}
+}
+
+// Sets up 2 partitions with connectivity between servers in each  partition.
+func (cfg *config) partition(p1 []int, p2 []int) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+	// log.Printf("partition servers into: %v %v\n", p1, p2)
+	for i := 0; i < len(p1); i++ {
+		cfg.disconnectUnlocked(p1[i], p2)
+		cfg.connectUnlocked(p1[i], p1)
+	}
+	for i := 0; i < len(p2); i++ {
+		cfg.disconnectUnlocked(p2[i], p1)
+		cfg.connectUnlocked(p2[i], p2)
+	}
+}
+
+// Create a clerk with clerk specific server names.
+// Give it connections to all of the servers, but for
+// now enable only connections to servers in to[].
+func (cfg *config) makeClient(to []int) *Clerk {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+
+	// a fresh set of ClientEnds.
+	ends := make([]*labrpc.ClientEnd, cfg.n)
+	endnames := make([]string, cfg.n)
+	for j := 0; j < cfg.n; j++ {
+		endnames[j] = randstring(20)
+		ends[j] = cfg.net.MakeEnd(endnames[j])
+		cfg.net.Connect(endnames[j], j)
+	}
+
+	ck := MakeClerk(random_handles(ends))
+	cfg.clerks[ck] = endnames
+	cfg.nextClientId++
+	cfg.ConnectClientUnlocked(ck, to)
+	return ck
+}
+
+func (cfg *config) deleteClient(ck *Clerk) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+
+	v := cfg.clerks[ck]
+	for i := 0; i < len(v); i++ {
+		os.Remove(v[i])
+	}
+	delete(cfg.clerks, ck)
+}
+
+// caller should hold cfg.mu
+func (cfg *config) ConnectClientUnlocked(ck *Clerk, to []int) {
+	// log.Printf("ConnectClient %v to %v\n", ck, to)
+	endnames := cfg.clerks[ck]
+	for j := 0; j < len(to); j++ {
+		s := endnames[to[j]]
+		cfg.net.Enable(s, true)
+	}
+}
+
+func (cfg *config) ConnectClient(ck *Clerk, to []int) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+	cfg.ConnectClientUnlocked(ck, to)
+}
+
+// caller should hold cfg.mu
+func (cfg *config) DisconnectClientUnlocked(ck *Clerk, from []int) {
+	// log.Printf("DisconnectClient %v from %v\n", ck, from)
+	endnames := cfg.clerks[ck]
+	for j := 0; j < len(from); j++ {
+		s := endnames[from[j]]
+		cfg.net.Enable(s, false)
+	}
+}
+
+func (cfg *config) DisconnectClient(ck *Clerk, from []int) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+	cfg.DisconnectClientUnlocked(ck, from)
+}
+
+// Shutdown a server by isolating it
+func (cfg *config) ShutdownServer(i int) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+
+	cfg.disconnectUnlocked(i, cfg.All())
+
+	// disable client connections to the server.
+	// it's important to do this before creating
+	// the new Persister in saved[i], to avoid
+	// the possibility of the server returning a
+	// positive reply to an Append but persisting
+	// the result in the superseded Persister.
+	cfg.net.DeleteServer(i)
+
+	// a fresh persister, in case old instance
+	// continues to update the Persister.
+	// but copy old persister's content so that we always
+	// pass Make() the last persisted state.
+	if cfg.saved[i] != nil {
+		cfg.saved[i] = cfg.saved[i].Copy()
+	}
+
+	kv := cfg.servers[i]
+	if kv != nil {
+		cfg.mu.Unlock()
+		kv.Kill()
+		cfg.mu.Lock()
+		cfg.servers[i] = nil
+	}
+}
+
+// If restart servers, first call ShutdownServer
+func (cfg *config) StartServer(i int) {
+	cfg.mu.Lock()
+
+	// a fresh set of outgoing ClientEnd names.
+	cfg.endnames[i] = make([]string, cfg.n)
+	for j := 0; j < cfg.n; j++ {
+		cfg.endnames[i][j] = randstring(20)
+	}
+
+	// a fresh set of ClientEnds.
+	ends := make([]*labrpc.ClientEnd, cfg.n)
+	for j := 0; j < cfg.n; j++ {
+		ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j])
+		cfg.net.Connect(cfg.endnames[i][j], j)
+	}
+
+	// a fresh persister, so old instance doesn't overwrite
+	// new instance's persisted state.
+	// give the fresh persister a copy of the old persister's
+	// state, so that the spec is that we pass StartKVServer()
+	// the last persisted state.
+	if cfg.saved[i] != nil {
+		cfg.saved[i] = cfg.saved[i].Copy()
+	} else {
+		cfg.saved[i] = raft.MakePersister()
+	}
+
+	cfg.mu.Unlock()
+
+	cfg.servers[i] = StartServer(ends, i, cfg.saved[i])
+
+	kvsvc := labrpc.MakeService(cfg.servers[i])
+	rfsvc := labrpc.MakeService(cfg.servers[i].rf)
+	srv := labrpc.MakeServer()
+	srv.AddService(kvsvc)
+	srv.AddService(rfsvc)
+	cfg.net.AddServer(i, srv)
+}
+
+func (cfg *config) Leader() (bool, int) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+
+	for i := 0; i < cfg.n; i++ {
+		_, is_leader := cfg.servers[i].rf.GetState()
+		if is_leader {
+			return true, i
+		}
+	}
+	return false, 0
+}
+
+// Partition servers into 2 groups and put current leader in minority
+func (cfg *config) make_partition() ([]int, []int) {
+	_, l := cfg.Leader()
+	p1 := make([]int, cfg.n/2+1)
+	p2 := make([]int, cfg.n/2)
+	j := 0
+	for i := 0; i < cfg.n; i++ {
+		if i != l {
+			if j < len(p1) {
+				p1[j] = i
+			} else {
+				p2[j-len(p1)] = i
+			}
+			j++
+		}
+	}
+	p2[len(p2)-1] = l
+	return p1, p2
+}
+
+func make_config(t *testing.T, n int, unreliable bool) *config {
+	runtime.GOMAXPROCS(4)
+	cfg := &config{}
+	cfg.t = t
+	cfg.net = labrpc.MakeNetwork()
+	cfg.n = n
+	cfg.servers = make([]*ShardMaster, cfg.n)
+	cfg.saved = make([]*raft.Persister, cfg.n)
+	cfg.endnames = make([][]string, cfg.n)
+	cfg.clerks = make(map[*Clerk][]string)
+	cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid
+
+	// create a full set of KV servers.
+	for i := 0; i < cfg.n; i++ {
+		cfg.StartServer(i)
+	}
+
+	cfg.ConnectAll()
+
+	cfg.net.Reliable(!unreliable)
+
+	return cfg
+}
--- a/src/shardmaster/server.go
+++ b/src/shardmaster/server.go
@ -0,0 +1,80 @@
+package shardmaster
+
+
+import "raft"
+import "labrpc"
+import "sync"
+import "encoding/gob"
+
+
+type ShardMaster struct {
+	mu      sync.Mutex
+	me      int
+	rf      *raft.Raft
+	applyCh chan raft.ApplyMsg
+
+	// Your data here.
+
+	configs []Config // indexed by config num
+}
+
+
+type Op struct {
+	// Your data here.
+}
+
+
+func (sm *ShardMaster) Join(args *JoinArgs, reply *JoinReply) {
+	// Your code here.
+}
+
+func (sm *ShardMaster) Leave(args *LeaveArgs, reply *LeaveReply) {
+	// Your code here.
+}
+
+func (sm *ShardMaster) Move(args *MoveArgs, reply *MoveReply) {
+	// Your code here.
+}
+
+func (sm *ShardMaster) Query(args *QueryArgs, reply *QueryReply) {
+	// Your code here.
+}
+
+
+//
+// the tester calls Kill() when a ShardMaster instance won't
+// be needed again. you are not required to do anything
+// in Kill(), but it might be convenient to (for example)
+// turn off debug output from this instance.
+//
+func (sm *ShardMaster) Kill() {
+	sm.rf.Kill()
+	// Your code here, if desired.
+}
+
+// needed by shardkv tester
+func (sm *ShardMaster) Raft() *raft.Raft {
+	return sm.rf
+}
+
+//
+// servers[] contains the ports of the set of
+// servers that will cooperate via Paxos to
+// form the fault-tolerant shardmaster service.
+// me is the index of the current server in servers[].
+//
+func StartServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister) *ShardMaster {
+	sm := new(ShardMaster)
+	sm.me = me
+
+	sm.configs = make([]Config, 1)
+	sm.configs[0].Groups = map[int][]string{}
+
+	gob.Register(Op{})
+	sm.applyCh = make(chan raft.ApplyMsg)
+	sm.rf = raft.Make(servers, me, persister, sm.applyCh)
+
+	// Your code here.
+
+	return sm
+}
--- a/src/shardmaster/test_test.go
+++ b/src/shardmaster/test_test.go
@ -0,0 +1,377 @@
+package shardmaster
+
+import (
+	"sync"
+	"testing"
+)
+
+// import "time"
+import "fmt"
+
+func check(t *testing.T, groups []int, ck *Clerk) {
+	c := ck.Query(-1)
+	if len(c.Groups) != len(groups) {
+		t.Fatalf("wanted %v groups, got %v", len(groups), len(c.Groups))
+	}
+
+	// are the groups as expected?
+	for _, g := range groups {
+		_, ok := c.Groups[g]
+		if ok != true {
+			t.Fatalf("missing group %v", g)
+		}
+	}
+
+	// any un-allocated shards?
+	if len(groups) > 0 {
+		for s, g := range c.Shards {
+			_, ok := c.Groups[g]
+			if ok == false {
+				t.Fatalf("shard %v -> invalid group %v", s, g)
+			}
+		}
+	}
+
+	// more or less balanced sharding?
+	counts := map[int]int{}
+	for _, g := range c.Shards {
+		counts[g] += 1
+	}
+	min := 257
+	max := 0
+	for g, _ := range c.Groups {
+		if counts[g] > max {
+			max = counts[g]
+		}
+		if counts[g] < min {
+			min = counts[g]
+		}
+	}
+	if max > min+1 {
+		t.Fatalf("max %v too much larger than min %v", max, min)
+	}
+}
+
+func check_same_config(t *testing.T, c1 Config, c2 Config) {
+	if c1.Num != c2.Num {
+		t.Fatalf("Num wrong")
+	}
+	if c1.Shards != c2.Shards {
+		t.Fatalf("Shards wrong")
+	}
+	if len(c1.Groups) != len(c2.Groups) {
+		t.Fatalf("number of Groups is wrong")
+	}
+	for gid, sa := range c1.Groups {
+		sa1, ok := c2.Groups[gid]
+		if ok == false || len(sa1) != len(sa) {
+			t.Fatalf("len(Groups) wrong")
+		}
+		if ok && len(sa1) == len(sa) {
+			for j := 0; j < len(sa); j++ {
+				if sa[j] != sa1[j] {
+					t.Fatalf("Groups wrong")
+				}
+			}
+		}
+	}
+}
+
+func TestBasic(t *testing.T) {
+	const nservers = 3
+	cfg := make_config(t, nservers, false)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient(cfg.All())
+
+	fmt.Printf("Test: Basic leave/join ...\n")
+
+	cfa := make([]Config, 6)
+	cfa[0] = ck.Query(-1)
+
+	check(t, []int{}, ck)
+
+	var gid1 int = 1
+	ck.Join(map[int][]string{gid1: []string{"x", "y", "z"}})
+	check(t, []int{gid1}, ck)
+	cfa[1] = ck.Query(-1)
+
+	var gid2 int = 2
+	ck.Join(map[int][]string{gid2: []string{"a", "b", "c"}})
+	check(t, []int{gid1, gid2}, ck)
+	cfa[2] = ck.Query(-1)
+
+	ck.Join(map[int][]string{gid2: []string{"a", "b", "c"}})
+	check(t, []int{gid1, gid2}, ck)
+	cfa[3] = ck.Query(-1)
+
+	cfx := ck.Query(-1)
+	sa1 := cfx.Groups[gid1]
+	if len(sa1) != 3 || sa1[0] != "x" || sa1[1] != "y" || sa1[2] != "z" {
+		t.Fatalf("wrong servers for gid %v: %v\n", gid1, sa1)
+	}
+	sa2 := cfx.Groups[gid2]
+	if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" {
+		t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2)
+	}
+
+	ck.Leave([]int{gid1})
+	check(t, []int{gid2}, ck)
+	cfa[4] = ck.Query(-1)
+
+	ck.Leave([]int{gid1})
+	check(t, []int{gid2}, ck)
+	cfa[5] = ck.Query(-1)
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Historical queries ...\n")
+
+	for s := 0; s < nservers; s++ {
+		cfg.ShutdownServer(s)
+		for i := 0; i < len(cfa); i++ {
+			c := ck.Query(cfa[i].Num)
+			check_same_config(t, c, cfa[i])
+		}
+		cfg.StartServer(s)
+		cfg.ConnectAll()
+	}
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Move ...\n")
+	{
+		var gid3 int = 503
+		ck.Join(map[int][]string{gid3: []string{"3a", "3b", "3c"}})
+		var gid4 int = 504
+		ck.Join(map[int][]string{gid4: []string{"4a", "4b", "4c"}})
+		for i := 0; i < NShards; i++ {
+			cf := ck.Query(-1)
+			if i < NShards/2 {
+				ck.Move(i, gid3)
+				if cf.Shards[i] != gid3 {
+					cf1 := ck.Query(-1)
+					if cf1.Num <= cf.Num {
+						t.Fatalf("Move should increase Config.Num")
+					}
+				}
+			} else {
+				ck.Move(i, gid4)
+				if cf.Shards[i] != gid4 {
+					cf1 := ck.Query(-1)
+					if cf1.Num <= cf.Num {
+						t.Fatalf("Move should increase Config.Num")
+					}
+				}
+			}
+		}
+		cf2 := ck.Query(-1)
+		for i := 0; i < NShards; i++ {
+			if i < NShards/2 {
+				if cf2.Shards[i] != gid3 {
+					t.Fatalf("expected shard %v on gid %v actually %v",
+						i, gid3, cf2.Shards[i])
+				}
+			} else {
+				if cf2.Shards[i] != gid4 {
+					t.Fatalf("expected shard %v on gid %v actually %v",
+						i, gid4, cf2.Shards[i])
+				}
+			}
+		}
+		ck.Leave([]int{gid3})
+		ck.Leave([]int{gid4})
+	}
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Concurrent leave/join ...\n")
+
+	const npara = 10
+	var cka [npara]*Clerk
+	for i := 0; i < len(cka); i++ {
+		cka[i] = cfg.makeClient(cfg.All())
+	}
+	gids := make([]int, npara)
+	ch := make(chan bool)
+	for xi := 0; xi < npara; xi++ {
+		gids[xi] = int(xi + 1)
+		go func(i int) {
+			defer func() { ch <- true }()
+			var gid int = gids[i]
+			cka[i].Join(map[int][]string{gid + 1000: []string{"a", "b", "c"}})
+			cka[i].Join(map[int][]string{gid: []string{"a", "b", "c"}})
+			cka[i].Leave([]int{gid + 1000})
+		}(xi)
+	}
+	for i := 0; i < npara; i++ {
+		<-ch
+	}
+	check(t, gids, ck)
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Minimal transfers after joins ...\n")
+
+	c1 := ck.Query(-1)
+	for i := 0; i < 5; i++ {
+		ck.Join(map[int][]string{int(npara + 1 + i): []string{"a", "b", "c"}})
+	}
+	c2 := ck.Query(-1)
+	for i := int(1); i <= npara; i++ {
+		for j := 0; j < len(c1.Shards); j++ {
+			if c2.Shards[j] == i {
+				if c1.Shards[j] != i {
+					t.Fatalf("non-minimal transfer after Join()s")
+				}
+			}
+		}
+	}
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Minimal transfers after leaves ...\n")
+
+	for i := 0; i < 5; i++ {
+		ck.Leave([]int{int(npara + 1 + i)})
+	}
+	c3 := ck.Query(-1)
+	for i := int(1); i <= npara; i++ {
+		for j := 0; j < len(c1.Shards); j++ {
+			if c2.Shards[j] == i {
+				if c3.Shards[j] != i {
+					t.Fatalf("non-minimal transfer after Leave()s")
+				}
+			}
+		}
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestMulti(t *testing.T) {
+	const nservers = 3
+	cfg := make_config(t, nservers, false)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient(cfg.All())
+
+	fmt.Printf("Test: Multi-group join/leave ...\n")
+
+	cfa := make([]Config, 6)
+	cfa[0] = ck.Query(-1)
+
+	check(t, []int{}, ck)
+
+	var gid1 int = 1
+	var gid2 int = 2
+	ck.Join(map[int][]string{
+		gid1: []string{"x", "y", "z"},
+		gid2: []string{"a", "b", "c"},
+	})
+	check(t, []int{gid1, gid2}, ck)
+	cfa[1] = ck.Query(-1)
+
+	var gid3 int = 3
+	ck.Join(map[int][]string{gid3: []string{"j", "k", "l"}})
+	check(t, []int{gid1, gid2, gid3}, ck)
+	cfa[2] = ck.Query(-1)
+
+	ck.Join(map[int][]string{gid2: []string{"a", "b", "c"}})
+	check(t, []int{gid1, gid2, gid3}, ck)
+	cfa[3] = ck.Query(-1)
+
+	cfx := ck.Query(-1)
+	sa1 := cfx.Groups[gid1]
+	if len(sa1) != 3 || sa1[0] != "x" || sa1[1] != "y" || sa1[2] != "z" {
+		t.Fatalf("wrong servers for gid %v: %v\n", gid1, sa1)
+	}
+	sa2 := cfx.Groups[gid2]
+	if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" {
+		t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2)
+	}
+	sa3 := cfx.Groups[gid3]
+	if len(sa3) != 3 || sa3[0] != "j" || sa3[1] != "k" || sa3[2] != "l" {
+		t.Fatalf("wrong servers for gid %v: %v\n", gid3, sa3)
+	}
+
+	ck.Leave([]int{gid1, gid3})
+	check(t, []int{gid2}, ck)
+	cfa[4] = ck.Query(-1)
+
+	cfx = ck.Query(-1)
+	sa2 = cfx.Groups[gid2]
+	if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" {
+		t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2)
+	}
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Concurrent multi leave/join ...\n")
+
+	const npara = 10
+	var cka [npara]*Clerk
+	for i := 0; i < len(cka); i++ {
+		cka[i] = cfg.makeClient(cfg.All())
+	}
+	gids := make([]int, npara)
+	var wg sync.WaitGroup
+	for xi := 0; xi < npara; xi++ {
+		wg.Add(1)
+		gids[xi] = int(xi + 1)
+		go func(i int) {
+			defer wg.Done()
+			var gid int = gids[i]
+			cka[i].Join(map[int][]string{
+				gid:        []string{"a", "b", "c"},
+				gid + 1000: []string{"a", "b", "c"},
+				gid + 2000: []string{"a", "b", "c"},
+			})
+			cka[i].Leave([]int{gid + 1000, gid + 2000})
+		}(xi)
+	}
+	wg.Wait()
+	check(t, gids, ck)
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Minimal transfers after multijoins ...\n")
+
+	c1 := ck.Query(-1)
+	m := make(map[int][]string)
+	for i := 0; i < 5; i++ {
+		m[npara+1+i] = []string{"a", "b", "c"}
+	}
+	ck.Join(m)
+	c2 := ck.Query(-1)
+	for i := int(1); i <= npara; i++ {
+		for j := 0; j < len(c1.Shards); j++ {
+			if c2.Shards[j] == i {
+				if c1.Shards[j] != i {
+					t.Fatalf("non-minimal transfer after Join()s")
+				}
+			}
+		}
+	}
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Minimal transfers after multileaves ...\n")
+
+	var l []int
+	for i := 0; i < 5; i++ {
+		l = append(l, npara+1+i)
+	}
+	ck.Leave(l)
+	c3 := ck.Query(-1)
+	for i := int(1); i <= npara; i++ {
+		for j := 0; j < len(c1.Shards); j++ {
+			if c2.Shards[j] == i {
+				if c3.Shards[j] != i {
+					t.Fatalf("non-minimal transfer after Leave()s")
+				}
+			}
+		}
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
--- a/src/viewservice/client.go
+++ b/src/viewservice/client.go
@ -0,0 +1,88 @@
+package viewservice
+
+import "net/rpc"
+import "fmt"
+
+//
+// the viewservice Clerk lives in the client
+// and maintains a little state.
+//
+type Clerk struct {
+	me     string // client's name (host:port)
+	server string // viewservice's host:port
+}
+
+func MakeClerk(me string, server string) *Clerk {
+	ck := new(Clerk)
+	ck.me = me
+	ck.server = server
+	return ck
+}
+
+//
+// call() sends an RPC to the rpcname handler on server srv
+// with arguments args, waits for the reply, and leaves the
+// reply in reply. the reply argument should be a pointer
+// to a reply structure.
+//
+// the return value is true if the server responded, and false
+// if call() was not able to contact the server. in particular,
+// the reply's contents are only valid if call() returned true.
+//
+// you should assume that call() will return an
+// error after a while if the server is dead.
+// don't provide your own time-out mechanism.
+//
+// please use call() to send all RPCs, in client.go and server.go.
+// please don't change this function.
+//
+func call(srv string, rpcname string,
+	args interface{}, reply interface{}) bool {
+	c, errx := rpc.Dial("unix", srv)
+	if errx != nil {
+		return false
+	}
+	defer c.Close()
+
+	err := c.Call(rpcname, args, reply)
+	if err == nil {
+		return true
+	}
+
+	fmt.Println(err)
+	return false
+}
+
+func (ck *Clerk) Ping(viewnum uint) (View, error) {
+	// prepare the arguments.
+	args := &PingArgs{}
+	args.Me = ck.me
+	args.Viewnum = viewnum
+	var reply PingReply
+
+	// send an RPC request, wait for the reply.
+	ok := call(ck.server, "ViewServer.Ping", args, &reply)
+	if ok == false {
+		return View{}, fmt.Errorf("Ping(%v) failed", viewnum)
+	}
+
+	return reply.View, nil
+}
+
+func (ck *Clerk) Get() (View, bool) {
+	args := &GetArgs{}
+	var reply GetReply
+	ok := call(ck.server, "ViewServer.Get", args, &reply)
+	if ok == false {
+		return View{}, false
+	}
+	return reply.View, true
+}
+
+func (ck *Clerk) Primary() string {
+	v, ok := ck.Get()
+	if ok {
+		return v.Primary
+	}
+	return ""
+}
--- a/src/viewservice/common.go
+++ b/src/viewservice/common.go
@ -0,0 +1,80 @@
+package viewservice
+
+import "time"
+
+//
+// This is a non-replicated view service for a simple
+// primary/backup system.
+//
+// The view service goes through a sequence of numbered
+// views, each with a primary and (if possible) a backup.
+// A view consists of a view number and the host:port of
+// the view's primary and backup p/b servers.
+//
+// The primary in a view is always either the primary
+// or the backup of the previous view (in order to ensure
+// that the p/b service's state is preserved).
+//
+// Each p/b server should send a Ping RPC once per PingInterval.
+// The view server replies with a description of the current
+// view. The Pings let the view server know that the p/b
+// server is still alive; inform the p/b server of the current
+// view; and inform the view server of the most recent view
+// that the p/b server knows about.
+//
+// The view server proceeds to a new view when either it hasn't
+// received a ping from the primary or backup for a while, or
+// if there was no backup and a new server starts Pinging.
+//
+// The view server will not proceed to a new view until
+// the primary from the current view acknowledges
+// that it is operating in the current view. This helps
+// ensure that there's at most one p/b primary operating at
+// a time.
+//
+
+type View struct {
+	Viewnum uint
+	Primary string
+	Backup  string
+}
+
+// clients should send a Ping RPC this often,
+// to tell the viewservice that the client is alive.
+const PingInterval = time.Millisecond * 100
+
+// the viewserver will declare a client dead if it misses
+// this many Ping RPCs in a row.
+const DeadPings = 5
+
+//
+// Ping(): called by a primary/backup server to tell the
+// view service it is alive, to indicate whether p/b server
+// has seen the latest view, and for p/b server to learn
+// the latest view.
+//
+// If Viewnum is zero, the caller is signalling that it is
+// alive and could become backup if needed.
+//
+
+type PingArgs struct {
+	Me      string // "host:port"
+	Viewnum uint   // caller's notion of current view #
+}
+
+type PingReply struct {
+	View View
+}
+
+//
+// Get(): fetch the current view, without volunteering
+// to be a server. mostly for clients of the p/b service,
+// and for testing.
+//
+
+type GetArgs struct {
+}
+
+type GetReply struct {
+	View View
+}
--- a/src/viewservice/server.go
+++ b/src/viewservice/server.go
@ -0,0 +1,123 @@
+package viewservice
+
+import "net"
+import "net/rpc"
+import "log"
+import "time"
+import "sync"
+import "fmt"
+import "os"
+import "sync/atomic"
+
+type ViewServer struct {
+	mu       sync.Mutex
+	l        net.Listener
+	dead     int32 // for testing
+	rpccount int32 // for testing
+	me       string
+
+
+	// Your declarations here.
+}
+
+//
+// server Ping RPC handler.
+//
+func (vs *ViewServer) Ping(args *PingArgs, reply *PingReply) error {
+
+	// Your code here.
+
+	return nil
+}
+
+//
+// server Get() RPC handler.
+//
+func (vs *ViewServer) Get(args *GetArgs, reply *GetReply) error {
+
+	// Your code here.
+
+	return nil
+}
+
+
+//
+// tick() is called once per PingInterval; it should notice
+// if servers have died or recovered, and change the view
+// accordingly.
+//
+func (vs *ViewServer) tick() {
+
+	// Your code here.
+}
+
+//
+// tell the server to shut itself down.
+// for testing.
+// please don't change these two functions.
+//
+func (vs *ViewServer) Kill() {
+	atomic.StoreInt32(&vs.dead, 1)
+	vs.l.Close()
+}
+
+//
+// has this server been asked to shut down?
+//
+func (vs *ViewServer) isdead() bool {
+	return atomic.LoadInt32(&vs.dead) != 0
+}
+
+// please don't change this function.
+func (vs *ViewServer) GetRPCCount() int32 {
+	return atomic.LoadInt32(&vs.rpccount)
+}
+
+func StartServer(me string) *ViewServer {
+	vs := new(ViewServer)
+	vs.me = me
+	// Your vs.* initializations here.
+
+	// tell net/rpc about our RPC server and handlers.
+	rpcs := rpc.NewServer()
+	rpcs.Register(vs)
+
+	// prepare to receive connections from clients.
+	// change "unix" to "tcp" to use over a network.
+	os.Remove(vs.me) // only needed for "unix"
+	l, e := net.Listen("unix", vs.me)
+	if e != nil {
+		log.Fatal("listen error: ", e)
+	}
+	vs.l = l
+
+	// please don't change any of the following code,
+	// or do anything to subvert it.
+
+	// create a thread to accept RPC connections from clients.
+	go func() {
+		for vs.isdead() == false {
+			conn, err := vs.l.Accept()
+			if err == nil && vs.isdead() == false {
+				atomic.AddInt32(&vs.rpccount, 1)
+				go rpcs.ServeConn(conn)
+			} else if err == nil {
+				conn.Close()
+			}
+			if err != nil && vs.isdead() == false {
+				fmt.Printf("ViewServer(%v) accept: %v\n", me, err.Error())
+				vs.Kill()
+			}
+		}
+	}()
+
+	// create a thread to call tick() periodically.
+	go func() {
+		for vs.isdead() == false {
+			vs.tick()
+			time.Sleep(PingInterval)
+		}
+	}()
+
+	return vs
+}
--- a/src/viewservice/test_test.go
+++ b/src/viewservice/test_test.go
@ -0,0 +1,235 @@
+package viewservice
+
+import "testing"
+import "runtime"
+import "time"
+import "fmt"
+import "os"
+import "strconv"
+
+func check(t *testing.T, ck *Clerk, p string, b string, n uint) {
+	view, _ := ck.Get()
+	if view.Primary != p {
+		t.Fatalf("wanted primary %v, got %v", p, view.Primary)
+	}
+	if view.Backup != b {
+		t.Fatalf("wanted backup %v, got %v", b, view.Backup)
+	}
+	if n != 0 && n != view.Viewnum {
+		t.Fatalf("wanted viewnum %v, got %v", n, view.Viewnum)
+	}
+	if ck.Primary() != p {
+		t.Fatalf("wanted primary %v, got %v", p, ck.Primary())
+	}
+}
+
+func port(suffix string) string {
+	s := "/var/tmp/824-"
+	s += strconv.Itoa(os.Getuid()) + "/"
+	os.Mkdir(s, 0777)
+	s += "viewserver-"
+	s += strconv.Itoa(os.Getpid()) + "-"
+	s += suffix
+	return s
+}
+
+func Test1(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	vshost := port("v")
+	vs := StartServer(vshost)
+
+	ck1 := MakeClerk(port("1"), vshost)
+	ck2 := MakeClerk(port("2"), vshost)
+	ck3 := MakeClerk(port("3"), vshost)
+
+	//
+
+	if ck1.Primary() != "" {
+		t.Fatalf("there was a primary too soon")
+	}
+
+	// very first primary
+	fmt.Printf("Test: First primary ...\n")
+
+	for i := 0; i < DeadPings*2; i++ {
+		view, _ := ck1.Ping(0)
+		if view.Primary == ck1.me {
+			break
+		}
+		time.Sleep(PingInterval)
+	}
+	check(t, ck1, ck1.me, "", 1)
+	fmt.Printf("  ... Passed\n")
+
+	// very first backup
+	fmt.Printf("Test: First backup ...\n")
+
+	{
+		vx, _ := ck1.Get()
+		for i := 0; i < DeadPings*2; i++ {
+			ck1.Ping(1)
+			view, _ := ck2.Ping(0)
+			if view.Backup == ck2.me {
+				break
+			}
+			time.Sleep(PingInterval)
+		}
+		check(t, ck1, ck1.me, ck2.me, vx.Viewnum+1)
+	}
+	fmt.Printf("  ... Passed\n")
+
+	// primary dies, backup should take over
+	fmt.Printf("Test: Backup takes over if primary fails ...\n")
+
+	{
+		ck1.Ping(2)
+		vx, _ := ck2.Ping(2)
+		for i := 0; i < DeadPings*2; i++ {
+			v, _ := ck2.Ping(vx.Viewnum)
+			if v.Primary == ck2.me && v.Backup == "" {
+				break
+			}
+			time.Sleep(PingInterval)
+		}
+		check(t, ck2, ck2.me, "", vx.Viewnum+1)
+	}
+	fmt.Printf("  ... Passed\n")
+
+	// revive ck1, should become backup
+	fmt.Printf("Test: Restarted server becomes backup ...\n")
+
+	{
+		vx, _ := ck2.Get()
+		ck2.Ping(vx.Viewnum)
+		for i := 0; i < DeadPings*2; i++ {
+			ck1.Ping(0)
+			v, _ := ck2.Ping(vx.Viewnum)
+			if v.Primary == ck2.me && v.Backup == ck1.me {
+				break
+			}
+			time.Sleep(PingInterval)
+		}
+		check(t, ck2, ck2.me, ck1.me, vx.Viewnum+1)
+	}
+	fmt.Printf("  ... Passed\n")
+
+	// start ck3, kill the primary (ck2), the previous backup (ck1)
+	// should become the server, and ck3 the backup.
+	// this should happen in a single view change, without
+	// any period in which there's no backup.
+	fmt.Printf("Test: Idle third server becomes backup if primary fails ...\n")
+
+	{
+		vx, _ := ck2.Get()
+		ck2.Ping(vx.Viewnum)
+		for i := 0; i < DeadPings*2; i++ {
+			ck3.Ping(0)
+			v, _ := ck1.Ping(vx.Viewnum)
+			if v.Primary == ck1.me && v.Backup == ck3.me {
+				break
+			}
+			vx = v
+			time.Sleep(PingInterval)
+		}
+		check(t, ck1, ck1.me, ck3.me, vx.Viewnum+1)
+	}
+	fmt.Printf("  ... Passed\n")
+
+	// kill and immediately restart the primary -- does viewservice
+	// conclude primary is down even though it's pinging?
+	fmt.Printf("Test: Restarted primary treated as dead ...\n")
+
+	{
+		vx, _ := ck1.Get()
+		ck1.Ping(vx.Viewnum)
+		for i := 0; i < DeadPings*2; i++ {
+			ck1.Ping(0)
+			ck3.Ping(vx.Viewnum)
+			v, _ := ck3.Get()
+			if v.Primary != ck1.me {
+				break
+			}
+			time.Sleep(PingInterval)
+		}
+		vy, _ := ck3.Get()
+		if vy.Primary != ck3.me {
+			t.Fatalf("expected primary=%v, got %v\n", ck3.me, vy.Primary)
+		}
+	}
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Dead backup is removed from view ...\n")
+
+	// set up a view with just 3 as primary,
+	// to prepare for the next test.
+	{
+		for i := 0; i < DeadPings*3; i++ {
+			vx, _ := ck3.Get()
+			ck3.Ping(vx.Viewnum)
+			time.Sleep(PingInterval)
+		}
+		v, _ := ck3.Get()
+		if v.Primary != ck3.me || v.Backup != "" {
+			t.Fatalf("wrong primary or backup")
+		}
+	}
+	fmt.Printf("  ... Passed\n")
+
+	// does viewserver wait for ack of previous view before
+	// starting the next one?
+	fmt.Printf("Test: Viewserver waits for primary to ack view ...\n")
+
+	{
+		// set up p=ck3 b=ck1, but
+		// but do not ack
+		vx, _ := ck1.Get()
+		for i := 0; i < DeadPings*3; i++ {
+			ck1.Ping(0)
+			ck3.Ping(vx.Viewnum)
+			v, _ := ck1.Get()
+			if v.Viewnum > vx.Viewnum {
+				break
+			}
+			time.Sleep(PingInterval)
+		}
+		check(t, ck1, ck3.me, ck1.me, vx.Viewnum+1)
+		vy, _ := ck1.Get()
+		// ck3 is the primary, but it never acked.
+		// let ck3 die. check that ck1 is not promoted.
+		for i := 0; i < DeadPings*3; i++ {
+			v, _ := ck1.Ping(vy.Viewnum)
+			if v.Viewnum > vy.Viewnum {
+				break
+			}
+			time.Sleep(PingInterval)
+		}
+		check(t, ck2, ck3.me, ck1.me, vy.Viewnum)
+	}
+	fmt.Printf("  ... Passed\n")
+
+	// if old servers die, check that a new (uninitialized) server
+	// cannot take over.
+	fmt.Printf("Test: Uninitialized server can't become primary ...\n")
+
+	{
+		for i := 0; i < DeadPings*2; i++ {
+			v, _ := ck1.Get()
+			ck1.Ping(v.Viewnum)
+			ck2.Ping(0)
+			ck3.Ping(v.Viewnum)
+			time.Sleep(PingInterval)
+		}
+		for i := 0; i < DeadPings*2; i++ {
+			ck2.Ping(0)
+			time.Sleep(PingInterval)
+		}
+		vz, _ := ck2.Get()
+		if vz.Primary == ck2.me {
+			t.Fatalf("uninitialized backup promoted to primary")
+		}
+	}
+	fmt.Printf("  ... Passed\n")
+
+	vs.Kill()
+}