其他分享
首页 > 其他分享> > 8. Lab: locks

8. Lab: locks

作者:互联网

https://pdos.csail.mit.edu/6.S081/2021/labs/lock.html

1. Memory allocator (moderate)

1.1 要求

Your job is to implement per-CPU freelists, and stealing when a CPU's free list is empty. You must give all of your locks names that start with "kmem". That is, you should call initlock for each of your locks, and pass a name that starts with "kmem". Run kalloctest to see if your implementation has reduced lock contention. To check that it can still allocate all of memory, run usertests sbrkmuch. Your output will look similar to that shown below, with much-reduced contention in total on kmem locks, although the specific numbers will differ. Make sure all tests in usertests pass. make grade should say that the kalloctests pass.

阅读原本的 kalloc.c 代码,可以看到内存管理的结构是一个链表,如下:

struct {
  struct spinlock lock;
  struct run *freelist;
} kmem;

当多个 cpu 同时申请内存时,只能串行执行,因为只有一个 spinlock ,因此,我们需要这个 freelist 拆分给多个 cpu,每个 cpu 都独自拥有一个上述的 kmem 结构,这样多个 cpu 就能并行申请内存了。但此刻还需要注意如下情况:
当 CPU1 的 freelist 为空时,表示内存已用完了,这个时候需要去访问其他 cpu 的 freelist,将他们的内存块挪给 CPU1 使用。

1.2 实现

由于要求比较简单,且 lab 中的 hints 基本提供了大部分信息,因此略过分析,直接上实现。

这里自定义了一个初始化函数 kfree_specific 将指定 pa 分配给指定 cpu 的 freelist

struct mem{
  struct spinlock lock;
  struct run *freelist;
};

struct mem kmem[NCPU];

void 
kfree_specific(void *pa, int cpuid)
{
  // Fill with junk to catch dangling refs.
  memset(pa, 1, PGSIZE);

  struct run* r = (struct run*)pa;
  struct mem* cpu_mem = &kmem[cpuid];
  acquire(&cpu_mem->lock);
  r->next = cpu_mem->freelist;
  cpu_mem->freelist = r;
  release(&cpu_mem->lock);
}

void
freerange(void *pa_start, void *pa_end)
{
  char *p;
  push_off();
  int hart = cpuid();
  pop_off();
  p = (char*)PGROUNDUP((uint64)pa_start);
  for(; p + PGSIZE <= (char*)pa_end; p += PGSIZE){
    kfree_specific(p, hart);
  }
}
// Allocate one 4096-byte page of physical memory.
// Returns a pointer that the kernel can use.
// Returns 0 if the memory cannot be allocated.
void *
kalloc(void)
{
  struct run *r;
  push_off();
  int hart = cpuid();
  pop_off();
  struct mem* cpu_mem = &kmem[hart];
  acquire(&cpu_mem->lock);
  r = cpu_mem->freelist;
  if(r){
    cpu_mem->freelist = r->next;
    release(&cpu_mem->lock);
  }
  else // "steal" free memory from other cpu mem list
  {
    release(&cpu_mem->lock);
    for (int i = 0; i < NCPU ; i++) {
      cpu_mem = &kmem[i];
      acquire(&cpu_mem->lock);
      r = cpu_mem->freelist;
      if (r){
        cpu_mem->freelist = r->next;
        release(&cpu_mem->lock);
        break;
      }
      release(&cpu_mem->lock);
    }
  }

  if(r)
    memset((char*)r, 5, PGSIZE); // fill with junk
  return (void*)r;
}
void
kfree(void *pa)
{
  struct run *r;

  if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP)
    panic("kfree");

  // Fill with junk to catch dangling refs.
  memset(pa, 1, PGSIZE);

  push_off();
  int hart = cpuid();
  pop_off();

  r = (struct run*)pa;
  //int hart = get_pa_cpu_id((uint64)r);
  struct mem* cpu_mem = &kmem[hart];
  acquire(&cpu_mem->lock);
  r->next = cpu_mem->freelist;
  cpu_mem->freelist = r;
  release(&cpu_mem->lock);
}

2. Buffer cache (hard)

这个 part 的实现思路也比较清晰,但是由于细节没注意,实现的时候各种 test case 不通过 =-=。

2.1 要求

Modify the block cache so that the number of acquire loop iterations for all locks in the bcache is close to zero when running bcachetest. Ideally the sum of the counts for all locks involved in the block cache should be zero, but it's OK if the sum is less than 500. Modify bget and brelse so that concurrent lookups and releases for different blocks that are in the bcache are unlikely to conflict on locks (e.g., don't all have to wait for bcache.lock). You must maintain the invariant that at most one copy of each block is cached. When you are done, your output should be similar to that shown below (though not identical). Make sure usertests still passes. make grade should pass all tests when you are done.

该 part 的目的也是通过重新设计文件系统的 buffer & cache 的数据结构来降低锁的竞争,原先的结构如下:

struct {
  struct spinlock lock;
  struct buf buf[NBUF];

  // Linked list of all buffers, through prev/next.
  // Sorted by how recently the buffer was used.
  // head.next is most recent, head.prev is least.
  struct buf head;
} bcache;

bcache.head 是一个 LRU 链表,headprev 是最少使用,next 是最近使用,通过维护该 LRU 链表,根据空间局部性原理,来达到提升效率的目的。
但是由于 bcache 是多个 cpu 通用的,head 链表每次访问都需要加锁,这样导致多 cpu 访问缓存时只能串行工作。因此需要重新设计该结构,在利用到空间局部性原理的同时,减少锁的竞争。

2.2 分析

根据 hints,要做如下操作:

2.3 实现

binit 将原来的 bcache.buf 所有 buf 平摊到每个桶中。

#define BUCKET_CNT 13
#define NBUF (BUCKET_CNT * 3)

struct bcache_bucket{
  struct buf head;
  struct spinlock lock;
};
struct {
  struct spinlock lock;
  struct buf buf[NBUF];
  struct bcache_bucket bucket[BUCKET_CNT];
} bcache;

int hash_key(int blockno){
  return blockno % BUCKET_CNT;
}

void binit(void)
{
  struct buf *b;
  char buf[32];
  int sz = 32;

  initlock(&bcache.lock, "bcache");

  for (int i = 0; i < BUCKET_CNT; i++){
    snprintf(buf, sz, "bcache.bucket_%d", i);
    initlock(&bcache.bucket[i].lock, buf);
  }

  // Create linked list of buffers
  int blockcnt = 0;
  struct bcache_bucket* bucket;
  for(b = bcache.buf; b < bcache.buf+NBUF; b++){
    initsleeplock(&b->lock, "buffer");
    b->access_time = ticks;
    b->blockno = blockcnt++;
    bucket = &bcache.bucket[hash_key(b->blockno)];
    b->next = bucket->head.next;
    bucket->head.next = b;
  }
}

获取 buf 的策略分为如下几步:

static struct buf*
bget(uint dev, uint blockno)
{
  struct buf *b, *lrub;
  struct bcache_bucket* bucket = &bcache.bucket[hash_key(blockno)];

  acquire(&bucket->lock);
  // Is the block already cached?
  for(b = &bucket->head; b; b = b->next){
    if(b->dev == dev && b->blockno == blockno){
      b->refcnt++;
      b->access_time = ticks;
      release(&bucket->lock);
      acquiresleep(&b->lock);
      return b;
    }
  }
  
  // find bucket lru buffer
  lrub = 0;
  uint min_time = 0x8ffffff;
  for(b = &bucket->head; b; b = b->next){
    if (b->refcnt == 0 && b->access_time < min_time){
      min_time = b->access_time;
      lrub = b;
    }
  }
  if (lrub) {
    goto setup;
  }

  // Not cached.
  // find in the global array
  acquire(&bcache.lock);

findbucket:
  lrub = 0;
  for(b = bcache.buf; b < bcache.buf+NBUF; b++){
    if(b->refcnt == 0 && b->access_time < min_time) {
      lrub = b;
    }
  }

  if (lrub) {
    // step 1 : release from the old bucket
    // need to hold the old bucket lock
    struct bcache_bucket* old_bucket = &bcache.bucket[hash_key(lrub->blockno)];
    acquire(&old_bucket->lock);
      
    
    if (lrub->refcnt != 0){
      release(&old_bucket->lock);
      goto findbucket;
    }

    b = &old_bucket->head;
    struct buf* bnext = b->next;
    while (bnext != lrub) {
      b = bnext;
      bnext = bnext->next;
    }
    b->next = bnext->next;

    // we don't need to modify bcache.bucket , so we release the lock
    release(&old_bucket->lock);
    // step 2 : add to target bucket 
    lrub->next = bucket->head.next;
    bucket->head.next = lrub;
    release(&bcache.lock);

setup:
    lrub->dev = dev;
    lrub->blockno = blockno;
    lrub->valid = 0;
    lrub->refcnt = 1;
    lrub->access_time = ticks;
    release(&bucket->lock);
    acquiresleep(&lrub->lock);
    return lrub;
  }
  panic("bget: no buffers");
}

这块内容较为简单,将锁从全局锁替换为桶锁即可

// Release a locked buffer.
// Move to the head of the most-recently-used list.
void
brelse(struct buf *b)
{
  if(!holdingsleep(&b->lock))
    panic("brelse");

  releasesleep(&b->lock);

  struct bcache_bucket* bucket = &bcache.bucket[hash_key(b->blockno)];
  acquire(&bucket->lock);
  b->refcnt--;
  release(&bucket->lock);
}

同上,替换锁即可

void
bpin(struct buf *b) {
  struct bcache_bucket* bucket = &bcache.bucket[hash_key(b->blockno)];
  acquire(&bucket->lock);
  b->refcnt++;
  release(&bucket->lock);
}

void
bunpin(struct buf *b) {
  struct bcache_bucket* bucket = &bcache.bucket[hash_key(b->blockno)];
  acquire(&bucket->lock);
  b->refcnt--;
  release(&bucket->lock);
}

3. 总结

该 lab 的坑比较多,在测试 usertests 时,出过一些异常错误,如 out of blocks ,这是因为默认设置的 blocks 大小太少了,默认配置为 1000,需要将该值调大为 10000。

#define FSSIZE       1000  // size of file system in blocks

此外还有一些错误如 freeing free block,这里可能因为在执行 bget 时,从其他桶挪移 buf 到新桶的时候,没有再次判断 refcnt 是否为 0,从而导致覆盖了已经被引用的块。

标签:struct,lock,bucket,Lab,locks,bcache,buf,cpu
来源: https://www.cnblogs.com/lawliet12/p/16101524.html