Goto sanos source index

//
// super.c
//
// Disk filesystem superblock routines
//
// Copyright (C) 2002 Michael Ringgaard. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 
// 1. Redistributions of source code must retain the above copyright 
//    notice, this list of conditions and the following disclaimer.  
// 2. Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.  
// 3. Neither the name of the project nor the names of its contributors
//    may be used to endorse or promote products derived from this software
//    without specific prior written permission. 
// 
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
// SUCH DAMAGE.
// 

#include <os/krnl.h>

#define DEFAULT_BLOCKSIZE       4096
#define DEFAULT_INODE_RATIO     4096
#define DEFAULT_CACHE_BUFFERS   1024
#define DEFAULT_RESERVED_BLOCKS 16
#define DEFAULT_RESERVED_INODES 16

#define FORMAT_BLOCKSIZE        (64 * 1024)

static void mark_group_desc_dirty(struct filsys *fs, int group) {
  mark_buffer_updated(fs->cache, fs->groupdesc_buffers[group / fs->groupdescs_per_block]);
}

static int log2(int n) {
  int l = 0;
  n >>= 1;
  while (n) {
    l++;
    n >>= 1;
  }

  return l;
}

static void dfs_sync(void *arg) {
  struct filsys *fs = (struct filsys *) arg;

  // Write super block
  if (fs->super_dirty) {
    dev_write(fs->devno, fs->super, SECTORSIZE, 1, 0);
    fs->super_dirty = 0;
  }
}

static int parse_options(char *opts, struct fsoptions *fsopts) {
  fsopts->cache = get_num_option(opts, "cache", 0);
  fsopts->blocksize = get_num_option(opts, "blocksize", DEFAULT_BLOCKSIZE);
  fsopts->inode_ratio = get_num_option(opts, "inoderatio", DEFAULT_INODE_RATIO);
  fsopts->reserved_blocks = get_num_option(opts, "resvblks", DEFAULT_RESERVED_BLOCKS);
  fsopts->reserved_inodes = get_num_option(opts, "resvinodes", DEFAULT_RESERVED_INODES);

  fsopts->flags = 0;
  if (get_option(opts, "quick", NULL, 0, NULL)) fsopts->flags |= FSOPT_QUICK;
  if (get_option(opts, "progress", NULL, 0, NULL)) fsopts->flags |= FSOPT_PROGRESS;
  if (get_option(opts, "format", NULL, 0, NULL)) fsopts->flags |= FSOPT_FORMAT;

  return 0;
}

static struct filsys *create_filesystem(char *devname, struct fsoptions *fsopts) {
  struct filsys *fs;
  dev_t devno;
  unsigned int sectcount;
  unsigned int blocks;
  unsigned int first_block;
  struct groupdesc *gd;
  struct buf *buf;
  unsigned int i, j;
  ino_t ino;
  struct inode *root;
  char *buffer;

  // Check device
  devno = dev_open(devname);
  if (devno == NODEV) return NULL;
  if (device(devno)->driver->type != DEV_TYPE_BLOCK) return NULL;
  sectcount = dev_ioctl(devno, IOCTL_GETDEVSIZE, NULL, 0);
  if (sectcount < 0) return NULL;

  // Allocate file system
  fs = (struct filsys *) kmalloc(sizeof(struct filsys));
  memset(fs, 0, sizeof(struct filsys));

  // Allocate super block
  fs->super = (struct superblock *) kmalloc(SECTORSIZE);
  memset(fs->super, 0, SECTORSIZE);
  fs->super_dirty = 1;

  // Set device number and block size
  fs->devno = devno;
  fs->blocksize = fsopts->blocksize;

  // Set signature, version and block size in super block
  fs->super->signature = DFS_SIGNATURE;
  fs->super->version = DFS_VERSION;
  fs->super->log_block_size = log2(fsopts->blocksize);

  // Each group has as many blocks as can be represented by the block bitmap block
  fs->super->blocks_per_group = fs->blocksize * 8;

  // Get the device size in sectors from the device and convert it to blocks
  fs->super->block_count =  sectcount / (fs->blocksize / SECTORSIZE);

  // Set cache size
  if (fsopts->cache == 0) {
    fs->super->cache_buffers = DEFAULT_CACHE_BUFFERS;
  } else {
    fs->super->cache_buffers = fsopts->cache;
  }
  if (fs->super->cache_buffers > fs->super->block_count) fs->super->cache_buffers = fs->super->block_count;

  // The number of inodes in a group is computed as a ratio of the size of the group.
  // If the device has only one group the inode count is based on size of device.
  // The number of inodes per block is then rounded up to fit a whole number of blocks.
  fs->inodes_per_block = fs->blocksize / sizeof(struct inodedesc);
  if (fs->super->blocks_per_group < fs->super->block_count) {
    fs->super->inodes_per_group = fs->blocksize * fs->super->blocks_per_group / fsopts->inode_ratio;
  } else {
    fs->super->inodes_per_group = fs->blocksize * fs->super->block_count / fsopts->inode_ratio;
  }
  if (fs->super->inodes_per_group > fs->blocksize * 8) fs->super->inodes_per_group = fs->blocksize * 8;
  fs->super->inodes_per_group = (fs->super->inodes_per_group + fs->inodes_per_block - 1) / fs->inodes_per_block * fs->inodes_per_block;
  fs->inode_blocks_per_group = (fs->super->inodes_per_group * sizeof(struct inodedesc) + fs->blocksize - 1) / fs->blocksize;

  // Calculate the number of block pointers per block directory page
  fs->log_blkptrs_per_block = fs->super->log_block_size - 2;

  // Calculate the number of group descriptors and the number of blocks to store them
  fs->super->group_count = (fs->super->block_count + fs->super->blocks_per_group - 1) / fs->super->blocks_per_group;
  fs->groupdescs_per_block = fs->blocksize / sizeof(struct groupdesc);
  fs->groupdesc_blocks = (fs->super->group_count * sizeof(struct groupdesc) + fs->blocksize - 1) / fs->blocksize;

  // The reserved blocks are allocated right after the super block
  fs->super->first_reserved_block = 1;
  if (fs->blocksize <= SECTORSIZE) fs->super->first_reserved_block++;
  fs->super->reserved_blocks = fsopts->reserved_blocks;
  
  // The group descriptor table starts after the superblock and reserved blocks
  fs->super->groupdesc_table_block = fs->super->first_reserved_block + fs->super->reserved_blocks;

  // If the last group is too small to hold the bitmaps and inode table skip it
  blocks = fs->super->block_count % fs->super->blocks_per_group;
  if (blocks > 0 && blocks < fs->inode_blocks_per_group + 2) fs->super->group_count--;
  if (fs->super->group_count == 0) {
    kprintf(KERN_ERR "dfs: filesystem too small\n");
    return NULL;
  }

  // Initialize buffer cache
  fs->cache = init_buffer_pool(devno, fs->super->cache_buffers, fs->blocksize, dfs_sync, fs);
  if (!fs->cache) return NULL;
  fs->cache->nosync = 1;

  // Zero all blocks on disk
  if ((fsopts->flags & FSOPT_QUICK) == 0) {
    int percent;
    int prev_percent;
    int blocks_per_io;

    blocks_per_io = FORMAT_BLOCKSIZE / fs->blocksize;
    buffer = (char *) kmalloc(FORMAT_BLOCKSIZE);
    memset(buffer, 0, FORMAT_BLOCKSIZE);

    prev_percent = -1;
    for (i = fs->super->groupdesc_table_block + fs->groupdesc_blocks; i < fs->super->block_count; i += blocks_per_io) {
      int rc;

      if (fsopts->flags & FSOPT_PROGRESS) {
        percent = (i / 100) * 100 / (fs->super->block_count / 100);
        if (percent != prev_percent) kprintf("%d%% complete\r", percent);
        prev_percent = percent;
      }

      if (i + blocks_per_io > fs->super->block_count) {
        rc = dev_write(fs->devno, buffer, (fs->super->block_count - i) * fs->blocksize, i, 0);
      } else {
        rc = dev_write(fs->devno, buffer, FORMAT_BLOCKSIZE, i, 0);
      }

      if (rc < 0) {
        kprintf("dfs: error %d in format\n", rc);
        return NULL;
      }
    }
    if (fsopts->flags & FSOPT_PROGRESS) kprintf("100%% complete\r");

    kfree(buffer);
  }

  // Allocate group descriptors
  fs->groupdesc_buffers = (struct buf **) kmalloc(sizeof(struct buf *) * fs->groupdesc_blocks);
  fs->groups = (struct blkgroup *) kmalloc(sizeof(struct group) * fs->super->group_count);

  for (i = 0; i < fs->groupdesc_blocks; i++) {
    fs->groupdesc_buffers[i] = alloc_buffer(fs->cache, fs->super->groupdesc_table_block + i);
    if (!fs->groupdesc_buffers[i]) return NULL;
  }

  for (i = 0; i < fs->super->group_count; i++) {
    gd = (struct groupdesc *) fs->groupdesc_buffers[i / fs->groupdescs_per_block]->data;
    gd += (i % fs->groupdescs_per_block);

    fs->groups[i].desc = gd;
    fs->groups[i].first_free_block = 0;
    fs->groups[i].first_free_inode = 0;
  }

  // Reserve inode for root directory
  fs->super->reserved_inodes = fsopts->reserved_inodes;

  // Set inode count based on group count
  fs->super->inode_count = fs->super->inodes_per_group * fs->super->group_count;

  // All blocks and inodes initially free
  fs->super->free_inode_count = fs->super->inode_count;
  fs->super->free_block_count = fs->super->block_count;

  // Initialize block bitmaps
  for (i = 0; i < fs->super->group_count; i++) {
    gd = fs->groups[i].desc;
    blocks = 0;
    first_block = fs->super->blocks_per_group * i;

    // The first group needs blocks for the super block, reserved blocks and the group descriptors
    if (i == 0) blocks = fs->super->groupdesc_table_block + fs->groupdesc_blocks;

    // Next blocks in group are the block bitmap, inode bitmap and the inode table
    gd->block_bitmap_block = first_block + blocks++;
    gd->inode_bitmap_block = first_block + blocks++;
    gd->inode_table_block = first_block + blocks;
    blocks += fs->inode_blocks_per_group;

    // Update block bitmap
    buf = alloc_buffer(fs->cache, gd->block_bitmap_block);
    if (!buf) return NULL;
    set_bits(buf->data, 0, blocks);
    mark_buffer_updated(fs->cache, buf);
    release_buffer(fs->cache, buf);

    // Determine the block count for the group. The last group may be truncated
    if (fs->super->blocks_per_group * (i + 1) > fs->super->block_count) {
      gd->block_count = fs->super->block_count - fs->super->blocks_per_group * i;
    } else {
      gd->block_count = fs->super->blocks_per_group;
    }

    // Set the count of free blocks and inodes for group
    gd->free_inode_count = fs->super->inodes_per_group;
    gd->free_block_count = gd->block_count - blocks;

    // Update super block
    fs->super->free_block_count -= blocks;

    mark_group_desc_dirty(fs, i);
  }

  // Zero out block and inode bitmaps and inode tables
  if (fsopts->flags & FSOPT_QUICK) {
    buffer = (char *) kmalloc(fs->blocksize);
    memset(buffer, 0, fs->blocksize);

    for (i = 0; i < fs->super->group_count; i++) {
      gd = fs->groups[i].desc;

      dev_write(fs->devno, buffer, fs->blocksize, gd->block_bitmap_block, 0);
      dev_write(fs->devno, buffer, fs->blocksize, gd->inode_bitmap_block, 0);
      for (j = 0; j < fs->inode_blocks_per_group; j++) {
        dev_write(fs->devno, buffer, fs->blocksize, gd->inode_table_block + j, 0);
      }
    }

    kfree(buffer);
  }

  // Reserve inodes
  for (i = 0; i < fs->super->reserved_inodes; i++) {
    ino = new_inode(fs, 0, 0);
    if (ino != i)  {
      kprintf(KERN_ERR "dfs: format expected inode %d, got %d\n", i, ino);
      return NULL;
    }
  }

  // Create root directory
  if (get_inode(fs, DFS_INODE_ROOT, &root) < 0) return NULL;
  root->desc->mode = S_IFDIR | S_IRWXU | S_IRWXG | S_IRWXO;
  root->desc->ctime = root->desc->mtime = time(NULL);
  root->desc->linkcount = 1;
  mark_buffer_updated(fs->cache, root->buf);
  release_inode(root);

  // Reenable buffer cache sync
  fs->cache->nosync = 0;

  return fs;
}

static struct filsys *open_filesystem(char *devname, struct fsoptions *fsopts) {
  struct filsys *fs;
  dev_t devno;
  struct groupdesc *gd;
  unsigned int i;
  unsigned int cache_buffers;

  // Check device
  devno = dev_open(devname);
  if (devno == NODEV) return NULL;
  if (device(devno)->driver->type != DEV_TYPE_BLOCK) return NULL;

  // Allocate file system
  fs = (struct filsys *) kmalloc(sizeof(struct filsys));
  memset(fs, 0, sizeof(struct filsys));

  // Allocate and read super block
  fs->super = (struct superblock *) kmalloc(SECTORSIZE);
  memset(fs->super, 0, SECTORSIZE);
  if (dev_read(devno, fs->super, SECTORSIZE, 1, 0) != SECTORSIZE) {
    kprintf(KERN_ERR "dfs: unable to read superblock on device %s\n", device(devno)->name);
    free(fs->super);
    free(fs);
    return NULL;
  }
  fs->super_dirty = 0;

  // Check signature and version
  if (fs->super->signature != DFS_SIGNATURE) {
    kprintf(KERN_ERR "dfs: invalid DFS signature on device %s\n", device(devno)->name);
    free(fs->super);
    free(fs);
    return NULL;
  }

  if (fs->super->version != DFS_VERSION) {
    kprintf(KERN_ERR "dfs: invalid DFS version on device %s\n", device(devno)->name);
    free(fs->super);
    free(fs);
    return NULL;
  }

  // Set device number and block size
  fs->devno = devno;
  fs->blocksize = 1 << fs->super->log_block_size;
  fs->inodes_per_block = fs->blocksize / sizeof(struct inodedesc);

  // Initialize buffer cache
  cache_buffers = (unsigned int) fsopts->cache;
  if (cache_buffers == 0) cache_buffers = fs->super->cache_buffers;
  if (cache_buffers == 0) cache_buffers = DEFAULT_CACHE_BUFFERS;
  if (cache_buffers > fs->super->block_count) cache_buffers = fs->super->block_count;
  fs->cache = init_buffer_pool(devno, cache_buffers, fs->blocksize, dfs_sync, fs);
  if (!fs->cache) return NULL;

  // Calculate the number of group descriptors blocks
  fs->groupdescs_per_block = fs->blocksize / sizeof(struct groupdesc);
  fs->groupdesc_blocks = (fs->super->group_count * sizeof(struct groupdesc) + fs->blocksize - 1) / fs->blocksize;

  // Calculate the number of block pointers per block directory page
  fs->log_blkptrs_per_block = fs->super->log_block_size - 2;

  // Read group descriptors
  fs->groupdesc_buffers = (struct buf **) kmalloc(sizeof(struct buf *) * fs->groupdesc_blocks);
  fs->groups = (struct blkgroup *) kmalloc(sizeof(struct group) * fs->super->group_count);
  for (i = 0; i < fs->groupdesc_blocks; i++) {
    fs->groupdesc_buffers[i] = get_buffer(fs->cache, fs->super->groupdesc_table_block + i);
    if (!fs->groupdesc_buffers[i]) return NULL;
  }

  for (i = 0; i < fs->super->group_count; i++) {
    gd = (struct groupdesc *) fs->groupdesc_buffers[i / fs->groupdescs_per_block]->data;
    gd += (i % fs->groupdescs_per_block);

    fs->groups[i].desc = gd;
    fs->groups[i].first_free_block = -1;
    fs->groups[i].first_free_inode = -1;
  }

  return fs;
}

static void close_filesystem(struct filsys *fs) {
  unsigned int i;

  // Release all group descriptors
  for (i = 0; i < fs->groupdesc_blocks; i++) release_buffer(fs->cache, fs->groupdesc_buffers[i]);
  kfree(fs->groupdesc_buffers);
  kfree(fs->groups);

  // Flush and sync buffer cache
  flush_buffers(fs->cache, 0);
  sync_buffers(fs->cache, 0);

  // Free cache
  free_buffer_pool(fs->cache);

  // Write super block
  if (fs->super_dirty) dev_write(fs->devno, fs->super, SECTORSIZE, 1, 0);
  kfree(fs->super);

  // Close device
  dev_close(fs->devno);

  // Deallocate file system
  kfree(fs);
}

static void get_filesystem_status(struct filsys *fs, struct statfs *buf) {
  buf->bsize = fs->blocksize;
  buf->iosize = fs->blocksize;
  buf->blocks = fs->super->block_count;
  buf->bfree = fs->super->free_block_count;
  buf->files = fs->super->inode_count;
  buf->ffree = fs->super->free_inode_count;
  buf->cachesize = fs->cache->poolsize * fs->cache->bufsize;
}

int dfs_mkfs(char *devname, char *opts) {
  struct fsoptions fsopts;
  struct filsys *fs;

  if (parse_options(opts, &fsopts) != 0) return -EINVAL;
  if (!devname) return -EINVAL;

  fs = create_filesystem(devname, &fsopts);
  if (!fs) return -EIO;
  close_filesystem(fs);
  return 0;
}

int dfs_mount(struct fs *fs, char *opts) {
  struct fsoptions fsopts;

  if (parse_options(opts, &fsopts) != 0) return -EINVAL;
  if (fsopts.flags & FSOPT_FORMAT) {
    fs->data = create_filesystem(fs->mntfrom, &fsopts);
  } else {
    fs->data = open_filesystem(fs->mntfrom, &fsopts);
  }
  if (!fs->data) return -EIO;

  return 0;
}

int dfs_umount(struct fs *fs) {
  close_filesystem((struct filsys *) fs->data);
  return 0;
}

int dfs_statfs(struct fs *fs, struct statfs *buf) {
  get_filesystem_status((struct filsys *) fs->data, buf);
  return 0;
}