/* * Filesystem using a swap partiton as backing storage. * * Copyright (C) 2001 Kasper Dupont. * * Parts of the code are taken from the filesystems: * ramfs and minix which are copyrighted by * Linus Torvalds and others. * * This file is released under the GPL. */ /* TODO: * - Write the rest of this TODO list */ #ifndef __KERNEL__ #define __KERNEL__ #endif #ifndef MODULE #define MODULE #endif #include #include #include #include #include #include #include #include #include #include /* some random number */ #define SWAPFS_MAGIC 0x53574150 #if 0 #define DBG printk #else #define DBG(x...) do { } while (0) #endif static kmem_cache_t *meta_cache,*bitmap_cache; #define SUPER(s) ((struct swapfs_super_block*)(&(s)->u)) #define BLOCKS_PER_META ((((int)sizeof(((struct inode*)NULL)->u))\ -2*(int)sizeof(void*))/(int)sizeof(long)) #define BLOCKS_PER_PAGE (PAGE_SIZE/512) #define BIT_SHIFT 5 #ifndef BITS_PER_LONG #define BITS_PER_LONG (1<BITS_PER_LONG) { unsigned long *t = (unsigned long*)S; if (!*t) return 1; } return 0; } static inline void flip(unsigned long N, unsigned long *S, unsigned long i) { if (N>BITS_PER_LONG) { unsigned long Nrek=N>>BIT_SHIFT; struct bitmap *t; Nrek=N>>BIT_SHIFT; if (*S){ t=(struct bitmap*)*S; } else { (*S)=(unsigned long)t=kmem_cache_alloc(bitmap_cache,0); if (!t) { printk("SWAPS: Out of memory. This is going to hurt. :-(\n"); return; } memset(t,0,sizeof(*t)); } flip(Nrek,t->submasks+(i/Nrek),i%Nrek); if (empty(Nrek,t->submasks[(i/Nrek)])) { t->topmask &= ~(1<<(i/Nrek)); if (!t->topmask) { kmem_cache_free(bitmap_cache,t); *S=0; } } else t->topmask |= (1<<(i/Nrek)); } else { *S ^= 1<BITS_PER_LONG) { unsigned long Nrek=N>>BIT_SHIFT; struct bitmap *t = (struct bitmap*)S; unsigned long r; if (!t) return N; r=succ(Nrek,t->submasks[(i/Nrek)],i%Nrek); if (r==Nrek) { r=bitsucc(t->topmask,i/Nrek+1); if (r==BITS_PER_LONG) return N; return succ(Nrek,t->submasks[r],0)+r*Nrek; } else { return r+(i/Nrek)*Nrek; } } else { int r=bitsucc(S,i); if (r==BITS_PER_LONG) return N; return r; } } static inline int bitpred(unsigned long V, int i) { while ((i>=0)&&((V&(1<BITS_PER_LONG) { unsigned long Nrek=N>>BIT_SHIFT; struct bitmap *t = (struct bitmap*)S; unsigned long r; if (!t) return N; r=pred(Nrek,t->submasks[(i/Nrek)],i%Nrek); if (r==Nrek) { r=bitpred(t->topmask,i/Nrek-1); if (r==-1) return N; return pred(Nrek,t->submasks[r],Nrek-1)+r*Nrek; } else { return r+(i/Nrek)*Nrek; } } else { int r=bitpred(S,i); if (r==-1) return N; return r; } } void free_bitmap(unsigned long N, unsigned long S) { if (N>BITS_PER_LONG) { unsigned long Nrek=N>>BIT_SHIFT; int i; struct bitmap *t=(struct bitmap*)S; if (!t) return; for (i=0;isubmasks[i]); kmem_cache_free(bitmap_cache,t); } } /*******************************\ * Here ends bitmap operations * \*******************************/ static int swapfs_alloc_block(struct swapfs_super_block *s) { long start,end; unsigned long N=s->N; if (!s->free_blocks) return 0; s->free_blocks--; if (s->range_start < s->range_end) return s->range_start++; /* There are no more pages in the range, * the page just before range_start must * be allocated. If it had been freed it * would have been added to the range. * I just have to start the scan from an * allocated page, then I will find a free * page. */ start = succ(N,s->bitmap,s->range_start-1); /* If no range found start from the begining, * page 0 is always allocated for the swap * header. */ if (start == N) start = succ(N,s->bitmap,0); /* If still no range found the disk is full, * just return and leave the empty range * unchanged. Next allocation will continue * from last allocation (if possible.) */ /* Now that I count free blocks this should * no more happen. */ if (start == N) { printk("SWAPFS: Free block count wrong\n"); return 0; } flip(N,&s->bitmap,start); end=succ(N,s->bitmap,start); flip(N,&s->bitmap,end); s->range_start=start+1; s->range_end=end; return start; } static void swapfs_free_block(struct super_block *sb, long block) { struct swapfs_super_block *s=SUPER(sb); unsigned long N = s->N; s->free_blocks++; if (block == s->range_end) { flip(N,&s->bitmap,block+1); /* There is at least one free block * after the range, include as many blocks as * possible in the range. */ s->range_end=succ(N,s->bitmap,block); flip(N,&s->bitmap,s->range_end); } else { flip(N,&s->bitmap,block); if (block == s->range_start-1) { /* There is at least one free block before * the range, include as many blocks as possible * in the range. */ s->range_start=pred(N,s->bitmap,block+1); flip(N,&s->bitmap,s->range_start); } else { flip(N,&s->bitmap,block+1); } } } static inline void free_meta(struct super_block *sb, struct meta_entry *e); static inline void flush_meta(struct super_block *sb, struct meta_entry *e); static inline void free_meta(struct super_block *sb, struct meta_entry *e) { if (!e) return; flush_meta(sb,e); kmem_cache_free(meta_cache,e); } static inline void flush_meta(struct super_block *sb, struct meta_entry *e) { int idx; for (idx=0;idxuserblocks[idx]) swapfs_free_block(sb,e->userblocks[idx]); free_meta(sb,e->left); free_meta(sb,e->right); } static void swapfs_delete_inode(struct inode *inode) { flush_meta(inode->i_sb,(struct meta_entry*)&inode->u); ++SUPER(inode->i_sb)->free_inodes; clear_inode(inode); } static struct super_operations swapfs_ops; static struct address_space_operations swapfs_aops; static struct file_operations swapfs_dir_operations; static struct file_operations swapfs_file_operations; static struct inode_operations swapfs_dir_inode_operations; static int swapfs_statfs(struct super_block *sb, struct statfs *buf) { buf->f_type = SWAPFS_MAGIC; buf->f_bsize = PAGE_CACHE_SIZE; buf->f_namelen = 255; buf->f_blocks=SUPER(sb)->total_blocks; buf->f_ffree=SUPER(sb)->free_inodes; buf->f_files=SUPER(sb)->total_inodes; buf->f_bfree=SUPER(sb)->free_blocks; buf->f_bavail=SUPER(sb)->free_blocks; return 0; } /* * Lookup the data. This is trivial - if the dentry didn't already * exist, we know it is negative. */ static struct dentry * swapfs_lookup(struct inode *dir, struct dentry *dentry) { d_add(dentry, NULL); return NULL; } static inline struct meta_entry* find_meta(struct inode * inode, long number, int create) { struct meta_entry *parent; struct meta_entry **new; if (!number) return (struct meta_entry*)&inode->u; parent=find_meta(inode,(number-1)>>1,create); new=&((number&1)?(parent->left):(parent->right)); if ((!*new)&&(create)) { *new=kmem_cache_alloc(meta_cache,0); if (*new) memset(*new,0,sizeof(**new)); } return *new; } static inline int swapfs_get_block(struct inode * inode, long block, struct buffer_head *bh_result, int create) { struct meta_entry * m; long idx=block%BLOCKS_PER_META; m=find_meta(inode,block/BLOCKS_PER_META,create); if (!m) return ((create)?(-ENOSPC):(0)); if (!(m->userblocks[idx])) { long block; if (!create) return 0; block=swapfs_alloc_block(SUPER(inode->i_sb)); if (!block) return -ENOSPC; m->userblocks[idx]=block; inode->i_blocks+=BLOCKS_PER_PAGE; } bh_result->b_dev = inode->i_dev; bh_result->b_blocknr = m->userblocks[idx]; bh_result->b_state |= (1UL << BH_Mapped); return 0; } /* Generic address space operations, I have a get_block */ static int swapfs_writepage(struct page *page) { return block_write_full_page(page,swapfs_get_block); } static int swapfs_readpage(struct file *file, struct page *page) { return block_read_full_page(page,swapfs_get_block); } static int swapfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { return block_prepare_write(page,from,to,swapfs_get_block); } static int swapfs_bmap(struct address_space *mapping, long block) { return generic_block_bmap(mapping,block,swapfs_get_block); } static struct inode *swapfs_get_inode(struct super_block *sb, int mode, int dev) { struct inode * inode; if (!SUPER(sb)->free_inodes) return NULL; --SUPER(sb)->free_inodes; inode = new_inode(sb); if (inode) { inode->i_mode = mode; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_rdev = to_kdev_t(dev); inode->i_mapping->a_ops = &swapfs_aops; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; switch (mode & S_IFMT) { default: init_special_inode(inode, mode, dev); break; case S_IFREG: inode->i_fop = &swapfs_file_operations; break; case S_IFDIR: inode->i_op = &swapfs_dir_inode_operations; inode->i_fop = &swapfs_dir_operations; break; case S_IFLNK: inode->i_op = &page_symlink_inode_operations; break; } memset(&inode->u,0,sizeof(inode->u)); } return inode; } /* * File creation. Allocate an inode, and we're done.. */ static int swapfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int dev) { struct inode * inode = swapfs_get_inode(dir->i_sb, mode, dev); int error = -ENOSPC; if (inode) { d_instantiate(dentry, inode); dget(dentry); /* Extra count - pin the dentry in core */ error = 0; } return error; } static int swapfs_mkdir(struct inode * dir, struct dentry * dentry, int mode) { return swapfs_mknod(dir, dentry, mode | S_IFDIR, 0); } static int swapfs_create(struct inode *dir, struct dentry *dentry, int mode) { return swapfs_mknod(dir, dentry, mode | S_IFREG, 0); } /* * Link a file.. */ static int swapfs_link(struct dentry *old_dentry, struct inode * dir, struct dentry * dentry) { struct inode *inode = old_dentry->d_inode; if (S_ISDIR(inode->i_mode)) return -EPERM; inode->i_nlink++; atomic_inc(&inode->i_count); /* New dentry reference */ dget(dentry); /* Extra pinning count for the created dentry */ d_instantiate(dentry, inode); return 0; } static inline int swapfs_positive(struct dentry *dentry) { return dentry->d_inode && !d_unhashed(dentry); } /* * Check that a directory is empty (this works * for regular files too, they'll just always be * considered empty..). * * Note that an empty directory can still have * children, they just all have to be negative.. */ static int swapfs_empty(struct dentry *dentry) { struct list_head *list; spin_lock(&dcache_lock); list = dentry->d_subdirs.next; while (list != &dentry->d_subdirs) { struct dentry *de = list_entry(list, struct dentry, d_child); if (swapfs_positive(de)) { spin_unlock(&dcache_lock); return 0; } list = list->next; } spin_unlock(&dcache_lock); return 1; } /* * This works for both directories and regular files. * (non-directories will always have empty subdirs) */ static int swapfs_unlink(struct inode * dir, struct dentry *dentry) { int retval = -ENOTEMPTY; if (swapfs_empty(dentry)) { struct inode *inode = dentry->d_inode; inode->i_nlink--; dput(dentry); /* Undo the count from "create" - this does all the work */ retval = 0; } return retval; } #define swapfs_rmdir swapfs_unlink /* * The VFS layer already does all the dentry stuff for rename, * we just have to decrement the usage count for the target if * it exists so that the VFS layer correctly free's it when it * gets overwritten. */ static int swapfs_rename(struct inode * old_dir, struct dentry *old_dentry, struct inode * new_dir, struct dentry *new_dentry) { int error = -ENOTEMPTY; if (swapfs_empty(new_dentry)) { struct inode *inode = new_dentry->d_inode; if (inode) { inode->i_nlink--; dput(new_dentry); } error = 0; } return error; } static int swapfs_symlink(struct inode * dir, struct dentry *dentry, const char * symname) { int error; error = swapfs_mknod(dir, dentry, S_IFLNK | S_IRWXUGO, 0); if (!error) { int l = strlen(symname)+1; struct inode *inode = dentry->d_inode; error = block_symlink(inode, symname, l); } return error; } static int swapfs_sync_file(struct file * file, struct dentry *dentry, int datasync) { return 0; } static struct address_space_operations swapfs_aops = { readpage: swapfs_readpage, writepage: swapfs_writepage, prepare_write: swapfs_prepare_write, commit_write: generic_commit_write, bmap: swapfs_bmap, }; static struct file_operations swapfs_file_operations = { read: generic_file_read, write: generic_file_write, mmap: generic_file_mmap, fsync: swapfs_sync_file, }; static struct file_operations swapfs_dir_operations = { read: generic_read_dir, readdir: dcache_readdir, fsync: swapfs_sync_file, }; static struct inode_operations swapfs_dir_inode_operations = { create: swapfs_create, lookup: swapfs_lookup, link: swapfs_link, unlink: swapfs_unlink, symlink: swapfs_symlink, mkdir: swapfs_mkdir, rmdir: swapfs_rmdir, mknod: swapfs_mknod, rename: swapfs_rename, }; static void swapfs_put_super(struct super_block *sb) { free_bitmap(SUPER(sb)->N,SUPER(sb)->bitmap); } static struct super_operations swapfs_ops = { statfs: swapfs_statfs, put_inode: force_delete, put_super: swapfs_put_super, delete_inode: swapfs_delete_inode, }; #define ERROR(s) do { if (!silent) printk("SWAPFS: " s "\n"); \ return NULL; } while(0) static struct super_block *swapfs_read_super(struct super_block * sb, void * data, int silent) { long last_page; struct inode * inode; struct dentry * root; struct buffer_head * bh; union swap_header *swap_header; struct swapfs_super_block *p; /* Doing this on a ramdisk will destroy it's contents. * The ramdisk relies on data staying in the blockcache, * but changing blocksize must flush the blockcache. * Just try to mount the device or enable it as swap * _before_ runing mkswap. This is not a problem in this * FS, try mkswap /dev/ram4 ; swapon /dev/ram4 */ set_blocksize(sb->s_dev, PAGE_CACHE_SIZE); bh=bread(sb->s_dev, 0, PAGE_CACHE_SIZE); if (!bh) ERROR("I/O error reading swap header"); swap_header = (void *) bh->b_data; if (!memcmp("SWAP-SPACE",swap_header->magic.magic,10)) { brelse(bh); ERROR("Oldstyle swap header is not supported!"); } if (memcmp("SWAPSPACE2",swap_header->magic.magic,10)) { brelse(bh); ERROR("No swap header found!"); } if (swap_header->info.version != 1) { brelse(bh); ERROR("Only swap header version 1 is supported"); } last_page=swap_header->info.last_page; /* Don't allow less than 4 blocks, just in case * it would break somewhere. */ if (last_page<4) { brelse(bh); ERROR("Number of pages too small"); } /* Stop now if the number of pages is too large * for the bitmap tree. The number of bits in the * bitmap must be a power of BITS_PER_LONG. On * 32 bit architectures the limit is 32^6 since * 32^7 would overflow. */ if (last_page>1024*1024*1024-4) { brelse(bh); ERROR("Number of pages too big"); } brelse(bh); p=SUPER(sb); p->bitmap=0; p->range_start=1; p->range_end=last_page+1; p->free_inodes=p->total_inodes=2*last_page; p->free_blocks=last_page; p->total_blocks=last_page; for (p->N=1;p->NN*=BITS_PER_LONG); sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; sb->s_magic = SWAPFS_MAGIC; sb->s_op = &swapfs_ops; inode = swapfs_get_inode(sb, S_IFDIR | 0755, 0); if (!inode) return NULL; root = d_alloc_root(inode); if (!root) { iput(inode); ERROR("Could not create filesystem root"); } sb->s_root = root; return sb; } static DECLARE_FSTYPE(swapfs_fs_type, "swapfs", swapfs_read_super, FS_LITTER|FS_REQUIRES_DEV); static int init_caches() { meta_cache=kmem_cache_create("swapfs-meta", sizeof(struct meta_entry), 0, 0, NULL, NULL); if (!meta_cache) return 1; bitmap_cache=kmem_cache_create("swapfs-bitmap", sizeof(struct bitmap), 0, 0, NULL, NULL); if (!bitmap_cache) return 1; return 0; } static void exit_caches() { if (meta_cache) kmem_cache_destroy(meta_cache); if (bitmap_cache) kmem_cache_destroy(bitmap_cache); } /* These references are removed by the optimizer. * They are used to verify that type casting the * unions to my own types does not write outside * available memory. */ extern void ERROR_META_DOES_NOT_FIT_IN_STRUCT_INODE(); extern void ERROR_SWAPFS_DOES_NOT_FIT_IN_STRUCT_SUPER_BLOCK(); static int __init init_swapfs_fs(void) { int err; if (BLOCKS_PER_META<1) ERROR_META_DOES_NOT_FIT_IN_STRUCT_INODE(); if (sizeof(struct swapfs_super_block)> sizeof(((struct super_block*)NULL)->u)) ERROR_SWAPFS_DOES_NOT_FIT_IN_STRUCT_SUPER_BLOCK(); if (init_caches()) { printk("swapfs: Could not create slab caches\n"); exit_caches(); return 1; } err=register_filesystem(&swapfs_fs_type); if (!err) return 0; printk("swapfs: Could not register filesystem\n"); exit_caches(); return 1; } static void __exit exit_swapfs_fs(void) { unregister_filesystem(&swapfs_fs_type); exit_caches(); } module_init(init_swapfs_fs) module_exit(exit_swapfs_fs)