/*
 * Filesystem using a swap partiton as backing storage.
 *
 * Copyright (C) 2001 Kasper Dupont.
 *
 * Parts of the code are taken from the filesystems:
 * ramfs and minix which are copyrighted by
 * Linus Torvalds and others.
 *
 * This file is released under the GPL.
 */

/* TODO:
 * - Write the rest of this TODO list
 */

#ifndef __KERNEL__
#define __KERNEL__
#endif
#ifndef MODULE
#define MODULE
#endif

#include <linux/module.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/locks.h>
#include <linux/swap.h>
#include <linux/vmalloc.h>
#include <linux/slab.h>

#include <asm/uaccess.h>

/* some random number */
#define SWAPFS_MAGIC	0x53574150

#if 0
#define DBG printk
#else
#define DBG(x...) do { } while (0)
#endif

static kmem_cache_t *meta_cache,*bitmap_cache;

#define SUPER(s) ((struct swapfs_super_block*)(&(s)->u))
#define BLOCKS_PER_META ((((int)sizeof(((struct inode*)NULL)->u))\
			  -2*(int)sizeof(void*))/(int)sizeof(long))
#define BLOCKS_PER_PAGE (PAGE_SIZE/512)
#define BIT_SHIFT 5
#ifndef BITS_PER_LONG
#define BITS_PER_LONG (1<<BIT_SHIFT)
#endif

struct meta_entry {
	long userblocks[BLOCKS_PER_META];
	struct meta_entry *left,*right;
};

struct bitmap {
	unsigned long topmask;
	unsigned long submasks[BITS_PER_LONG];
};

struct swapfs_super_block {
	unsigned long N,bitmap;
	long range_start,range_end;
	long total_inodes,free_inodes;
	long total_blocks,free_blocks;
};

/*********************************\
 * Here starts bitmap operations *
\*********************************/

static inline int empty(unsigned long N,
			unsigned long S)
{
	if (!S) return 1;
	if (N>BITS_PER_LONG) {
		unsigned long *t = (unsigned long*)S;
		if (!*t) return 1;
	}
	return 0;
}

static inline void flip(unsigned long N,
			unsigned long *S,
			unsigned long i)
{
	if (N>BITS_PER_LONG) {
		unsigned long Nrek=N>>BIT_SHIFT;
		struct bitmap *t;

		Nrek=N>>BIT_SHIFT;
		if (*S){
			t=(struct bitmap*)*S;
		} else {
			(*S)=(unsigned long)t=kmem_cache_alloc(bitmap_cache,0);
			if (!t) {
				printk("SWAPS: Out of memory. This is going to hurt. :-(\n");
				return;
			}
			memset(t,0,sizeof(*t));
		}
		
		flip(Nrek,t->submasks+(i/Nrek),i%Nrek);
		if (empty(Nrek,t->submasks[(i/Nrek)])) {
			t->topmask &= ~(1<<(i/Nrek));
			if (!t->topmask) {
				kmem_cache_free(bitmap_cache,t);
				*S=0;
			}
		} else
			t->topmask |= (1<<(i/Nrek));

	} else {
		*S ^= 1<<i;
	}
}

static inline int bitsucc(unsigned long V,
			  int i)
{
	while ((i<BITS_PER_LONG)&&((V&(1<<i))==0)) ++i;
	return i;
}

static inline unsigned long succ(unsigned long N,
				 unsigned long S,
				 unsigned long i)
{
	if (N>BITS_PER_LONG) {
		unsigned long Nrek=N>>BIT_SHIFT;
		struct bitmap *t = (struct bitmap*)S;
		unsigned long r;

		if (!t) return N;

		r=succ(Nrek,t->submasks[(i/Nrek)],i%Nrek);
		if (r==Nrek) {
			r=bitsucc(t->topmask,i/Nrek+1);
			if (r==BITS_PER_LONG) return N;
			return succ(Nrek,t->submasks[r],0)+r*Nrek;
		} else {
			return r+(i/Nrek)*Nrek;
		}
	} else {
		int r=bitsucc(S,i);
		if (r==BITS_PER_LONG) return N;
		return r;
	}
}

static inline int bitpred(unsigned long V,
			  int i)
{
	while ((i>=0)&&((V&(1<<i))==0)) --i;
	return i;
}

static inline unsigned long pred(unsigned long N,
				 unsigned long S,
				 unsigned long i)
{
	if (N>BITS_PER_LONG) {
		unsigned long Nrek=N>>BIT_SHIFT;
		struct bitmap *t = (struct bitmap*)S;
		unsigned long r;

		if (!t) return N;

		r=pred(Nrek,t->submasks[(i/Nrek)],i%Nrek);
		if (r==Nrek) {
			r=bitpred(t->topmask,i/Nrek-1);
			if (r==-1) return N;
			return pred(Nrek,t->submasks[r],Nrek-1)+r*Nrek;
		} else {
			return r+(i/Nrek)*Nrek;
		}
	} else {
		int r=bitpred(S,i);
		if (r==-1) return N;
		return r;
	}
}

void free_bitmap(unsigned long N,
		 unsigned long S)
{
	if (N>BITS_PER_LONG) {
		unsigned long Nrek=N>>BIT_SHIFT;
		int i;
		struct bitmap *t=(struct bitmap*)S;

		if (!t) return;

		for (i=0;i<BITS_PER_LONG;++i)
			free_bitmap(Nrek,t->submasks[i]);

		kmem_cache_free(bitmap_cache,t);
	}
}
	
/*******************************\
 * Here ends bitmap operations *
\*******************************/

static int swapfs_alloc_block(struct swapfs_super_block *s)
{
	long start,end;
	unsigned long N=s->N;

	if (!s->free_blocks) return 0;
	s->free_blocks--;

	if (s->range_start < s->range_end)
		return s->range_start++;

	/* There are no more pages in the range,
	 * the page just before range_start must
	 * be allocated. If it had been freed it
	 * would have been added to the range.
	 * I just have to start the scan from an
	 * allocated page, then I will find a free
	 * page.
	 */
	start = succ(N,s->bitmap,s->range_start-1);

	/* If no range found start from the begining,
	 * page 0 is always allocated for the swap
	 * header.
	 */
	if (start == N) start = succ(N,s->bitmap,0);

	/* If still no range found the disk is full,
	 * just return and leave the empty range
	 * unchanged. Next allocation will continue
	 * from last allocation (if possible.)
	 */
	/* Now that I count free blocks this should
	 * no more happen.
	 */
	if (start == N) {
		printk("SWAPFS: Free block count wrong\n");
		return 0;
	}

	flip(N,&s->bitmap,start);
	end=succ(N,s->bitmap,start);
	flip(N,&s->bitmap,end);

	s->range_start=start+1;
	s->range_end=end;

	return start;
}

static void swapfs_free_block(struct super_block *sb,
			      long block)
{
	struct swapfs_super_block *s=SUPER(sb);
	unsigned long N = s->N;
	s->free_blocks++;

	if (block == s->range_end) {
		flip(N,&s->bitmap,block+1);
		/* There is at least one free block
		 * after the range, include as many blocks as
		 * possible in the range.
		 */
		s->range_end=succ(N,s->bitmap,block);
		flip(N,&s->bitmap,s->range_end);
	} else {
		flip(N,&s->bitmap,block);
		if (block == s->range_start-1) {
			/* There is at least one free block before
			 * the range, include as many blocks as possible
			 * in the range.
			 */
			s->range_start=pred(N,s->bitmap,block+1);
			flip(N,&s->bitmap,s->range_start);
		} else {
			flip(N,&s->bitmap,block+1);
		}
	}
}

static inline void free_meta(struct super_block *sb, struct meta_entry *e);
static inline void flush_meta(struct super_block *sb, struct meta_entry *e);

static inline void free_meta(struct super_block *sb, struct meta_entry *e)
{
	if (!e) return;
	flush_meta(sb,e);
	kmem_cache_free(meta_cache,e);
}

static inline void flush_meta(struct super_block *sb, struct meta_entry *e)
{
	int idx;
	for (idx=0;idx<BLOCKS_PER_META;++idx)
		if(e->userblocks[idx])
                        swapfs_free_block(sb,e->userblocks[idx]);
	free_meta(sb,e->left);
	free_meta(sb,e->right);
}

static void swapfs_delete_inode(struct inode *inode)
{
	flush_meta(inode->i_sb,(struct meta_entry*)&inode->u);
	++SUPER(inode->i_sb)->free_inodes;
	clear_inode(inode);
}

static struct super_operations swapfs_ops;
static struct address_space_operations swapfs_aops;
static struct file_operations swapfs_dir_operations;
static struct file_operations swapfs_file_operations;
static struct inode_operations swapfs_dir_inode_operations;

static int swapfs_statfs(struct super_block *sb, struct statfs *buf)
{
	buf->f_type = SWAPFS_MAGIC;
	buf->f_bsize = PAGE_CACHE_SIZE;
	buf->f_namelen = 255;

	buf->f_blocks=SUPER(sb)->total_blocks;
	buf->f_ffree=SUPER(sb)->free_inodes;
	buf->f_files=SUPER(sb)->total_inodes;
	buf->f_bfree=SUPER(sb)->free_blocks;
	buf->f_bavail=SUPER(sb)->free_blocks;
	return 0;
}

/*
 * Lookup the data. This is trivial - if the dentry didn't already
 * exist, we know it is negative.
 */
static struct dentry * swapfs_lookup(struct inode *dir, struct dentry *dentry)
{
	d_add(dentry, NULL);
	return NULL;
}

static inline struct meta_entry* find_meta(struct inode * inode, 
					   long number,
					   int create)
{
	struct meta_entry *parent;
	struct meta_entry **new;
	if (!number) return (struct meta_entry*)&inode->u;

	parent=find_meta(inode,(number-1)>>1,create);
	new=&((number&1)?(parent->left):(parent->right));
	if ((!*new)&&(create)) {
		*new=kmem_cache_alloc(meta_cache,0);
		if (*new) memset(*new,0,sizeof(**new));
	}
	return *new;
}

static inline int swapfs_get_block(struct inode * inode, 
				   long block,
				   struct buffer_head *bh_result, 
				   int create)
{
	struct meta_entry * m;
	long idx=block%BLOCKS_PER_META;

	m=find_meta(inode,block/BLOCKS_PER_META,create);
	if (!m) return ((create)?(-ENOSPC):(0));

	if (!(m->userblocks[idx])) {
		long block;
		if (!create) return 0;
		block=swapfs_alloc_block(SUPER(inode->i_sb));
		if (!block) return -ENOSPC;
		m->userblocks[idx]=block;
		inode->i_blocks+=BLOCKS_PER_PAGE;
	}

	bh_result->b_dev = inode->i_dev;
	bh_result->b_blocknr = m->userblocks[idx];
	bh_result->b_state |= (1UL << BH_Mapped);
	return 0;
}

/* Generic address space operations, I have a get_block */
static int swapfs_writepage(struct page *page)
{
	return block_write_full_page(page,swapfs_get_block);
}
static int swapfs_readpage(struct file *file, 
			   struct page *page)
{
	return block_read_full_page(page,swapfs_get_block);
}
static int swapfs_prepare_write(struct file *file, 
				struct page *page, 
				unsigned from, 
				unsigned to)
{
	return block_prepare_write(page,from,to,swapfs_get_block);
}
static int swapfs_bmap(struct address_space *mapping, 
		       long block)
{
	return generic_block_bmap(mapping,block,swapfs_get_block);
}

static struct inode *swapfs_get_inode(struct super_block *sb, 
				      int mode, 
				      int dev)
{
	struct inode * inode;
	
	if (!SUPER(sb)->free_inodes) return NULL;
	--SUPER(sb)->free_inodes;

	inode = new_inode(sb);

	if (inode) {
		inode->i_mode = mode;
		inode->i_uid = current->fsuid;
		inode->i_gid = current->fsgid;
		inode->i_blksize = PAGE_CACHE_SIZE;
		inode->i_blocks = 0;
		inode->i_rdev = to_kdev_t(dev);
		inode->i_mapping->a_ops = &swapfs_aops;
		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
		switch (mode & S_IFMT) {
		default:
			init_special_inode(inode, mode, dev);
			break;
		case S_IFREG:
			inode->i_fop = &swapfs_file_operations;
			break;
		case S_IFDIR:
			inode->i_op = &swapfs_dir_inode_operations;
			inode->i_fop = &swapfs_dir_operations;
			break;
		case S_IFLNK:
			inode->i_op = &page_symlink_inode_operations;
			break;
		}
		memset(&inode->u,0,sizeof(inode->u));
	}
	return inode;
}

/*
 * File creation. Allocate an inode, and we're done..
 */
static int swapfs_mknod(struct inode *dir, 
			struct dentry *dentry, 
			int mode, 
			int dev)
{
	struct inode * inode = swapfs_get_inode(dir->i_sb, mode, dev);
	int error = -ENOSPC;

	if (inode) {
		d_instantiate(dentry, inode);
		dget(dentry);	/* Extra count - pin the dentry in core */
		error = 0;
	}
	return error;
}

static int swapfs_mkdir(struct inode * dir, 
			struct dentry * dentry, 
			int mode)
{
	return swapfs_mknod(dir, dentry, mode | S_IFDIR, 0);
}

static int swapfs_create(struct inode *dir, 
			 struct dentry *dentry, 
			 int mode)
{
	return swapfs_mknod(dir, dentry, mode | S_IFREG, 0);
}

/*
 * Link a file..
 */
static int swapfs_link(struct dentry *old_dentry,
		       struct inode * dir,
		       struct dentry * dentry)
{
	struct inode *inode = old_dentry->d_inode;

	if (S_ISDIR(inode->i_mode))
		return -EPERM;

	inode->i_nlink++;
	atomic_inc(&inode->i_count);	/* New dentry reference */
	dget(dentry);	/* Extra pinning count for the created dentry */
	d_instantiate(dentry, inode);
	return 0;
}

static inline int swapfs_positive(struct dentry *dentry)
{
	return dentry->d_inode && !d_unhashed(dentry);
}

/*
 * Check that a directory is empty (this works
 * for regular files too, they'll just always be
 * considered empty..).
 *
 * Note that an empty directory can still have
 * children, they just all have to be negative..
 */
static int swapfs_empty(struct dentry *dentry)
{
	struct list_head *list;

	spin_lock(&dcache_lock);
	list = dentry->d_subdirs.next;

	while (list != &dentry->d_subdirs) {
		struct dentry *de = list_entry(list, struct dentry, d_child);

		if (swapfs_positive(de)) {
			spin_unlock(&dcache_lock);
			return 0;
		}
		list = list->next;
	}
	spin_unlock(&dcache_lock);
	return 1;
}

/*
 * This works for both directories and regular files.
 * (non-directories will always have empty subdirs)
 */
static int swapfs_unlink(struct inode * dir, 
			 struct dentry *dentry)
{
	int retval = -ENOTEMPTY;

	if (swapfs_empty(dentry)) {
		struct inode *inode = dentry->d_inode;

		inode->i_nlink--;
		dput(dentry);		/* Undo the count from "create" - this does all the work */
		retval = 0;
	}
	return retval;
}

#define swapfs_rmdir swapfs_unlink

/*
 * The VFS layer already does all the dentry stuff for rename,
 * we just have to decrement the usage count for the target if
 * it exists so that the VFS layer correctly free's it when it
 * gets overwritten.
 */
static int swapfs_rename(struct inode * old_dir, 
			 struct dentry *old_dentry, 
			 struct inode * new_dir,
			 struct dentry *new_dentry)
{
	int error = -ENOTEMPTY;

	if (swapfs_empty(new_dentry)) {
		struct inode *inode = new_dentry->d_inode;
		if (inode) {
			inode->i_nlink--;
			dput(new_dentry);
		}
		error = 0;
	}
	return error;
}

static int swapfs_symlink(struct inode * dir,
			  struct dentry *dentry,
			  const char * symname)
{
	int error;

	error = swapfs_mknod(dir, dentry, S_IFLNK | S_IRWXUGO, 0);
	if (!error) {
		int l = strlen(symname)+1;
		struct inode *inode = dentry->d_inode;
		error = block_symlink(inode, symname, l);
	}
	return error;
}

static int swapfs_sync_file(struct file * file,
			    struct dentry *dentry,
			    int datasync)
{
	return 0;
}

static struct address_space_operations swapfs_aops = {
	readpage:	swapfs_readpage,
	writepage:	swapfs_writepage,
	prepare_write:	swapfs_prepare_write,
	commit_write:	generic_commit_write,
	bmap:		swapfs_bmap,
};

static struct file_operations swapfs_file_operations = {
	read:		generic_file_read,
	write:		generic_file_write,
	mmap:		generic_file_mmap,
	fsync:		swapfs_sync_file,
};

static struct file_operations swapfs_dir_operations = {
	read:		generic_read_dir,
	readdir:	dcache_readdir,
	fsync:		swapfs_sync_file,
};

static struct inode_operations swapfs_dir_inode_operations = {
	create:		swapfs_create,
	lookup:		swapfs_lookup,
	link:		swapfs_link,
	unlink:		swapfs_unlink,
	symlink:	swapfs_symlink,
	mkdir:		swapfs_mkdir,
	rmdir:		swapfs_rmdir,
	mknod:		swapfs_mknod,
	rename:		swapfs_rename,
};

static void swapfs_put_super(struct super_block *sb)
{
	free_bitmap(SUPER(sb)->N,SUPER(sb)->bitmap);
}

static struct super_operations swapfs_ops = {
	statfs:		swapfs_statfs,
	put_inode:	force_delete,
	put_super:	swapfs_put_super,
	delete_inode:	swapfs_delete_inode,
};

#define ERROR(s) do { if (!silent) printk("SWAPFS: " s "\n");	\
			return NULL; } while(0)

static struct super_block *swapfs_read_super(struct super_block * sb, 
					     void * data, 
					     int silent)
{
	long last_page;
	struct inode * inode;
	struct dentry * root;
	struct buffer_head * bh;
        union swap_header *swap_header;
	struct swapfs_super_block *p;

	/* Doing this on a ramdisk will destroy it's contents.
	 * The ramdisk relies on data staying in the blockcache,
	 * but changing blocksize must flush the blockcache.
	 * Just try to mount the device or enable it as swap
	 * _before_ runing mkswap. This is not a problem in this
	 * FS, try mkswap /dev/ram4 ; swapon /dev/ram4
	 */
	set_blocksize(sb->s_dev, PAGE_CACHE_SIZE);
	bh=bread(sb->s_dev, 0, PAGE_CACHE_SIZE);
	if (!bh) ERROR("I/O error reading swap header");

	swap_header = (void *) bh->b_data;
	if (!memcmp("SWAP-SPACE",swap_header->magic.magic,10)) {
		brelse(bh);
		ERROR("Oldstyle swap header is not supported!");
	}
	if (memcmp("SWAPSPACE2",swap_header->magic.magic,10)) {
		brelse(bh);
		ERROR("No swap header found!");
	}
	if (swap_header->info.version != 1) {
		brelse(bh);
		ERROR("Only swap header version 1 is supported");
	}
	last_page=swap_header->info.last_page;
	/* Don't allow less than 4 blocks, just in case
	 * it would break somewhere.
	 */
	if (last_page<4) {
		brelse(bh);
		ERROR("Number of pages too small");
	}
	/* Stop now if the number of pages is too large
	 * for the bitmap tree. The number of bits in the
	 * bitmap must be a power of BITS_PER_LONG. On
	 * 32 bit architectures the limit is 32^6 since
	 * 32^7 would overflow.
	 */
	if (last_page>1024*1024*1024-4) {
		brelse(bh);
		ERROR("Number of pages too big");
	}
	brelse(bh);

	p=SUPER(sb);
	p->bitmap=0;
	p->range_start=1;
	p->range_end=last_page+1;
	p->free_inodes=p->total_inodes=2*last_page;
	p->free_blocks=last_page;
	p->total_blocks=last_page;
	for (p->N=1;p->N<last_page+1;p->N*=BITS_PER_LONG);

	sb->s_blocksize = PAGE_CACHE_SIZE;
	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
	sb->s_magic = SWAPFS_MAGIC;
	sb->s_op = &swapfs_ops;
	inode = swapfs_get_inode(sb, S_IFDIR | 0755, 0);
	if (!inode)
		return NULL;

	root = d_alloc_root(inode);
	if (!root) {
		iput(inode);
		ERROR("Could not create filesystem root");
	}
	sb->s_root = root;
	return sb;
	
}

static DECLARE_FSTYPE(swapfs_fs_type, "swapfs", swapfs_read_super,
		      FS_LITTER|FS_REQUIRES_DEV);

static int init_caches()
{
	meta_cache=kmem_cache_create("swapfs-meta",
				     sizeof(struct meta_entry),
				     0,
				     0,
				     NULL,
				     NULL);
	if (!meta_cache) return 1;
	bitmap_cache=kmem_cache_create("swapfs-bitmap",
				       sizeof(struct bitmap),
				       0,
				       0,
				       NULL,
				       NULL);
	if (!bitmap_cache) return 1;
	return 0;
}

static void exit_caches()
{
	if (meta_cache) kmem_cache_destroy(meta_cache);
	if (bitmap_cache) kmem_cache_destroy(bitmap_cache);
}

/* These references are removed by the optimizer.
 * They are used to verify that type casting the
 * unions to my own types does not write outside
 * available memory.
 */
extern void ERROR_META_DOES_NOT_FIT_IN_STRUCT_INODE();
extern void ERROR_SWAPFS_DOES_NOT_FIT_IN_STRUCT_SUPER_BLOCK();

static int __init init_swapfs_fs(void)
{
	int err;

	if (BLOCKS_PER_META<1) ERROR_META_DOES_NOT_FIT_IN_STRUCT_INODE();
	if (sizeof(struct swapfs_super_block)>
	    sizeof(((struct super_block*)NULL)->u))
	  ERROR_SWAPFS_DOES_NOT_FIT_IN_STRUCT_SUPER_BLOCK();

	if (init_caches()) {
		printk("swapfs: Could not create slab caches\n");
		exit_caches();
		return 1;
	}

	err=register_filesystem(&swapfs_fs_type);
	if (!err) return 0;

	printk("swapfs: Could not register filesystem\n");
	exit_caches();

	return 1;
}

static void __exit exit_swapfs_fs(void)
{
	unregister_filesystem(&swapfs_fs_type);
	exit_caches();
}

module_init(init_swapfs_fs)
module_exit(exit_swapfs_fs)