diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 8b5dd6369f82c19d5ba28dea07018e4fb13a025f..670d1343f914dcd7b26ba381138d1ce0827c58ed 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -858,6 +858,7 @@ struct ext4_inode_info {
 	spinlock_t i_completed_io_lock;
 	/* current io_end structure for async DIO write*/
 	ext4_io_end_t *cur_aio_dio;
+	atomic_t i_ioend_count;	/* Number of outstanding io_end structs */
 
 	/*
 	 * Transactions that contain inode's metadata needed to complete
@@ -2060,6 +2061,7 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
 /* page-io.c */
 extern int __init ext4_init_pageio(void);
 extern void ext4_exit_pageio(void);
+extern void ext4_ioend_wait(struct inode *);
 extern void ext4_free_io_end(ext4_io_end_t *io);
 extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
 extern int ext4_end_io_nolock(ext4_io_end_t *io);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 46a7d6a9d9764b82075071c94f661cc65c3675fc..a24c8cca7370304a82d81996ba59986b448e967a 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -32,8 +32,14 @@
 
 static struct kmem_cache *io_page_cachep, *io_end_cachep;
 
+#define WQ_HASH_SZ		37
+#define to_ioend_wq(v)	(&ioend_wq[((unsigned long)v) % WQ_HASH_SZ])
+static wait_queue_head_t ioend_wq[WQ_HASH_SZ];
+
 int __init ext4_init_pageio(void)
 {
+	int i;
+
 	io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT);
 	if (io_page_cachep == NULL)
 		return -ENOMEM;
@@ -42,6 +48,8 @@ int __init ext4_init_pageio(void)
 		kmem_cache_destroy(io_page_cachep);
 		return -ENOMEM;
 	}
+	for (i = 0; i < WQ_HASH_SZ; i++)
+		init_waitqueue_head(&ioend_wq[i]);
 
 	return 0;
 }
@@ -52,9 +60,17 @@ void ext4_exit_pageio(void)
 	kmem_cache_destroy(io_page_cachep);
 }
 
+void ext4_ioend_wait(struct inode *inode)
+{
+	wait_queue_head_t *wq = to_ioend_wq(inode);
+
+	wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0));
+}
+
 void ext4_free_io_end(ext4_io_end_t *io)
 {
 	int i;
+	wait_queue_head_t *wq;
 
 	BUG_ON(!io);
 	if (io->page)
@@ -69,7 +85,10 @@ void ext4_free_io_end(ext4_io_end_t *io)
 		}
 	}
 	io->num_io_pages = 0;
-	iput(io->inode);
+	wq = to_ioend_wq(io->inode);
+	if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) &&
+	    waitqueue_active(wq))
+		wake_up_all(wq);
 	kmem_cache_free(io_end_cachep, io);
 }
 
@@ -142,8 +161,8 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
 	io = kmem_cache_alloc(io_end_cachep, flags);
 	if (io) {
 		memset(io, 0, sizeof(*io));
-		io->inode = igrab(inode);
-		BUG_ON(!io->inode);
+		atomic_inc(&EXT4_I(inode)->i_ioend_count);
+		io->inode = inode;
 		INIT_WORK(&io->work, ext4_end_io_work);
 		INIT_LIST_HEAD(&io->list);
 	}
@@ -171,35 +190,15 @@ static void ext4_end_bio(struct bio *bio, int error)
 	struct workqueue_struct *wq;
 	struct inode *inode;
 	unsigned long flags;
-	ext4_fsblk_t err_block;
 	int i;
 
 	BUG_ON(!io_end);
-	inode = io_end->inode;
 	bio->bi_private = NULL;
 	bio->bi_end_io = NULL;
 	if (test_bit(BIO_UPTODATE, &bio->bi_flags))
 		error = 0;
-	err_block = bio->bi_sector >> (inode->i_blkbits - 9);
 	bio_put(bio);
 
-	if (!(inode->i_sb->s_flags & MS_ACTIVE)) {
-		pr_err("sb umounted, discard end_io request for inode %lu\n",
-			io_end->inode->i_ino);
-		ext4_free_io_end(io_end);
-		return;
-	}
-
-	if (error) {
-		io_end->flag |= EXT4_IO_END_ERROR;
-		ext4_warning(inode->i_sb, "I/O error writing to inode %lu "
-			     "(offset %llu size %ld starting block %llu)",
-			     inode->i_ino,
-			     (unsigned long long) io_end->offset,
-			     (long) io_end->size,
-			     (unsigned long long) err_block);
-	}
-
 	for (i = 0; i < io_end->num_io_pages; i++) {
 		struct page *page = io_end->pages[i]->p_page;
 		struct buffer_head *bh, *head;
@@ -254,8 +253,19 @@ static void ext4_end_bio(struct bio *bio, int error)
 		if (!partial_write)
 			SetPageUptodate(page);
 	}
-
 	io_end->num_io_pages = 0;
+	inode = io_end->inode;
+
+	if (error) {
+		io_end->flag |= EXT4_IO_END_ERROR;
+		ext4_warning(inode->i_sb, "I/O error writing to inode %lu "
+			     "(offset %llu size %ld starting block %llu)",
+			     inode->i_ino,
+			     (unsigned long long) io_end->offset,
+			     (long) io_end->size,
+			     (unsigned long long)
+			     bio->bi_sector >> (inode->i_blkbits - 9));
+	}
 
 	/* Add the io_end to per-inode completed io list*/
 	spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
@@ -305,7 +315,6 @@ static int io_submit_init(struct ext4_io_submit *io,
 	bio->bi_private = io->io_end = io_end;
 	bio->bi_end_io = ext4_end_bio;
 
-	io_end->inode = inode;
 	io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh);
 
 	io->io_bio = bio;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 04352e9729d0d3c9f32fd04434706ce4d22c95e4..45653af889532e9e49e4236392ad78049b66439b 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -828,12 +828,14 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
 	ei->cur_aio_dio = NULL;
 	ei->i_sync_tid = 0;
 	ei->i_datasync_tid = 0;
+	atomic_set(&ei->i_ioend_count, 0);
 
 	return &ei->vfs_inode;
 }
 
 static void ext4_destroy_inode(struct inode *inode)
 {
+	ext4_ioend_wait(inode);
 	if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
 		ext4_msg(inode->i_sb, KERN_ERR,
 			 "Inode %lu (%p): orphan list check failed!",