diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 02971e239a182debc69aea1306169be981d97086..ece6926fa2e6aa242e38a189a7b3a8f7ed22e140 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -449,12 +449,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
 		return ret;
 
 	rt = (struct rt6_info *)dst;
-	if (ipv6_addr_any(&fl6.saddr)) {
-		ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev,
-					 &fl6.daddr, 0, &fl6.saddr);
-		if (ret)
-			goto put;
-
+	if (ipv6_addr_any(&src_in->sin6_addr)) {
 		src_in->sin6_family = AF_INET6;
 		src_in->sin6_addr = fl6.saddr;
 	}
@@ -471,9 +466,6 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
 
 	*pdst = dst;
 	return 0;
-put:
-	dst_release(dst);
-	return ret;
 }
 #else
 static int addr6_resolve(struct sockaddr_in6 *src_in,
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index ebf7be8d4139b87dd8232568603ab1aa8c19d045..08772836fded416e41be8eccfe53cf3e8427691b 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -56,6 +56,10 @@
 #define BNXT_RE_MAX_SRQC_COUNT		(64 * 1024)
 #define BNXT_RE_MAX_CQ_COUNT		(64 * 1024)
 
+#define BNXT_RE_UD_QP_HW_STALL		0x400000
+
+#define BNXT_RE_RQ_WQE_THRESHOLD	32
+
 struct bnxt_re_work {
 	struct work_struct	work;
 	unsigned long		event;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 7ba9e699d7abc65cb82fbd588e9ea12e7f71433b..c7bd68311d0c5317973ecf9c3fb8fce8fe8ffad4 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -61,6 +61,48 @@
 #include "ib_verbs.h"
 #include <rdma/bnxt_re-abi.h>
 
+static int __from_ib_access_flags(int iflags)
+{
+	int qflags = 0;
+
+	if (iflags & IB_ACCESS_LOCAL_WRITE)
+		qflags |= BNXT_QPLIB_ACCESS_LOCAL_WRITE;
+	if (iflags & IB_ACCESS_REMOTE_READ)
+		qflags |= BNXT_QPLIB_ACCESS_REMOTE_READ;
+	if (iflags & IB_ACCESS_REMOTE_WRITE)
+		qflags |= BNXT_QPLIB_ACCESS_REMOTE_WRITE;
+	if (iflags & IB_ACCESS_REMOTE_ATOMIC)
+		qflags |= BNXT_QPLIB_ACCESS_REMOTE_ATOMIC;
+	if (iflags & IB_ACCESS_MW_BIND)
+		qflags |= BNXT_QPLIB_ACCESS_MW_BIND;
+	if (iflags & IB_ZERO_BASED)
+		qflags |= BNXT_QPLIB_ACCESS_ZERO_BASED;
+	if (iflags & IB_ACCESS_ON_DEMAND)
+		qflags |= BNXT_QPLIB_ACCESS_ON_DEMAND;
+	return qflags;
+};
+
+static enum ib_access_flags __to_ib_access_flags(int qflags)
+{
+	enum ib_access_flags iflags = 0;
+
+	if (qflags & BNXT_QPLIB_ACCESS_LOCAL_WRITE)
+		iflags |= IB_ACCESS_LOCAL_WRITE;
+	if (qflags & BNXT_QPLIB_ACCESS_REMOTE_WRITE)
+		iflags |= IB_ACCESS_REMOTE_WRITE;
+	if (qflags & BNXT_QPLIB_ACCESS_REMOTE_READ)
+		iflags |= IB_ACCESS_REMOTE_READ;
+	if (qflags & BNXT_QPLIB_ACCESS_REMOTE_ATOMIC)
+		iflags |= IB_ACCESS_REMOTE_ATOMIC;
+	if (qflags & BNXT_QPLIB_ACCESS_MW_BIND)
+		iflags |= IB_ACCESS_MW_BIND;
+	if (qflags & BNXT_QPLIB_ACCESS_ZERO_BASED)
+		iflags |= IB_ZERO_BASED;
+	if (qflags & BNXT_QPLIB_ACCESS_ON_DEMAND)
+		iflags |= IB_ACCESS_ON_DEMAND;
+	return iflags;
+};
+
 static int bnxt_re_build_sgl(struct ib_sge *ib_sg_list,
 			     struct bnxt_qplib_sge *sg_list, int num)
 {
@@ -149,8 +191,8 @@ int bnxt_re_query_device(struct ib_device *ibdev,
 	ib_attr->max_total_mcast_qp_attach = 0;
 	ib_attr->max_ah = dev_attr->max_ah;
 
-	ib_attr->max_fmr = dev_attr->max_fmr;
-	ib_attr->max_map_per_fmr = 1;	/* ? */
+	ib_attr->max_fmr = 0;
+	ib_attr->max_map_per_fmr = 0;
 
 	ib_attr->max_srq = dev_attr->max_srq;
 	ib_attr->max_srq_wr = dev_attr->max_srq_wqes;
@@ -410,6 +452,158 @@ enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev,
 	return IB_LINK_LAYER_ETHERNET;
 }
 
+#define	BNXT_RE_FENCE_PBL_SIZE	DIV_ROUND_UP(BNXT_RE_FENCE_BYTES, PAGE_SIZE)
+
+static void bnxt_re_create_fence_wqe(struct bnxt_re_pd *pd)
+{
+	struct bnxt_re_fence_data *fence = &pd->fence;
+	struct ib_mr *ib_mr = &fence->mr->ib_mr;
+	struct bnxt_qplib_swqe *wqe = &fence->bind_wqe;
+
+	memset(wqe, 0, sizeof(*wqe));
+	wqe->type = BNXT_QPLIB_SWQE_TYPE_BIND_MW;
+	wqe->wr_id = BNXT_QPLIB_FENCE_WRID;
+	wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
+	wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+	wqe->bind.zero_based = false;
+	wqe->bind.parent_l_key = ib_mr->lkey;
+	wqe->bind.va = (u64)(unsigned long)fence->va;
+	wqe->bind.length = fence->size;
+	wqe->bind.access_cntl = __from_ib_access_flags(IB_ACCESS_REMOTE_READ);
+	wqe->bind.mw_type = SQ_BIND_MW_TYPE_TYPE1;
+
+	/* Save the initial rkey in fence structure for now;
+	 * wqe->bind.r_key will be set at (re)bind time.
+	 */
+	fence->bind_rkey = ib_inc_rkey(fence->mw->rkey);
+}
+
+static int bnxt_re_bind_fence_mw(struct bnxt_qplib_qp *qplib_qp)
+{
+	struct bnxt_re_qp *qp = container_of(qplib_qp, struct bnxt_re_qp,
+					     qplib_qp);
+	struct ib_pd *ib_pd = qp->ib_qp.pd;
+	struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+	struct bnxt_re_fence_data *fence = &pd->fence;
+	struct bnxt_qplib_swqe *fence_wqe = &fence->bind_wqe;
+	struct bnxt_qplib_swqe wqe;
+	int rc;
+
+	memcpy(&wqe, fence_wqe, sizeof(wqe));
+	wqe.bind.r_key = fence->bind_rkey;
+	fence->bind_rkey = ib_inc_rkey(fence->bind_rkey);
+
+	dev_dbg(rdev_to_dev(qp->rdev),
+		"Posting bind fence-WQE: rkey: %#x QP: %d PD: %p\n",
+		wqe.bind.r_key, qp->qplib_qp.id, pd);
+	rc = bnxt_qplib_post_send(&qp->qplib_qp, &wqe);
+	if (rc) {
+		dev_err(rdev_to_dev(qp->rdev), "Failed to bind fence-WQE\n");
+		return rc;
+	}
+	bnxt_qplib_post_send_db(&qp->qplib_qp);
+
+	return rc;
+}
+
+static void bnxt_re_destroy_fence_mr(struct bnxt_re_pd *pd)
+{
+	struct bnxt_re_fence_data *fence = &pd->fence;
+	struct bnxt_re_dev *rdev = pd->rdev;
+	struct device *dev = &rdev->en_dev->pdev->dev;
+	struct bnxt_re_mr *mr = fence->mr;
+
+	if (fence->mw) {
+		bnxt_re_dealloc_mw(fence->mw);
+		fence->mw = NULL;
+	}
+	if (mr) {
+		if (mr->ib_mr.rkey)
+			bnxt_qplib_dereg_mrw(&rdev->qplib_res, &mr->qplib_mr,
+					     true);
+		if (mr->ib_mr.lkey)
+			bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
+		kfree(mr);
+		fence->mr = NULL;
+	}
+	if (fence->dma_addr) {
+		dma_unmap_single(dev, fence->dma_addr, BNXT_RE_FENCE_BYTES,
+				 DMA_BIDIRECTIONAL);
+		fence->dma_addr = 0;
+	}
+}
+
+static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd)
+{
+	int mr_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_MW_BIND;
+	struct bnxt_re_fence_data *fence = &pd->fence;
+	struct bnxt_re_dev *rdev = pd->rdev;
+	struct device *dev = &rdev->en_dev->pdev->dev;
+	struct bnxt_re_mr *mr = NULL;
+	dma_addr_t dma_addr = 0;
+	struct ib_mw *mw;
+	u64 pbl_tbl;
+	int rc;
+
+	dma_addr = dma_map_single(dev, fence->va, BNXT_RE_FENCE_BYTES,
+				  DMA_BIDIRECTIONAL);
+	rc = dma_mapping_error(dev, dma_addr);
+	if (rc) {
+		dev_err(rdev_to_dev(rdev), "Failed to dma-map fence-MR-mem\n");
+		rc = -EIO;
+		fence->dma_addr = 0;
+		goto fail;
+	}
+	fence->dma_addr = dma_addr;
+
+	/* Allocate a MR */
+	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+	if (!mr) {
+		rc = -ENOMEM;
+		goto fail;
+	}
+	fence->mr = mr;
+	mr->rdev = rdev;
+	mr->qplib_mr.pd = &pd->qplib_pd;
+	mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR;
+	mr->qplib_mr.flags = __from_ib_access_flags(mr_access_flags);
+	rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
+	if (rc) {
+		dev_err(rdev_to_dev(rdev), "Failed to alloc fence-HW-MR\n");
+		goto fail;
+	}
+
+	/* Register MR */
+	mr->ib_mr.lkey = mr->qplib_mr.lkey;
+	mr->qplib_mr.va = (u64)(unsigned long)fence->va;
+	mr->qplib_mr.total_size = BNXT_RE_FENCE_BYTES;
+	pbl_tbl = dma_addr;
+	rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, &pbl_tbl,
+			       BNXT_RE_FENCE_PBL_SIZE, false);
+	if (rc) {
+		dev_err(rdev_to_dev(rdev), "Failed to register fence-MR\n");
+		goto fail;
+	}
+	mr->ib_mr.rkey = mr->qplib_mr.rkey;
+
+	/* Create a fence MW only for kernel consumers */
+	mw = bnxt_re_alloc_mw(&pd->ib_pd, IB_MW_TYPE_1, NULL);
+	if (!mw) {
+		dev_err(rdev_to_dev(rdev),
+			"Failed to create fence-MW for PD: %p\n", pd);
+		rc = -EINVAL;
+		goto fail;
+	}
+	fence->mw = mw;
+
+	bnxt_re_create_fence_wqe(pd);
+	return 0;
+
+fail:
+	bnxt_re_destroy_fence_mr(pd);
+	return rc;
+}
+
 /* Protection Domains */
 int bnxt_re_dealloc_pd(struct ib_pd *ib_pd)
 {
@@ -417,6 +611,7 @@ int bnxt_re_dealloc_pd(struct ib_pd *ib_pd)
 	struct bnxt_re_dev *rdev = pd->rdev;
 	int rc;
 
+	bnxt_re_destroy_fence_mr(pd);
 	if (ib_pd->uobject && pd->dpi.dbr) {
 		struct ib_ucontext *ib_uctx = ib_pd->uobject->context;
 		struct bnxt_re_ucontext *ucntx;
@@ -498,6 +693,10 @@ struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev,
 		}
 	}
 
+	if (!udata)
+		if (bnxt_re_create_fence_mr(pd))
+			dev_warn(rdev_to_dev(rdev),
+				 "Failed to create Fence-MR\n");
 	return &pd->ib_pd;
 dbfail:
 	(void)bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl,
@@ -849,12 +1048,16 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp
 	/* Shadow QP SQ depth should be same as QP1 RQ depth */
 	qp->qplib_qp.sq.max_wqe = qp1_qp->rq.max_wqe;
 	qp->qplib_qp.sq.max_sge = 2;
+	/* Q full delta can be 1 since it is internal QP */
+	qp->qplib_qp.sq.q_full_delta = 1;
 
 	qp->qplib_qp.scq = qp1_qp->scq;
 	qp->qplib_qp.rcq = qp1_qp->rcq;
 
 	qp->qplib_qp.rq.max_wqe = qp1_qp->rq.max_wqe;
 	qp->qplib_qp.rq.max_sge = qp1_qp->rq.max_sge;
+	/* Q full delta can be 1 since it is internal QP */
+	qp->qplib_qp.rq.q_full_delta = 1;
 
 	qp->qplib_qp.mtu = qp1_qp->mtu;
 
@@ -917,10 +1120,6 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
 	qp->qplib_qp.sig_type = ((qp_init_attr->sq_sig_type ==
 				  IB_SIGNAL_ALL_WR) ? true : false);
 
-	entries = roundup_pow_of_two(qp_init_attr->cap.max_send_wr + 1);
-	qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
-					dev_attr->max_qp_wqes + 1);
-
 	qp->qplib_qp.sq.max_sge = qp_init_attr->cap.max_send_sge;
 	if (qp->qplib_qp.sq.max_sge > dev_attr->max_qp_sges)
 		qp->qplib_qp.sq.max_sge = dev_attr->max_qp_sges;
@@ -959,6 +1158,9 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
 		qp->qplib_qp.rq.max_wqe = min_t(u32, entries,
 						dev_attr->max_qp_wqes + 1);
 
+		qp->qplib_qp.rq.q_full_delta = qp->qplib_qp.rq.max_wqe -
+						qp_init_attr->cap.max_recv_wr;
+
 		qp->qplib_qp.rq.max_sge = qp_init_attr->cap.max_recv_sge;
 		if (qp->qplib_qp.rq.max_sge > dev_attr->max_qp_sges)
 			qp->qplib_qp.rq.max_sge = dev_attr->max_qp_sges;
@@ -967,6 +1169,12 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
 	qp->qplib_qp.mtu = ib_mtu_enum_to_int(iboe_get_mtu(rdev->netdev->mtu));
 
 	if (qp_init_attr->qp_type == IB_QPT_GSI) {
+		/* Allocate 1 more than what's provided */
+		entries = roundup_pow_of_two(qp_init_attr->cap.max_send_wr + 1);
+		qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
+						dev_attr->max_qp_wqes + 1);
+		qp->qplib_qp.sq.q_full_delta = qp->qplib_qp.sq.max_wqe -
+						qp_init_attr->cap.max_send_wr;
 		qp->qplib_qp.rq.max_sge = dev_attr->max_qp_sges;
 		if (qp->qplib_qp.rq.max_sge > dev_attr->max_qp_sges)
 			qp->qplib_qp.rq.max_sge = dev_attr->max_qp_sges;
@@ -1006,6 +1214,22 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
 		}
 
 	} else {
+		/* Allocate 128 + 1 more than what's provided */
+		entries = roundup_pow_of_two(qp_init_attr->cap.max_send_wr +
+					     BNXT_QPLIB_RESERVED_QP_WRS + 1);
+		qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
+						dev_attr->max_qp_wqes +
+						BNXT_QPLIB_RESERVED_QP_WRS + 1);
+		qp->qplib_qp.sq.q_full_delta = BNXT_QPLIB_RESERVED_QP_WRS + 1;
+
+		/*
+		 * Reserving one slot for Phantom WQE. Application can
+		 * post one extra entry in this case. But allowing this to avoid
+		 * unexpected Queue full condition
+		 */
+
+		qp->qplib_qp.sq.q_full_delta -= 1;
+
 		qp->qplib_qp.max_rd_atomic = dev_attr->max_qp_rd_atom;
 		qp->qplib_qp.max_dest_rd_atomic = dev_attr->max_qp_init_rd_atom;
 		if (udata) {
@@ -1025,6 +1249,7 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
 
 	qp->ib_qp.qp_num = qp->qplib_qp.id;
 	spin_lock_init(&qp->sq_lock);
+	spin_lock_init(&qp->rq_lock);
 
 	if (udata) {
 		struct bnxt_re_qp_resp resp;
@@ -1129,48 +1354,6 @@ static enum ib_mtu __to_ib_mtu(u32 mtu)
 	}
 }
 
-static int __from_ib_access_flags(int iflags)
-{
-	int qflags = 0;
-
-	if (iflags & IB_ACCESS_LOCAL_WRITE)
-		qflags |= BNXT_QPLIB_ACCESS_LOCAL_WRITE;
-	if (iflags & IB_ACCESS_REMOTE_READ)
-		qflags |= BNXT_QPLIB_ACCESS_REMOTE_READ;
-	if (iflags & IB_ACCESS_REMOTE_WRITE)
-		qflags |= BNXT_QPLIB_ACCESS_REMOTE_WRITE;
-	if (iflags & IB_ACCESS_REMOTE_ATOMIC)
-		qflags |= BNXT_QPLIB_ACCESS_REMOTE_ATOMIC;
-	if (iflags & IB_ACCESS_MW_BIND)
-		qflags |= BNXT_QPLIB_ACCESS_MW_BIND;
-	if (iflags & IB_ZERO_BASED)
-		qflags |= BNXT_QPLIB_ACCESS_ZERO_BASED;
-	if (iflags & IB_ACCESS_ON_DEMAND)
-		qflags |= BNXT_QPLIB_ACCESS_ON_DEMAND;
-	return qflags;
-};
-
-static enum ib_access_flags __to_ib_access_flags(int qflags)
-{
-	enum ib_access_flags iflags = 0;
-
-	if (qflags & BNXT_QPLIB_ACCESS_LOCAL_WRITE)
-		iflags |= IB_ACCESS_LOCAL_WRITE;
-	if (qflags & BNXT_QPLIB_ACCESS_REMOTE_WRITE)
-		iflags |= IB_ACCESS_REMOTE_WRITE;
-	if (qflags & BNXT_QPLIB_ACCESS_REMOTE_READ)
-		iflags |= IB_ACCESS_REMOTE_READ;
-	if (qflags & BNXT_QPLIB_ACCESS_REMOTE_ATOMIC)
-		iflags |= IB_ACCESS_REMOTE_ATOMIC;
-	if (qflags & BNXT_QPLIB_ACCESS_MW_BIND)
-		iflags |= IB_ACCESS_MW_BIND;
-	if (qflags & BNXT_QPLIB_ACCESS_ZERO_BASED)
-		iflags |= IB_ZERO_BASED;
-	if (qflags & BNXT_QPLIB_ACCESS_ON_DEMAND)
-		iflags |= IB_ACCESS_ON_DEMAND;
-	return iflags;
-};
-
 static int bnxt_re_modify_shadow_qp(struct bnxt_re_dev *rdev,
 				    struct bnxt_re_qp *qp1_qp,
 				    int qp_attr_mask)
@@ -1378,11 +1561,21 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
 		entries = roundup_pow_of_two(qp_attr->cap.max_send_wr);
 		qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
 						dev_attr->max_qp_wqes + 1);
+		qp->qplib_qp.sq.q_full_delta = qp->qplib_qp.sq.max_wqe -
+						qp_attr->cap.max_send_wr;
+		/*
+		 * Reserving one slot for Phantom WQE. Some application can
+		 * post one extra entry in this case. Allowing this to avoid
+		 * unexpected Queue full condition
+		 */
+		qp->qplib_qp.sq.q_full_delta -= 1;
 		qp->qplib_qp.sq.max_sge = qp_attr->cap.max_send_sge;
 		if (qp->qplib_qp.rq.max_wqe) {
 			entries = roundup_pow_of_two(qp_attr->cap.max_recv_wr);
 			qp->qplib_qp.rq.max_wqe =
 				min_t(u32, entries, dev_attr->max_qp_wqes + 1);
+			qp->qplib_qp.rq.q_full_delta = qp->qplib_qp.rq.max_wqe -
+						       qp_attr->cap.max_recv_wr;
 			qp->qplib_qp.rq.max_sge = qp_attr->cap.max_recv_sge;
 		} else {
 			/* SRQ was used prior, just ignore the RQ caps */
@@ -1883,6 +2076,22 @@ static int bnxt_re_copy_wr_payload(struct bnxt_re_dev *rdev,
 	return payload_sz;
 }
 
+static void bnxt_ud_qp_hw_stall_workaround(struct bnxt_re_qp *qp)
+{
+	if ((qp->ib_qp.qp_type == IB_QPT_UD ||
+	     qp->ib_qp.qp_type == IB_QPT_GSI ||
+	     qp->ib_qp.qp_type == IB_QPT_RAW_ETHERTYPE) &&
+	     qp->qplib_qp.wqe_cnt == BNXT_RE_UD_QP_HW_STALL) {
+		int qp_attr_mask;
+		struct ib_qp_attr qp_attr;
+
+		qp_attr_mask = IB_QP_STATE;
+		qp_attr.qp_state = IB_QPS_RTS;
+		bnxt_re_modify_qp(&qp->ib_qp, &qp_attr, qp_attr_mask, NULL);
+		qp->qplib_qp.wqe_cnt = 0;
+	}
+}
+
 static int bnxt_re_post_send_shadow_qp(struct bnxt_re_dev *rdev,
 				       struct bnxt_re_qp *qp,
 				struct ib_send_wr *wr)
@@ -1928,6 +2137,7 @@ bad:
 		wr = wr->next;
 	}
 	bnxt_qplib_post_send_db(&qp->qplib_qp);
+	bnxt_ud_qp_hw_stall_workaround(qp);
 	spin_unlock_irqrestore(&qp->sq_lock, flags);
 	return rc;
 }
@@ -2024,6 +2234,7 @@ bad:
 		wr = wr->next;
 	}
 	bnxt_qplib_post_send_db(&qp->qplib_qp);
+	bnxt_ud_qp_hw_stall_workaround(qp);
 	spin_unlock_irqrestore(&qp->sq_lock, flags);
 
 	return rc;
@@ -2071,7 +2282,10 @@ int bnxt_re_post_recv(struct ib_qp *ib_qp, struct ib_recv_wr *wr,
 	struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
 	struct bnxt_qplib_swqe wqe;
 	int rc = 0, payload_sz = 0;
+	unsigned long flags;
+	u32 count = 0;
 
+	spin_lock_irqsave(&qp->rq_lock, flags);
 	while (wr) {
 		/* House keeping */
 		memset(&wqe, 0, sizeof(wqe));
@@ -2100,9 +2314,21 @@ int bnxt_re_post_recv(struct ib_qp *ib_qp, struct ib_recv_wr *wr,
 			*bad_wr = wr;
 			break;
 		}
+
+		/* Ring DB if the RQEs posted reaches a threshold value */
+		if (++count >= BNXT_RE_RQ_WQE_THRESHOLD) {
+			bnxt_qplib_post_recv_db(&qp->qplib_qp);
+			count = 0;
+		}
+
 		wr = wr->next;
 	}
-	bnxt_qplib_post_recv_db(&qp->qplib_qp);
+
+	if (count)
+		bnxt_qplib_post_recv_db(&qp->qplib_qp);
+
+	spin_unlock_irqrestore(&qp->rq_lock, flags);
+
 	return rc;
 }
 
@@ -2643,12 +2869,36 @@ static void bnxt_re_process_res_ud_wc(struct ib_wc *wc,
 		wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
 }
 
+static int send_phantom_wqe(struct bnxt_re_qp *qp)
+{
+	struct bnxt_qplib_qp *lib_qp = &qp->qplib_qp;
+	unsigned long flags;
+	int rc = 0;
+
+	spin_lock_irqsave(&qp->sq_lock, flags);
+
+	rc = bnxt_re_bind_fence_mw(lib_qp);
+	if (!rc) {
+		lib_qp->sq.phantom_wqe_cnt++;
+		dev_dbg(&lib_qp->sq.hwq.pdev->dev,
+			"qp %#x sq->prod %#x sw_prod %#x phantom_wqe_cnt %d\n",
+			lib_qp->id, lib_qp->sq.hwq.prod,
+			HWQ_CMP(lib_qp->sq.hwq.prod, &lib_qp->sq.hwq),
+			lib_qp->sq.phantom_wqe_cnt);
+	}
+
+	spin_unlock_irqrestore(&qp->sq_lock, flags);
+	return rc;
+}
+
 int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
 {
 	struct bnxt_re_cq *cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq);
 	struct bnxt_re_qp *qp;
 	struct bnxt_qplib_cqe *cqe;
 	int i, ncqe, budget;
+	struct bnxt_qplib_q *sq;
+	struct bnxt_qplib_qp *lib_qp;
 	u32 tbl_idx;
 	struct bnxt_re_sqp_entries *sqp_entry = NULL;
 	unsigned long flags;
@@ -2661,7 +2911,21 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
 	}
 	cqe = &cq->cql[0];
 	while (budget) {
-		ncqe = bnxt_qplib_poll_cq(&cq->qplib_cq, cqe, budget);
+		lib_qp = NULL;
+		ncqe = bnxt_qplib_poll_cq(&cq->qplib_cq, cqe, budget, &lib_qp);
+		if (lib_qp) {
+			sq = &lib_qp->sq;
+			if (sq->send_phantom) {
+				qp = container_of(lib_qp,
+						  struct bnxt_re_qp, qplib_qp);
+				if (send_phantom_wqe(qp) == -ENOMEM)
+					dev_err(rdev_to_dev(cq->rdev),
+						"Phantom failed! Scheduled to send again\n");
+				else
+					sq->send_phantom = false;
+			}
+		}
+
 		if (!ncqe)
 			break;
 
@@ -2822,6 +3086,12 @@ int bnxt_re_dereg_mr(struct ib_mr *ib_mr)
 	struct bnxt_re_dev *rdev = mr->rdev;
 	int rc;
 
+	rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
+	if (rc) {
+		dev_err(rdev_to_dev(rdev), "Dereg MR failed: %#x\n", rc);
+		return rc;
+	}
+
 	if (mr->npages && mr->pages) {
 		rc = bnxt_qplib_free_fast_reg_page_list(&rdev->qplib_res,
 							&mr->qplib_frpl);
@@ -2829,8 +3099,6 @@ int bnxt_re_dereg_mr(struct ib_mr *ib_mr)
 		mr->npages = 0;
 		mr->pages = NULL;
 	}
-	rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
-
 	if (!IS_ERR_OR_NULL(mr->ib_umem))
 		ib_umem_release(mr->ib_umem);
 
@@ -2914,97 +3182,52 @@ fail:
 	return ERR_PTR(rc);
 }
 
-/* Fast Memory Regions */
-struct ib_fmr *bnxt_re_alloc_fmr(struct ib_pd *ib_pd, int mr_access_flags,
-				 struct ib_fmr_attr *fmr_attr)
+struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
+			       struct ib_udata *udata)
 {
 	struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
 	struct bnxt_re_dev *rdev = pd->rdev;
-	struct bnxt_re_fmr *fmr;
+	struct bnxt_re_mw *mw;
 	int rc;
 
-	if (fmr_attr->max_pages > MAX_PBL_LVL_2_PGS ||
-	    fmr_attr->max_maps > rdev->dev_attr.max_map_per_fmr) {
-		dev_err(rdev_to_dev(rdev), "Allocate FMR exceeded Max limit");
+	mw = kzalloc(sizeof(*mw), GFP_KERNEL);
+	if (!mw)
 		return ERR_PTR(-ENOMEM);
-	}
-	fmr = kzalloc(sizeof(*fmr), GFP_KERNEL);
-	if (!fmr)
-		return ERR_PTR(-ENOMEM);
-
-	fmr->rdev = rdev;
-	fmr->qplib_fmr.pd = &pd->qplib_pd;
-	fmr->qplib_fmr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR;
+	mw->rdev = rdev;
+	mw->qplib_mw.pd = &pd->qplib_pd;
 
-	rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &fmr->qplib_fmr);
-	if (rc)
+	mw->qplib_mw.type = (type == IB_MW_TYPE_1 ?
+			       CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE1 :
+			       CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B);
+	rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mw->qplib_mw);
+	if (rc) {
+		dev_err(rdev_to_dev(rdev), "Allocate MW failed!");
 		goto fail;
+	}
+	mw->ib_mw.rkey = mw->qplib_mw.rkey;
 
-	fmr->qplib_fmr.flags = __from_ib_access_flags(mr_access_flags);
-	fmr->ib_fmr.lkey = fmr->qplib_fmr.lkey;
-	fmr->ib_fmr.rkey = fmr->ib_fmr.lkey;
+	atomic_inc(&rdev->mw_count);
+	return &mw->ib_mw;
 
-	atomic_inc(&rdev->mr_count);
-	return &fmr->ib_fmr;
 fail:
-	kfree(fmr);
+	kfree(mw);
 	return ERR_PTR(rc);
 }
 
-int bnxt_re_map_phys_fmr(struct ib_fmr *ib_fmr, u64 *page_list, int list_len,
-			 u64 iova)
+int bnxt_re_dealloc_mw(struct ib_mw *ib_mw)
 {
-	struct bnxt_re_fmr *fmr = container_of(ib_fmr, struct bnxt_re_fmr,
-					     ib_fmr);
-	struct bnxt_re_dev *rdev = fmr->rdev;
+	struct bnxt_re_mw *mw = container_of(ib_mw, struct bnxt_re_mw, ib_mw);
+	struct bnxt_re_dev *rdev = mw->rdev;
 	int rc;
 
-	fmr->qplib_fmr.va = iova;
-	fmr->qplib_fmr.total_size = list_len * PAGE_SIZE;
-
-	rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &fmr->qplib_fmr, page_list,
-			       list_len, true);
-	if (rc)
-		dev_err(rdev_to_dev(rdev), "Failed to map FMR for lkey = 0x%x!",
-			fmr->ib_fmr.lkey);
-	return rc;
-}
-
-int bnxt_re_unmap_fmr(struct list_head *fmr_list)
-{
-	struct bnxt_re_dev *rdev;
-	struct bnxt_re_fmr *fmr;
-	struct ib_fmr *ib_fmr;
-	int rc = 0;
-
-	/* Validate each FMRs inside the fmr_list */
-	list_for_each_entry(ib_fmr, fmr_list, list) {
-		fmr = container_of(ib_fmr, struct bnxt_re_fmr, ib_fmr);
-		rdev = fmr->rdev;
-
-		if (rdev) {
-			rc = bnxt_qplib_dereg_mrw(&rdev->qplib_res,
-						  &fmr->qplib_fmr, true);
-			if (rc)
-				break;
-		}
+	rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mw->qplib_mw);
+	if (rc) {
+		dev_err(rdev_to_dev(rdev), "Free MW failed: %#x\n", rc);
+		return rc;
 	}
-	return rc;
-}
-
-int bnxt_re_dealloc_fmr(struct ib_fmr *ib_fmr)
-{
-	struct bnxt_re_fmr *fmr = container_of(ib_fmr, struct bnxt_re_fmr,
-					       ib_fmr);
-	struct bnxt_re_dev *rdev = fmr->rdev;
-	int rc;
 
-	rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &fmr->qplib_fmr);
-	if (rc)
-		dev_err(rdev_to_dev(rdev), "Failed to free FMR");
-
-	kfree(fmr);
-	atomic_dec(&rdev->mr_count);
+	kfree(mw);
+	atomic_dec(&rdev->mw_count);
 	return rc;
 }
 
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index 5c3d71765454f6e8476b86fd329d63b45d7735f6..6c160f6a5398702d4b73a3c319db3660f472c190 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -44,11 +44,23 @@ struct bnxt_re_gid_ctx {
 	u32			refcnt;
 };
 
+#define BNXT_RE_FENCE_BYTES	64
+struct bnxt_re_fence_data {
+	u32 size;
+	u8 va[BNXT_RE_FENCE_BYTES];
+	dma_addr_t dma_addr;
+	struct bnxt_re_mr *mr;
+	struct ib_mw *mw;
+	struct bnxt_qplib_swqe bind_wqe;
+	u32 bind_rkey;
+};
+
 struct bnxt_re_pd {
 	struct bnxt_re_dev	*rdev;
 	struct ib_pd		ib_pd;
 	struct bnxt_qplib_pd	qplib_pd;
 	struct bnxt_qplib_dpi	dpi;
+	struct bnxt_re_fence_data fence;
 };
 
 struct bnxt_re_ah {
@@ -62,6 +74,7 @@ struct bnxt_re_qp {
 	struct bnxt_re_dev	*rdev;
 	struct ib_qp		ib_qp;
 	spinlock_t		sq_lock;	/* protect sq */
+	spinlock_t		rq_lock;	/* protect rq */
 	struct bnxt_qplib_qp	qplib_qp;
 	struct ib_umem		*sumem;
 	struct ib_umem		*rumem;
@@ -181,12 +194,9 @@ int bnxt_re_map_mr_sg(struct ib_mr *ib_mr, struct scatterlist *sg, int sg_nents,
 struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type mr_type,
 			       u32 max_num_sg);
 int bnxt_re_dereg_mr(struct ib_mr *mr);
-struct ib_fmr *bnxt_re_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
-				 struct ib_fmr_attr *fmr_attr);
-int bnxt_re_map_phys_fmr(struct ib_fmr *fmr, u64 *page_list, int list_len,
-			 u64 iova);
-int bnxt_re_unmap_fmr(struct list_head *fmr_list);
-int bnxt_re_dealloc_fmr(struct ib_fmr *fmr);
+struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
+			       struct ib_udata *udata);
+int bnxt_re_dealloc_mw(struct ib_mw *mw);
 struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 				  u64 virt_addr, int mr_access_flags,
 				  struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 5d355401179b8ae5107e411421c673db86164696..1fce5e73216be1bc74c23d2773690dc824d0e251 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -507,10 +507,6 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
 	ibdev->dereg_mr			= bnxt_re_dereg_mr;
 	ibdev->alloc_mr			= bnxt_re_alloc_mr;
 	ibdev->map_mr_sg		= bnxt_re_map_mr_sg;
-	ibdev->alloc_fmr		= bnxt_re_alloc_fmr;
-	ibdev->map_phys_fmr		= bnxt_re_map_phys_fmr;
-	ibdev->unmap_fmr		= bnxt_re_unmap_fmr;
-	ibdev->dealloc_fmr		= bnxt_re_dealloc_fmr;
 
 	ibdev->reg_user_mr		= bnxt_re_reg_user_mr;
 	ibdev->alloc_ucontext		= bnxt_re_alloc_ucontext;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 43d08b5e908525eae96b567178fdc8ad15dec9c5..f05500bcdcf1e35d21a88aba70b013d0d0d8a2cf 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -284,7 +284,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_create_qp1 req;
-	struct creq_create_qp1_resp *resp;
+	struct creq_create_qp1_resp resp;
 	struct bnxt_qplib_pbl *pbl;
 	struct bnxt_qplib_q *sq = &qp->sq;
 	struct bnxt_qplib_q *rq = &qp->rq;
@@ -394,31 +394,12 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 
 	req.pd_id = cpu_to_le32(qp->pd->id);
 
-	resp = (struct creq_create_qp1_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 0);
-	if (!resp) {
-		dev_err(&res->pdev->dev, "QPLIB: FP: CREATE_QP1 send failed");
-		rc = -EINVAL;
-		goto fail;
-	}
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP1 timed out");
-		rc = -ETIMEDOUT;
-		goto fail;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP1 failed ");
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		rc = -EINVAL;
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+					  (void *)&resp, NULL, 0);
+	if (rc)
 		goto fail;
-	}
-	qp->id = le32_to_cpu(resp->xid);
+
+	qp->id = le32_to_cpu(resp.xid);
 	qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET;
 	sq->flush_in_progress = false;
 	rq->flush_in_progress = false;
@@ -442,7 +423,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct sq_send *hw_sq_send_hdr, **hw_sq_send_ptr;
 	struct cmdq_create_qp req;
-	struct creq_create_qp_resp *resp;
+	struct creq_create_qp_resp resp;
 	struct bnxt_qplib_pbl *pbl;
 	struct sq_psn_search **psn_search_ptr;
 	unsigned long int psn_search, poff = 0;
@@ -627,31 +608,12 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 	}
 	req.pd_id = cpu_to_le32(qp->pd->id);
 
-	resp = (struct creq_create_qp_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 0);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP send failed");
-		rc = -EINVAL;
-		goto fail;
-	}
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP timed out");
-		rc = -ETIMEDOUT;
-		goto fail;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP failed ");
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		rc = -EINVAL;
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+					  (void *)&resp, NULL, 0);
+	if (rc)
 		goto fail;
-	}
-	qp->id = le32_to_cpu(resp->xid);
+
+	qp->id = le32_to_cpu(resp.xid);
 	qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET;
 	sq->flush_in_progress = false;
 	rq->flush_in_progress = false;
@@ -769,10 +731,11 @@ int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_modify_qp req;
-	struct creq_modify_qp_resp *resp;
+	struct creq_modify_qp_resp resp;
 	u16 cmd_flags = 0, pkey;
 	u32 temp32[4];
 	u32 bmask;
+	int rc;
 
 	RCFW_CMD_PREP(req, MODIFY_QP, cmd_flags);
 
@@ -862,27 +825,10 @@ int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 
 	req.vlan_pcp_vlan_dei_vlan_id = cpu_to_le16(qp->vlan_id);
 
-	resp = (struct creq_modify_qp_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 0);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: MODIFY_QP send failed");
-		return -EINVAL;
-	}
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: MODIFY_QP timed out");
-		return -ETIMEDOUT;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: MODIFY_QP failed ");
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		return -EINVAL;
-	}
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+					  (void *)&resp, NULL, 0);
+	if (rc)
+		return rc;
 	qp->cur_qp_state = qp->state;
 	return 0;
 }
@@ -891,37 +837,26 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_query_qp req;
-	struct creq_query_qp_resp *resp;
+	struct creq_query_qp_resp resp;
+	struct bnxt_qplib_rcfw_sbuf *sbuf;
 	struct creq_query_qp_resp_sb *sb;
 	u16 cmd_flags = 0;
 	u32 temp32[4];
-	int i;
+	int i, rc = 0;
 
 	RCFW_CMD_PREP(req, QUERY_QP, cmd_flags);
 
+	sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
+	if (!sbuf)
+		return -ENOMEM;
+	sb = sbuf->sb;
+
 	req.qp_cid = cpu_to_le32(qp->id);
 	req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
-	resp = (struct creq_query_qp_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     (void **)&sb, 0);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: QUERY_QP send failed");
-		return -EINVAL;
-	}
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: QUERY_QP timed out");
-		return -ETIMEDOUT;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: QUERY_QP failed ");
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		return -EINVAL;
-	}
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+					  (void *)sbuf, 0);
+	if (rc)
+		goto bail;
 	/* Extract the context from the side buffer */
 	qp->state = sb->en_sqd_async_notify_state &
 			CREQ_QUERY_QP_RESP_SB_STATE_MASK;
@@ -976,7 +911,9 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 	qp->dest_qpn = le32_to_cpu(sb->dest_qp_id);
 	memcpy(qp->smac, sb->src_mac, 6);
 	qp->vlan_id = le16_to_cpu(sb->vlan_pcp_vlan_dei_vlan_id);
-	return 0;
+bail:
+	bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
+	return rc;
 }
 
 static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp)
@@ -1021,34 +958,18 @@ int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res,
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_destroy_qp req;
-	struct creq_destroy_qp_resp *resp;
+	struct creq_destroy_qp_resp resp;
 	unsigned long flags;
 	u16 cmd_flags = 0;
+	int rc;
 
 	RCFW_CMD_PREP(req, DESTROY_QP, cmd_flags);
 
 	req.qp_cid = cpu_to_le32(qp->id);
-	resp = (struct creq_destroy_qp_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 0);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_QP send failed");
-		return -EINVAL;
-	}
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_QP timed out");
-		return -ETIMEDOUT;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_QP failed ");
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		return -EINVAL;
-	}
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+					  (void *)&resp, NULL, 0);
+	if (rc)
+		return rc;
 
 	/* Must walk the associated CQs to nullified the QP ptr */
 	spin_lock_irqsave(&qp->scq->hwq.lock, flags);
@@ -1162,8 +1083,12 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
 		rc = -EINVAL;
 		goto done;
 	}
-	if (HWQ_CMP((sq->hwq.prod + 1), &sq->hwq) ==
-	    HWQ_CMP(sq->hwq.cons, &sq->hwq)) {
+
+	if (bnxt_qplib_queue_full(sq)) {
+		dev_err(&sq->hwq.pdev->dev,
+			"QPLIB: prod = %#x cons = %#x qdepth = %#x delta = %#x",
+			sq->hwq.prod, sq->hwq.cons, sq->hwq.max_elements,
+			sq->q_full_delta);
 		rc = -ENOMEM;
 		goto done;
 	}
@@ -1373,6 +1298,9 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
 	}
 
 	sq->hwq.prod++;
+
+	qp->wqe_cnt++;
+
 done:
 	return rc;
 }
@@ -1411,8 +1339,7 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
 		rc = -EINVAL;
 		goto done;
 	}
-	if (HWQ_CMP((rq->hwq.prod + 1), &rq->hwq) ==
-	    HWQ_CMP(rq->hwq.cons, &rq->hwq)) {
+	if (bnxt_qplib_queue_full(rq)) {
 		dev_err(&rq->hwq.pdev->dev,
 			"QPLIB: FP: QP (0x%x) RQ is full!", qp->id);
 		rc = -EINVAL;
@@ -1483,7 +1410,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_create_cq req;
-	struct creq_create_cq_resp *resp;
+	struct creq_create_cq_resp resp;
 	struct bnxt_qplib_pbl *pbl;
 	u16 cmd_flags = 0;
 	int rc;
@@ -1525,30 +1452,12 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
 			(cq->cnq_hw_ring_id & CMDQ_CREATE_CQ_CNQ_ID_MASK) <<
 			 CMDQ_CREATE_CQ_CNQ_ID_SFT);
 
-	resp = (struct creq_create_cq_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 0);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_CQ send failed");
-		return -EINVAL;
-	}
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_CQ timed out");
-		rc = -ETIMEDOUT;
-		goto fail;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_CQ failed ");
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		rc = -EINVAL;
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+					  (void *)&resp, NULL, 0);
+	if (rc)
 		goto fail;
-	}
-	cq->id = le32_to_cpu(resp->xid);
+
+	cq->id = le32_to_cpu(resp.xid);
 	cq->dbr_base = res->dpi_tbl.dbr_bar_reg_iomem;
 	cq->period = BNXT_QPLIB_QUEUE_START_PERIOD;
 	init_waitqueue_head(&cq->waitq);
@@ -1566,33 +1475,17 @@ int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_destroy_cq req;
-	struct creq_destroy_cq_resp *resp;
+	struct creq_destroy_cq_resp resp;
 	u16 cmd_flags = 0;
+	int rc;
 
 	RCFW_CMD_PREP(req, DESTROY_CQ, cmd_flags);
 
 	req.cq_cid = cpu_to_le32(cq->id);
-	resp = (struct creq_destroy_cq_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 0);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_CQ send failed");
-		return -EINVAL;
-	}
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_CQ timed out");
-		return -ETIMEDOUT;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_CQ failed ");
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		return -EINVAL;
-	}
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+					  (void *)&resp, NULL, 0);
+	if (rc)
+		return rc;
 	bnxt_qplib_free_hwq(res->pdev, &cq->hwq);
 	return 0;
 }
@@ -1664,14 +1557,113 @@ static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp,
 	return rc;
 }
 
+/* Note: SQE is valid from sw_sq_cons up to cqe_sq_cons (exclusive)
+ *       CQE is track from sw_cq_cons to max_element but valid only if VALID=1
+ */
+static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
+		     u32 cq_cons, u32 sw_sq_cons, u32 cqe_sq_cons)
+{
+	struct bnxt_qplib_q *sq = &qp->sq;
+	struct bnxt_qplib_swq *swq;
+	u32 peek_sw_cq_cons, peek_raw_cq_cons, peek_sq_cons_idx;
+	struct cq_base *peek_hwcqe, **peek_hw_cqe_ptr;
+	struct cq_req *peek_req_hwcqe;
+	struct bnxt_qplib_qp *peek_qp;
+	struct bnxt_qplib_q *peek_sq;
+	int i, rc = 0;
+
+	/* Normal mode */
+	/* Check for the psn_search marking before completing */
+	swq = &sq->swq[sw_sq_cons];
+	if (swq->psn_search &&
+	    le32_to_cpu(swq->psn_search->flags_next_psn) & 0x80000000) {
+		/* Unmark */
+		swq->psn_search->flags_next_psn = cpu_to_le32
+			(le32_to_cpu(swq->psn_search->flags_next_psn)
+				     & ~0x80000000);
+		dev_dbg(&cq->hwq.pdev->dev,
+			"FP: Process Req cq_cons=0x%x qp=0x%x sq cons sw=0x%x cqe=0x%x marked!\n",
+			cq_cons, qp->id, sw_sq_cons, cqe_sq_cons);
+		sq->condition = true;
+		sq->send_phantom = true;
+
+		/* TODO: Only ARM if the previous SQE is ARMALL */
+		bnxt_qplib_arm_cq(cq, DBR_DBR_TYPE_CQ_ARMALL);
+
+		rc = -EAGAIN;
+		goto out;
+	}
+	if (sq->condition) {
+		/* Peek at the completions */
+		peek_raw_cq_cons = cq->hwq.cons;
+		peek_sw_cq_cons = cq_cons;
+		i = cq->hwq.max_elements;
+		while (i--) {
+			peek_sw_cq_cons = HWQ_CMP((peek_sw_cq_cons), &cq->hwq);
+			peek_hw_cqe_ptr = (struct cq_base **)cq->hwq.pbl_ptr;
+			peek_hwcqe = &peek_hw_cqe_ptr[CQE_PG(peek_sw_cq_cons)]
+						     [CQE_IDX(peek_sw_cq_cons)];
+			/* If the next hwcqe is VALID */
+			if (CQE_CMP_VALID(peek_hwcqe, peek_raw_cq_cons,
+					  cq->hwq.max_elements)) {
+				/* If the next hwcqe is a REQ */
+				if ((peek_hwcqe->cqe_type_toggle &
+				    CQ_BASE_CQE_TYPE_MASK) ==
+				    CQ_BASE_CQE_TYPE_REQ) {
+					peek_req_hwcqe = (struct cq_req *)
+							 peek_hwcqe;
+					peek_qp = (struct bnxt_qplib_qp *)
+						((unsigned long)
+						 le64_to_cpu
+						 (peek_req_hwcqe->qp_handle));
+					peek_sq = &peek_qp->sq;
+					peek_sq_cons_idx = HWQ_CMP(le16_to_cpu(
+						peek_req_hwcqe->sq_cons_idx) - 1
+						, &sq->hwq);
+					/* If the hwcqe's sq's wr_id matches */
+					if (peek_sq == sq &&
+					    sq->swq[peek_sq_cons_idx].wr_id ==
+					    BNXT_QPLIB_FENCE_WRID) {
+						/*
+						 *  Unbreak only if the phantom
+						 *  comes back
+						 */
+						dev_dbg(&cq->hwq.pdev->dev,
+							"FP:Got Phantom CQE");
+						sq->condition = false;
+						sq->single = true;
+						rc = 0;
+						goto out;
+					}
+				}
+				/* Valid but not the phantom, so keep looping */
+			} else {
+				/* Not valid yet, just exit and wait */
+				rc = -EINVAL;
+				goto out;
+			}
+			peek_sw_cq_cons++;
+			peek_raw_cq_cons++;
+		}
+		dev_err(&cq->hwq.pdev->dev,
+			"Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x",
+			cq_cons, qp->id, sw_sq_cons, cqe_sq_cons);
+		rc = -EINVAL;
+	}
+out:
+	return rc;
+}
+
 static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
 				     struct cq_req *hwcqe,
-				     struct bnxt_qplib_cqe **pcqe, int *budget)
+				     struct bnxt_qplib_cqe **pcqe, int *budget,
+				     u32 cq_cons, struct bnxt_qplib_qp **lib_qp)
 {
 	struct bnxt_qplib_qp *qp;
 	struct bnxt_qplib_q *sq;
 	struct bnxt_qplib_cqe *cqe;
-	u32 sw_cons, cqe_cons;
+	u32 sw_sq_cons, cqe_sq_cons;
+	struct bnxt_qplib_swq *swq;
 	int rc = 0;
 
 	qp = (struct bnxt_qplib_qp *)((unsigned long)
@@ -1683,13 +1675,13 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
 	}
 	sq = &qp->sq;
 
-	cqe_cons = HWQ_CMP(le16_to_cpu(hwcqe->sq_cons_idx), &sq->hwq);
-	if (cqe_cons > sq->hwq.max_elements) {
+	cqe_sq_cons = HWQ_CMP(le16_to_cpu(hwcqe->sq_cons_idx), &sq->hwq);
+	if (cqe_sq_cons > sq->hwq.max_elements) {
 		dev_err(&cq->hwq.pdev->dev,
 			"QPLIB: FP: CQ Process req reported ");
 		dev_err(&cq->hwq.pdev->dev,
 			"QPLIB: sq_cons_idx 0x%x which exceeded max 0x%x",
-			cqe_cons, sq->hwq.max_elements);
+			cqe_sq_cons, sq->hwq.max_elements);
 		return -EINVAL;
 	}
 	/* If we were in the middle of flushing the SQ, continue */
@@ -1698,53 +1690,74 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
 
 	/* Require to walk the sq's swq to fabricate CQEs for all previously
 	 * signaled SWQEs due to CQE aggregation from the current sq cons
-	 * to the cqe_cons
+	 * to the cqe_sq_cons
 	 */
 	cqe = *pcqe;
 	while (*budget) {
-		sw_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq);
-		if (sw_cons == cqe_cons)
+		sw_sq_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq);
+		if (sw_sq_cons == cqe_sq_cons)
+			/* Done */
 			break;
+
+		swq = &sq->swq[sw_sq_cons];
 		memset(cqe, 0, sizeof(*cqe));
 		cqe->opcode = CQ_BASE_CQE_TYPE_REQ;
 		cqe->qp_handle = (u64)(unsigned long)qp;
 		cqe->src_qp = qp->id;
-		cqe->wr_id = sq->swq[sw_cons].wr_id;
-		cqe->type = sq->swq[sw_cons].type;
+		cqe->wr_id = swq->wr_id;
+		if (cqe->wr_id == BNXT_QPLIB_FENCE_WRID)
+			goto skip;
+		cqe->type = swq->type;
 
 		/* For the last CQE, check for status.  For errors, regardless
 		 * of the request being signaled or not, it must complete with
 		 * the hwcqe error status
 		 */
-		if (HWQ_CMP((sw_cons + 1), &sq->hwq) == cqe_cons &&
+		if (HWQ_CMP((sw_sq_cons + 1), &sq->hwq) == cqe_sq_cons &&
 		    hwcqe->status != CQ_REQ_STATUS_OK) {
 			cqe->status = hwcqe->status;
 			dev_err(&cq->hwq.pdev->dev,
 				"QPLIB: FP: CQ Processed Req ");
 			dev_err(&cq->hwq.pdev->dev,
 				"QPLIB: wr_id[%d] = 0x%llx with status 0x%x",
-				sw_cons, cqe->wr_id, cqe->status);
+				sw_sq_cons, cqe->wr_id, cqe->status);
 			cqe++;
 			(*budget)--;
 			sq->flush_in_progress = true;
 			/* Must block new posting of SQ and RQ */
 			qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
+			sq->condition = false;
+			sq->single = false;
 		} else {
-			if (sq->swq[sw_cons].flags &
-			    SQ_SEND_FLAGS_SIGNAL_COMP) {
+			if (swq->flags & SQ_SEND_FLAGS_SIGNAL_COMP) {
+				/* Before we complete, do WA 9060 */
+				if (do_wa9060(qp, cq, cq_cons, sw_sq_cons,
+					      cqe_sq_cons)) {
+					*lib_qp = qp;
+					goto out;
+				}
 				cqe->status = CQ_REQ_STATUS_OK;
 				cqe++;
 				(*budget)--;
 			}
 		}
+skip:
 		sq->hwq.cons++;
+		if (sq->single)
+			break;
 	}
+out:
 	*pcqe = cqe;
-	if (!*budget && HWQ_CMP(sq->hwq.cons, &sq->hwq) != cqe_cons) {
+	if (HWQ_CMP(sq->hwq.cons, &sq->hwq) != cqe_sq_cons) {
 		/* Out of budget */
 		rc = -EAGAIN;
 		goto done;
 	}
+	/*
+	 * Back to normal completion mode only after it has completed all of
+	 * the WC for this CQE
+	 */
+	sq->single = false;
 	if (!sq->flush_in_progress)
 		goto done;
 flush:
@@ -2074,7 +2087,7 @@ static int bnxt_qplib_cq_process_cutoff(struct bnxt_qplib_cq *cq,
 }
 
 int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
-		       int num_cqes)
+		       int num_cqes, struct bnxt_qplib_qp **lib_qp)
 {
 	struct cq_base *hw_cqe, **hw_cqe_ptr;
 	unsigned long flags;
@@ -2099,7 +2112,8 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
 		case CQ_BASE_CQE_TYPE_REQ:
 			rc = bnxt_qplib_cq_process_req(cq,
 						       (struct cq_req *)hw_cqe,
-						       &cqe, &budget);
+						       &cqe, &budget,
+						       sw_cons, lib_qp);
 			break;
 		case CQ_BASE_CQE_TYPE_RES_RC:
 			rc = bnxt_qplib_cq_process_res_rc(cq,
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
index f0150f8da1e3929b309d14006b68469a965d8945..36b7b7db0e3f9782104ee8b049e674edb830226b 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
@@ -88,6 +88,7 @@ struct bnxt_qplib_swq {
 
 struct bnxt_qplib_swqe {
 	/* General */
+#define	BNXT_QPLIB_FENCE_WRID	0x46454E43	/* "FENC" */
 	u64				wr_id;
 	u8				reqs_type;
 	u8				type;
@@ -216,9 +217,16 @@ struct bnxt_qplib_q {
 	struct scatterlist		*sglist;
 	u32				nmap;
 	u32				max_wqe;
+	u16				q_full_delta;
 	u16				max_sge;
 	u32				psn;
 	bool				flush_in_progress;
+	bool				condition;
+	bool				single;
+	bool				send_phantom;
+	u32				phantom_wqe_cnt;
+	u32				phantom_cqe_cnt;
+	u32				next_cq_cons;
 };
 
 struct bnxt_qplib_qp {
@@ -242,6 +250,7 @@ struct bnxt_qplib_qp {
 	u8				timeout;
 	u8				retry_cnt;
 	u8				rnr_retry;
+	u64				wqe_cnt;
 	u32				min_rnr_timer;
 	u32				max_rd_atomic;
 	u32				max_dest_rd_atomic;
@@ -301,6 +310,13 @@ struct bnxt_qplib_qp {
 	(!!((hdr)->cqe_type_toggle & CQ_BASE_TOGGLE) ==		\
 	   !((raw_cons) & (cp_bit)))
 
+static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *qplib_q)
+{
+	return HWQ_CMP((qplib_q->hwq.prod + qplib_q->q_full_delta),
+		       &qplib_q->hwq) == HWQ_CMP(qplib_q->hwq.cons,
+						 &qplib_q->hwq);
+}
+
 struct bnxt_qplib_cqe {
 	u8				status;
 	u8				type;
@@ -432,7 +448,7 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
 int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq);
 int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq);
 int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
-		       int num);
+		       int num, struct bnxt_qplib_qp **qp);
 void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type);
 void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq);
 int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index 23fb7260662b134b8d030f6c0ef7c6648976c434..16e42754dbeccfef863509b0bd8226683582df2a 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -39,72 +39,55 @@
 #include <linux/spinlock.h>
 #include <linux/pci.h>
 #include <linux/prefetch.h>
+#include <linux/delay.h>
+
 #include "roce_hsi.h"
 #include "qplib_res.h"
 #include "qplib_rcfw.h"
 static void bnxt_qplib_service_creq(unsigned long data);
 
 /* Hardware communication channel */
-int bnxt_qplib_rcfw_wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
+static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
 {
 	u16 cbit;
 	int rc;
 
-	cookie &= RCFW_MAX_COOKIE_VALUE;
 	cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
-	if (!test_bit(cbit, rcfw->cmdq_bitmap))
-		dev_warn(&rcfw->pdev->dev,
-			 "QPLIB: CMD bit %d for cookie 0x%x is not set?",
-			 cbit, cookie);
-
 	rc = wait_event_timeout(rcfw->waitq,
 				!test_bit(cbit, rcfw->cmdq_bitmap),
 				msecs_to_jiffies(RCFW_CMD_WAIT_TIME_MS));
-	if (!rc) {
-		dev_warn(&rcfw->pdev->dev,
-			 "QPLIB: Bono Error: timeout %d msec, msg {0x%x}\n",
-			 RCFW_CMD_WAIT_TIME_MS, cookie);
-	}
-
-	return rc;
+	return rc ? 0 : -ETIMEDOUT;
 };
 
-int bnxt_qplib_rcfw_block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
+static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
 {
-	u32 count = -1;
+	u32 count = RCFW_BLOCKED_CMD_WAIT_COUNT;
 	u16 cbit;
 
-	cookie &= RCFW_MAX_COOKIE_VALUE;
 	cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
 	if (!test_bit(cbit, rcfw->cmdq_bitmap))
 		goto done;
 	do {
+		mdelay(1); /* 1m sec */
 		bnxt_qplib_service_creq((unsigned long)rcfw);
 	} while (test_bit(cbit, rcfw->cmdq_bitmap) && --count);
 done:
-	return count;
+	return count ? 0 : -ETIMEDOUT;
 };
 
-void *bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
-				   struct cmdq_base *req, void **crsbe,
-				   u8 is_block)
+static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
+			  struct creq_base *resp, void *sb, u8 is_block)
 {
-	struct bnxt_qplib_crsq *crsq = &rcfw->crsq;
 	struct bnxt_qplib_cmdqe *cmdqe, **cmdq_ptr;
 	struct bnxt_qplib_hwq *cmdq = &rcfw->cmdq;
-	struct bnxt_qplib_hwq *crsb = &rcfw->crsb;
-	struct bnxt_qplib_crsqe *crsqe = NULL;
-	struct bnxt_qplib_crsbe **crsb_ptr;
+	struct bnxt_qplib_crsq *crsqe;
 	u32 sw_prod, cmdq_prod;
-	u8 retry_cnt = 0xFF;
-	dma_addr_t dma_addr;
 	unsigned long flags;
 	u32 size, opcode;
 	u16 cookie, cbit;
 	int pg, idx;
 	u8 *preq;
 
-retry:
 	opcode = req->opcode;
 	if (!test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) &&
 	    (opcode != CMDQ_BASE_OPCODE_QUERY_FUNC &&
@@ -112,63 +95,50 @@ retry:
 		dev_err(&rcfw->pdev->dev,
 			"QPLIB: RCFW not initialized, reject opcode 0x%x",
 			opcode);
-		return NULL;
+		return -EINVAL;
 	}
 
 	if (test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) &&
 	    opcode == CMDQ_BASE_OPCODE_INITIALIZE_FW) {
 		dev_err(&rcfw->pdev->dev, "QPLIB: RCFW already initialized!");
-		return NULL;
+		return -EINVAL;
 	}
 
 	/* Cmdq are in 16-byte units, each request can consume 1 or more
 	 * cmdqe
 	 */
 	spin_lock_irqsave(&cmdq->lock, flags);
-	if (req->cmd_size > cmdq->max_elements -
-	    ((HWQ_CMP(cmdq->prod, cmdq) - HWQ_CMP(cmdq->cons, cmdq)) &
-	     (cmdq->max_elements - 1))) {
+	if (req->cmd_size >= HWQ_FREE_SLOTS(cmdq)) {
 		dev_err(&rcfw->pdev->dev, "QPLIB: RCFW: CMDQ is full!");
 		spin_unlock_irqrestore(&cmdq->lock, flags);
-
-		if (!retry_cnt--)
-			return NULL;
-		goto retry;
+		return -EAGAIN;
 	}
 
-	retry_cnt = 0xFF;
 
-	cookie = atomic_inc_return(&rcfw->seq_num) & RCFW_MAX_COOKIE_VALUE;
+	cookie = rcfw->seq_num & RCFW_MAX_COOKIE_VALUE;
 	cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
 	if (is_block)
 		cookie |= RCFW_CMD_IS_BLOCKING;
+
+	set_bit(cbit, rcfw->cmdq_bitmap);
 	req->cookie = cpu_to_le16(cookie);
-	if (test_and_set_bit(cbit, rcfw->cmdq_bitmap)) {
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: RCFW MAX outstanding cmd reached!");
-		atomic_dec(&rcfw->seq_num);
+	crsqe = &rcfw->crsqe_tbl[cbit];
+	if (crsqe->resp) {
 		spin_unlock_irqrestore(&cmdq->lock, flags);
-
-		if (!retry_cnt--)
-			return NULL;
-		goto retry;
+		return -EBUSY;
 	}
-	/* Reserve a resp buffer slot if requested */
-	if (req->resp_size && crsbe) {
-		spin_lock(&crsb->lock);
-		sw_prod = HWQ_CMP(crsb->prod, crsb);
-		crsb_ptr = (struct bnxt_qplib_crsbe **)crsb->pbl_ptr;
-		*crsbe = (void *)&crsb_ptr[get_crsb_pg(sw_prod)]
-					  [get_crsb_idx(sw_prod)];
-		bnxt_qplib_crsb_dma_next(crsb->pbl_dma_ptr, sw_prod, &dma_addr);
-		req->resp_addr = cpu_to_le64(dma_addr);
-		crsb->prod++;
-		spin_unlock(&crsb->lock);
-
-		req->resp_size = (sizeof(struct bnxt_qplib_crsbe) +
-				  BNXT_QPLIB_CMDQE_UNITS - 1) /
-				 BNXT_QPLIB_CMDQE_UNITS;
+	memset(resp, 0, sizeof(*resp));
+	crsqe->resp = (struct creq_qp_event *)resp;
+	crsqe->resp->cookie = req->cookie;
+	crsqe->req_size = req->cmd_size;
+	if (req->resp_size && sb) {
+		struct bnxt_qplib_rcfw_sbuf *sbuf = sb;
+
+		req->resp_addr = cpu_to_le64(sbuf->dma_addr);
+		req->resp_size = (sbuf->size + BNXT_QPLIB_CMDQE_UNITS - 1) /
+				  BNXT_QPLIB_CMDQE_UNITS;
 	}
+
 	cmdq_ptr = (struct bnxt_qplib_cmdqe **)cmdq->pbl_ptr;
 	preq = (u8 *)req;
 	size = req->cmd_size * BNXT_QPLIB_CMDQE_UNITS;
@@ -190,23 +160,24 @@ retry:
 		preq += min_t(u32, size, sizeof(*cmdqe));
 		size -= min_t(u32, size, sizeof(*cmdqe));
 		cmdq->prod++;
+		rcfw->seq_num++;
 	} while (size > 0);
 
+	rcfw->seq_num++;
+
 	cmdq_prod = cmdq->prod;
 	if (rcfw->flags & FIRMWARE_FIRST_FLAG) {
-		/* The very first doorbell write is required to set this flag
-		 * which prompts the FW to reset its internal pointers
+		/* The very first doorbell write
+		 * is required to set this flag
+		 * which prompts the FW to reset
+		 * its internal pointers
 		 */
 		cmdq_prod |= FIRMWARE_FIRST_FLAG;
 		rcfw->flags &= ~FIRMWARE_FIRST_FLAG;
 	}
-	sw_prod = HWQ_CMP(crsq->prod, crsq);
-	crsqe = &crsq->crsq[sw_prod];
-	memset(crsqe, 0, sizeof(*crsqe));
-	crsq->prod++;
-	crsqe->req_size = req->cmd_size;
 
 	/* ring CMDQ DB */
+	wmb();
 	writel(cmdq_prod, rcfw->cmdq_bar_reg_iomem +
 	       rcfw->cmdq_bar_reg_prod_off);
 	writel(RCFW_CMDQ_TRIG_VAL, rcfw->cmdq_bar_reg_iomem +
@@ -214,9 +185,56 @@ retry:
 done:
 	spin_unlock_irqrestore(&cmdq->lock, flags);
 	/* Return the CREQ response pointer */
-	return crsqe ? &crsqe->qp_event : NULL;
+	return 0;
 }
 
+int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
+				 struct cmdq_base *req,
+				 struct creq_base *resp,
+				 void *sb, u8 is_block)
+{
+	struct creq_qp_event *evnt = (struct creq_qp_event *)resp;
+	u16 cookie;
+	u8 opcode, retry_cnt = 0xFF;
+	int rc = 0;
+
+	do {
+		opcode = req->opcode;
+		rc = __send_message(rcfw, req, resp, sb, is_block);
+		cookie = le16_to_cpu(req->cookie) & RCFW_MAX_COOKIE_VALUE;
+		if (!rc)
+			break;
+
+		if (!retry_cnt || (rc != -EAGAIN && rc != -EBUSY)) {
+			/* send failed */
+			dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x send failed",
+				cookie, opcode);
+			return rc;
+		}
+		is_block ? mdelay(1) : usleep_range(500, 1000);
+
+	} while (retry_cnt--);
+
+	if (is_block)
+		rc = __block_for_resp(rcfw, cookie);
+	else
+		rc = __wait_for_resp(rcfw, cookie);
+	if (rc) {
+		/* timed out */
+		dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x timedout (%d)msec",
+			cookie, opcode, RCFW_CMD_WAIT_TIME_MS);
+		return rc;
+	}
+
+	if (evnt->status) {
+		/* failed with status */
+		dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x status %#x",
+			cookie, opcode, evnt->status);
+		rc = -EFAULT;
+	}
+
+	return rc;
+}
 /* Completions */
 static int bnxt_qplib_process_func_event(struct bnxt_qplib_rcfw *rcfw,
 					 struct creq_func_event *func_event)
@@ -260,12 +278,12 @@ static int bnxt_qplib_process_func_event(struct bnxt_qplib_rcfw *rcfw,
 static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
 				       struct creq_qp_event *qp_event)
 {
-	struct bnxt_qplib_crsq *crsq = &rcfw->crsq;
 	struct bnxt_qplib_hwq *cmdq = &rcfw->cmdq;
-	struct bnxt_qplib_crsqe *crsqe;
-	u16 cbit, cookie, blocked = 0;
+	struct bnxt_qplib_crsq *crsqe;
 	unsigned long flags;
-	u32 sw_cons;
+	u16 cbit, blocked = 0;
+	u16 cookie;
+	__le16  mcookie;
 
 	switch (qp_event->event) {
 	case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION:
@@ -275,24 +293,31 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
 	default:
 		/* Command Response */
 		spin_lock_irqsave(&cmdq->lock, flags);
-		sw_cons = HWQ_CMP(crsq->cons, crsq);
-		crsqe = &crsq->crsq[sw_cons];
-		crsq->cons++;
-		memcpy(&crsqe->qp_event, qp_event, sizeof(crsqe->qp_event));
-
-		cookie = le16_to_cpu(crsqe->qp_event.cookie);
+		cookie = le16_to_cpu(qp_event->cookie);
+		mcookie = qp_event->cookie;
 		blocked = cookie & RCFW_CMD_IS_BLOCKING;
 		cookie &= RCFW_MAX_COOKIE_VALUE;
 		cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
+		crsqe = &rcfw->crsqe_tbl[cbit];
+		if (crsqe->resp &&
+		    crsqe->resp->cookie  == mcookie) {
+			memcpy(crsqe->resp, qp_event, sizeof(*qp_event));
+			crsqe->resp = NULL;
+		} else {
+			dev_err(&rcfw->pdev->dev,
+				"QPLIB: CMD %s resp->cookie = %#x, evnt->cookie = %#x",
+				crsqe->resp ? "mismatch" : "collision",
+				crsqe->resp ? crsqe->resp->cookie : 0, mcookie);
+		}
 		if (!test_and_clear_bit(cbit, rcfw->cmdq_bitmap))
 			dev_warn(&rcfw->pdev->dev,
 				 "QPLIB: CMD bit %d was not requested", cbit);
-
 		cmdq->cons += crsqe->req_size;
-		spin_unlock_irqrestore(&cmdq->lock, flags);
+		crsqe->req_size = 0;
+
 		if (!blocked)
 			wake_up(&rcfw->waitq);
-		break;
+		spin_unlock_irqrestore(&cmdq->lock, flags);
 	}
 	return 0;
 }
@@ -305,12 +330,12 @@ static void bnxt_qplib_service_creq(unsigned long data)
 	struct creq_base *creqe, **creq_ptr;
 	u32 sw_cons, raw_cons;
 	unsigned long flags;
-	u32 type;
+	u32 type, budget = CREQ_ENTRY_POLL_BUDGET;
 
-	/* Service the CREQ until empty */
+	/* Service the CREQ until budget is over */
 	spin_lock_irqsave(&creq->lock, flags);
 	raw_cons = creq->cons;
-	while (1) {
+	while (budget > 0) {
 		sw_cons = HWQ_CMP(raw_cons, creq);
 		creq_ptr = (struct creq_base **)creq->pbl_ptr;
 		creqe = &creq_ptr[get_creq_pg(sw_cons)][get_creq_idx(sw_cons)];
@@ -320,15 +345,9 @@ static void bnxt_qplib_service_creq(unsigned long data)
 		type = creqe->type & CREQ_BASE_TYPE_MASK;
 		switch (type) {
 		case CREQ_BASE_TYPE_QP_EVENT:
-			if (!bnxt_qplib_process_qp_event
-			    (rcfw, (struct creq_qp_event *)creqe))
-				rcfw->creq_qp_event_processed++;
-			else {
-				dev_warn(&rcfw->pdev->dev, "QPLIB: crsqe with");
-				dev_warn(&rcfw->pdev->dev,
-					 "QPLIB: type = 0x%x not handled",
-					 type);
-			}
+			bnxt_qplib_process_qp_event
+				(rcfw, (struct creq_qp_event *)creqe);
+			rcfw->creq_qp_event_processed++;
 			break;
 		case CREQ_BASE_TYPE_FUNC_EVENT:
 			if (!bnxt_qplib_process_func_event
@@ -346,7 +365,9 @@ static void bnxt_qplib_service_creq(unsigned long data)
 			break;
 		}
 		raw_cons++;
+		budget--;
 	}
+
 	if (creq->cons != raw_cons) {
 		creq->cons = raw_cons;
 		CREQ_DB_REARM(rcfw->creq_bar_reg_iomem, raw_cons,
@@ -375,23 +396,16 @@ static irqreturn_t bnxt_qplib_creq_irq(int irq, void *dev_instance)
 /* RCFW */
 int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw)
 {
-	struct creq_deinitialize_fw_resp *resp;
 	struct cmdq_deinitialize_fw req;
+	struct creq_deinitialize_fw_resp resp;
 	u16 cmd_flags = 0;
+	int rc;
 
 	RCFW_CMD_PREP(req, DEINITIALIZE_FW, cmd_flags);
-	resp = (struct creq_deinitialize_fw_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 0);
-	if (!resp)
-		return -EINVAL;
-
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie)))
-		return -ETIMEDOUT;
-
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie))
-		return -EFAULT;
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+					  NULL, 0);
+	if (rc)
+		return rc;
 
 	clear_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags);
 	return 0;
@@ -417,9 +431,10 @@ static int __get_pbl_pg_idx(struct bnxt_qplib_pbl *pbl)
 int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
 			 struct bnxt_qplib_ctx *ctx, int is_virtfn)
 {
-	struct creq_initialize_fw_resp *resp;
 	struct cmdq_initialize_fw req;
+	struct creq_initialize_fw_resp resp;
 	u16 cmd_flags = 0, level;
+	int rc;
 
 	RCFW_CMD_PREP(req, INITIALIZE_FW, cmd_flags);
 
@@ -482,37 +497,19 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
 
 skip_ctx_setup:
 	req.stat_ctx_id = cpu_to_le32(ctx->stats.fw_id);
-	resp = (struct creq_initialize_fw_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 0);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: RCFW: INITIALIZE_FW send failed");
-		return -EINVAL;
-	}
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: RCFW: INITIALIZE_FW timed out");
-		return -ETIMEDOUT;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: RCFW: INITIALIZE_FW failed");
-		return -EINVAL;
-	}
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+					  NULL, 0);
+	if (rc)
+		return rc;
 	set_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags);
 	return 0;
 }
 
 void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
 {
-	bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->crsb);
-	kfree(rcfw->crsq.crsq);
+	kfree(rcfw->crsqe_tbl);
 	bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->cmdq);
 	bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->creq);
-
 	rcfw->pdev = NULL;
 }
 
@@ -539,21 +536,11 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
 		goto fail;
 	}
 
-	rcfw->crsq.max_elements = rcfw->cmdq.max_elements;
-	rcfw->crsq.crsq = kcalloc(rcfw->crsq.max_elements,
-				  sizeof(*rcfw->crsq.crsq), GFP_KERNEL);
-	if (!rcfw->crsq.crsq)
+	rcfw->crsqe_tbl = kcalloc(rcfw->cmdq.max_elements,
+				  sizeof(*rcfw->crsqe_tbl), GFP_KERNEL);
+	if (!rcfw->crsqe_tbl)
 		goto fail;
 
-	rcfw->crsb.max_elements = BNXT_QPLIB_CRSBE_MAX_CNT;
-	if (bnxt_qplib_alloc_init_hwq(rcfw->pdev, &rcfw->crsb, NULL, 0,
-				      &rcfw->crsb.max_elements,
-				      BNXT_QPLIB_CRSBE_UNITS, 0, PAGE_SIZE,
-				      HWQ_TYPE_CTX)) {
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: HW channel CRSB allocation failed");
-		goto fail;
-	}
 	return 0;
 
 fail:
@@ -606,7 +593,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
 	int rc;
 
 	/* General */
-	atomic_set(&rcfw->seq_num, 0);
+	rcfw->seq_num = 0;
 	rcfw->flags = FIRMWARE_FIRST_FLAG;
 	bmap_size = BITS_TO_LONGS(RCFW_MAX_OUTSTANDING_CMD *
 				  sizeof(unsigned long));
@@ -636,10 +623,6 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
 
 	rcfw->cmdq_bar_reg_trig_off = RCFW_COMM_TRIG_OFFSET;
 
-	/* CRSQ */
-	rcfw->crsq.prod = 0;
-	rcfw->crsq.cons = 0;
-
 	/* CREQ */
 	rcfw->creq_bar_reg = RCFW_COMM_CONS_PCI_BAR_REGION;
 	res_base = pci_resource_start(pdev, rcfw->creq_bar_reg);
@@ -692,3 +675,34 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
 	__iowrite32_copy(rcfw->cmdq_bar_reg_iomem, &init, sizeof(init) / 4);
 	return 0;
 }
+
+struct bnxt_qplib_rcfw_sbuf *bnxt_qplib_rcfw_alloc_sbuf(
+		struct bnxt_qplib_rcfw *rcfw,
+		u32 size)
+{
+	struct bnxt_qplib_rcfw_sbuf *sbuf;
+
+	sbuf = kzalloc(sizeof(*sbuf), GFP_ATOMIC);
+	if (!sbuf)
+		return NULL;
+
+	sbuf->size = size;
+	sbuf->sb = dma_zalloc_coherent(&rcfw->pdev->dev, sbuf->size,
+				       &sbuf->dma_addr, GFP_ATOMIC);
+	if (!sbuf->sb)
+		goto bail;
+
+	return sbuf;
+bail:
+	kfree(sbuf);
+	return NULL;
+}
+
+void bnxt_qplib_rcfw_free_sbuf(struct bnxt_qplib_rcfw *rcfw,
+			       struct bnxt_qplib_rcfw_sbuf *sbuf)
+{
+	if (sbuf->sb)
+		dma_free_coherent(&rcfw->pdev->dev, sbuf->size,
+				  sbuf->sb, sbuf->dma_addr);
+	kfree(sbuf);
+}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
index d3567d75bf58fe4e5e496c78ec27643af7d80cdc..09ce121770cdafd1af01bd815788f6b25eac3bf0 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -73,6 +73,7 @@
 #define RCFW_MAX_OUTSTANDING_CMD	BNXT_QPLIB_CMDQE_MAX_CNT
 #define RCFW_MAX_COOKIE_VALUE		0x7FFF
 #define RCFW_CMD_IS_BLOCKING		0x8000
+#define RCFW_BLOCKED_CMD_WAIT_COUNT	0x4E20
 
 /* Cmdq contains a fix number of a 16-Byte slots */
 struct bnxt_qplib_cmdqe {
@@ -94,32 +95,6 @@ struct bnxt_qplib_crsbe {
 	u8			data[1024];
 };
 
-/* CRSQ SB */
-#define BNXT_QPLIB_CRSBE_MAX_CNT	4
-#define BNXT_QPLIB_CRSBE_UNITS		sizeof(struct bnxt_qplib_crsbe)
-#define BNXT_QPLIB_CRSBE_CNT_PER_PG	(PAGE_SIZE / BNXT_QPLIB_CRSBE_UNITS)
-
-#define MAX_CRSB_IDX			(BNXT_QPLIB_CRSBE_MAX_CNT - 1)
-#define MAX_CRSB_IDX_PER_PG		(BNXT_QPLIB_CRSBE_CNT_PER_PG - 1)
-
-static inline u32 get_crsb_pg(u32 val)
-{
-	return (val & ~MAX_CRSB_IDX_PER_PG) / BNXT_QPLIB_CRSBE_CNT_PER_PG;
-}
-
-static inline u32 get_crsb_idx(u32 val)
-{
-	return val & MAX_CRSB_IDX_PER_PG;
-}
-
-static inline void bnxt_qplib_crsb_dma_next(dma_addr_t *pg_map_arr,
-					    u32 prod, dma_addr_t *dma_addr)
-{
-		*dma_addr = pg_map_arr[(prod) / BNXT_QPLIB_CRSBE_CNT_PER_PG];
-		*dma_addr += ((prod) % BNXT_QPLIB_CRSBE_CNT_PER_PG) *
-			      BNXT_QPLIB_CRSBE_UNITS;
-}
-
 /* CREQ */
 /* Allocate 1 per QP for async error notification for now */
 #define BNXT_QPLIB_CREQE_MAX_CNT	(64 * 1024)
@@ -158,17 +133,19 @@ static inline u32 get_creq_idx(u32 val)
 #define CREQ_DB(db, raw_cons, cp_bit)				\
 	writel(CREQ_DB_CP_FLAGS | ((raw_cons) & ((cp_bit) - 1)), db)
 
+#define CREQ_ENTRY_POLL_BUDGET		0x100
+
 /* HWQ */
-struct bnxt_qplib_crsqe {
-	struct creq_qp_event	qp_event;
+
+struct bnxt_qplib_crsq {
+	struct creq_qp_event	*resp;
 	u32			req_size;
 };
 
-struct bnxt_qplib_crsq {
-	struct bnxt_qplib_crsqe	*crsq;
-	u32			prod;
-	u32			cons;
-	u32			max_elements;
+struct bnxt_qplib_rcfw_sbuf {
+	void *sb;
+	dma_addr_t dma_addr;
+	u32 size;
 };
 
 /* RCFW Communication Channels */
@@ -185,7 +162,7 @@ struct bnxt_qplib_rcfw {
 	wait_queue_head_t	waitq;
 	int			(*aeq_handler)(struct bnxt_qplib_rcfw *,
 					       struct creq_func_event *);
-	atomic_t		seq_num;
+	u32			seq_num;
 
 	/* Bar region info */
 	void __iomem		*cmdq_bar_reg_iomem;
@@ -203,8 +180,7 @@ struct bnxt_qplib_rcfw {
 
 	/* Actual Cmd and Resp Queues */
 	struct bnxt_qplib_hwq	cmdq;
-	struct bnxt_qplib_crsq	crsq;
-	struct bnxt_qplib_hwq	crsb;
+	struct bnxt_qplib_crsq	*crsqe_tbl;
 };
 
 void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
@@ -219,11 +195,14 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
 					(struct bnxt_qplib_rcfw *,
 					 struct creq_func_event *));
 
-int bnxt_qplib_rcfw_block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie);
-int bnxt_qplib_rcfw_wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie);
-void *bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
-				   struct cmdq_base *req, void **crsbe,
-				   u8 is_block);
+struct bnxt_qplib_rcfw_sbuf *bnxt_qplib_rcfw_alloc_sbuf(
+				struct bnxt_qplib_rcfw *rcfw,
+				u32 size);
+void bnxt_qplib_rcfw_free_sbuf(struct bnxt_qplib_rcfw *rcfw,
+			       struct bnxt_qplib_rcfw_sbuf *sbuf);
+int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
+				 struct cmdq_base *req, struct creq_base *resp,
+				 void *sbuf, u8 is_block);
 
 int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw);
 int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
index 6277d802ca4bc7e231e010331b5045c92cb57aad..2e4855509719cbf5ffa1725c8bee4d7e8e2c2f34 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -48,6 +48,10 @@ extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero;
 
 #define HWQ_CMP(idx, hwq)	((idx) & ((hwq)->max_elements - 1))
 
+#define HWQ_FREE_SLOTS(hwq)	(hwq->max_elements - \
+				((HWQ_CMP(hwq->prod, hwq)\
+				- HWQ_CMP(hwq->cons, hwq))\
+				& (hwq->max_elements - 1)))
 enum bnxt_qplib_hwq_type {
 	HWQ_TYPE_CTX,
 	HWQ_TYPE_QUEUE,
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index 7b31eccedf11fa8f36a29e6251127144e75a59a5..fde18cf0e406b9f78e2af96d70077622f3d02732 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -55,37 +55,30 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
 			    struct bnxt_qplib_dev_attr *attr)
 {
 	struct cmdq_query_func req;
-	struct creq_query_func_resp *resp;
+	struct creq_query_func_resp resp;
+	struct bnxt_qplib_rcfw_sbuf *sbuf;
 	struct creq_query_func_resp_sb *sb;
 	u16 cmd_flags = 0;
 	u32 temp;
 	u8 *tqm_alloc;
-	int i;
+	int i, rc = 0;
 
 	RCFW_CMD_PREP(req, QUERY_FUNC, cmd_flags);
 
-	req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
-	resp = (struct creq_query_func_resp *)
-		bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void **)&sb,
-					     0);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: QUERY_FUNC send failed");
-		return -EINVAL;
-	}
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: QUERY_FUNC timed out");
-		return -ETIMEDOUT;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: QUERY_FUNC failed ");
+	sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
+	if (!sbuf) {
 		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		return -EINVAL;
+			"QPLIB: SP: QUERY_FUNC alloc side buffer failed");
+		return -ENOMEM;
 	}
+
+	sb = sbuf->sb;
+	req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+					  (void *)sbuf, 0);
+	if (rc)
+		goto bail;
+
 	/* Extract the context from the side buffer */
 	attr->max_qp = le32_to_cpu(sb->max_qp);
 	attr->max_qp_rd_atom =
@@ -95,6 +88,11 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
 		sb->max_qp_init_rd_atom > BNXT_QPLIB_MAX_OUT_RD_ATOM ?
 		BNXT_QPLIB_MAX_OUT_RD_ATOM : sb->max_qp_init_rd_atom;
 	attr->max_qp_wqes = le16_to_cpu(sb->max_qp_wr);
+	/*
+	 * 128 WQEs needs to be reserved for the HW (8916). Prevent
+	 * reporting the max number
+	 */
+	attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS;
 	attr->max_qp_sges = sb->max_sge;
 	attr->max_cq = le32_to_cpu(sb->max_cq);
 	attr->max_cq_wqes = le32_to_cpu(sb->max_cqe);
@@ -130,7 +128,10 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
 		attr->tqm_alloc_reqs[i * 4 + 2] = *(++tqm_alloc);
 		attr->tqm_alloc_reqs[i * 4 + 3] = *(++tqm_alloc);
 	}
-	return 0;
+
+bail:
+	bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
+	return rc;
 }
 
 /* SGID */
@@ -178,8 +179,9 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
 	/* Remove GID from the SGID table */
 	if (update) {
 		struct cmdq_delete_gid req;
-		struct creq_delete_gid_resp *resp;
+		struct creq_delete_gid_resp resp;
 		u16 cmd_flags = 0;
+		int rc;
 
 		RCFW_CMD_PREP(req, DELETE_GID, cmd_flags);
 		if (sgid_tbl->hw_id[index] == 0xFFFF) {
@@ -188,31 +190,10 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
 			return -EINVAL;
 		}
 		req.gid_index = cpu_to_le16(sgid_tbl->hw_id[index]);
-		resp = (struct creq_delete_gid_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, NULL,
-						     0);
-		if (!resp) {
-			dev_err(&res->pdev->dev,
-				"QPLIB: SP: DELETE_GID send failed");
-			return -EINVAL;
-		}
-		if (!bnxt_qplib_rcfw_wait_for_resp(rcfw,
-						   le16_to_cpu(req.cookie))) {
-			/* Cmd timed out */
-			dev_err(&res->pdev->dev,
-				"QPLIB: SP: DELETE_GID timed out");
-			return -ETIMEDOUT;
-		}
-		if (resp->status ||
-		    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-			dev_err(&res->pdev->dev,
-				"QPLIB: SP: DELETE_GID failed ");
-			dev_err(&res->pdev->dev,
-				"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-				resp->status, le16_to_cpu(req.cookie),
-				le16_to_cpu(resp->cookie));
-			return -EINVAL;
-		}
+		rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+						  (void *)&resp, NULL, 0);
+		if (rc)
+			return rc;
 	}
 	memcpy(&sgid_tbl->tbl[index], &bnxt_qplib_gid_zero,
 	       sizeof(bnxt_qplib_gid_zero));
@@ -234,7 +215,7 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
 						   struct bnxt_qplib_res,
 						   sgid_tbl);
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
-	int i, free_idx, rc = 0;
+	int i, free_idx;
 
 	if (!sgid_tbl) {
 		dev_err(&res->pdev->dev, "QPLIB: SGID table not allocated");
@@ -266,10 +247,11 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
 	}
 	if (update) {
 		struct cmdq_add_gid req;
-		struct creq_add_gid_resp *resp;
+		struct creq_add_gid_resp resp;
 		u16 cmd_flags = 0;
 		u32 temp32[4];
 		u16 temp16[3];
+		int rc;
 
 		RCFW_CMD_PREP(req, ADD_GID, cmd_flags);
 
@@ -290,31 +272,11 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
 		req.src_mac[1] = cpu_to_be16(temp16[1]);
 		req.src_mac[2] = cpu_to_be16(temp16[2]);
 
-		resp = (struct creq_add_gid_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 0);
-		if (!resp) {
-			dev_err(&res->pdev->dev,
-				"QPLIB: SP: ADD_GID send failed");
-			return -EINVAL;
-		}
-		if (!bnxt_qplib_rcfw_wait_for_resp(rcfw,
-						   le16_to_cpu(req.cookie))) {
-			/* Cmd timed out */
-			dev_err(&res->pdev->dev,
-				"QPIB: SP: ADD_GID timed out");
-			return -ETIMEDOUT;
-		}
-		if (resp->status ||
-		    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-			dev_err(&res->pdev->dev, "QPLIB: SP: ADD_GID failed ");
-			dev_err(&res->pdev->dev,
-				"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-				resp->status, le16_to_cpu(req.cookie),
-				le16_to_cpu(resp->cookie));
-			return -EINVAL;
-		}
-		sgid_tbl->hw_id[free_idx] = le32_to_cpu(resp->xid);
+		rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+						  (void *)&resp, NULL, 0);
+		if (rc)
+			return rc;
+		sgid_tbl->hw_id[free_idx] = le32_to_cpu(resp.xid);
 	}
 	/* Add GID to the sgid_tbl */
 	memcpy(&sgid_tbl->tbl[free_idx], gid, sizeof(*gid));
@@ -325,7 +287,7 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
 
 	*index = free_idx;
 	/* unlock */
-	return rc;
+	return 0;
 }
 
 /* pkeys */
@@ -422,10 +384,11 @@ int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah)
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_create_ah req;
-	struct creq_create_ah_resp *resp;
+	struct creq_create_ah_resp resp;
 	u16 cmd_flags = 0;
 	u32 temp32[4];
 	u16 temp16[3];
+	int rc;
 
 	RCFW_CMD_PREP(req, CREATE_AH, cmd_flags);
 
@@ -450,28 +413,12 @@ int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah)
 	req.dest_mac[1] = cpu_to_le16(temp16[1]);
 	req.dest_mac[2] = cpu_to_le16(temp16[2]);
 
-	resp = (struct creq_create_ah_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 1);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: CREATE_AH send failed");
-		return -EINVAL;
-	}
-	if (!bnxt_qplib_rcfw_block_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: CREATE_AH timed out");
-		return -ETIMEDOUT;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: CREATE_AH failed ");
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		return -EINVAL;
-	}
-	ah->id = le32_to_cpu(resp->xid);
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+					  NULL, 1);
+	if (rc)
+		return rc;
+
+	ah->id = le32_to_cpu(resp.xid);
 	return 0;
 }
 
@@ -479,35 +426,19 @@ int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah)
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_destroy_ah req;
-	struct creq_destroy_ah_resp *resp;
+	struct creq_destroy_ah_resp resp;
 	u16 cmd_flags = 0;
+	int rc;
 
 	/* Clean up the AH table in the device */
 	RCFW_CMD_PREP(req, DESTROY_AH, cmd_flags);
 
 	req.ah_cid = cpu_to_le32(ah->id);
 
-	resp = (struct creq_destroy_ah_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 1);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: DESTROY_AH send failed");
-		return -EINVAL;
-	}
-	if (!bnxt_qplib_rcfw_block_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: DESTROY_AH timed out");
-		return -ETIMEDOUT;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: DESTROY_AH failed ");
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		return -EINVAL;
-	}
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+					  NULL, 1);
+	if (rc)
+		return rc;
 	return 0;
 }
 
@@ -516,8 +447,9 @@ int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_deallocate_key req;
-	struct creq_deallocate_key_resp *resp;
+	struct creq_deallocate_key_resp resp;
 	u16 cmd_flags = 0;
+	int rc;
 
 	if (mrw->lkey == 0xFFFFFFFF) {
 		dev_info(&res->pdev->dev,
@@ -536,27 +468,11 @@ int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
 	else
 		req.key = cpu_to_le32(mrw->lkey);
 
-	resp = (struct creq_deallocate_key_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 0);
-	if (!resp) {
-		dev_err(&res->pdev->dev, "QPLIB: SP: FREE_MR send failed");
-		return -EINVAL;
-	}
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&res->pdev->dev, "QPLIB: SP: FREE_MR timed out");
-		return -ETIMEDOUT;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&res->pdev->dev, "QPLIB: SP: FREE_MR failed ");
-		dev_err(&res->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		return -EINVAL;
-	}
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+					  NULL, 0);
+	if (rc)
+		return rc;
+
 	/* Free the qplib's MRW memory */
 	if (mrw->hwq.max_elements)
 		bnxt_qplib_free_hwq(res->pdev, &mrw->hwq);
@@ -568,9 +484,10 @@ int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_allocate_mrw req;
-	struct creq_allocate_mrw_resp *resp;
+	struct creq_allocate_mrw_resp resp;
 	u16 cmd_flags = 0;
 	unsigned long tmp;
+	int rc;
 
 	RCFW_CMD_PREP(req, ALLOCATE_MRW, cmd_flags);
 
@@ -584,33 +501,17 @@ int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
 	tmp = (unsigned long)mrw;
 	req.mrw_handle = cpu_to_le64(tmp);
 
-	resp = (struct creq_allocate_mrw_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 0);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: ALLOC_MRW send failed");
-		return -EINVAL;
-	}
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: ALLOC_MRW timed out");
-		return -ETIMEDOUT;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: ALLOC_MRW failed ");
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		return -EINVAL;
-	}
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+					  (void *)&resp, NULL, 0);
+	if (rc)
+		return rc;
+
 	if ((mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE1)  ||
 	    (mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2A) ||
 	    (mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B))
-		mrw->rkey = le32_to_cpu(resp->xid);
+		mrw->rkey = le32_to_cpu(resp.xid);
 	else
-		mrw->lkey = le32_to_cpu(resp->xid);
+		mrw->lkey = le32_to_cpu(resp.xid);
 	return 0;
 }
 
@@ -619,40 +520,17 @@ int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw,
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_deregister_mr req;
-	struct creq_deregister_mr_resp *resp;
+	struct creq_deregister_mr_resp resp;
 	u16 cmd_flags = 0;
 	int rc;
 
 	RCFW_CMD_PREP(req, DEREGISTER_MR, cmd_flags);
 
 	req.lkey = cpu_to_le32(mrw->lkey);
-	resp = (struct creq_deregister_mr_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, block);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: DEREG_MR send failed");
-		return -EINVAL;
-	}
-	if (block)
-		rc = bnxt_qplib_rcfw_block_for_resp(rcfw,
-						    le16_to_cpu(req.cookie));
-	else
-		rc = bnxt_qplib_rcfw_wait_for_resp(rcfw,
-						   le16_to_cpu(req.cookie));
-	if (!rc) {
-		/* Cmd timed out */
-		dev_err(&res->pdev->dev, "QPLIB: SP: DEREG_MR timed out");
-		return -ETIMEDOUT;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: DEREG_MR failed ");
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		return -EINVAL;
-	}
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+					  (void *)&resp, NULL, block);
+	if (rc)
+		return rc;
 
 	/* Free the qplib's MR memory */
 	if (mrw->hwq.max_elements) {
@@ -669,7 +547,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_register_mr req;
-	struct creq_register_mr_resp *resp;
+	struct creq_register_mr_resp resp;
 	u16 cmd_flags = 0, level;
 	int pg_ptrs, pages, i, rc;
 	dma_addr_t **pbl_ptr;
@@ -730,36 +608,11 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
 	req.key = cpu_to_le32(mr->lkey);
 	req.mr_size = cpu_to_le64(mr->total_size);
 
-	resp = (struct creq_register_mr_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, block);
-	if (!resp) {
-		dev_err(&res->pdev->dev, "SP: REG_MR send failed");
-		rc = -EINVAL;
-		goto fail;
-	}
-	if (block)
-		rc = bnxt_qplib_rcfw_block_for_resp(rcfw,
-						    le16_to_cpu(req.cookie));
-	else
-		rc = bnxt_qplib_rcfw_wait_for_resp(rcfw,
-						   le16_to_cpu(req.cookie));
-	if (!rc) {
-		/* Cmd timed out */
-		dev_err(&res->pdev->dev, "SP: REG_MR timed out");
-		rc = -ETIMEDOUT;
-		goto fail;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&res->pdev->dev, "QPLIB: SP: REG_MR failed ");
-		dev_err(&res->pdev->dev,
-			"QPLIB: SP: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		rc = -EINVAL;
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+					  (void *)&resp, NULL, block);
+	if (rc)
 		goto fail;
-	}
+
 	return 0;
 
 fail:
@@ -804,35 +657,15 @@ int bnxt_qplib_map_tc2cos(struct bnxt_qplib_res *res, u16 *cids)
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_map_tc_to_cos req;
-	struct creq_map_tc_to_cos_resp *resp;
+	struct creq_map_tc_to_cos_resp resp;
 	u16 cmd_flags = 0;
-	int tleft;
+	int rc = 0;
 
 	RCFW_CMD_PREP(req, MAP_TC_TO_COS, cmd_flags);
 	req.cos0 = cpu_to_le16(cids[0]);
 	req.cos1 = cpu_to_le16(cids[1]);
 
-	resp = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, NULL, 0);
-	if (!resp) {
-		dev_err(&res->pdev->dev, "QPLIB: SP: MAP_TC2COS send failed");
-		return -EINVAL;
-	}
-
-	tleft = bnxt_qplib_rcfw_block_for_resp(rcfw, le16_to_cpu(req.cookie));
-	if (!tleft) {
-		dev_err(&res->pdev->dev, "QPLIB: SP: MAP_TC2COS timed out");
-		return -ETIMEDOUT;
-	}
-
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&res->pdev->dev, "QPLIB: SP: MAP_TC2COS failed ");
-		dev_err(&res->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		return -EINVAL;
-	}
-
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+					  (void *)&resp, NULL, 0);
 	return 0;
 }
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
index 1442a617e968e1d367a9fd9e69a0540da645ce6f..a543f959098bd11972383a9d907725c08497a8f4 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
@@ -40,6 +40,8 @@
 #ifndef __BNXT_QPLIB_SP_H__
 #define __BNXT_QPLIB_SP_H__
 
+#define BNXT_QPLIB_RESERVED_QP_WRS	128
+
 struct bnxt_qplib_dev_attr {
 	char				fw_ver[32];
 	u16				max_sgid;
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index f96a96dbcf1ff4e40b75de36122a3efd0405faae..ae0b79aeea2ec141ebc83b9960f59718035eb1bf 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -767,7 +767,7 @@ void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev,
 		kfree(entry);
 	}
 
-	list_for_each_safe(pos, nxt, &uctx->qpids) {
+	list_for_each_safe(pos, nxt, &uctx->cqids) {
 		entry = list_entry(pos, struct c4iw_qid_list, entry);
 		list_del_init(&entry->entry);
 		kfree(entry);
@@ -880,13 +880,15 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
 	rdev->free_workq = create_singlethread_workqueue("iw_cxgb4_free");
 	if (!rdev->free_workq) {
 		err = -ENOMEM;
-		goto err_free_status_page;
+		goto err_free_status_page_and_wr_log;
 	}
 
 	rdev->status_page->db_off = 0;
 
 	return 0;
-err_free_status_page:
+err_free_status_page_and_wr_log:
+	if (c4iw_wr_log && rdev->wr_log)
+		kfree(rdev->wr_log);
 	free_page((unsigned long)rdev->status_page);
 destroy_ocqp_pool:
 	c4iw_ocqp_pool_destroy(rdev);
@@ -903,9 +905,11 @@ static void c4iw_rdev_close(struct c4iw_rdev *rdev)
 {
 	destroy_workqueue(rdev->free_workq);
 	kfree(rdev->wr_log);
+	c4iw_release_dev_ucontext(rdev, &rdev->uctx);
 	free_page((unsigned long)rdev->status_page);
 	c4iw_pblpool_destroy(rdev);
 	c4iw_rqtpool_destroy(rdev);
+	c4iw_ocqp_pool_destroy(rdev);
 	c4iw_destroy_resource(&rdev->resource);
 }
 
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 0c79983c8b1a0a4e6189fb5e02439ff3522a9098..9ecc089d4529440fa87929fb74461558db920549 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -3692,8 +3692,10 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 	dev->ib_dev.check_mr_status	= mlx5_ib_check_mr_status;
 	dev->ib_dev.get_port_immutable  = mlx5_port_immutable;
 	dev->ib_dev.get_dev_fw_str      = get_dev_fw_str;
-	dev->ib_dev.alloc_rdma_netdev	= mlx5_ib_alloc_rdma_netdev;
-	dev->ib_dev.free_rdma_netdev	= mlx5_ib_free_rdma_netdev;
+	if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads)) {
+		dev->ib_dev.alloc_rdma_netdev	= mlx5_ib_alloc_rdma_netdev;
+		dev->ib_dev.free_rdma_netdev	= mlx5_ib_free_rdma_netdev;
+	}
 	if (mlx5_core_is_pf(mdev)) {
 		dev->ib_dev.get_vf_config	= mlx5_ib_get_vf_config;
 		dev->ib_dev.set_vf_link_state	= mlx5_ib_set_vf_link_state;
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index aa08c76a42450a2ef86d1854b45a80d7bcd4387c..d961f79b317cba526fe52ca4af6adc40c0781868 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -58,7 +58,10 @@
 #define QEDR_MSG_QP   "  QP"
 #define QEDR_MSG_GSI  " GSI"
 
-#define QEDR_CQ_MAGIC_NUMBER   (0x11223344)
+#define QEDR_CQ_MAGIC_NUMBER	(0x11223344)
+
+#define FW_PAGE_SIZE		(RDMA_RING_PAGE_SIZE)
+#define FW_PAGE_SHIFT		(12)
 
 struct qedr_dev;
 
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 17685cfea6a2dcb435e659675805dcbc29139e7b..d6723c365c7fba36168bc11c227de8bc34eef862 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -653,14 +653,15 @@ static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
 
 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
 			       struct qedr_pbl *pbl,
-			       struct qedr_pbl_info *pbl_info)
+			       struct qedr_pbl_info *pbl_info, u32 pg_shift)
 {
 	int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
+	u32 fw_pg_cnt, fw_pg_per_umem_pg;
 	struct qedr_pbl *pbl_tbl;
 	struct scatterlist *sg;
 	struct regpair *pbe;
+	u64 pg_addr;
 	int entry;
-	u32 addr;
 
 	if (!pbl_info->num_pbes)
 		return;
@@ -683,29 +684,35 @@ static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
 
 	shift = umem->page_shift;
 
+	fw_pg_per_umem_pg = BIT(umem->page_shift - pg_shift);
+
 	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
 		pages = sg_dma_len(sg) >> shift;
+		pg_addr = sg_dma_address(sg);
 		for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
-			/* store the page address in pbe */
-			pbe->lo = cpu_to_le32(sg_dma_address(sg) +
-					      (pg_cnt << shift));
-			addr = upper_32_bits(sg_dma_address(sg) +
-					     (pg_cnt << shift));
-			pbe->hi = cpu_to_le32(addr);
-			pbe_cnt++;
-			total_num_pbes++;
-			pbe++;
-
-			if (total_num_pbes == pbl_info->num_pbes)
-				return;
-
-			/* If the given pbl is full storing the pbes,
-			 * move to next pbl.
-			 */
-			if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
-				pbl_tbl++;
-				pbe = (struct regpair *)pbl_tbl->va;
-				pbe_cnt = 0;
+			for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) {
+				pbe->lo = cpu_to_le32(pg_addr);
+				pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
+
+				pg_addr += BIT(pg_shift);
+				pbe_cnt++;
+				total_num_pbes++;
+				pbe++;
+
+				if (total_num_pbes == pbl_info->num_pbes)
+					return;
+
+				/* If the given pbl is full storing the pbes,
+				 * move to next pbl.
+				 */
+				if (pbe_cnt ==
+				    (pbl_info->pbl_size / sizeof(u64))) {
+					pbl_tbl++;
+					pbe = (struct regpair *)pbl_tbl->va;
+					pbe_cnt = 0;
+				}
+
+				fw_pg_cnt++;
 			}
 		}
 	}
@@ -754,7 +761,7 @@ static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
 				       u64 buf_addr, size_t buf_len,
 				       int access, int dmasync)
 {
-	int page_cnt;
+	u32 fw_pages;
 	int rc;
 
 	q->buf_addr = buf_addr;
@@ -766,8 +773,10 @@ static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
 		return PTR_ERR(q->umem);
 	}
 
-	page_cnt = ib_umem_page_count(q->umem);
-	rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, page_cnt, 0);
+	fw_pages = ib_umem_page_count(q->umem) <<
+	    (q->umem->page_shift - FW_PAGE_SHIFT);
+
+	rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
 	if (rc)
 		goto err0;
 
@@ -777,7 +786,8 @@ static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
 		goto err0;
 	}
 
-	qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info);
+		qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
+				   FW_PAGE_SHIFT);
 
 	return 0;
 
@@ -2226,7 +2236,7 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
 		goto err1;
 
 	qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
-			   &mr->info.pbl_info);
+			   &mr->info.pbl_info, mr->umem->page_shift);
 
 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
 	if (rc) {
@@ -3209,6 +3219,10 @@ static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
 		case IB_WC_REG_MR:
 			qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
 			break;
+		case IB_WC_RDMA_READ:
+		case IB_WC_SEND:
+			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
+			break;
 		default:
 			break;
 		}
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 83d709e74dfb87ab0dcec30c87f0e49cd57022d2..073e66783f1dd8a4b62f9fc59a84319b507b51b8 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -740,13 +740,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct ib_send_wr *ibwr,
 
 		sge = ibwr->sg_list;
 		for (i = 0; i < num_sge; i++, sge++) {
-			if (qp->is_user && copy_from_user(p, (__user void *)
-					    (uintptr_t)sge->addr, sge->length))
-				return -EFAULT;
-
-			else if (!qp->is_user)
-				memcpy(p, (void *)(uintptr_t)sge->addr,
-				       sge->length);
+			memcpy(p, (void *)(uintptr_t)sge->addr,
+					sge->length);
 
 			p += sge->length;
 		}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 0060b2f9f659d7ad1ee9a1767d0029dc8529ff4c..efe7402f48852195efae4fc70cf843d0398dda30 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -863,7 +863,6 @@ dev_stop:
 	set_bit(IPOIB_STOP_REAPER, &priv->flags);
 	cancel_delayed_work(&priv->ah_reap_task);
 	set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
-	napi_enable(&priv->napi);
 	ipoib_ib_dev_stop(dev);
 	return -1;
 }
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index a115c0b7a310ed630c1c32ffd9e2c17574358f7c..1015a63de6aed6f3a8b88f3816dc5720a2d24092 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1596,6 +1596,8 @@ static void ipoib_dev_uninit_default(struct net_device *dev)
 
 	ipoib_transport_dev_cleanup(dev);
 
+	netif_napi_del(&priv->napi);
+
 	ipoib_cm_dev_cleanup(dev);
 
 	kfree(priv->rx_ring);
@@ -1649,6 +1651,7 @@ out_rx_ring_cleanup:
 	kfree(priv->rx_ring);
 
 out:
+	netif_napi_del(&priv->napi);
 	return -ENOMEM;
 }
 
@@ -2237,6 +2240,7 @@ event_failed:
 
 device_init_failed:
 	free_netdev(priv->dev);
+	kfree(priv);
 
 alloc_mem_failed:
 	return ERR_PTR(result);
@@ -2277,7 +2281,7 @@ static void ipoib_add_one(struct ib_device *device)
 
 static void ipoib_remove_one(struct ib_device *device, void *client_data)
 {
-	struct ipoib_dev_priv *priv, *tmp;
+	struct ipoib_dev_priv *priv, *tmp, *cpriv, *tcpriv;
 	struct list_head *dev_list = client_data;
 
 	if (!dev_list)
@@ -2300,7 +2304,14 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
 		flush_workqueue(priv->wq);
 
 		unregister_netdev(priv->dev);
-		free_netdev(priv->dev);
+		if (device->free_rdma_netdev)
+			device->free_rdma_netdev(priv->dev);
+		else
+			free_netdev(priv->dev);
+
+		list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list)
+			kfree(cpriv);
+
 		kfree(priv);
 	}
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 36dc4fcaa3cdbcd2516d44fb9360c5dcccce1404..081b33deff1bcbf6f381c9993af174a3dc5e90b4 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -133,13 +133,13 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
 	snprintf(intf_name, sizeof intf_name, "%s.%04x",
 		 ppriv->dev->name, pkey);
 
+	if (!rtnl_trylock())
+		return restart_syscall();
+
 	priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
 	if (!priv)
 		return -ENOMEM;
 
-	if (!rtnl_trylock())
-		return restart_syscall();
-
 	down_write(&ppriv->vlan_rwsem);
 
 	/*
@@ -167,8 +167,10 @@ out:
 
 	rtnl_unlock();
 
-	if (result)
+	if (result) {
 		free_netdev(priv->dev);
+		kfree(priv);
+	}
 
 	return result;
 }
@@ -209,6 +211,7 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
 
 	if (dev) {
 		free_netdev(dev);
+		kfree(priv);
 		return 0;
 	}