diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
index 88f5aef7934c77a5213fdc85a7f8fe435fc76e70..e3a375c4cb83c383242ac6b9cc8b3247939e0947 100644
--- a/arch/arm64/crypto/aes-ce-ccm-core.S
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -19,33 +19,24 @@
 	 *			     u32 *macp, u8 const rk[], u32 rounds);
 	 */
 ENTRY(ce_aes_ccm_auth_data)
-	frame_push	7
-
-	mov	x19, x0
-	mov	x20, x1
-	mov	x21, x2
-	mov	x22, x3
-	mov	x23, x4
-	mov	x24, x5
-
-	ldr	w25, [x22]			/* leftover from prev round? */
+	ldr	w8, [x3]			/* leftover from prev round? */
 	ld1	{v0.16b}, [x0]			/* load mac */
-	cbz	w25, 1f
-	sub	w25, w25, #16
+	cbz	w8, 1f
+	sub	w8, w8, #16
 	eor	v1.16b, v1.16b, v1.16b
-0:	ldrb	w7, [x20], #1			/* get 1 byte of input */
-	subs	w21, w21, #1
-	add	w25, w25, #1
+0:	ldrb	w7, [x1], #1			/* get 1 byte of input */
+	subs	w2, w2, #1
+	add	w8, w8, #1
 	ins	v1.b[0], w7
 	ext	v1.16b, v1.16b, v1.16b, #1	/* rotate in the input bytes */
 	beq	8f				/* out of input? */
-	cbnz	w25, 0b
+	cbnz	w8, 0b
 	eor	v0.16b, v0.16b, v1.16b
-1:	ld1	{v3.4s}, [x23]			/* load first round key */
-	prfm	pldl1strm, [x20]
-	cmp	w24, #12			/* which key size? */
-	add	x6, x23, #16
-	sub	w7, w24, #2			/* modified # of rounds */
+1:	ld1	{v3.4s}, [x4]			/* load first round key */
+	prfm	pldl1strm, [x1]
+	cmp	w5, #12				/* which key size? */
+	add	x6, x4, #16
+	sub	w7, w5, #2			/* modified # of rounds */
 	bmi	2f
 	bne	5f
 	mov	v5.16b, v3.16b
@@ -64,43 +55,33 @@ ENTRY(ce_aes_ccm_auth_data)
 	ld1	{v5.4s}, [x6], #16		/* load next round key */
 	bpl	3b
 	aese	v0.16b, v4.16b
-	subs	w21, w21, #16			/* last data? */
+	subs	w2, w2, #16			/* last data? */
 	eor	v0.16b, v0.16b, v5.16b		/* final round */
 	bmi	6f
-	ld1	{v1.16b}, [x20], #16		/* load next input block */
+	ld1	{v1.16b}, [x1], #16		/* load next input block */
 	eor	v0.16b, v0.16b, v1.16b		/* xor with mac */
-	beq	6f
-
-	if_will_cond_yield_neon
-	st1	{v0.16b}, [x19]			/* store mac */
-	do_cond_yield_neon
-	ld1	{v0.16b}, [x19]			/* reload mac */
-	endif_yield_neon
-
-	b	1b
-6:	st1	{v0.16b}, [x19]			/* store mac */
+	bne	1b
+6:	st1	{v0.16b}, [x0]			/* store mac */
 	beq	10f
-	adds	w21, w21, #16
+	adds	w2, w2, #16
 	beq	10f
-	mov	w25, w21
-7:	ldrb	w7, [x20], #1
+	mov	w8, w2
+7:	ldrb	w7, [x1], #1
 	umov	w6, v0.b[0]
 	eor	w6, w6, w7
-	strb	w6, [x19], #1
-	subs	w21, w21, #1
+	strb	w6, [x0], #1
+	subs	w2, w2, #1
 	beq	10f
 	ext	v0.16b, v0.16b, v0.16b, #1	/* rotate out the mac bytes */
 	b	7b
-8:	mov	w7, w25
-	add	w25, w25, #16
+8:	mov	w7, w8
+	add	w8, w8, #16
 9:	ext	v1.16b, v1.16b, v1.16b, #1
 	adds	w7, w7, #1
 	bne	9b
 	eor	v0.16b, v0.16b, v1.16b
-	st1	{v0.16b}, [x19]
-10:	str	w25, [x22]
-
-	frame_pop
+	st1	{v0.16b}, [x0]
+10:	str	w8, [x3]
 	ret
 ENDPROC(ce_aes_ccm_auth_data)
 
@@ -145,29 +126,19 @@ ENTRY(ce_aes_ccm_final)
 ENDPROC(ce_aes_ccm_final)
 
 	.macro	aes_ccm_do_crypt,enc
-	frame_push	8
-
-	mov	x19, x0
-	mov	x20, x1
-	mov	x21, x2
-	mov	x22, x3
-	mov	x23, x4
-	mov	x24, x5
-	mov	x25, x6
-
-	ldr	x26, [x25, #8]			/* load lower ctr */
-	ld1	{v0.16b}, [x24]			/* load mac */
-CPU_LE(	rev	x26, x26		)	/* keep swabbed ctr in reg */
+	ldr	x8, [x6, #8]			/* load lower ctr */
+	ld1	{v0.16b}, [x5]			/* load mac */
+CPU_LE(	rev	x8, x8			)	/* keep swabbed ctr in reg */
 0:	/* outer loop */
-	ld1	{v1.8b}, [x25]			/* load upper ctr */
-	prfm	pldl1strm, [x20]
-	add	x26, x26, #1
-	rev	x9, x26
-	cmp	w23, #12			/* which key size? */
-	sub	w7, w23, #2			/* get modified # of rounds */
+	ld1	{v1.8b}, [x6]			/* load upper ctr */
+	prfm	pldl1strm, [x1]
+	add	x8, x8, #1
+	rev	x9, x8
+	cmp	w4, #12				/* which key size? */
+	sub	w7, w4, #2			/* get modified # of rounds */
 	ins	v1.d[1], x9			/* no carry in lower ctr */
-	ld1	{v3.4s}, [x22]			/* load first round key */
-	add	x10, x22, #16
+	ld1	{v3.4s}, [x3]			/* load first round key */
+	add	x10, x3, #16
 	bmi	1f
 	bne	4f
 	mov	v5.16b, v3.16b
@@ -194,9 +165,9 @@ CPU_LE(	rev	x26, x26		)	/* keep swabbed ctr in reg */
 	bpl	2b
 	aese	v0.16b, v4.16b
 	aese	v1.16b, v4.16b
-	subs	w21, w21, #16
-	bmi	7f				/* partial block? */
-	ld1	{v2.16b}, [x20], #16		/* load next input block */
+	subs	w2, w2, #16
+	bmi	6f				/* partial block? */
+	ld1	{v2.16b}, [x1], #16		/* load next input block */
 	.if	\enc == 1
 	eor	v2.16b, v2.16b, v5.16b		/* final round enc+mac */
 	eor	v1.16b, v1.16b, v2.16b		/* xor with crypted ctr */
@@ -205,29 +176,18 @@ CPU_LE(	rev	x26, x26		)	/* keep swabbed ctr in reg */
 	eor	v1.16b, v2.16b, v5.16b		/* final round enc */
 	.endif
 	eor	v0.16b, v0.16b, v2.16b		/* xor mac with pt ^ rk[last] */
-	st1	{v1.16b}, [x19], #16		/* write output block */
-	beq	5f
-
-	if_will_cond_yield_neon
-	st1	{v0.16b}, [x24]			/* store mac */
-	do_cond_yield_neon
-	ld1	{v0.16b}, [x24]			/* reload mac */
-	endif_yield_neon
-
-	b	0b
-5:
-CPU_LE(	rev	x26, x26			)
-	st1	{v0.16b}, [x24]			/* store mac */
-	str	x26, [x25, #8]			/* store lsb end of ctr (BE) */
-
-6:	frame_pop
-	ret
-
-7:	eor	v0.16b, v0.16b, v5.16b		/* final round mac */
+	st1	{v1.16b}, [x0], #16		/* write output block */
+	bne	0b
+CPU_LE(	rev	x8, x8			)
+	st1	{v0.16b}, [x5]			/* store mac */
+	str	x8, [x6, #8]			/* store lsb end of ctr (BE) */
+5:	ret
+
+6:	eor	v0.16b, v0.16b, v5.16b		/* final round mac */
 	eor	v1.16b, v1.16b, v5.16b		/* final round enc */
-	st1	{v0.16b}, [x24]			/* store mac */
-	add	w21, w21, #16			/* process partial tail block */
-8:	ldrb	w9, [x20], #1			/* get 1 byte of input */
+	st1	{v0.16b}, [x5]			/* store mac */
+	add	w2, w2, #16			/* process partial tail block */
+7:	ldrb	w9, [x1], #1			/* get 1 byte of input */
 	umov	w6, v1.b[0]			/* get top crypted ctr byte */
 	umov	w7, v0.b[0]			/* get top mac byte */
 	.if	\enc == 1
@@ -237,13 +197,13 @@ CPU_LE(	rev	x26, x26			)
 	eor	w9, w9, w6
 	eor	w7, w7, w9
 	.endif
-	strb	w9, [x19], #1			/* store out byte */
-	strb	w7, [x24], #1			/* store mac byte */
-	subs	w21, w21, #1
-	beq	6b
+	strb	w9, [x0], #1			/* store out byte */
+	strb	w7, [x5], #1			/* store mac byte */
+	subs	w2, w2, #1
+	beq	5b
 	ext	v0.16b, v0.16b, v0.16b, #1	/* shift out mac byte */
 	ext	v1.16b, v1.16b, v1.16b, #1	/* shift out ctr byte */
-	b	8b
+	b	7b
 	.endm
 
 	/*
diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
index dcffb9e77589cd843bb04691a46d117013f12ba1..c723647b37db0387f58d3ea88f899147fdbc2727 100644
--- a/arch/arm64/crypto/ghash-ce-core.S
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -322,55 +322,41 @@ ENDPROC(pmull_ghash_update_p8)
 	.endm
 
 	.macro		pmull_gcm_do_crypt, enc
-	frame_push	10
+	ld1		{SHASH.2d}, [x4]
+	ld1		{XL.2d}, [x1]
+	ldr		x8, [x5, #8]			// load lower counter
 
-	mov		x19, x0
-	mov		x20, x1
-	mov		x21, x2
-	mov		x22, x3
-	mov		x23, x4
-	mov		x24, x5
-	mov		x25, x6
-	mov		x26, x7
-	.if		\enc == 1
-	ldr		x27, [sp, #96]			// first stacked arg
-	.endif
-
-	ldr		x28, [x24, #8]			// load lower counter
-CPU_LE(	rev		x28, x28	)
-
-0:	mov		x0, x25
-	load_round_keys	w26, x0
-	ld1		{SHASH.2d}, [x23]
-	ld1		{XL.2d}, [x20]
+	load_round_keys	w7, x6
 
 	movi		MASK.16b, #0xe1
 	ext		SHASH2.16b, SHASH.16b, SHASH.16b, #8
+CPU_LE(	rev		x8, x8		)
 	shl		MASK.2d, MASK.2d, #57
 	eor		SHASH2.16b, SHASH2.16b, SHASH.16b
 
 	.if		\enc == 1
-	ld1		{KS.16b}, [x27]
+	ldr		x10, [sp]
+	ld1		{KS.16b}, [x10]
 	.endif
 
-1:	ld1		{CTR.8b}, [x24]			// load upper counter
-	ld1		{INP.16b}, [x22], #16
-	rev		x9, x28
-	add		x28, x28, #1
-	sub		w19, w19, #1
+0:	ld1		{CTR.8b}, [x5]			// load upper counter
+	ld1		{INP.16b}, [x3], #16
+	rev		x9, x8
+	add		x8, x8, #1
+	sub		w0, w0, #1
 	ins		CTR.d[1], x9			// set lower counter
 
 	.if		\enc == 1
 	eor		INP.16b, INP.16b, KS.16b	// encrypt input
-	st1		{INP.16b}, [x21], #16
+	st1		{INP.16b}, [x2], #16
 	.endif
 
 	rev64		T1.16b, INP.16b
 
-	cmp		w26, #12
-	b.ge		4f				// AES-192/256?
+	cmp		w7, #12
+	b.ge		2f				// AES-192/256?
 
-2:	enc_round	CTR, v21
+1:	enc_round	CTR, v21
 
 	ext		T2.16b, XL.16b, XL.16b, #8
 	ext		IN1.16b, T1.16b, T1.16b, #8
@@ -425,39 +411,27 @@ CPU_LE(	rev		x28, x28	)
 
 	.if		\enc == 0
 	eor		INP.16b, INP.16b, KS.16b
-	st1		{INP.16b}, [x21], #16
+	st1		{INP.16b}, [x2], #16
 	.endif
 
-	cbz		w19, 3f
+	cbnz		w0, 0b
 
-	if_will_cond_yield_neon
-	st1		{XL.2d}, [x20]
-	.if		\enc == 1
-	st1		{KS.16b}, [x27]
-	.endif
-	do_cond_yield_neon
-	b		0b
-	endif_yield_neon
+CPU_LE(	rev		x8, x8		)
+	st1		{XL.2d}, [x1]
+	str		x8, [x5, #8]			// store lower counter
 
-	b		1b
-
-3:	st1		{XL.2d}, [x20]
 	.if		\enc == 1
-	st1		{KS.16b}, [x27]
+	st1		{KS.16b}, [x10]
 	.endif
 
-CPU_LE(	rev		x28, x28	)
-	str		x28, [x24, #8]			// store lower counter
-
-	frame_pop
 	ret
 
-4:	b.eq		5f				// AES-192?
+2:	b.eq		3f				// AES-192?
 	enc_round	CTR, v17
 	enc_round	CTR, v18
-5:	enc_round	CTR, v19
+3:	enc_round	CTR, v19
 	enc_round	CTR, v20
-	b		2b
+	b		1b
 	.endm
 
 	/*
diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c
index 5de7c0d46edfc56459280519e8987e28349baee9..acd11b3bf639e0a50013014e5eaeca6b11083c28 100644
--- a/arch/x86/crypto/aegis128-aesni-glue.c
+++ b/arch/x86/crypto/aegis128-aesni-glue.c
@@ -375,16 +375,12 @@ static struct aead_alg crypto_aegis128_aesni_alg[] = {
 	}
 };
 
-static const struct x86_cpu_id aesni_cpu_id[] = {
-	X86_FEATURE_MATCH(X86_FEATURE_AES),
-	X86_FEATURE_MATCH(X86_FEATURE_XMM2),
-	{}
-};
-MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
-
 static int __init crypto_aegis128_aesni_module_init(void)
 {
-	if (!x86_match_cpu(aesni_cpu_id))
+	if (!boot_cpu_has(X86_FEATURE_XMM2) ||
+	    !boot_cpu_has(X86_FEATURE_AES) ||
+	    !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
+	    !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
 		return -ENODEV;
 
 	return crypto_register_aeads(crypto_aegis128_aesni_alg,
diff --git a/arch/x86/crypto/aegis128l-aesni-glue.c b/arch/x86/crypto/aegis128l-aesni-glue.c
index 876e4866e63386ec0dd0231c6be35b9f94bc709c..2071c3d1ae07575143cc4d6262e92eaeef9ba560 100644
--- a/arch/x86/crypto/aegis128l-aesni-glue.c
+++ b/arch/x86/crypto/aegis128l-aesni-glue.c
@@ -375,16 +375,12 @@ static struct aead_alg crypto_aegis128l_aesni_alg[] = {
 	}
 };
 
-static const struct x86_cpu_id aesni_cpu_id[] = {
-	X86_FEATURE_MATCH(X86_FEATURE_AES),
-	X86_FEATURE_MATCH(X86_FEATURE_XMM2),
-	{}
-};
-MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
-
 static int __init crypto_aegis128l_aesni_module_init(void)
 {
-	if (!x86_match_cpu(aesni_cpu_id))
+	if (!boot_cpu_has(X86_FEATURE_XMM2) ||
+	    !boot_cpu_has(X86_FEATURE_AES) ||
+	    !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
+	    !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
 		return -ENODEV;
 
 	return crypto_register_aeads(crypto_aegis128l_aesni_alg,
diff --git a/arch/x86/crypto/aegis256-aesni-glue.c b/arch/x86/crypto/aegis256-aesni-glue.c
index 2b5dd3af8f4dc4c20caad2a96825576dc17fe2be..b5f2a8fd5a713ca986e2d3ef24aa1b69d421ced2 100644
--- a/arch/x86/crypto/aegis256-aesni-glue.c
+++ b/arch/x86/crypto/aegis256-aesni-glue.c
@@ -375,16 +375,12 @@ static struct aead_alg crypto_aegis256_aesni_alg[] = {
 	}
 };
 
-static const struct x86_cpu_id aesni_cpu_id[] = {
-	X86_FEATURE_MATCH(X86_FEATURE_AES),
-	X86_FEATURE_MATCH(X86_FEATURE_XMM2),
-	{}
-};
-MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
-
 static int __init crypto_aegis256_aesni_module_init(void)
 {
-	if (!x86_match_cpu(aesni_cpu_id))
+	if (!boot_cpu_has(X86_FEATURE_XMM2) ||
+	    !boot_cpu_has(X86_FEATURE_AES) ||
+	    !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
+	    !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
 		return -ENODEV;
 
 	return crypto_register_aeads(crypto_aegis256_aesni_alg,
diff --git a/arch/x86/crypto/morus1280-avx2-glue.c b/arch/x86/crypto/morus1280-avx2-glue.c
index f111f36d26dce558ddb1e4ad5427a048011aded2..6634907d6ccdf17eb3c425337ca96bf746927ff7 100644
--- a/arch/x86/crypto/morus1280-avx2-glue.c
+++ b/arch/x86/crypto/morus1280-avx2-glue.c
@@ -37,15 +37,11 @@ asmlinkage void crypto_morus1280_avx2_final(void *state, void *tag_xor,
 
 MORUS1280_DECLARE_ALGS(avx2, "morus1280-avx2", 400);
 
-static const struct x86_cpu_id avx2_cpu_id[] = {
-    X86_FEATURE_MATCH(X86_FEATURE_AVX2),
-    {}
-};
-MODULE_DEVICE_TABLE(x86cpu, avx2_cpu_id);
-
 static int __init crypto_morus1280_avx2_module_init(void)
 {
-	if (!x86_match_cpu(avx2_cpu_id))
+	if (!boot_cpu_has(X86_FEATURE_AVX2) ||
+	    !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
+	    !cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
 		return -ENODEV;
 
 	return crypto_register_aeads(crypto_morus1280_avx2_algs,
diff --git a/arch/x86/crypto/morus1280-sse2-glue.c b/arch/x86/crypto/morus1280-sse2-glue.c
index 839270aa713cab55dfebc55f9d02c53aa13c9f73..95cf857d2cbb1943ba8ce356c48416837f1655d9 100644
--- a/arch/x86/crypto/morus1280-sse2-glue.c
+++ b/arch/x86/crypto/morus1280-sse2-glue.c
@@ -37,15 +37,11 @@ asmlinkage void crypto_morus1280_sse2_final(void *state, void *tag_xor,
 
 MORUS1280_DECLARE_ALGS(sse2, "morus1280-sse2", 350);
 
-static const struct x86_cpu_id sse2_cpu_id[] = {
-    X86_FEATURE_MATCH(X86_FEATURE_XMM2),
-    {}
-};
-MODULE_DEVICE_TABLE(x86cpu, sse2_cpu_id);
-
 static int __init crypto_morus1280_sse2_module_init(void)
 {
-	if (!x86_match_cpu(sse2_cpu_id))
+	if (!boot_cpu_has(X86_FEATURE_XMM2) ||
+	    !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
+	    !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
 		return -ENODEV;
 
 	return crypto_register_aeads(crypto_morus1280_sse2_algs,
diff --git a/arch/x86/crypto/morus640-sse2-glue.c b/arch/x86/crypto/morus640-sse2-glue.c
index 26b47e2db8d2149c64b407ee10ccd4f013fcb542..615fb7bc9a323d949d038a8496125eab0c4bc4ba 100644
--- a/arch/x86/crypto/morus640-sse2-glue.c
+++ b/arch/x86/crypto/morus640-sse2-glue.c
@@ -37,15 +37,11 @@ asmlinkage void crypto_morus640_sse2_final(void *state, void *tag_xor,
 
 MORUS640_DECLARE_ALGS(sse2, "morus640-sse2", 400);
 
-static const struct x86_cpu_id sse2_cpu_id[] = {
-    X86_FEATURE_MATCH(X86_FEATURE_XMM2),
-    {}
-};
-MODULE_DEVICE_TABLE(x86cpu, sse2_cpu_id);
-
 static int __init crypto_morus640_sse2_module_init(void)
 {
-	if (!x86_match_cpu(sse2_cpu_id))
+	if (!boot_cpu_has(X86_FEATURE_XMM2) ||
+	    !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
+	    !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
 		return -ENODEV;
 
 	return crypto_register_aeads(crypto_morus640_sse2_algs,